New src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     if(UseAVX <= 2) {
 295       size += 3; // vzeroupper
 296     }
 297   }
 298   return size;
 299 }
 300 
 301 // !!!!! Special hack to get all type of calls to specify the byte offset
 302 //       from the start of the call to the point where the return address
 303 //       will point.
 304 int MachCallStaticJavaNode::ret_addr_offset() {
 305   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 306 }
 307 
 308 int MachCallDynamicJavaNode::ret_addr_offset() {
 309   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 310 }
 311 
 312 static int sizeof_FFree_Float_Stack_All = -1;
 313 
 314 int MachCallRuntimeNode::ret_addr_offset() {
 315   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 316   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 317 }
 318 
 319 // Indicate if the safepoint node needs the polling page as an input.
 320 // Since x86 does have absolute addressing, it doesn't.
 321 bool SafePointNode::needs_polling_address_input() {
 322   return false;
 323 }
 324 
 325 //
 326 // Compute padding required for nodes which need alignment
 327 //
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 1;      // skip call opcode byte
 334   return round_to(current_offset, alignment_required()) - current_offset;
 335 }
 336 
 337 // The address of the call instruction needs to be 4-byte aligned to
 338 // ensure that it does not span a cache line so that it can be patched.
 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 340   current_offset += pre_call_resets_size();  // skip fldcw, if any
 341   current_offset += 5;      // skip MOV instruction
 342   current_offset += 1;      // skip call opcode byte
 343   return round_to(current_offset, alignment_required()) - current_offset;
 344 }
 345 
 346 // EMIT_RM()
 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 348   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 349   cbuf.insts()->emit_int8(c);
 350 }
 351 
 352 // EMIT_CC()
 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 354   unsigned char c = (unsigned char)( f1 | f2 );
 355   cbuf.insts()->emit_int8(c);
 356 }
 357 
 358 // EMIT_OPCODE()
 359 void emit_opcode(CodeBuffer &cbuf, int code) {
 360   cbuf.insts()->emit_int8((unsigned char) code);
 361 }
 362 
 363 // EMIT_OPCODE() w/ relocation information
 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 365   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 366   emit_opcode(cbuf, code);
 367 }
 368 
 369 // EMIT_D8()
 370 void emit_d8(CodeBuffer &cbuf, int d8) {
 371   cbuf.insts()->emit_int8((unsigned char) d8);
 372 }
 373 
 374 // EMIT_D16()
 375 void emit_d16(CodeBuffer &cbuf, int d16) {
 376   cbuf.insts()->emit_int16(d16);
 377 }
 378 
 379 // EMIT_D32()
 380 void emit_d32(CodeBuffer &cbuf, int d32) {
 381   cbuf.insts()->emit_int32(d32);
 382 }
 383 
 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 386         int format) {
 387   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 388   cbuf.insts()->emit_int32(d32);
 389 }
 390 
 391 // emit 32 bit value and construct relocation entry from RelocationHolder
 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 393         int format) {
 394 #ifdef ASSERT
 395   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 396     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 397   }
 398 #endif
 399   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 400   cbuf.insts()->emit_int32(d32);
 401 }
 402 
 403 // Access stack slot for load or store
 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 405   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 406   if( -128 <= disp && disp <= 127 ) {
 407     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 408     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 409     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 410   } else {
 411     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 414   }
 415 }
 416 
 417    // rRegI ereg, memory mem) %{    // emit_reg_mem
 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 419   // There is no index & no scale, use form without SIB byte
 420   if ((index == 0x4) &&
 421       (scale == 0) && (base != ESP_enc)) {
 422     // If no displacement, mode is 0x0; unless base is [EBP]
 423     if ( (displace == 0) && (base != EBP_enc) ) {
 424       emit_rm(cbuf, 0x0, reg_encoding, base);
 425     }
 426     else {                    // If 8-bit displacement, mode 0x1
 427       if ((displace >= -128) && (displace <= 127)
 428           && (disp_reloc == relocInfo::none) ) {
 429         emit_rm(cbuf, 0x1, reg_encoding, base);
 430         emit_d8(cbuf, displace);
 431       }
 432       else {                  // If 32-bit displacement
 433         if (base == -1) { // Special flag for absolute address
 434           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 435           // (manual lies; no SIB needed here)
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442         else {                // Normal base + offset
 443           emit_rm(cbuf, 0x2, reg_encoding, base);
 444           if ( disp_reloc != relocInfo::none ) {
 445             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 446           } else {
 447             emit_d32      (cbuf, displace);
 448           }
 449         }
 450       }
 451     }
 452   }
 453   else {                      // Else, encode with the SIB byte
 454     // If no displacement, mode is 0x0; unless base is [EBP]
 455     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 456       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 457       emit_rm(cbuf, scale, index, base);
 458     }
 459     else {                    // If 8-bit displacement, mode 0x1
 460       if ((displace >= -128) && (displace <= 127)
 461           && (disp_reloc == relocInfo::none) ) {
 462         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 463         emit_rm(cbuf, scale, index, base);
 464         emit_d8(cbuf, displace);
 465       }
 466       else {                  // If 32-bit displacement
 467         if (base == 0x04 ) {
 468           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 469           emit_rm(cbuf, scale, index, 0x04);
 470         } else {
 471           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 472           emit_rm(cbuf, scale, index, base);
 473         }
 474         if ( disp_reloc != relocInfo::none ) {
 475           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 476         } else {
 477           emit_d32      (cbuf, displace);
 478         }
 479       }
 480     }
 481   }
 482 }
 483 
 484 
 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 486   if( dst_encoding == src_encoding ) {
 487     // reg-reg copy, use an empty encoding
 488   } else {
 489     emit_opcode( cbuf, 0x8B );
 490     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 491   }
 492 }
 493 
 494 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 495   Label exit;
 496   __ jccb(Assembler::noParity, exit);
 497   __ pushf();
 498   //
 499   // comiss/ucomiss instructions set ZF,PF,CF flags and
 500   // zero OF,AF,SF for NaN values.
 501   // Fixup flags by zeroing ZF,PF so that compare of NaN
 502   // values returns 'less than' result (CF is set).
 503   // Leave the rest of flags unchanged.
 504   //
 505   //    7 6 5 4 3 2 1 0
 506   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 507   //    0 0 1 0 1 0 1 1   (0x2B)
 508   //
 509   __ andl(Address(rsp, 0), 0xffffff2b);
 510   __ popf();
 511   __ bind(exit);
 512 }
 513 
 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 515   Label done;
 516   __ movl(dst, -1);
 517   __ jcc(Assembler::parity, done);
 518   __ jcc(Assembler::below, done);
 519   __ setb(Assembler::notEqual, dst);
 520   __ movzbl(dst, dst);
 521   __ bind(done);
 522 }
 523 
 524 
 525 //=============================================================================
 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 527 
 528 int Compile::ConstantTable::calculate_table_base_offset() const {
 529   return 0;  // absolute addressing, no offset
 530 }
 531 
 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 534   ShouldNotReachHere();
 535 }
 536 
 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 538   // Empty encoding
 539 }
 540 
 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 542   return 0;
 543 }
 544 
 545 #ifndef PRODUCT
 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   st->print("# MachConstantBaseNode (empty encoding)");
 548 }
 549 #endif
 550 
 551 
 552 //=============================================================================
 553 #ifndef PRODUCT
 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 555   Compile* C = ra_->C;
 556 
 557   int framesize = C->frame_size_in_bytes();
 558   int bangsize = C->bang_size_in_bytes();
 559   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 560   // Remove wordSize for return addr which is already pushed.
 561   framesize -= wordSize;
 562 
 563   if (C->need_stack_bang(bangsize)) {
 564     framesize -= wordSize;
 565     st->print("# stack bang (%d bytes)", bangsize);
 566     st->print("\n\t");
 567     st->print("PUSH   EBP\t# Save EBP");
 568     if (PreserveFramePointer) {
 569       st->print("\n\t");
 570       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 571     }
 572     if (framesize) {
 573       st->print("\n\t");
 574       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 575     }
 576   } else {
 577     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 578     st->print("\n\t");
 579     framesize -= wordSize;
 580     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 581     if (PreserveFramePointer) {
 582       st->print("\n\t");
 583       st->print("MOV    EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
 584     }
 585   }
 586 
 587   if (VerifyStackAtCalls) {
 588     st->print("\n\t");
 589     framesize -= wordSize;
 590     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 591   }
 592 
 593   if( C->in_24_bit_fp_mode() ) {
 594     st->print("\n\t");
 595     st->print("FLDCW  \t# load 24 bit fpu control word");
 596   }
 597   if (UseSSE >= 2 && VerifyFPU) {
 598     st->print("\n\t");
 599     st->print("# verify FPU stack (must be clean on entry)");
 600   }
 601 
 602 #ifdef ASSERT
 603   if (VerifyStackAtCalls) {
 604     st->print("\n\t");
 605     st->print("# stack alignment check");
 606   }
 607 #endif
 608   st->cr();
 609 }
 610 #endif
 611 
 612 
 613 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 614   Compile* C = ra_->C;
 615   MacroAssembler _masm(&cbuf);
 616 
 617   int framesize = C->frame_size_in_bytes();
 618   int bangsize = C->bang_size_in_bytes();
 619 
 620   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 621 
 622   C->set_frame_complete(cbuf.insts_size());
 623 
 624   if (C->has_mach_constant_base_node()) {
 625     // NOTE: We set the table base offset here because users might be
 626     // emitted before MachConstantBaseNode.
 627     Compile::ConstantTable& constant_table = C->constant_table();
 628     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 629   }
 630 }
 631 
 632 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 633   return MachNode::size(ra_); // too many variables; just compute it the hard way
 634 }
 635 
 636 int MachPrologNode::reloc() const {
 637   return 0; // a large enough number
 638 }
 639 
 640 //=============================================================================
 641 #ifndef PRODUCT
 642 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 643   Compile *C = ra_->C;
 644   int framesize = C->frame_size_in_bytes();
 645   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 646   // Remove two words for return addr and rbp,
 647   framesize -= 2*wordSize;
 648 
 649   if (C->max_vector_size() > 16) {
 650     st->print("VZEROUPPER");
 651     st->cr(); st->print("\t");
 652   }
 653   if (C->in_24_bit_fp_mode()) {
 654     st->print("FLDCW  standard control word");
 655     st->cr(); st->print("\t");
 656   }
 657   if (framesize) {
 658     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 659     st->cr(); st->print("\t");
 660   }
 661   st->print_cr("POPL   EBP"); st->print("\t");
 662   if (do_polling() && C->is_method_compilation()) {
 663     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 664     st->cr(); st->print("\t");
 665   }
 666 }
 667 #endif
 668 
 669 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 670   Compile *C = ra_->C;
 671 
 672   if (C->max_vector_size() > 16) {
 673     // Clear upper bits of YMM registers when current compiled code uses
 674     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 675     MacroAssembler masm(&cbuf);
 676     masm.vzeroupper();
 677   }
 678   // If method set FPU control word, restore to standard control word
 679   if (C->in_24_bit_fp_mode()) {
 680     MacroAssembler masm(&cbuf);
 681     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 682   }
 683 
 684   int framesize = C->frame_size_in_bytes();
 685   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 686   // Remove two words for return addr and rbp,
 687   framesize -= 2*wordSize;
 688 
 689   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 690 
 691   if (framesize >= 128) {
 692     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 693     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 694     emit_d32(cbuf, framesize);
 695   } else if (framesize) {
 696     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 697     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 698     emit_d8(cbuf, framesize);
 699   }
 700 
 701   emit_opcode(cbuf, 0x58 | EBP_enc);
 702 
 703   if (do_polling() && C->is_method_compilation()) {
 704     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 705     emit_opcode(cbuf,0x85);
 706     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 707     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 708   }
 709 }
 710 
 711 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 712   Compile *C = ra_->C;
 713   // If method set FPU control word, restore to standard control word
 714   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 715   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 716   if (do_polling() && C->is_method_compilation()) size += 6;
 717 
 718   int framesize = C->frame_size_in_bytes();
 719   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 720   // Remove two words for return addr and rbp,
 721   framesize -= 2*wordSize;
 722 
 723   size++; // popl rbp,
 724 
 725   if (framesize >= 128) {
 726     size += 6;
 727   } else {
 728     size += framesize ? 3 : 0;
 729   }
 730   return size;
 731 }
 732 
 733 int MachEpilogNode::reloc() const {
 734   return 0; // a large enough number
 735 }
 736 
 737 const Pipeline * MachEpilogNode::pipeline() const {
 738   return MachNode::pipeline_class();
 739 }
 740 
 741 int MachEpilogNode::safepoint_offset() const { return 0; }
 742 
 743 //=============================================================================
 744 
 745 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 746 static enum RC rc_class( OptoReg::Name reg ) {
 747 
 748   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 749   if (OptoReg::is_stack(reg)) return rc_stack;
 750 
 751   VMReg r = OptoReg::as_VMReg(reg);
 752   if (r->is_Register()) return rc_int;
 753   if (r->is_FloatRegister()) {
 754     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 755     return rc_float;
 756   }
 757   assert(r->is_XMMRegister(), "must be");
 758   return rc_xmm;
 759 }
 760 
 761 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 762                         int opcode, const char *op_str, int size, outputStream* st ) {
 763   if( cbuf ) {
 764     emit_opcode  (*cbuf, opcode );
 765     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 766 #ifndef PRODUCT
 767   } else if( !do_size ) {
 768     if( size != 0 ) st->print("\n\t");
 769     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 770       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 771       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 772     } else { // FLD, FST, PUSH, POP
 773       st->print("%s [ESP + #%d]",op_str,offset);
 774     }
 775 #endif
 776   }
 777   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 778   return size+3+offset_size;
 779 }
 780 
 781 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 782 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 783                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 784   int in_size_in_bits = Assembler::EVEX_32bit;
 785   int evex_encoding = 0;
 786   if (reg_lo+1 == reg_hi) {
 787     in_size_in_bits = Assembler::EVEX_64bit;
 788     evex_encoding = Assembler::VEX_W;
 789   }
 790   if (cbuf) {
 791     MacroAssembler _masm(cbuf);
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 843       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 844                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 845     } else {
 846       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 847                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 848     }
 849 #ifndef PRODUCT
 850   } else if (!do_size) {
 851     if (size != 0) st->print("\n\t");
 852     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 853       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 855       } else {
 856         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       }
 858     } else {
 859       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 860         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       } else {
 862         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       }
 864     }
 865 #endif
 866   }
 867   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 868   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 869   int sz = (UseAVX > 2) ? 6 : 4;
 870   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 871       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 872   return size + sz;
 873 }
 874 
 875 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 876                             int src_hi, int dst_hi, int size, outputStream* st ) {
 877   // 32-bit
 878   if (cbuf) {
 879     MacroAssembler _masm(cbuf);
 880     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 881              as_Register(Matcher::_regEncode[src_lo]));
 882 #ifndef PRODUCT
 883   } else if (!do_size) {
 884     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 885 #endif
 886   }
 887   return (UseAVX> 2) ? 6 : 4;
 888 }
 889 
 890 
 891 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 892                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 893   // 32-bit
 894   if (cbuf) {
 895     MacroAssembler _masm(cbuf);
 896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 897              as_XMMRegister(Matcher::_regEncode[src_lo]));
 898 #ifndef PRODUCT
 899   } else if (!do_size) {
 900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 901 #endif
 902   }
 903   return (UseAVX> 2) ? 6 : 4;
 904 }
 905 
 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 907   if( cbuf ) {
 908     emit_opcode(*cbuf, 0x8B );
 909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 910 #ifndef PRODUCT
 911   } else if( !do_size ) {
 912     if( size != 0 ) st->print("\n\t");
 913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 914 #endif
 915   }
 916   return size+2;
 917 }
 918 
 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 920                                  int offset, int size, outputStream* st ) {
 921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 922     if( cbuf ) {
 923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 925 #ifndef PRODUCT
 926     } else if( !do_size ) {
 927       if( size != 0 ) st->print("\n\t");
 928       st->print("FLD    %s",Matcher::regName[src_lo]);
 929 #endif
 930     }
 931     size += 2;
 932   }
 933 
 934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 935   const char *op_str;
 936   int op;
 937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 939     op = 0xDD;
 940   } else {                   // 32-bit store
 941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 942     op = 0xD9;
 943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 944   }
 945 
 946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 947 }
 948 
 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 950 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 952 
 953 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 954                             int stack_offset, int reg, uint ireg, outputStream* st);
 955 
 956 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 957                                      int dst_offset, uint ireg, outputStream* st) {
 958   int calc_size = 0;
 959   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 960   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 961   switch (ireg) {
 962   case Op_VecS:
 963     calc_size = 3+src_offset_size + 3+dst_offset_size;
 964     break;
 965   case Op_VecD:
 966     calc_size = 3+src_offset_size + 3+dst_offset_size;
 967     src_offset += 4;
 968     dst_offset += 4;
 969     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 970     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 971     calc_size += 3+src_offset_size + 3+dst_offset_size;
 972     break;
 973   case Op_VecX:
 974   case Op_VecY:
 975   case Op_VecZ:
 976     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 977     break;
 978   default:
 979     ShouldNotReachHere();
 980   }
 981   if (cbuf) {
 982     MacroAssembler _masm(cbuf);
 983     int offset = __ offset();
 984     switch (ireg) {
 985     case Op_VecS:
 986       __ pushl(Address(rsp, src_offset));
 987       __ popl (Address(rsp, dst_offset));
 988       break;
 989     case Op_VecD:
 990       __ pushl(Address(rsp, src_offset));
 991       __ popl (Address(rsp, dst_offset));
 992       __ pushl(Address(rsp, src_offset+4));
 993       __ popl (Address(rsp, dst_offset+4));
 994       break;
 995     case Op_VecX:
 996       __ movdqu(Address(rsp, -16), xmm0);
 997       __ movdqu(xmm0, Address(rsp, src_offset));
 998       __ movdqu(Address(rsp, dst_offset), xmm0);
 999       __ movdqu(xmm0, Address(rsp, -16));
1000       break;
1001     case Op_VecY:
1002       __ vmovdqu(Address(rsp, -32), xmm0);
1003       __ vmovdqu(xmm0, Address(rsp, src_offset));
1004       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1005       __ vmovdqu(xmm0, Address(rsp, -32));
1006     case Op_VecZ:
1007       __ evmovdqu(Address(rsp, -64), xmm0, 2);
1008       __ evmovdqu(xmm0, Address(rsp, src_offset), 2);
1009       __ evmovdqu(Address(rsp, dst_offset), xmm0, 2);
1010       __ evmovdqu(xmm0, Address(rsp, -64), 2);
1011       break;
1012     default:
1013       ShouldNotReachHere();
1014     }
1015     int size = __ offset() - offset;
1016     assert(size == calc_size, "incorrect size calculattion");
1017     return size;
1018 #ifndef PRODUCT
1019   } else if (!do_size) {
1020     switch (ireg) {
1021     case Op_VecS:
1022       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1023                 "popl    [rsp + #%d]",
1024                 src_offset, dst_offset);
1025       break;
1026     case Op_VecD:
1027       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1028                 "popq    [rsp + #%d]\n\t"
1029                 "pushl   [rsp + #%d]\n\t"
1030                 "popq    [rsp + #%d]",
1031                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1032       break;
1033      case Op_VecX:
1034       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1035                 "movdqu  xmm0, [rsp + #%d]\n\t"
1036                 "movdqu  [rsp + #%d], xmm0\n\t"
1037                 "movdqu  xmm0, [rsp - #16]",
1038                 src_offset, dst_offset);
1039       break;
1040     case Op_VecY:
1041       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1042                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1043                 "vmovdqu [rsp + #%d], xmm0\n\t"
1044                 "vmovdqu xmm0, [rsp - #32]",
1045                 src_offset, dst_offset);
1046     case Op_VecZ:
1047       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1048                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1049                 "vmovdqu [rsp + #%d], xmm0\n\t"
1050                 "vmovdqu xmm0, [rsp - #64]",
1051                 src_offset, dst_offset);
1052       break;
1053     default:
1054       ShouldNotReachHere();
1055     }
1056 #endif
1057   }
1058   return calc_size;
1059 }
1060 
1061 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1062   // Get registers to move
1063   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1064   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1065   OptoReg::Name dst_second = ra_->get_reg_second(this );
1066   OptoReg::Name dst_first = ra_->get_reg_first(this );
1067 
1068   enum RC src_second_rc = rc_class(src_second);
1069   enum RC src_first_rc = rc_class(src_first);
1070   enum RC dst_second_rc = rc_class(dst_second);
1071   enum RC dst_first_rc = rc_class(dst_first);
1072 
1073   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1074 
1075   // Generate spill code!
1076   int size = 0;
1077 
1078   if( src_first == dst_first && src_second == dst_second )
1079     return size;            // Self copy, no move
1080 
1081   if (bottom_type()->isa_vect() != NULL) {
1082     uint ireg = ideal_reg();
1083     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1084     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1085     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1086     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1087       // mem -> mem
1088       int src_offset = ra_->reg2offset(src_first);
1089       int dst_offset = ra_->reg2offset(dst_first);
1090       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1091     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1092       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1093     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1094       int stack_offset = ra_->reg2offset(dst_first);
1095       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1096     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1097       int stack_offset = ra_->reg2offset(src_first);
1098       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1099     } else {
1100       ShouldNotReachHere();
1101     }
1102   }
1103 
1104   // --------------------------------------
1105   // Check for mem-mem move.  push/pop to move.
1106   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1107     if( src_second == dst_first ) { // overlapping stack copy ranges
1108       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1109       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1110       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1111       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1112     }
1113     // move low bits
1114     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1115     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1116     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1117       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1118       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1119     }
1120     return size;
1121   }
1122 
1123   // --------------------------------------
1124   // Check for integer reg-reg copy
1125   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1126     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1127 
1128   // Check for integer store
1129   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1130     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1131 
1132   // Check for integer load
1133   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1134     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1135 
1136   // Check for integer reg-xmm reg copy
1137   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1138     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1139             "no 64 bit integer-float reg moves" );
1140     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1141   }
1142   // --------------------------------------
1143   // Check for float reg-reg copy
1144   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1145     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1146             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1147     if( cbuf ) {
1148 
1149       // Note the mucking with the register encode to compensate for the 0/1
1150       // indexing issue mentioned in a comment in the reg_def sections
1151       // for FPR registers many lines above here.
1152 
1153       if( src_first != FPR1L_num ) {
1154         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1155         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1156         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1157         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1158      } else {
1159         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1160         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1161      }
1162 #ifndef PRODUCT
1163     } else if( !do_size ) {
1164       if( size != 0 ) st->print("\n\t");
1165       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1166       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1167 #endif
1168     }
1169     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1170   }
1171 
1172   // Check for float store
1173   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1174     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1175   }
1176 
1177   // Check for float load
1178   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1179     int offset = ra_->reg2offset(src_first);
1180     const char *op_str;
1181     int op;
1182     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1183       op_str = "FLD_D";
1184       op = 0xDD;
1185     } else {                   // 32-bit load
1186       op_str = "FLD_S";
1187       op = 0xD9;
1188       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1189     }
1190     if( cbuf ) {
1191       emit_opcode  (*cbuf, op );
1192       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1193       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1194       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1195 #ifndef PRODUCT
1196     } else if( !do_size ) {
1197       if( size != 0 ) st->print("\n\t");
1198       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1199 #endif
1200     }
1201     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1202     return size + 3+offset_size+2;
1203   }
1204 
1205   // Check for xmm reg-reg copy
1206   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1207     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1208             (src_first+1 == src_second && dst_first+1 == dst_second),
1209             "no non-adjacent float-moves" );
1210     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1211   }
1212 
1213   // Check for xmm reg-integer reg copy
1214   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1215     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1216             "no 64 bit float-integer reg moves" );
1217     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1218   }
1219 
1220   // Check for xmm store
1221   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1222     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1223   }
1224 
1225   // Check for float xmm load
1226   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1227     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1228   }
1229 
1230   // Copy from float reg to xmm reg
1231   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1232     // copy to the top of stack from floating point reg
1233     // and use LEA to preserve flags
1234     if( cbuf ) {
1235       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1236       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1237       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1238       emit_d8(*cbuf,0xF8);
1239 #ifndef PRODUCT
1240     } else if( !do_size ) {
1241       if( size != 0 ) st->print("\n\t");
1242       st->print("LEA    ESP,[ESP-8]");
1243 #endif
1244     }
1245     size += 4;
1246 
1247     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1248 
1249     // Copy from the temp memory to the xmm reg.
1250     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1251 
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0x08);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP+8]");
1261 #endif
1262     }
1263     size += 4;
1264     return size;
1265   }
1266 
1267   assert( size > 0, "missed a case" );
1268 
1269   // --------------------------------------------------------------------
1270   // Check for second bits still needing moving.
1271   if( src_second == dst_second )
1272     return size;               // Self copy; no move
1273   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1274 
1275   // Check for second word int-int move
1276   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1277     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1278 
1279   // Check for second word integer store
1280   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1281     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1282 
1283   // Check for second word integer load
1284   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1285     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1286 
1287 
1288   Unimplemented();
1289   return 0; // Mute compiler
1290 }
1291 
1292 #ifndef PRODUCT
1293 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1294   implementation( NULL, ra_, false, st );
1295 }
1296 #endif
1297 
1298 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1299   implementation( &cbuf, ra_, false, NULL );
1300 }
1301 
1302 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1303   return implementation( NULL, ra_, true, NULL );
1304 }
1305 
1306 
1307 //=============================================================================
1308 #ifndef PRODUCT
1309 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1310   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1311   int reg = ra_->get_reg_first(this);
1312   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1313 }
1314 #endif
1315 
1316 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1318   int reg = ra_->get_encode(this);
1319   if( offset >= 128 ) {
1320     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1321     emit_rm(cbuf, 0x2, reg, 0x04);
1322     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1323     emit_d32(cbuf, offset);
1324   }
1325   else {
1326     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1327     emit_rm(cbuf, 0x1, reg, 0x04);
1328     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1329     emit_d8(cbuf, offset);
1330   }
1331 }
1332 
1333 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1335   if( offset >= 128 ) {
1336     return 7;
1337   }
1338   else {
1339     return 4;
1340   }
1341 }
1342 
1343 //=============================================================================
1344 #ifndef PRODUCT
1345 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1346   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1347   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1348   st->print_cr("\tNOP");
1349   st->print_cr("\tNOP");
1350   if( !OptoBreakpoint )
1351     st->print_cr("\tNOP");
1352 }
1353 #endif
1354 
1355 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1356   MacroAssembler masm(&cbuf);
1357 #ifdef ASSERT
1358   uint insts_size = cbuf.insts_size();
1359 #endif
1360   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1361   masm.jump_cc(Assembler::notEqual,
1362                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1363   /* WARNING these NOPs are critical so that verified entry point is properly
1364      aligned for patching by NativeJump::patch_verified_entry() */
1365   int nops_cnt = 2;
1366   if( !OptoBreakpoint ) // Leave space for int3
1367      nops_cnt += 1;
1368   masm.nop(nops_cnt);
1369 
1370   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1371 }
1372 
1373 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1374   return OptoBreakpoint ? 11 : 12;
1375 }
1376 
1377 
1378 //=============================================================================
1379 
1380 int Matcher::regnum_to_fpu_offset(int regnum) {
1381   return regnum - 32; // The FP registers are in the second chunk
1382 }
1383 
1384 // This is UltraSparc specific, true just means we have fast l2f conversion
1385 const bool Matcher::convL2FSupported(void) {
1386   return true;
1387 }
1388 
1389 // Is this branch offset short enough that a short branch can be used?
1390 //
1391 // NOTE: If the platform does not provide any short branch variants, then
1392 //       this method should return false for offset 0.
1393 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1394   // The passed offset is relative to address of the branch.
1395   // On 86 a branch displacement is calculated relative to address
1396   // of a next instruction.
1397   offset -= br_size;
1398 
1399   // the short version of jmpConUCF2 contains multiple branches,
1400   // making the reach slightly less
1401   if (rule == jmpConUCF2_rule)
1402     return (-126 <= offset && offset <= 125);
1403   return (-128 <= offset && offset <= 127);
1404 }
1405 
1406 const bool Matcher::isSimpleConstant64(jlong value) {
1407   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1408   return false;
1409 }
1410 
1411 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1412 const bool Matcher::init_array_count_is_in_bytes = false;
1413 
1414 // Threshold size for cleararray.
1415 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1416 
1417 // Needs 2 CMOV's for longs.
1418 const int Matcher::long_cmove_cost() { return 1; }
1419 
1420 // No CMOVF/CMOVD with SSE/SSE2
1421 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1422 
1423 // Does the CPU require late expand (see block.cpp for description of late expand)?
1424 const bool Matcher::require_postalloc_expand = false;
1425 
1426 // Should the Matcher clone shifts on addressing modes, expecting them to
1427 // be subsumed into complex addressing expressions or compute them into
1428 // registers?  True for Intel but false for most RISCs
1429 const bool Matcher::clone_shift_expressions = true;
1430 
1431 // Do we need to mask the count passed to shift instructions or does
1432 // the cpu only look at the lower 5/6 bits anyway?
1433 const bool Matcher::need_masked_shift_count = false;
1434 
1435 bool Matcher::narrow_oop_use_complex_address() {
1436   ShouldNotCallThis();
1437   return true;
1438 }
1439 
1440 bool Matcher::narrow_klass_use_complex_address() {
1441   ShouldNotCallThis();
1442   return true;
1443 }
1444 
1445 
1446 // Is it better to copy float constants, or load them directly from memory?
1447 // Intel can load a float constant from a direct address, requiring no
1448 // extra registers.  Most RISCs will have to materialize an address into a
1449 // register first, so they would do better to copy the constant from stack.
1450 const bool Matcher::rematerialize_float_constants = true;
1451 
1452 // If CPU can load and store mis-aligned doubles directly then no fixup is
1453 // needed.  Else we split the double into 2 integer pieces and move it
1454 // piece-by-piece.  Only happens when passing doubles into C code as the
1455 // Java calling convention forces doubles to be aligned.
1456 const bool Matcher::misaligned_doubles_ok = true;
1457 
1458 
1459 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1460   // Get the memory operand from the node
1461   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1462   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1463   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1464   uint opcnt     = 1;                 // First operand
1465   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1466   while( idx >= skipped+num_edges ) {
1467     skipped += num_edges;
1468     opcnt++;                          // Bump operand count
1469     assert( opcnt < numopnds, "Accessing non-existent operand" );
1470     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1471   }
1472 
1473   MachOper *memory = node->_opnds[opcnt];
1474   MachOper *new_memory = NULL;
1475   switch (memory->opcode()) {
1476   case DIRECT:
1477   case INDOFFSET32X:
1478     // No transformation necessary.
1479     return;
1480   case INDIRECT:
1481     new_memory = new indirect_win95_safeOper( );
1482     break;
1483   case INDOFFSET8:
1484     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1485     break;
1486   case INDOFFSET32:
1487     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1488     break;
1489   case INDINDEXOFFSET:
1490     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1491     break;
1492   case INDINDEXSCALE:
1493     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1494     break;
1495   case INDINDEXSCALEOFFSET:
1496     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1497     break;
1498   case LOAD_LONG_INDIRECT:
1499   case LOAD_LONG_INDOFFSET32:
1500     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1501     return;
1502   default:
1503     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1504     return;
1505   }
1506   node->_opnds[opcnt] = new_memory;
1507 }
1508 
1509 // Advertise here if the CPU requires explicit rounding operations
1510 // to implement the UseStrictFP mode.
1511 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1512 
1513 // Are floats conerted to double when stored to stack during deoptimization?
1514 // On x32 it is stored with convertion only when FPU is used for floats.
1515 bool Matcher::float_in_double() { return (UseSSE == 0); }
1516 
1517 // Do ints take an entire long register or just half?
1518 const bool Matcher::int_in_long = false;
1519 
1520 // Return whether or not this register is ever used as an argument.  This
1521 // function is used on startup to build the trampoline stubs in generateOptoStub.
1522 // Registers not mentioned will be killed by the VM call in the trampoline, and
1523 // arguments in those registers not be available to the callee.
1524 bool Matcher::can_be_java_arg( int reg ) {
1525   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1526   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1527   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1528   return false;
1529 }
1530 
1531 bool Matcher::is_spillable_arg( int reg ) {
1532   return can_be_java_arg(reg);
1533 }
1534 
1535 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1536   // Use hardware integer DIV instruction when
1537   // it is faster than a code which use multiply.
1538   // Only when constant divisor fits into 32 bit
1539   // (min_jint is excluded to get only correct
1540   // positive 32 bit values from negative).
1541   return VM_Version::has_fast_idiv() &&
1542          (divisor == (int)divisor && divisor != min_jint);
1543 }
1544 
1545 // Register for DIVI projection of divmodI
1546 RegMask Matcher::divI_proj_mask() {
1547   return EAX_REG_mask();
1548 }
1549 
1550 // Register for MODI projection of divmodI
1551 RegMask Matcher::modI_proj_mask() {
1552   return EDX_REG_mask();
1553 }
1554 
1555 // Register for DIVL projection of divmodL
1556 RegMask Matcher::divL_proj_mask() {
1557   ShouldNotReachHere();
1558   return RegMask();
1559 }
1560 
1561 // Register for MODL projection of divmodL
1562 RegMask Matcher::modL_proj_mask() {
1563   ShouldNotReachHere();
1564   return RegMask();
1565 }
1566 
1567 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1568   return NO_REG_mask();
1569 }
1570 
1571 // Returns true if the high 32 bits of the value is known to be zero.
1572 bool is_operand_hi32_zero(Node* n) {
1573   int opc = n->Opcode();
1574   if (opc == Op_AndL) {
1575     Node* o2 = n->in(2);
1576     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1577       return true;
1578     }
1579   }
1580   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1581     return true;
1582   }
1583   return false;
1584 }
1585 
1586 %}
1587 
1588 //----------ENCODING BLOCK-----------------------------------------------------
1589 // This block specifies the encoding classes used by the compiler to output
1590 // byte streams.  Encoding classes generate functions which are called by
1591 // Machine Instruction Nodes in order to generate the bit encoding of the
1592 // instruction.  Operands specify their base encoding interface with the
1593 // interface keyword.  There are currently supported four interfaces,
1594 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1595 // operand to generate a function which returns its register number when
1596 // queried.   CONST_INTER causes an operand to generate a function which
1597 // returns the value of the constant when queried.  MEMORY_INTER causes an
1598 // operand to generate four functions which return the Base Register, the
1599 // Index Register, the Scale Value, and the Offset Value of the operand when
1600 // queried.  COND_INTER causes an operand to generate six functions which
1601 // return the encoding code (ie - encoding bits for the instruction)
1602 // associated with each basic boolean condition for a conditional instruction.
1603 // Instructions specify two basic values for encoding.  They use the
1604 // ins_encode keyword to specify their encoding class (which must be one of
1605 // the class names specified in the encoding block), and they use the
1606 // opcode keyword to specify, in order, their primary, secondary, and
1607 // tertiary opcode.  Only the opcode sections which a particular instruction
1608 // needs for encoding need to be specified.
1609 encode %{
1610   // Build emit functions for each basic byte or larger field in the intel
1611   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1612   // code in the enc_class source block.  Emit functions will live in the
1613   // main source block for now.  In future, we can generalize this by
1614   // adding a syntax that specifies the sizes of fields in an order,
1615   // so that the adlc can build the emit functions automagically
1616 
1617   // Emit primary opcode
1618   enc_class OpcP %{
1619     emit_opcode(cbuf, $primary);
1620   %}
1621 
1622   // Emit secondary opcode
1623   enc_class OpcS %{
1624     emit_opcode(cbuf, $secondary);
1625   %}
1626 
1627   // Emit opcode directly
1628   enc_class Opcode(immI d8) %{
1629     emit_opcode(cbuf, $d8$$constant);
1630   %}
1631 
1632   enc_class SizePrefix %{
1633     emit_opcode(cbuf,0x66);
1634   %}
1635 
1636   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1637     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1638   %}
1639 
1640   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1641     emit_opcode(cbuf,$opcode$$constant);
1642     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1643   %}
1644 
1645   enc_class mov_r32_imm0( rRegI dst ) %{
1646     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1647     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1648   %}
1649 
1650   enc_class cdq_enc %{
1651     // Full implementation of Java idiv and irem; checks for
1652     // special case as described in JVM spec., p.243 & p.271.
1653     //
1654     //         normal case                           special case
1655     //
1656     // input : rax,: dividend                         min_int
1657     //         reg: divisor                          -1
1658     //
1659     // output: rax,: quotient  (= rax, idiv reg)       min_int
1660     //         rdx: remainder (= rax, irem reg)       0
1661     //
1662     //  Code sequnce:
1663     //
1664     //  81 F8 00 00 00 80    cmp         rax,80000000h
1665     //  0F 85 0B 00 00 00    jne         normal_case
1666     //  33 D2                xor         rdx,edx
1667     //  83 F9 FF             cmp         rcx,0FFh
1668     //  0F 84 03 00 00 00    je          done
1669     //                  normal_case:
1670     //  99                   cdq
1671     //  F7 F9                idiv        rax,ecx
1672     //                  done:
1673     //
1674     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1675     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1676     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1677     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1678     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1679     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1680     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1681     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1682     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1683     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1684     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1685     // normal_case:
1686     emit_opcode(cbuf,0x99);                                         // cdq
1687     // idiv (note: must be emitted by the user of this rule)
1688     // normal:
1689   %}
1690 
1691   // Dense encoding for older common ops
1692   enc_class Opc_plus(immI opcode, rRegI reg) %{
1693     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1694   %}
1695 
1696 
1697   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1698   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1699     // Check for 8-bit immediate, and set sign extend bit in opcode
1700     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1701       emit_opcode(cbuf, $primary | 0x02);
1702     }
1703     else {                          // If 32-bit immediate
1704       emit_opcode(cbuf, $primary);
1705     }
1706   %}
1707 
1708   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1709     // Emit primary opcode and set sign-extend bit
1710     // Check for 8-bit immediate, and set sign extend bit in opcode
1711     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1712       emit_opcode(cbuf, $primary | 0x02);    }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716     // Emit r/m byte with secondary opcode, after primary opcode.
1717     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1718   %}
1719 
1720   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1721     // Check for 8-bit immediate, and set sign extend bit in opcode
1722     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1723       $$$emit8$imm$$constant;
1724     }
1725     else {                          // If 32-bit immediate
1726       // Output immediate
1727       $$$emit32$imm$$constant;
1728     }
1729   %}
1730 
1731   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1732     // Emit primary opcode and set sign-extend bit
1733     // Check for 8-bit immediate, and set sign extend bit in opcode
1734     int con = (int)$imm$$constant; // Throw away top bits
1735     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1736     // Emit r/m byte with secondary opcode, after primary opcode.
1737     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1738     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1739     else                               emit_d32(cbuf,con);
1740   %}
1741 
1742   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1743     // Emit primary opcode and set sign-extend bit
1744     // Check for 8-bit immediate, and set sign extend bit in opcode
1745     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1746     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1747     // Emit r/m byte with tertiary opcode, after primary opcode.
1748     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1749     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1750     else                               emit_d32(cbuf,con);
1751   %}
1752 
1753   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1754     emit_cc(cbuf, $secondary, $dst$$reg );
1755   %}
1756 
1757   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1758     int destlo = $dst$$reg;
1759     int desthi = HIGH_FROM_LOW(destlo);
1760     // bswap lo
1761     emit_opcode(cbuf, 0x0F);
1762     emit_cc(cbuf, 0xC8, destlo);
1763     // bswap hi
1764     emit_opcode(cbuf, 0x0F);
1765     emit_cc(cbuf, 0xC8, desthi);
1766     // xchg lo and hi
1767     emit_opcode(cbuf, 0x87);
1768     emit_rm(cbuf, 0x3, destlo, desthi);
1769   %}
1770 
1771   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1772     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1773   %}
1774 
1775   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1776     $$$emit8$primary;
1777     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1778   %}
1779 
1780   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1781     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1782     emit_d8(cbuf, op >> 8 );
1783     emit_d8(cbuf, op & 255);
1784   %}
1785 
1786   // emulate a CMOV with a conditional branch around a MOV
1787   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1788     // Invert sense of branch from sense of CMOV
1789     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1790     emit_d8( cbuf, $brOffs$$constant );
1791   %}
1792 
1793   enc_class enc_PartialSubtypeCheck( ) %{
1794     Register Redi = as_Register(EDI_enc); // result register
1795     Register Reax = as_Register(EAX_enc); // super class
1796     Register Recx = as_Register(ECX_enc); // killed
1797     Register Resi = as_Register(ESI_enc); // sub class
1798     Label miss;
1799 
1800     MacroAssembler _masm(&cbuf);
1801     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1802                                      NULL, &miss,
1803                                      /*set_cond_codes:*/ true);
1804     if ($primary) {
1805       __ xorptr(Redi, Redi);
1806     }
1807     __ bind(miss);
1808   %}
1809 
1810   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1811     MacroAssembler masm(&cbuf);
1812     int start = masm.offset();
1813     if (UseSSE >= 2) {
1814       if (VerifyFPU) {
1815         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1816       }
1817     } else {
1818       // External c_calling_convention expects the FPU stack to be 'clean'.
1819       // Compiled code leaves it dirty.  Do cleanup now.
1820       masm.empty_FPU_stack();
1821     }
1822     if (sizeof_FFree_Float_Stack_All == -1) {
1823       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1824     } else {
1825       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1826     }
1827   %}
1828 
1829   enc_class Verify_FPU_For_Leaf %{
1830     if( VerifyFPU ) {
1831       MacroAssembler masm(&cbuf);
1832       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1833     }
1834   %}
1835 
1836   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1837     // This is the instruction starting address for relocation info.
1838     cbuf.set_insts_mark();
1839     $$$emit8$primary;
1840     // CALL directly to the runtime
1841     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1842                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1843 
1844     if (UseSSE >= 2) {
1845       MacroAssembler _masm(&cbuf);
1846       BasicType rt = tf()->return_type();
1847 
1848       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1849         // A C runtime call where the return value is unused.  In SSE2+
1850         // mode the result needs to be removed from the FPU stack.  It's
1851         // likely that this function call could be removed by the
1852         // optimizer if the C function is a pure function.
1853         __ ffree(0);
1854       } else if (rt == T_FLOAT) {
1855         __ lea(rsp, Address(rsp, -4));
1856         __ fstp_s(Address(rsp, 0));
1857         __ movflt(xmm0, Address(rsp, 0));
1858         __ lea(rsp, Address(rsp,  4));
1859       } else if (rt == T_DOUBLE) {
1860         __ lea(rsp, Address(rsp, -8));
1861         __ fstp_d(Address(rsp, 0));
1862         __ movdbl(xmm0, Address(rsp, 0));
1863         __ lea(rsp, Address(rsp,  8));
1864       }
1865     }
1866   %}
1867 
1868 
1869   enc_class pre_call_resets %{
1870     // If method sets FPU control word restore it here
1871     debug_only(int off0 = cbuf.insts_size());
1872     if (ra_->C->in_24_bit_fp_mode()) {
1873       MacroAssembler _masm(&cbuf);
1874       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1875     }
1876     if (ra_->C->max_vector_size() > 16) {
1877       // Clear upper bits of YMM registers when current compiled code uses
1878       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1879       MacroAssembler _masm(&cbuf);
1880       __ vzeroupper();
1881     }
1882     debug_only(int off1 = cbuf.insts_size());
1883     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1884   %}
1885 
1886   enc_class post_call_FPU %{
1887     // If method sets FPU control word do it here also
1888     if (Compile::current()->in_24_bit_fp_mode()) {
1889       MacroAssembler masm(&cbuf);
1890       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891     }
1892   %}
1893 
1894   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1895     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1896     // who we intended to call.
1897     cbuf.set_insts_mark();
1898     $$$emit8$primary;
1899     if (!_method) {
1900       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1901                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1902     } else if (_optimized_virtual) {
1903       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1904                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1905     } else {
1906       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1907                      static_call_Relocation::spec(), RELOC_IMM32 );
1908     }
1909     if (_method) {  // Emit stub for static call.
1910       CompiledStaticCall::emit_to_interp_stub(cbuf);
1911     }
1912   %}
1913 
1914   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1915     MacroAssembler _masm(&cbuf);
1916     __ ic_call((address)$meth$$method);
1917   %}
1918 
1919   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1920     int disp = in_bytes(Method::from_compiled_offset());
1921     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1922 
1923     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1924     cbuf.set_insts_mark();
1925     $$$emit8$primary;
1926     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1927     emit_d8(cbuf, disp);             // Displacement
1928 
1929   %}
1930 
1931 //   Following encoding is no longer used, but may be restored if calling
1932 //   convention changes significantly.
1933 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1934 //
1935 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1936 //     // int ic_reg     = Matcher::inline_cache_reg();
1937 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1938 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1939 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1940 //
1941 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1942 //     // // so we load it immediately before the call
1943 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1944 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1945 //
1946 //     // xor rbp,ebp
1947 //     emit_opcode(cbuf, 0x33);
1948 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1949 //
1950 //     // CALL to interpreter.
1951 //     cbuf.set_insts_mark();
1952 //     $$$emit8$primary;
1953 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1954 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1955 //   %}
1956 
1957   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1958     $$$emit8$primary;
1959     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1960     $$$emit8$shift$$constant;
1961   %}
1962 
1963   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1964     // Load immediate does not have a zero or sign extended version
1965     // for 8-bit immediates
1966     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1967     $$$emit32$src$$constant;
1968   %}
1969 
1970   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1971     // Load immediate does not have a zero or sign extended version
1972     // for 8-bit immediates
1973     emit_opcode(cbuf, $primary + $dst$$reg);
1974     $$$emit32$src$$constant;
1975   %}
1976 
1977   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1978     // Load immediate does not have a zero or sign extended version
1979     // for 8-bit immediates
1980     int dst_enc = $dst$$reg;
1981     int src_con = $src$$constant & 0x0FFFFFFFFL;
1982     if (src_con == 0) {
1983       // xor dst, dst
1984       emit_opcode(cbuf, 0x33);
1985       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1986     } else {
1987       emit_opcode(cbuf, $primary + dst_enc);
1988       emit_d32(cbuf, src_con);
1989     }
1990   %}
1991 
1992   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1993     // Load immediate does not have a zero or sign extended version
1994     // for 8-bit immediates
1995     int dst_enc = $dst$$reg + 2;
1996     int src_con = ((julong)($src$$constant)) >> 32;
1997     if (src_con == 0) {
1998       // xor dst, dst
1999       emit_opcode(cbuf, 0x33);
2000       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2001     } else {
2002       emit_opcode(cbuf, $primary + dst_enc);
2003       emit_d32(cbuf, src_con);
2004     }
2005   %}
2006 
2007 
2008   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2009   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2010     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2011   %}
2012 
2013   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2014     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2015   %}
2016 
2017   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2018     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2019   %}
2020 
2021   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2022     $$$emit8$primary;
2023     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2024   %}
2025 
2026   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2027     $$$emit8$secondary;
2028     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2029   %}
2030 
2031   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2032     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2033   %}
2034 
2035   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2036     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2037   %}
2038 
2039   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2040     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2044     // Output immediate
2045     $$$emit32$src$$constant;
2046   %}
2047 
2048   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2049     // Output Float immediate bits
2050     jfloat jf = $src$$constant;
2051     int    jf_as_bits = jint_cast( jf );
2052     emit_d32(cbuf, jf_as_bits);
2053   %}
2054 
2055   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2056     // Output Float immediate bits
2057     jfloat jf = $src$$constant;
2058     int    jf_as_bits = jint_cast( jf );
2059     emit_d32(cbuf, jf_as_bits);
2060   %}
2061 
2062   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2063     // Output immediate
2064     $$$emit16$src$$constant;
2065   %}
2066 
2067   enc_class Con_d32(immI src) %{
2068     emit_d32(cbuf,$src$$constant);
2069   %}
2070 
2071   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2072     // Output immediate memory reference
2073     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2074     emit_d32(cbuf, 0x00);
2075   %}
2076 
2077   enc_class lock_prefix( ) %{
2078     if( os::is_MP() )
2079       emit_opcode(cbuf,0xF0);         // [Lock]
2080   %}
2081 
2082   // Cmp-xchg long value.
2083   // Note: we need to swap rbx, and rcx before and after the
2084   //       cmpxchg8 instruction because the instruction uses
2085   //       rcx as the high order word of the new value to store but
2086   //       our register encoding uses rbx,.
2087   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2088 
2089     // XCHG  rbx,ecx
2090     emit_opcode(cbuf,0x87);
2091     emit_opcode(cbuf,0xD9);
2092     // [Lock]
2093     if( os::is_MP() )
2094       emit_opcode(cbuf,0xF0);
2095     // CMPXCHG8 [Eptr]
2096     emit_opcode(cbuf,0x0F);
2097     emit_opcode(cbuf,0xC7);
2098     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2099     // XCHG  rbx,ecx
2100     emit_opcode(cbuf,0x87);
2101     emit_opcode(cbuf,0xD9);
2102   %}
2103 
2104   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2105     // [Lock]
2106     if( os::is_MP() )
2107       emit_opcode(cbuf,0xF0);
2108 
2109     // CMPXCHG [Eptr]
2110     emit_opcode(cbuf,0x0F);
2111     emit_opcode(cbuf,0xB1);
2112     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2113   %}
2114 
2115   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2116     int res_encoding = $res$$reg;
2117 
2118     // MOV  res,0
2119     emit_opcode( cbuf, 0xB8 + res_encoding);
2120     emit_d32( cbuf, 0 );
2121     // JNE,s  fail
2122     emit_opcode(cbuf,0x75);
2123     emit_d8(cbuf, 5 );
2124     // MOV  res,1
2125     emit_opcode( cbuf, 0xB8 + res_encoding);
2126     emit_d32( cbuf, 1 );
2127     // fail:
2128   %}
2129 
2130   enc_class set_instruction_start( ) %{
2131     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2132   %}
2133 
2134   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2135     int reg_encoding = $ereg$$reg;
2136     int base  = $mem$$base;
2137     int index = $mem$$index;
2138     int scale = $mem$$scale;
2139     int displace = $mem$$disp;
2140     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2141     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2142   %}
2143 
2144   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2145     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2146     int base  = $mem$$base;
2147     int index = $mem$$index;
2148     int scale = $mem$$scale;
2149     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2150     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2151     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2152   %}
2153 
2154   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2155     int r1, r2;
2156     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2157     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2158     emit_opcode(cbuf,0x0F);
2159     emit_opcode(cbuf,$tertiary);
2160     emit_rm(cbuf, 0x3, r1, r2);
2161     emit_d8(cbuf,$cnt$$constant);
2162     emit_d8(cbuf,$primary);
2163     emit_rm(cbuf, 0x3, $secondary, r1);
2164     emit_d8(cbuf,$cnt$$constant);
2165   %}
2166 
2167   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2168     emit_opcode( cbuf, 0x8B ); // Move
2169     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2170     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2171       emit_d8(cbuf,$primary);
2172       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2173       emit_d8(cbuf,$cnt$$constant-32);
2174     }
2175     emit_d8(cbuf,$primary);
2176     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2177     emit_d8(cbuf,31);
2178   %}
2179 
2180   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2181     int r1, r2;
2182     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2183     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2184 
2185     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2186     emit_rm(cbuf, 0x3, r1, r2);
2187     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2188       emit_opcode(cbuf,$primary);
2189       emit_rm(cbuf, 0x3, $secondary, r1);
2190       emit_d8(cbuf,$cnt$$constant-32);
2191     }
2192     emit_opcode(cbuf,0x33);  // XOR r2,r2
2193     emit_rm(cbuf, 0x3, r2, r2);
2194   %}
2195 
2196   // Clone of RegMem but accepts an extra parameter to access each
2197   // half of a double in memory; it never needs relocation info.
2198   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2199     emit_opcode(cbuf,$opcode$$constant);
2200     int reg_encoding = $rm_reg$$reg;
2201     int base     = $mem$$base;
2202     int index    = $mem$$index;
2203     int scale    = $mem$$scale;
2204     int displace = $mem$$disp + $disp_for_half$$constant;
2205     relocInfo::relocType disp_reloc = relocInfo::none;
2206     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2207   %}
2208 
2209   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2210   //
2211   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2212   // and it never needs relocation information.
2213   // Frequently used to move data between FPU's Stack Top and memory.
2214   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2215     int rm_byte_opcode = $rm_opcode$$constant;
2216     int base     = $mem$$base;
2217     int index    = $mem$$index;
2218     int scale    = $mem$$scale;
2219     int displace = $mem$$disp;
2220     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2221     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2222   %}
2223 
2224   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2225     int rm_byte_opcode = $rm_opcode$$constant;
2226     int base     = $mem$$base;
2227     int index    = $mem$$index;
2228     int scale    = $mem$$scale;
2229     int displace = $mem$$disp;
2230     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2231     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2232   %}
2233 
2234   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2235     int reg_encoding = $dst$$reg;
2236     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2237     int index        = 0x04;            // 0x04 indicates no index
2238     int scale        = 0x00;            // 0x00 indicates no scale
2239     int displace     = $src1$$constant; // 0x00 indicates no displacement
2240     relocInfo::relocType disp_reloc = relocInfo::none;
2241     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2242   %}
2243 
2244   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2245     // Compare dst,src
2246     emit_opcode(cbuf,0x3B);
2247     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2248     // jmp dst < src around move
2249     emit_opcode(cbuf,0x7C);
2250     emit_d8(cbuf,2);
2251     // move dst,src
2252     emit_opcode(cbuf,0x8B);
2253     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2254   %}
2255 
2256   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2257     // Compare dst,src
2258     emit_opcode(cbuf,0x3B);
2259     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2260     // jmp dst > src around move
2261     emit_opcode(cbuf,0x7F);
2262     emit_d8(cbuf,2);
2263     // move dst,src
2264     emit_opcode(cbuf,0x8B);
2265     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2266   %}
2267 
2268   enc_class enc_FPR_store(memory mem, regDPR src) %{
2269     // If src is FPR1, we can just FST to store it.
2270     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2271     int reg_encoding = 0x2; // Just store
2272     int base  = $mem$$base;
2273     int index = $mem$$index;
2274     int scale = $mem$$scale;
2275     int displace = $mem$$disp;
2276     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2277     if( $src$$reg != FPR1L_enc ) {
2278       reg_encoding = 0x3;  // Store & pop
2279       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2280       emit_d8( cbuf, 0xC0-1+$src$$reg );
2281     }
2282     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2283     emit_opcode(cbuf,$primary);
2284     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2285   %}
2286 
2287   enc_class neg_reg(rRegI dst) %{
2288     // NEG $dst
2289     emit_opcode(cbuf,0xF7);
2290     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2291   %}
2292 
2293   enc_class setLT_reg(eCXRegI dst) %{
2294     // SETLT $dst
2295     emit_opcode(cbuf,0x0F);
2296     emit_opcode(cbuf,0x9C);
2297     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2298   %}
2299 
2300   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2301     int tmpReg = $tmp$$reg;
2302 
2303     // SUB $p,$q
2304     emit_opcode(cbuf,0x2B);
2305     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2306     // SBB $tmp,$tmp
2307     emit_opcode(cbuf,0x1B);
2308     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2309     // AND $tmp,$y
2310     emit_opcode(cbuf,0x23);
2311     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2312     // ADD $p,$tmp
2313     emit_opcode(cbuf,0x03);
2314     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2315   %}
2316 
2317   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2318     // TEST shift,32
2319     emit_opcode(cbuf,0xF7);
2320     emit_rm(cbuf, 0x3, 0, ECX_enc);
2321     emit_d32(cbuf,0x20);
2322     // JEQ,s small
2323     emit_opcode(cbuf, 0x74);
2324     emit_d8(cbuf, 0x04);
2325     // MOV    $dst.hi,$dst.lo
2326     emit_opcode( cbuf, 0x8B );
2327     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2328     // CLR    $dst.lo
2329     emit_opcode(cbuf, 0x33);
2330     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2331 // small:
2332     // SHLD   $dst.hi,$dst.lo,$shift
2333     emit_opcode(cbuf,0x0F);
2334     emit_opcode(cbuf,0xA5);
2335     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2336     // SHL    $dst.lo,$shift"
2337     emit_opcode(cbuf,0xD3);
2338     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2339   %}
2340 
2341   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2342     // TEST shift,32
2343     emit_opcode(cbuf,0xF7);
2344     emit_rm(cbuf, 0x3, 0, ECX_enc);
2345     emit_d32(cbuf,0x20);
2346     // JEQ,s small
2347     emit_opcode(cbuf, 0x74);
2348     emit_d8(cbuf, 0x04);
2349     // MOV    $dst.lo,$dst.hi
2350     emit_opcode( cbuf, 0x8B );
2351     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2352     // CLR    $dst.hi
2353     emit_opcode(cbuf, 0x33);
2354     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2355 // small:
2356     // SHRD   $dst.lo,$dst.hi,$shift
2357     emit_opcode(cbuf,0x0F);
2358     emit_opcode(cbuf,0xAD);
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2360     // SHR    $dst.hi,$shift"
2361     emit_opcode(cbuf,0xD3);
2362     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2363   %}
2364 
2365   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2366     // TEST shift,32
2367     emit_opcode(cbuf,0xF7);
2368     emit_rm(cbuf, 0x3, 0, ECX_enc);
2369     emit_d32(cbuf,0x20);
2370     // JEQ,s small
2371     emit_opcode(cbuf, 0x74);
2372     emit_d8(cbuf, 0x05);
2373     // MOV    $dst.lo,$dst.hi
2374     emit_opcode( cbuf, 0x8B );
2375     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2376     // SAR    $dst.hi,31
2377     emit_opcode(cbuf, 0xC1);
2378     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2379     emit_d8(cbuf, 0x1F );
2380 // small:
2381     // SHRD   $dst.lo,$dst.hi,$shift
2382     emit_opcode(cbuf,0x0F);
2383     emit_opcode(cbuf,0xAD);
2384     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2385     // SAR    $dst.hi,$shift"
2386     emit_opcode(cbuf,0xD3);
2387     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2388   %}
2389 
2390 
2391   // ----------------- Encodings for floating point unit -----------------
2392   // May leave result in FPU-TOS or FPU reg depending on opcodes
2393   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2394     $$$emit8$primary;
2395     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2396   %}
2397 
2398   // Pop argument in FPR0 with FSTP ST(0)
2399   enc_class PopFPU() %{
2400     emit_opcode( cbuf, 0xDD );
2401     emit_d8( cbuf, 0xD8 );
2402   %}
2403 
2404   // !!!!! equivalent to Pop_Reg_F
2405   enc_class Pop_Reg_DPR( regDPR dst ) %{
2406     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2407     emit_d8( cbuf, 0xD8+$dst$$reg );
2408   %}
2409 
2410   enc_class Push_Reg_DPR( regDPR dst ) %{
2411     emit_opcode( cbuf, 0xD9 );
2412     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2413   %}
2414 
2415   enc_class strictfp_bias1( regDPR dst ) %{
2416     emit_opcode( cbuf, 0xDB );           // FLD m80real
2417     emit_opcode( cbuf, 0x2D );
2418     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2419     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2420     emit_opcode( cbuf, 0xC8+$dst$$reg );
2421   %}
2422 
2423   enc_class strictfp_bias2( regDPR dst ) %{
2424     emit_opcode( cbuf, 0xDB );           // FLD m80real
2425     emit_opcode( cbuf, 0x2D );
2426     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2427     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2428     emit_opcode( cbuf, 0xC8+$dst$$reg );
2429   %}
2430 
2431   // Special case for moving an integer register to a stack slot.
2432   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2433     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2434   %}
2435 
2436   // Special case for moving a register to a stack slot.
2437   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2438     // Opcode already emitted
2439     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2440     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2441     emit_d32(cbuf, $dst$$disp);   // Displacement
2442   %}
2443 
2444   // Push the integer in stackSlot 'src' onto FP-stack
2445   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2446     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2447   %}
2448 
2449   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2450   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2451     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2452   %}
2453 
2454   // Same as Pop_Mem_F except for opcode
2455   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2456   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2457     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2458   %}
2459 
2460   enc_class Pop_Reg_FPR( regFPR dst ) %{
2461     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2462     emit_d8( cbuf, 0xD8+$dst$$reg );
2463   %}
2464 
2465   enc_class Push_Reg_FPR( regFPR dst ) %{
2466     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2467     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2468   %}
2469 
2470   // Push FPU's float to a stack-slot, and pop FPU-stack
2471   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2472     int pop = 0x02;
2473     if ($src$$reg != FPR1L_enc) {
2474       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2475       emit_d8( cbuf, 0xC0-1+$src$$reg );
2476       pop = 0x03;
2477     }
2478     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2479   %}
2480 
2481   // Push FPU's double to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2483     int pop = 0x02;
2484     if ($src$$reg != FPR1L_enc) {
2485       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2486       emit_d8( cbuf, 0xC0-1+$src$$reg );
2487       pop = 0x03;
2488     }
2489     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2490   %}
2491 
2492   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2493   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2494     int pop = 0xD0 - 1; // -1 since we skip FLD
2495     if ($src$$reg != FPR1L_enc) {
2496       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2497       emit_d8( cbuf, 0xC0-1+$src$$reg );
2498       pop = 0xD8;
2499     }
2500     emit_opcode( cbuf, 0xDD );
2501     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2502   %}
2503 
2504 
2505   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2506     // load dst in FPR0
2507     emit_opcode( cbuf, 0xD9 );
2508     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2509     if ($src$$reg != FPR1L_enc) {
2510       // fincstp
2511       emit_opcode (cbuf, 0xD9);
2512       emit_opcode (cbuf, 0xF7);
2513       // swap src with FPR1:
2514       // FXCH FPR1 with src
2515       emit_opcode(cbuf, 0xD9);
2516       emit_d8(cbuf, 0xC8-1+$src$$reg );
2517       // fdecstp
2518       emit_opcode (cbuf, 0xD9);
2519       emit_opcode (cbuf, 0xF6);
2520     }
2521   %}
2522 
2523   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2524     MacroAssembler _masm(&cbuf);
2525     __ subptr(rsp, 8);
2526     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2527     __ fld_d(Address(rsp, 0));
2528     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2529     __ fld_d(Address(rsp, 0));
2530   %}
2531 
2532   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2533     MacroAssembler _masm(&cbuf);
2534     __ subptr(rsp, 4);
2535     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2536     __ fld_s(Address(rsp, 0));
2537     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2538     __ fld_s(Address(rsp, 0));
2539   %}
2540 
2541   enc_class Push_ResultD(regD dst) %{
2542     MacroAssembler _masm(&cbuf);
2543     __ fstp_d(Address(rsp, 0));
2544     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2545     __ addptr(rsp, 8);
2546   %}
2547 
2548   enc_class Push_ResultF(regF dst, immI d8) %{
2549     MacroAssembler _masm(&cbuf);
2550     __ fstp_s(Address(rsp, 0));
2551     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2552     __ addptr(rsp, $d8$$constant);
2553   %}
2554 
2555   enc_class Push_SrcD(regD src) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560   %}
2561 
2562   enc_class push_stack_temp_qword() %{
2563     MacroAssembler _masm(&cbuf);
2564     __ subptr(rsp, 8);
2565   %}
2566 
2567   enc_class pop_stack_temp_qword() %{
2568     MacroAssembler _masm(&cbuf);
2569     __ addptr(rsp, 8);
2570   %}
2571 
2572   enc_class push_xmm_to_fpr1(regD src) %{
2573     MacroAssembler _masm(&cbuf);
2574     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2575     __ fld_d(Address(rsp, 0));
2576   %}
2577 
2578   enc_class Push_Result_Mod_DPR( regDPR src) %{
2579     if ($src$$reg != FPR1L_enc) {
2580       // fincstp
2581       emit_opcode (cbuf, 0xD9);
2582       emit_opcode (cbuf, 0xF7);
2583       // FXCH FPR1 with src
2584       emit_opcode(cbuf, 0xD9);
2585       emit_d8(cbuf, 0xC8-1+$src$$reg );
2586       // fdecstp
2587       emit_opcode (cbuf, 0xD9);
2588       emit_opcode (cbuf, 0xF6);
2589     }
2590     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2591     // // FSTP   FPR$dst$$reg
2592     // emit_opcode( cbuf, 0xDD );
2593     // emit_d8( cbuf, 0xD8+$dst$$reg );
2594   %}
2595 
2596   enc_class fnstsw_sahf_skip_parity() %{
2597     // fnstsw ax
2598     emit_opcode( cbuf, 0xDF );
2599     emit_opcode( cbuf, 0xE0 );
2600     // sahf
2601     emit_opcode( cbuf, 0x9E );
2602     // jnp  ::skip
2603     emit_opcode( cbuf, 0x7B );
2604     emit_opcode( cbuf, 0x05 );
2605   %}
2606 
2607   enc_class emitModDPR() %{
2608     // fprem must be iterative
2609     // :: loop
2610     // fprem
2611     emit_opcode( cbuf, 0xD9 );
2612     emit_opcode( cbuf, 0xF8 );
2613     // wait
2614     emit_opcode( cbuf, 0x9b );
2615     // fnstsw ax
2616     emit_opcode( cbuf, 0xDF );
2617     emit_opcode( cbuf, 0xE0 );
2618     // sahf
2619     emit_opcode( cbuf, 0x9E );
2620     // jp  ::loop
2621     emit_opcode( cbuf, 0x0F );
2622     emit_opcode( cbuf, 0x8A );
2623     emit_opcode( cbuf, 0xF4 );
2624     emit_opcode( cbuf, 0xFF );
2625     emit_opcode( cbuf, 0xFF );
2626     emit_opcode( cbuf, 0xFF );
2627   %}
2628 
2629   enc_class fpu_flags() %{
2630     // fnstsw_ax
2631     emit_opcode( cbuf, 0xDF);
2632     emit_opcode( cbuf, 0xE0);
2633     // test ax,0x0400
2634     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2635     emit_opcode( cbuf, 0xA9 );
2636     emit_d16   ( cbuf, 0x0400 );
2637     // // // This sequence works, but stalls for 12-16 cycles on PPro
2638     // // test rax,0x0400
2639     // emit_opcode( cbuf, 0xA9 );
2640     // emit_d32   ( cbuf, 0x00000400 );
2641     //
2642     // jz exit (no unordered comparison)
2643     emit_opcode( cbuf, 0x74 );
2644     emit_d8    ( cbuf, 0x02 );
2645     // mov ah,1 - treat as LT case (set carry flag)
2646     emit_opcode( cbuf, 0xB4 );
2647     emit_d8    ( cbuf, 0x01 );
2648     // sahf
2649     emit_opcode( cbuf, 0x9E);
2650   %}
2651 
2652   enc_class cmpF_P6_fixup() %{
2653     // Fixup the integer flags in case comparison involved a NaN
2654     //
2655     // JNP exit (no unordered comparison, P-flag is set by NaN)
2656     emit_opcode( cbuf, 0x7B );
2657     emit_d8    ( cbuf, 0x03 );
2658     // MOV AH,1 - treat as LT case (set carry flag)
2659     emit_opcode( cbuf, 0xB4 );
2660     emit_d8    ( cbuf, 0x01 );
2661     // SAHF
2662     emit_opcode( cbuf, 0x9E);
2663     // NOP     // target for branch to avoid branch to branch
2664     emit_opcode( cbuf, 0x90);
2665   %}
2666 
2667 //     fnstsw_ax();
2668 //     sahf();
2669 //     movl(dst, nan_result);
2670 //     jcc(Assembler::parity, exit);
2671 //     movl(dst, less_result);
2672 //     jcc(Assembler::below, exit);
2673 //     movl(dst, equal_result);
2674 //     jcc(Assembler::equal, exit);
2675 //     movl(dst, greater_result);
2676 
2677 // less_result     =  1;
2678 // greater_result  = -1;
2679 // equal_result    = 0;
2680 // nan_result      = -1;
2681 
2682   enc_class CmpF_Result(rRegI dst) %{
2683     // fnstsw_ax();
2684     emit_opcode( cbuf, 0xDF);
2685     emit_opcode( cbuf, 0xE0);
2686     // sahf
2687     emit_opcode( cbuf, 0x9E);
2688     // movl(dst, nan_result);
2689     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2690     emit_d32( cbuf, -1 );
2691     // jcc(Assembler::parity, exit);
2692     emit_opcode( cbuf, 0x7A );
2693     emit_d8    ( cbuf, 0x13 );
2694     // movl(dst, less_result);
2695     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2696     emit_d32( cbuf, -1 );
2697     // jcc(Assembler::below, exit);
2698     emit_opcode( cbuf, 0x72 );
2699     emit_d8    ( cbuf, 0x0C );
2700     // movl(dst, equal_result);
2701     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2702     emit_d32( cbuf, 0 );
2703     // jcc(Assembler::equal, exit);
2704     emit_opcode( cbuf, 0x74 );
2705     emit_d8    ( cbuf, 0x05 );
2706     // movl(dst, greater_result);
2707     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2708     emit_d32( cbuf, 1 );
2709   %}
2710 
2711 
2712   // Compare the longs and set flags
2713   // BROKEN!  Do Not use as-is
2714   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2715     // CMP    $src1.hi,$src2.hi
2716     emit_opcode( cbuf, 0x3B );
2717     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2718     // JNE,s  done
2719     emit_opcode(cbuf,0x75);
2720     emit_d8(cbuf, 2 );
2721     // CMP    $src1.lo,$src2.lo
2722     emit_opcode( cbuf, 0x3B );
2723     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2724 // done:
2725   %}
2726 
2727   enc_class convert_int_long( regL dst, rRegI src ) %{
2728     // mov $dst.lo,$src
2729     int dst_encoding = $dst$$reg;
2730     int src_encoding = $src$$reg;
2731     encode_Copy( cbuf, dst_encoding  , src_encoding );
2732     // mov $dst.hi,$src
2733     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2734     // sar $dst.hi,31
2735     emit_opcode( cbuf, 0xC1 );
2736     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2737     emit_d8(cbuf, 0x1F );
2738   %}
2739 
2740   enc_class convert_long_double( eRegL src ) %{
2741     // push $src.hi
2742     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2743     // push $src.lo
2744     emit_opcode(cbuf, 0x50+$src$$reg  );
2745     // fild 64-bits at [SP]
2746     emit_opcode(cbuf,0xdf);
2747     emit_d8(cbuf, 0x6C);
2748     emit_d8(cbuf, 0x24);
2749     emit_d8(cbuf, 0x00);
2750     // pop stack
2751     emit_opcode(cbuf, 0x83); // add  SP, #8
2752     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2753     emit_d8(cbuf, 0x8);
2754   %}
2755 
2756   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2757     // IMUL   EDX:EAX,$src1
2758     emit_opcode( cbuf, 0xF7 );
2759     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2760     // SAR    EDX,$cnt-32
2761     int shift_count = ((int)$cnt$$constant) - 32;
2762     if (shift_count > 0) {
2763       emit_opcode(cbuf, 0xC1);
2764       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2765       emit_d8(cbuf, shift_count);
2766     }
2767   %}
2768 
2769   // this version doesn't have add sp, 8
2770   enc_class convert_long_double2( eRegL src ) %{
2771     // push $src.hi
2772     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2773     // push $src.lo
2774     emit_opcode(cbuf, 0x50+$src$$reg  );
2775     // fild 64-bits at [SP]
2776     emit_opcode(cbuf,0xdf);
2777     emit_d8(cbuf, 0x6C);
2778     emit_d8(cbuf, 0x24);
2779     emit_d8(cbuf, 0x00);
2780   %}
2781 
2782   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2783     // Basic idea: long = (long)int * (long)int
2784     // IMUL EDX:EAX, src
2785     emit_opcode( cbuf, 0xF7 );
2786     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2787   %}
2788 
2789   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2790     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2791     // MUL EDX:EAX, src
2792     emit_opcode( cbuf, 0xF7 );
2793     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2794   %}
2795 
2796   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2797     // Basic idea: lo(result) = lo(x_lo * y_lo)
2798     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2799     // MOV    $tmp,$src.lo
2800     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2801     // IMUL   $tmp,EDX
2802     emit_opcode( cbuf, 0x0F );
2803     emit_opcode( cbuf, 0xAF );
2804     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2805     // MOV    EDX,$src.hi
2806     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2807     // IMUL   EDX,EAX
2808     emit_opcode( cbuf, 0x0F );
2809     emit_opcode( cbuf, 0xAF );
2810     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2811     // ADD    $tmp,EDX
2812     emit_opcode( cbuf, 0x03 );
2813     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2814     // MUL   EDX:EAX,$src.lo
2815     emit_opcode( cbuf, 0xF7 );
2816     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2817     // ADD    EDX,ESI
2818     emit_opcode( cbuf, 0x03 );
2819     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2820   %}
2821 
2822   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2823     // Basic idea: lo(result) = lo(src * y_lo)
2824     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2825     // IMUL   $tmp,EDX,$src
2826     emit_opcode( cbuf, 0x6B );
2827     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2828     emit_d8( cbuf, (int)$src$$constant );
2829     // MOV    EDX,$src
2830     emit_opcode(cbuf, 0xB8 + EDX_enc);
2831     emit_d32( cbuf, (int)$src$$constant );
2832     // MUL   EDX:EAX,EDX
2833     emit_opcode( cbuf, 0xF7 );
2834     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2835     // ADD    EDX,ESI
2836     emit_opcode( cbuf, 0x03 );
2837     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2838   %}
2839 
2840   enc_class long_div( eRegL src1, eRegL src2 ) %{
2841     // PUSH src1.hi
2842     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2843     // PUSH src1.lo
2844     emit_opcode(cbuf,               0x50+$src1$$reg  );
2845     // PUSH src2.hi
2846     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2847     // PUSH src2.lo
2848     emit_opcode(cbuf,               0x50+$src2$$reg  );
2849     // CALL directly to the runtime
2850     cbuf.set_insts_mark();
2851     emit_opcode(cbuf,0xE8);       // Call into runtime
2852     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2853     // Restore stack
2854     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2855     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2856     emit_d8(cbuf, 4*4);
2857   %}
2858 
2859   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2860     // PUSH src1.hi
2861     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2862     // PUSH src1.lo
2863     emit_opcode(cbuf,               0x50+$src1$$reg  );
2864     // PUSH src2.hi
2865     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2866     // PUSH src2.lo
2867     emit_opcode(cbuf,               0x50+$src2$$reg  );
2868     // CALL directly to the runtime
2869     cbuf.set_insts_mark();
2870     emit_opcode(cbuf,0xE8);       // Call into runtime
2871     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2872     // Restore stack
2873     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2874     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2875     emit_d8(cbuf, 4*4);
2876   %}
2877 
2878   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2879     // MOV   $tmp,$src.lo
2880     emit_opcode(cbuf, 0x8B);
2881     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2882     // OR    $tmp,$src.hi
2883     emit_opcode(cbuf, 0x0B);
2884     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2885   %}
2886 
2887   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2888     // CMP    $src1.lo,$src2.lo
2889     emit_opcode( cbuf, 0x3B );
2890     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2891     // JNE,s  skip
2892     emit_cc(cbuf, 0x70, 0x5);
2893     emit_d8(cbuf,2);
2894     // CMP    $src1.hi,$src2.hi
2895     emit_opcode( cbuf, 0x3B );
2896     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2897   %}
2898 
2899   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2900     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2901     emit_opcode( cbuf, 0x3B );
2902     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2903     // MOV    $tmp,$src1.hi
2904     emit_opcode( cbuf, 0x8B );
2905     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2906     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2907     emit_opcode( cbuf, 0x1B );
2908     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2909   %}
2910 
2911   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2912     // XOR    $tmp,$tmp
2913     emit_opcode(cbuf,0x33);  // XOR
2914     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2915     // CMP    $tmp,$src.lo
2916     emit_opcode( cbuf, 0x3B );
2917     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2918     // SBB    $tmp,$src.hi
2919     emit_opcode( cbuf, 0x1B );
2920     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2921   %}
2922 
2923  // Sniff, sniff... smells like Gnu Superoptimizer
2924   enc_class neg_long( eRegL dst ) %{
2925     emit_opcode(cbuf,0xF7);    // NEG hi
2926     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2927     emit_opcode(cbuf,0xF7);    // NEG lo
2928     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2929     emit_opcode(cbuf,0x83);    // SBB hi,0
2930     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2931     emit_d8    (cbuf,0 );
2932   %}
2933 
2934   enc_class enc_pop_rdx() %{
2935     emit_opcode(cbuf,0x5A);
2936   %}
2937 
2938   enc_class enc_rethrow() %{
2939     cbuf.set_insts_mark();
2940     emit_opcode(cbuf, 0xE9);        // jmp    entry
2941     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2942                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2943   %}
2944 
2945 
2946   // Convert a double to an int.  Java semantics require we do complex
2947   // manglelations in the corner cases.  So we set the rounding mode to
2948   // 'zero', store the darned double down as an int, and reset the
2949   // rounding mode to 'nearest'.  The hardware throws an exception which
2950   // patches up the correct value directly to the stack.
2951   enc_class DPR2I_encoding( regDPR src ) %{
2952     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2953     // exceptions here, so that a NAN or other corner-case value will
2954     // thrown an exception (but normal values get converted at full speed).
2955     // However, I2C adapters and other float-stack manglers leave pending
2956     // invalid-op exceptions hanging.  We would have to clear them before
2957     // enabling them and that is more expensive than just testing for the
2958     // invalid value Intel stores down in the corner cases.
2959     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2960     emit_opcode(cbuf,0x2D);
2961     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2962     // Allocate a word
2963     emit_opcode(cbuf,0x83);            // SUB ESP,4
2964     emit_opcode(cbuf,0xEC);
2965     emit_d8(cbuf,0x04);
2966     // Encoding assumes a double has been pushed into FPR0.
2967     // Store down the double as an int, popping the FPU stack
2968     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2969     emit_opcode(cbuf,0x1C);
2970     emit_d8(cbuf,0x24);
2971     // Restore the rounding mode; mask the exception
2972     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2973     emit_opcode(cbuf,0x2D);
2974     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2975         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2976         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2977 
2978     // Load the converted int; adjust CPU stack
2979     emit_opcode(cbuf,0x58);       // POP EAX
2980     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2981     emit_d32   (cbuf,0x80000000); //         0x80000000
2982     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2983     emit_d8    (cbuf,0x07);       // Size of slow_call
2984     // Push src onto stack slow-path
2985     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2986     emit_d8    (cbuf,0xC0-1+$src$$reg );
2987     // CALL directly to the runtime
2988     cbuf.set_insts_mark();
2989     emit_opcode(cbuf,0xE8);       // Call into runtime
2990     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2991     // Carry on here...
2992   %}
2993 
2994   enc_class DPR2L_encoding( regDPR src ) %{
2995     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2996     emit_opcode(cbuf,0x2D);
2997     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2998     // Allocate a word
2999     emit_opcode(cbuf,0x83);            // SUB ESP,8
3000     emit_opcode(cbuf,0xEC);
3001     emit_d8(cbuf,0x08);
3002     // Encoding assumes a double has been pushed into FPR0.
3003     // Store down the double as a long, popping the FPU stack
3004     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3005     emit_opcode(cbuf,0x3C);
3006     emit_d8(cbuf,0x24);
3007     // Restore the rounding mode; mask the exception
3008     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3009     emit_opcode(cbuf,0x2D);
3010     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3011         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3012         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3013 
3014     // Load the converted int; adjust CPU stack
3015     emit_opcode(cbuf,0x58);       // POP EAX
3016     emit_opcode(cbuf,0x5A);       // POP EDX
3017     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3018     emit_d8    (cbuf,0xFA);       // rdx
3019     emit_d32   (cbuf,0x80000000); //         0x80000000
3020     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3021     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3022     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3023     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3024     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3025     emit_d8    (cbuf,0x07);       // Size of slow_call
3026     // Push src onto stack slow-path
3027     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3028     emit_d8    (cbuf,0xC0-1+$src$$reg );
3029     // CALL directly to the runtime
3030     cbuf.set_insts_mark();
3031     emit_opcode(cbuf,0xE8);       // Call into runtime
3032     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3033     // Carry on here...
3034   %}
3035 
3036   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3037     // Operand was loaded from memory into fp ST (stack top)
3038     // FMUL   ST,$src  /* D8 C8+i */
3039     emit_opcode(cbuf, 0xD8);
3040     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3041   %}
3042 
3043   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3044     // FADDP  ST,src2  /* D8 C0+i */
3045     emit_opcode(cbuf, 0xD8);
3046     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3047     //could use FADDP  src2,fpST  /* DE C0+i */
3048   %}
3049 
3050   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3051     // FADDP  src2,ST  /* DE C0+i */
3052     emit_opcode(cbuf, 0xDE);
3053     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3054   %}
3055 
3056   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3057     // Operand has been loaded into fp ST (stack top)
3058       // FSUB   ST,$src1
3059       emit_opcode(cbuf, 0xD8);
3060       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3061 
3062       // FDIV
3063       emit_opcode(cbuf, 0xD8);
3064       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3065   %}
3066 
3067   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3068     // Operand was loaded from memory into fp ST (stack top)
3069     // FADD   ST,$src  /* D8 C0+i */
3070     emit_opcode(cbuf, 0xD8);
3071     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3072 
3073     // FMUL  ST,src2  /* D8 C*+i */
3074     emit_opcode(cbuf, 0xD8);
3075     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3076   %}
3077 
3078 
3079   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3080     // Operand was loaded from memory into fp ST (stack top)
3081     // FADD   ST,$src  /* D8 C0+i */
3082     emit_opcode(cbuf, 0xD8);
3083     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3084 
3085     // FMULP  src2,ST  /* DE C8+i */
3086     emit_opcode(cbuf, 0xDE);
3087     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3088   %}
3089 
3090   // Atomically load the volatile long
3091   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3092     emit_opcode(cbuf,0xDF);
3093     int rm_byte_opcode = 0x05;
3094     int base     = $mem$$base;
3095     int index    = $mem$$index;
3096     int scale    = $mem$$scale;
3097     int displace = $mem$$disp;
3098     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3099     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3100     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3101   %}
3102 
3103   // Volatile Store Long.  Must be atomic, so move it into
3104   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3105   // target address before the store (for null-ptr checks)
3106   // so the memory operand is used twice in the encoding.
3107   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3108     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3109     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3110     emit_opcode(cbuf,0xDF);
3111     int rm_byte_opcode = 0x07;
3112     int base     = $mem$$base;
3113     int index    = $mem$$index;
3114     int scale    = $mem$$scale;
3115     int displace = $mem$$disp;
3116     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3117     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3118   %}
3119 
3120   // Safepoint Poll.  This polls the safepoint page, and causes an
3121   // exception if it is not readable. Unfortunately, it kills the condition code
3122   // in the process
3123   // We current use TESTL [spp],EDI
3124   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3125 
3126   enc_class Safepoint_Poll() %{
3127     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3128     emit_opcode(cbuf,0x85);
3129     emit_rm (cbuf, 0x0, 0x7, 0x5);
3130     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3131   %}
3132 %}
3133 
3134 
3135 //----------FRAME--------------------------------------------------------------
3136 // Definition of frame structure and management information.
3137 //
3138 //  S T A C K   L A Y O U T    Allocators stack-slot number
3139 //                             |   (to get allocators register number
3140 //  G  Owned by    |        |  v    add OptoReg::stack0())
3141 //  r   CALLER     |        |
3142 //  o     |        +--------+      pad to even-align allocators stack-slot
3143 //  w     V        |  pad0  |        numbers; owned by CALLER
3144 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3145 //  h     ^        |   in   |  5
3146 //        |        |  args  |  4   Holes in incoming args owned by SELF
3147 //  |     |        |        |  3
3148 //  |     |        +--------+
3149 //  V     |        | old out|      Empty on Intel, window on Sparc
3150 //        |    old |preserve|      Must be even aligned.
3151 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3152 //        |        |   in   |  3   area for Intel ret address
3153 //     Owned by    |preserve|      Empty on Sparc.
3154 //       SELF      +--------+
3155 //        |        |  pad2  |  2   pad to align old SP
3156 //        |        +--------+  1
3157 //        |        | locks  |  0
3158 //        |        +--------+----> OptoReg::stack0(), even aligned
3159 //        |        |  pad1  | 11   pad to align new SP
3160 //        |        +--------+
3161 //        |        |        | 10
3162 //        |        | spills |  9   spills
3163 //        V        |        |  8   (pad0 slot for callee)
3164 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3165 //        ^        |  out   |  7
3166 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3167 //     Owned by    +--------+
3168 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3169 //        |    new |preserve|      Must be even-aligned.
3170 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3171 //        |        |        |
3172 //
3173 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3174 //         known from SELF's arguments and the Java calling convention.
3175 //         Region 6-7 is determined per call site.
3176 // Note 2: If the calling convention leaves holes in the incoming argument
3177 //         area, those holes are owned by SELF.  Holes in the outgoing area
3178 //         are owned by the CALLEE.  Holes should not be nessecary in the
3179 //         incoming area, as the Java calling convention is completely under
3180 //         the control of the AD file.  Doubles can be sorted and packed to
3181 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3182 //         varargs C calling conventions.
3183 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3184 //         even aligned with pad0 as needed.
3185 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3186 //         region 6-11 is even aligned; it may be padded out more so that
3187 //         the region from SP to FP meets the minimum stack alignment.
3188 
3189 frame %{
3190   // What direction does stack grow in (assumed to be same for C & Java)
3191   stack_direction(TOWARDS_LOW);
3192 
3193   // These three registers define part of the calling convention
3194   // between compiled code and the interpreter.
3195   inline_cache_reg(EAX);                // Inline Cache Register
3196   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3197 
3198   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3199   cisc_spilling_operand_name(indOffset32);
3200 
3201   // Number of stack slots consumed by locking an object
3202   sync_stack_slots(1);
3203 
3204   // Compiled code's Frame Pointer
3205   frame_pointer(ESP);
3206   // Interpreter stores its frame pointer in a register which is
3207   // stored to the stack by I2CAdaptors.
3208   // I2CAdaptors convert from interpreted java to compiled java.
3209   interpreter_frame_pointer(EBP);
3210 
3211   // Stack alignment requirement
3212   // Alignment size in bytes (128-bit -> 16 bytes)
3213   stack_alignment(StackAlignmentInBytes);
3214 
3215   // Number of stack slots between incoming argument block and the start of
3216   // a new frame.  The PROLOG must add this many slots to the stack.  The
3217   // EPILOG must remove this many slots.  Intel needs one slot for
3218   // return address and one for rbp, (must save rbp)
3219   in_preserve_stack_slots(2+VerifyStackAtCalls);
3220 
3221   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3222   // for calls to C.  Supports the var-args backing area for register parms.
3223   varargs_C_out_slots_killed(0);
3224 
3225   // The after-PROLOG location of the return address.  Location of
3226   // return address specifies a type (REG or STACK) and a number
3227   // representing the register number (i.e. - use a register name) or
3228   // stack slot.
3229   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3230   // Otherwise, it is above the locks and verification slot and alignment word
3231   return_addr(STACK - 1 +
3232               round_to((Compile::current()->in_preserve_stack_slots() +
3233                         Compile::current()->fixed_slots()),
3234                        stack_alignment_in_slots()));
3235 
3236   // Body of function which returns an integer array locating
3237   // arguments either in registers or in stack slots.  Passed an array
3238   // of ideal registers called "sig" and a "length" count.  Stack-slot
3239   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3240   // arguments for a CALLEE.  Incoming stack arguments are
3241   // automatically biased by the preserve_stack_slots field above.
3242   calling_convention %{
3243     // No difference between ingoing/outgoing just pass false
3244     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3245   %}
3246 
3247 
3248   // Body of function which returns an integer array locating
3249   // arguments either in registers or in stack slots.  Passed an array
3250   // of ideal registers called "sig" and a "length" count.  Stack-slot
3251   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3252   // arguments for a CALLEE.  Incoming stack arguments are
3253   // automatically biased by the preserve_stack_slots field above.
3254   c_calling_convention %{
3255     // This is obviously always outgoing
3256     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3257   %}
3258 
3259   // Location of C & interpreter return values
3260   c_return_value %{
3261     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3262     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3263     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3264 
3265     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3266     // that C functions return float and double results in XMM0.
3267     if( ideal_reg == Op_RegD && UseSSE>=2 )
3268       return OptoRegPair(XMM0b_num,XMM0_num);
3269     if( ideal_reg == Op_RegF && UseSSE>=2 )
3270       return OptoRegPair(OptoReg::Bad,XMM0_num);
3271 
3272     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3273   %}
3274 
3275   // Location of return values
3276   return_value %{
3277     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3278     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3279     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3280     if( ideal_reg == Op_RegD && UseSSE>=2 )
3281       return OptoRegPair(XMM0b_num,XMM0_num);
3282     if( ideal_reg == Op_RegF && UseSSE>=1 )
3283       return OptoRegPair(OptoReg::Bad,XMM0_num);
3284     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3285   %}
3286 
3287 %}
3288 
3289 //----------ATTRIBUTES---------------------------------------------------------
3290 //----------Operand Attributes-------------------------------------------------
3291 op_attrib op_cost(0);        // Required cost attribute
3292 
3293 //----------Instruction Attributes---------------------------------------------
3294 ins_attrib ins_cost(100);       // Required cost attribute
3295 ins_attrib ins_size(8);         // Required size attribute (in bits)
3296 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3297                                 // non-matching short branch variant of some
3298                                                             // long branch?
3299 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3300                                 // specifies the alignment that some part of the instruction (not
3301                                 // necessarily the start) requires.  If > 1, a compute_padding()
3302                                 // function must be provided for the instruction
3303 
3304 //----------OPERANDS-----------------------------------------------------------
3305 // Operand definitions must precede instruction definitions for correct parsing
3306 // in the ADLC because operands constitute user defined types which are used in
3307 // instruction definitions.
3308 
3309 //----------Simple Operands----------------------------------------------------
3310 // Immediate Operands
3311 // Integer Immediate
3312 operand immI() %{
3313   match(ConI);
3314 
3315   op_cost(10);
3316   format %{ %}
3317   interface(CONST_INTER);
3318 %}
3319 
3320 // Constant for test vs zero
3321 operand immI0() %{
3322   predicate(n->get_int() == 0);
3323   match(ConI);
3324 
3325   op_cost(0);
3326   format %{ %}
3327   interface(CONST_INTER);
3328 %}
3329 
3330 // Constant for increment
3331 operand immI1() %{
3332   predicate(n->get_int() == 1);
3333   match(ConI);
3334 
3335   op_cost(0);
3336   format %{ %}
3337   interface(CONST_INTER);
3338 %}
3339 
3340 // Constant for decrement
3341 operand immI_M1() %{
3342   predicate(n->get_int() == -1);
3343   match(ConI);
3344 
3345   op_cost(0);
3346   format %{ %}
3347   interface(CONST_INTER);
3348 %}
3349 
3350 // Valid scale values for addressing modes
3351 operand immI2() %{
3352   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3353   match(ConI);
3354 
3355   format %{ %}
3356   interface(CONST_INTER);
3357 %}
3358 
3359 operand immI8() %{
3360   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3361   match(ConI);
3362 
3363   op_cost(5);
3364   format %{ %}
3365   interface(CONST_INTER);
3366 %}
3367 
3368 operand immI16() %{
3369   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3370   match(ConI);
3371 
3372   op_cost(10);
3373   format %{ %}
3374   interface(CONST_INTER);
3375 %}
3376 
3377 // Int Immediate non-negative
3378 operand immU31()
3379 %{
3380   predicate(n->get_int() >= 0);
3381   match(ConI);
3382 
3383   op_cost(0);
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 // Constant for long shifts
3389 operand immI_32() %{
3390   predicate( n->get_int() == 32 );
3391   match(ConI);
3392 
3393   op_cost(0);
3394   format %{ %}
3395   interface(CONST_INTER);
3396 %}
3397 
3398 operand immI_1_31() %{
3399   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3400   match(ConI);
3401 
3402   op_cost(0);
3403   format %{ %}
3404   interface(CONST_INTER);
3405 %}
3406 
3407 operand immI_32_63() %{
3408   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3409   match(ConI);
3410   op_cost(0);
3411 
3412   format %{ %}
3413   interface(CONST_INTER);
3414 %}
3415 
3416 operand immI_1() %{
3417   predicate( n->get_int() == 1 );
3418   match(ConI);
3419 
3420   op_cost(0);
3421   format %{ %}
3422   interface(CONST_INTER);
3423 %}
3424 
3425 operand immI_2() %{
3426   predicate( n->get_int() == 2 );
3427   match(ConI);
3428 
3429   op_cost(0);
3430   format %{ %}
3431   interface(CONST_INTER);
3432 %}
3433 
3434 operand immI_3() %{
3435   predicate( n->get_int() == 3 );
3436   match(ConI);
3437 
3438   op_cost(0);
3439   format %{ %}
3440   interface(CONST_INTER);
3441 %}
3442 
3443 // Pointer Immediate
3444 operand immP() %{
3445   match(ConP);
3446 
3447   op_cost(10);
3448   format %{ %}
3449   interface(CONST_INTER);
3450 %}
3451 
3452 // NULL Pointer Immediate
3453 operand immP0() %{
3454   predicate( n->get_ptr() == 0 );
3455   match(ConP);
3456   op_cost(0);
3457 
3458   format %{ %}
3459   interface(CONST_INTER);
3460 %}
3461 
3462 // Long Immediate
3463 operand immL() %{
3464   match(ConL);
3465 
3466   op_cost(20);
3467   format %{ %}
3468   interface(CONST_INTER);
3469 %}
3470 
3471 // Long Immediate zero
3472 operand immL0() %{
3473   predicate( n->get_long() == 0L );
3474   match(ConL);
3475   op_cost(0);
3476 
3477   format %{ %}
3478   interface(CONST_INTER);
3479 %}
3480 
3481 // Long Immediate zero
3482 operand immL_M1() %{
3483   predicate( n->get_long() == -1L );
3484   match(ConL);
3485   op_cost(0);
3486 
3487   format %{ %}
3488   interface(CONST_INTER);
3489 %}
3490 
3491 // Long immediate from 0 to 127.
3492 // Used for a shorter form of long mul by 10.
3493 operand immL_127() %{
3494   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3495   match(ConL);
3496   op_cost(0);
3497 
3498   format %{ %}
3499   interface(CONST_INTER);
3500 %}
3501 
3502 // Long Immediate: low 32-bit mask
3503 operand immL_32bits() %{
3504   predicate(n->get_long() == 0xFFFFFFFFL);
3505   match(ConL);
3506   op_cost(0);
3507 
3508   format %{ %}
3509   interface(CONST_INTER);
3510 %}
3511 
3512 // Long Immediate: low 32-bit mask
3513 operand immL32() %{
3514   predicate(n->get_long() == (int)(n->get_long()));
3515   match(ConL);
3516   op_cost(20);
3517 
3518   format %{ %}
3519   interface(CONST_INTER);
3520 %}
3521 
3522 //Double Immediate zero
3523 operand immDPR0() %{
3524   // Do additional (and counter-intuitive) test against NaN to work around VC++
3525   // bug that generates code such that NaNs compare equal to 0.0
3526   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3527   match(ConD);
3528 
3529   op_cost(5);
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Double Immediate one
3535 operand immDPR1() %{
3536   predicate( UseSSE<=1 && n->getd() == 1.0 );
3537   match(ConD);
3538 
3539   op_cost(5);
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Double Immediate
3545 operand immDPR() %{
3546   predicate(UseSSE<=1);
3547   match(ConD);
3548 
3549   op_cost(5);
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 operand immD() %{
3555   predicate(UseSSE>=2);
3556   match(ConD);
3557 
3558   op_cost(5);
3559   format %{ %}
3560   interface(CONST_INTER);
3561 %}
3562 
3563 // Double Immediate zero
3564 operand immD0() %{
3565   // Do additional (and counter-intuitive) test against NaN to work around VC++
3566   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3567   // compare equal to -0.0.
3568   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3569   match(ConD);
3570 
3571   format %{ %}
3572   interface(CONST_INTER);
3573 %}
3574 
3575 // Float Immediate zero
3576 operand immFPR0() %{
3577   predicate(UseSSE == 0 && n->getf() == 0.0F);
3578   match(ConF);
3579 
3580   op_cost(5);
3581   format %{ %}
3582   interface(CONST_INTER);
3583 %}
3584 
3585 // Float Immediate one
3586 operand immFPR1() %{
3587   predicate(UseSSE == 0 && n->getf() == 1.0F);
3588   match(ConF);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Float Immediate
3596 operand immFPR() %{
3597   predicate( UseSSE == 0 );
3598   match(ConF);
3599 
3600   op_cost(5);
3601   format %{ %}
3602   interface(CONST_INTER);
3603 %}
3604 
3605 // Float Immediate
3606 operand immF() %{
3607   predicate(UseSSE >= 1);
3608   match(ConF);
3609 
3610   op_cost(5);
3611   format %{ %}
3612   interface(CONST_INTER);
3613 %}
3614 
3615 // Float Immediate zero.  Zero and not -0.0
3616 operand immF0() %{
3617   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3618   match(ConF);
3619 
3620   op_cost(5);
3621   format %{ %}
3622   interface(CONST_INTER);
3623 %}
3624 
3625 // Immediates for special shifts (sign extend)
3626 
3627 // Constants for increment
3628 operand immI_16() %{
3629   predicate( n->get_int() == 16 );
3630   match(ConI);
3631 
3632   format %{ %}
3633   interface(CONST_INTER);
3634 %}
3635 
3636 operand immI_24() %{
3637   predicate( n->get_int() == 24 );
3638   match(ConI);
3639 
3640   format %{ %}
3641   interface(CONST_INTER);
3642 %}
3643 
3644 // Constant for byte-wide masking
3645 operand immI_255() %{
3646   predicate( n->get_int() == 255 );
3647   match(ConI);
3648 
3649   format %{ %}
3650   interface(CONST_INTER);
3651 %}
3652 
3653 // Constant for short-wide masking
3654 operand immI_65535() %{
3655   predicate(n->get_int() == 65535);
3656   match(ConI);
3657 
3658   format %{ %}
3659   interface(CONST_INTER);
3660 %}
3661 
3662 // Register Operands
3663 // Integer Register
3664 operand rRegI() %{
3665   constraint(ALLOC_IN_RC(int_reg));
3666   match(RegI);
3667   match(xRegI);
3668   match(eAXRegI);
3669   match(eBXRegI);
3670   match(eCXRegI);
3671   match(eDXRegI);
3672   match(eDIRegI);
3673   match(eSIRegI);
3674 
3675   format %{ %}
3676   interface(REG_INTER);
3677 %}
3678 
3679 // Subset of Integer Register
3680 operand xRegI(rRegI reg) %{
3681   constraint(ALLOC_IN_RC(int_x_reg));
3682   match(reg);
3683   match(eAXRegI);
3684   match(eBXRegI);
3685   match(eCXRegI);
3686   match(eDXRegI);
3687 
3688   format %{ %}
3689   interface(REG_INTER);
3690 %}
3691 
3692 // Special Registers
3693 operand eAXRegI(xRegI reg) %{
3694   constraint(ALLOC_IN_RC(eax_reg));
3695   match(reg);
3696   match(rRegI);
3697 
3698   format %{ "EAX" %}
3699   interface(REG_INTER);
3700 %}
3701 
3702 // Special Registers
3703 operand eBXRegI(xRegI reg) %{
3704   constraint(ALLOC_IN_RC(ebx_reg));
3705   match(reg);
3706   match(rRegI);
3707 
3708   format %{ "EBX" %}
3709   interface(REG_INTER);
3710 %}
3711 
3712 operand eCXRegI(xRegI reg) %{
3713   constraint(ALLOC_IN_RC(ecx_reg));
3714   match(reg);
3715   match(rRegI);
3716 
3717   format %{ "ECX" %}
3718   interface(REG_INTER);
3719 %}
3720 
3721 operand eDXRegI(xRegI reg) %{
3722   constraint(ALLOC_IN_RC(edx_reg));
3723   match(reg);
3724   match(rRegI);
3725 
3726   format %{ "EDX" %}
3727   interface(REG_INTER);
3728 %}
3729 
3730 operand eDIRegI(xRegI reg) %{
3731   constraint(ALLOC_IN_RC(edi_reg));
3732   match(reg);
3733   match(rRegI);
3734 
3735   format %{ "EDI" %}
3736   interface(REG_INTER);
3737 %}
3738 
3739 operand naxRegI() %{
3740   constraint(ALLOC_IN_RC(nax_reg));
3741   match(RegI);
3742   match(eCXRegI);
3743   match(eDXRegI);
3744   match(eSIRegI);
3745   match(eDIRegI);
3746 
3747   format %{ %}
3748   interface(REG_INTER);
3749 %}
3750 
3751 operand nadxRegI() %{
3752   constraint(ALLOC_IN_RC(nadx_reg));
3753   match(RegI);
3754   match(eBXRegI);
3755   match(eCXRegI);
3756   match(eSIRegI);
3757   match(eDIRegI);
3758 
3759   format %{ %}
3760   interface(REG_INTER);
3761 %}
3762 
3763 operand ncxRegI() %{
3764   constraint(ALLOC_IN_RC(ncx_reg));
3765   match(RegI);
3766   match(eAXRegI);
3767   match(eDXRegI);
3768   match(eSIRegI);
3769   match(eDIRegI);
3770 
3771   format %{ %}
3772   interface(REG_INTER);
3773 %}
3774 
3775 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3776 // //
3777 operand eSIRegI(xRegI reg) %{
3778    constraint(ALLOC_IN_RC(esi_reg));
3779    match(reg);
3780    match(rRegI);
3781 
3782    format %{ "ESI" %}
3783    interface(REG_INTER);
3784 %}
3785 
3786 // Pointer Register
3787 operand anyRegP() %{
3788   constraint(ALLOC_IN_RC(any_reg));
3789   match(RegP);
3790   match(eAXRegP);
3791   match(eBXRegP);
3792   match(eCXRegP);
3793   match(eDIRegP);
3794   match(eRegP);
3795 
3796   format %{ %}
3797   interface(REG_INTER);
3798 %}
3799 
3800 operand eRegP() %{
3801   constraint(ALLOC_IN_RC(int_reg));
3802   match(RegP);
3803   match(eAXRegP);
3804   match(eBXRegP);
3805   match(eCXRegP);
3806   match(eDIRegP);
3807 
3808   format %{ %}
3809   interface(REG_INTER);
3810 %}
3811 
3812 // On windows95, EBP is not safe to use for implicit null tests.
3813 operand eRegP_no_EBP() %{
3814   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3815   match(RegP);
3816   match(eAXRegP);
3817   match(eBXRegP);
3818   match(eCXRegP);
3819   match(eDIRegP);
3820 
3821   op_cost(100);
3822   format %{ %}
3823   interface(REG_INTER);
3824 %}
3825 
3826 operand naxRegP() %{
3827   constraint(ALLOC_IN_RC(nax_reg));
3828   match(RegP);
3829   match(eBXRegP);
3830   match(eDXRegP);
3831   match(eCXRegP);
3832   match(eSIRegP);
3833   match(eDIRegP);
3834 
3835   format %{ %}
3836   interface(REG_INTER);
3837 %}
3838 
3839 operand nabxRegP() %{
3840   constraint(ALLOC_IN_RC(nabx_reg));
3841   match(RegP);
3842   match(eCXRegP);
3843   match(eDXRegP);
3844   match(eSIRegP);
3845   match(eDIRegP);
3846 
3847   format %{ %}
3848   interface(REG_INTER);
3849 %}
3850 
3851 operand pRegP() %{
3852   constraint(ALLOC_IN_RC(p_reg));
3853   match(RegP);
3854   match(eBXRegP);
3855   match(eDXRegP);
3856   match(eSIRegP);
3857   match(eDIRegP);
3858 
3859   format %{ %}
3860   interface(REG_INTER);
3861 %}
3862 
3863 // Special Registers
3864 // Return a pointer value
3865 operand eAXRegP(eRegP reg) %{
3866   constraint(ALLOC_IN_RC(eax_reg));
3867   match(reg);
3868   format %{ "EAX" %}
3869   interface(REG_INTER);
3870 %}
3871 
3872 // Used in AtomicAdd
3873 operand eBXRegP(eRegP reg) %{
3874   constraint(ALLOC_IN_RC(ebx_reg));
3875   match(reg);
3876   format %{ "EBX" %}
3877   interface(REG_INTER);
3878 %}
3879 
3880 // Tail-call (interprocedural jump) to interpreter
3881 operand eCXRegP(eRegP reg) %{
3882   constraint(ALLOC_IN_RC(ecx_reg));
3883   match(reg);
3884   format %{ "ECX" %}
3885   interface(REG_INTER);
3886 %}
3887 
3888 operand eSIRegP(eRegP reg) %{
3889   constraint(ALLOC_IN_RC(esi_reg));
3890   match(reg);
3891   format %{ "ESI" %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Used in rep stosw
3896 operand eDIRegP(eRegP reg) %{
3897   constraint(ALLOC_IN_RC(edi_reg));
3898   match(reg);
3899   format %{ "EDI" %}
3900   interface(REG_INTER);
3901 %}
3902 
3903 operand eRegL() %{
3904   constraint(ALLOC_IN_RC(long_reg));
3905   match(RegL);
3906   match(eADXRegL);
3907 
3908   format %{ %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 operand eADXRegL( eRegL reg ) %{
3913   constraint(ALLOC_IN_RC(eadx_reg));
3914   match(reg);
3915 
3916   format %{ "EDX:EAX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eBCXRegL( eRegL reg ) %{
3921   constraint(ALLOC_IN_RC(ebcx_reg));
3922   match(reg);
3923 
3924   format %{ "EBX:ECX" %}
3925   interface(REG_INTER);
3926 %}
3927 
3928 // Special case for integer high multiply
3929 operand eADXRegL_low_only() %{
3930   constraint(ALLOC_IN_RC(eadx_reg));
3931   match(RegL);
3932 
3933   format %{ "EAX" %}
3934   interface(REG_INTER);
3935 %}
3936 
3937 // Flags register, used as output of compare instructions
3938 operand eFlagsReg() %{
3939   constraint(ALLOC_IN_RC(int_flags));
3940   match(RegFlags);
3941 
3942   format %{ "EFLAGS" %}
3943   interface(REG_INTER);
3944 %}
3945 
3946 // Flags register, used as output of FLOATING POINT compare instructions
3947 operand eFlagsRegU() %{
3948   constraint(ALLOC_IN_RC(int_flags));
3949   match(RegFlags);
3950 
3951   format %{ "EFLAGS_U" %}
3952   interface(REG_INTER);
3953 %}
3954 
3955 operand eFlagsRegUCF() %{
3956   constraint(ALLOC_IN_RC(int_flags));
3957   match(RegFlags);
3958   predicate(false);
3959 
3960   format %{ "EFLAGS_U_CF" %}
3961   interface(REG_INTER);
3962 %}
3963 
3964 // Condition Code Register used by long compare
3965 operand flagsReg_long_LTGE() %{
3966   constraint(ALLOC_IN_RC(int_flags));
3967   match(RegFlags);
3968   format %{ "FLAGS_LTGE" %}
3969   interface(REG_INTER);
3970 %}
3971 operand flagsReg_long_EQNE() %{
3972   constraint(ALLOC_IN_RC(int_flags));
3973   match(RegFlags);
3974   format %{ "FLAGS_EQNE" %}
3975   interface(REG_INTER);
3976 %}
3977 operand flagsReg_long_LEGT() %{
3978   constraint(ALLOC_IN_RC(int_flags));
3979   match(RegFlags);
3980   format %{ "FLAGS_LEGT" %}
3981   interface(REG_INTER);
3982 %}
3983 
3984 // Float register operands
3985 operand regDPR() %{
3986   predicate( UseSSE < 2 );
3987   constraint(ALLOC_IN_RC(fp_dbl_reg));
3988   match(RegD);
3989   match(regDPR1);
3990   match(regDPR2);
3991   format %{ %}
3992   interface(REG_INTER);
3993 %}
3994 
3995 operand regDPR1(regDPR reg) %{
3996   predicate( UseSSE < 2 );
3997   constraint(ALLOC_IN_RC(fp_dbl_reg0));
3998   match(reg);
3999   format %{ "FPR1" %}
4000   interface(REG_INTER);
4001 %}
4002 
4003 operand regDPR2(regDPR reg) %{
4004   predicate( UseSSE < 2 );
4005   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4006   match(reg);
4007   format %{ "FPR2" %}
4008   interface(REG_INTER);
4009 %}
4010 
4011 operand regnotDPR1(regDPR reg) %{
4012   predicate( UseSSE < 2 );
4013   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4014   match(reg);
4015   format %{ %}
4016   interface(REG_INTER);
4017 %}
4018 
4019 // Float register operands
4020 operand regFPR() %{
4021   predicate( UseSSE < 2 );
4022   constraint(ALLOC_IN_RC(fp_flt_reg));
4023   match(RegF);
4024   match(regFPR1);
4025   format %{ %}
4026   interface(REG_INTER);
4027 %}
4028 
4029 // Float register operands
4030 operand regFPR1(regFPR reg) %{
4031   predicate( UseSSE < 2 );
4032   constraint(ALLOC_IN_RC(fp_flt_reg0));
4033   match(reg);
4034   format %{ "FPR1" %}
4035   interface(REG_INTER);
4036 %}
4037 
4038 // XMM Float register operands
4039 operand regF() %{
4040   predicate( UseSSE>=1 );
4041   constraint(ALLOC_IN_RC(float_reg_legacy));
4042   match(RegF);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 // XMM Double register operands
4048 operand regD() %{
4049   predicate( UseSSE>=2 );
4050   constraint(ALLOC_IN_RC(double_reg_legacy));
4051   match(RegD);
4052   format %{ %}
4053   interface(REG_INTER);
4054 %}
4055 
4056 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4057 // runtime code generation via reg_class_dynamic.
4058 operand vecS() %{
4059   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4060   match(VecS);
4061 
4062   format %{ %}
4063   interface(REG_INTER);
4064 %}
4065 
4066 operand vecD() %{
4067   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4068   match(VecD);
4069 
4070   format %{ %}
4071   interface(REG_INTER);
4072 %}
4073 
4074 operand vecX() %{
4075   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4076   match(VecX);
4077 
4078   format %{ %}
4079   interface(REG_INTER);
4080 %}
4081 
4082 operand vecY() %{
4083   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4084   match(VecY);
4085 
4086   format %{ %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 //----------Memory Operands----------------------------------------------------
4091 // Direct Memory Operand
4092 operand direct(immP addr) %{
4093   match(addr);
4094 
4095   format %{ "[$addr]" %}
4096   interface(MEMORY_INTER) %{
4097     base(0xFFFFFFFF);
4098     index(0x4);
4099     scale(0x0);
4100     disp($addr);
4101   %}
4102 %}
4103 
4104 // Indirect Memory Operand
4105 operand indirect(eRegP reg) %{
4106   constraint(ALLOC_IN_RC(int_reg));
4107   match(reg);
4108 
4109   format %{ "[$reg]" %}
4110   interface(MEMORY_INTER) %{
4111     base($reg);
4112     index(0x4);
4113     scale(0x0);
4114     disp(0x0);
4115   %}
4116 %}
4117 
4118 // Indirect Memory Plus Short Offset Operand
4119 operand indOffset8(eRegP reg, immI8 off) %{
4120   match(AddP reg off);
4121 
4122   format %{ "[$reg + $off]" %}
4123   interface(MEMORY_INTER) %{
4124     base($reg);
4125     index(0x4);
4126     scale(0x0);
4127     disp($off);
4128   %}
4129 %}
4130 
4131 // Indirect Memory Plus Long Offset Operand
4132 operand indOffset32(eRegP reg, immI off) %{
4133   match(AddP reg off);
4134 
4135   format %{ "[$reg + $off]" %}
4136   interface(MEMORY_INTER) %{
4137     base($reg);
4138     index(0x4);
4139     scale(0x0);
4140     disp($off);
4141   %}
4142 %}
4143 
4144 // Indirect Memory Plus Long Offset Operand
4145 operand indOffset32X(rRegI reg, immP off) %{
4146   match(AddP off reg);
4147 
4148   format %{ "[$reg + $off]" %}
4149   interface(MEMORY_INTER) %{
4150     base($reg);
4151     index(0x4);
4152     scale(0x0);
4153     disp($off);
4154   %}
4155 %}
4156 
4157 // Indirect Memory Plus Index Register Plus Offset Operand
4158 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4159   match(AddP (AddP reg ireg) off);
4160 
4161   op_cost(10);
4162   format %{"[$reg + $off + $ireg]" %}
4163   interface(MEMORY_INTER) %{
4164     base($reg);
4165     index($ireg);
4166     scale(0x0);
4167     disp($off);
4168   %}
4169 %}
4170 
4171 // Indirect Memory Plus Index Register Plus Offset Operand
4172 operand indIndex(eRegP reg, rRegI ireg) %{
4173   match(AddP reg ireg);
4174 
4175   op_cost(10);
4176   format %{"[$reg + $ireg]" %}
4177   interface(MEMORY_INTER) %{
4178     base($reg);
4179     index($ireg);
4180     scale(0x0);
4181     disp(0x0);
4182   %}
4183 %}
4184 
4185 // // -------------------------------------------------------------------------
4186 // // 486 architecture doesn't support "scale * index + offset" with out a base
4187 // // -------------------------------------------------------------------------
4188 // // Scaled Memory Operands
4189 // // Indirect Memory Times Scale Plus Offset Operand
4190 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4191 //   match(AddP off (LShiftI ireg scale));
4192 //
4193 //   op_cost(10);
4194 //   format %{"[$off + $ireg << $scale]" %}
4195 //   interface(MEMORY_INTER) %{
4196 //     base(0x4);
4197 //     index($ireg);
4198 //     scale($scale);
4199 //     disp($off);
4200 //   %}
4201 // %}
4202 
4203 // Indirect Memory Times Scale Plus Index Register
4204 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4205   match(AddP reg (LShiftI ireg scale));
4206 
4207   op_cost(10);
4208   format %{"[$reg + $ireg << $scale]" %}
4209   interface(MEMORY_INTER) %{
4210     base($reg);
4211     index($ireg);
4212     scale($scale);
4213     disp(0x0);
4214   %}
4215 %}
4216 
4217 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4218 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4219   match(AddP (AddP reg (LShiftI ireg scale)) off);
4220 
4221   op_cost(10);
4222   format %{"[$reg + $off + $ireg << $scale]" %}
4223   interface(MEMORY_INTER) %{
4224     base($reg);
4225     index($ireg);
4226     scale($scale);
4227     disp($off);
4228   %}
4229 %}
4230 
4231 //----------Load Long Memory Operands------------------------------------------
4232 // The load-long idiom will use it's address expression again after loading
4233 // the first word of the long.  If the load-long destination overlaps with
4234 // registers used in the addressing expression, the 2nd half will be loaded
4235 // from a clobbered address.  Fix this by requiring that load-long use
4236 // address registers that do not overlap with the load-long target.
4237 
4238 // load-long support
4239 operand load_long_RegP() %{
4240   constraint(ALLOC_IN_RC(esi_reg));
4241   match(RegP);
4242   match(eSIRegP);
4243   op_cost(100);
4244   format %{  %}
4245   interface(REG_INTER);
4246 %}
4247 
4248 // Indirect Memory Operand Long
4249 operand load_long_indirect(load_long_RegP reg) %{
4250   constraint(ALLOC_IN_RC(esi_reg));
4251   match(reg);
4252 
4253   format %{ "[$reg]" %}
4254   interface(MEMORY_INTER) %{
4255     base($reg);
4256     index(0x4);
4257     scale(0x0);
4258     disp(0x0);
4259   %}
4260 %}
4261 
4262 // Indirect Memory Plus Long Offset Operand
4263 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4264   match(AddP reg off);
4265 
4266   format %{ "[$reg + $off]" %}
4267   interface(MEMORY_INTER) %{
4268     base($reg);
4269     index(0x4);
4270     scale(0x0);
4271     disp($off);
4272   %}
4273 %}
4274 
4275 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4276 
4277 
4278 //----------Special Memory Operands--------------------------------------------
4279 // Stack Slot Operand - This operand is used for loading and storing temporary
4280 //                      values on the stack where a match requires a value to
4281 //                      flow through memory.
4282 operand stackSlotP(sRegP reg) %{
4283   constraint(ALLOC_IN_RC(stack_slots));
4284   // No match rule because this operand is only generated in matching
4285   format %{ "[$reg]" %}
4286   interface(MEMORY_INTER) %{
4287     base(0x4);   // ESP
4288     index(0x4);  // No Index
4289     scale(0x0);  // No Scale
4290     disp($reg);  // Stack Offset
4291   %}
4292 %}
4293 
4294 operand stackSlotI(sRegI reg) %{
4295   constraint(ALLOC_IN_RC(stack_slots));
4296   // No match rule because this operand is only generated in matching
4297   format %{ "[$reg]" %}
4298   interface(MEMORY_INTER) %{
4299     base(0x4);   // ESP
4300     index(0x4);  // No Index
4301     scale(0x0);  // No Scale
4302     disp($reg);  // Stack Offset
4303   %}
4304 %}
4305 
4306 operand stackSlotF(sRegF reg) %{
4307   constraint(ALLOC_IN_RC(stack_slots));
4308   // No match rule because this operand is only generated in matching
4309   format %{ "[$reg]" %}
4310   interface(MEMORY_INTER) %{
4311     base(0x4);   // ESP
4312     index(0x4);  // No Index
4313     scale(0x0);  // No Scale
4314     disp($reg);  // Stack Offset
4315   %}
4316 %}
4317 
4318 operand stackSlotD(sRegD reg) %{
4319   constraint(ALLOC_IN_RC(stack_slots));
4320   // No match rule because this operand is only generated in matching
4321   format %{ "[$reg]" %}
4322   interface(MEMORY_INTER) %{
4323     base(0x4);   // ESP
4324     index(0x4);  // No Index
4325     scale(0x0);  // No Scale
4326     disp($reg);  // Stack Offset
4327   %}
4328 %}
4329 
4330 operand stackSlotL(sRegL reg) %{
4331   constraint(ALLOC_IN_RC(stack_slots));
4332   // No match rule because this operand is only generated in matching
4333   format %{ "[$reg]" %}
4334   interface(MEMORY_INTER) %{
4335     base(0x4);   // ESP
4336     index(0x4);  // No Index
4337     scale(0x0);  // No Scale
4338     disp($reg);  // Stack Offset
4339   %}
4340 %}
4341 
4342 //----------Memory Operands - Win95 Implicit Null Variants----------------
4343 // Indirect Memory Operand
4344 operand indirect_win95_safe(eRegP_no_EBP reg)
4345 %{
4346   constraint(ALLOC_IN_RC(int_reg));
4347   match(reg);
4348 
4349   op_cost(100);
4350   format %{ "[$reg]" %}
4351   interface(MEMORY_INTER) %{
4352     base($reg);
4353     index(0x4);
4354     scale(0x0);
4355     disp(0x0);
4356   %}
4357 %}
4358 
4359 // Indirect Memory Plus Short Offset Operand
4360 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4361 %{
4362   match(AddP reg off);
4363 
4364   op_cost(100);
4365   format %{ "[$reg + $off]" %}
4366   interface(MEMORY_INTER) %{
4367     base($reg);
4368     index(0x4);
4369     scale(0x0);
4370     disp($off);
4371   %}
4372 %}
4373 
4374 // Indirect Memory Plus Long Offset Operand
4375 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4376 %{
4377   match(AddP reg off);
4378 
4379   op_cost(100);
4380   format %{ "[$reg + $off]" %}
4381   interface(MEMORY_INTER) %{
4382     base($reg);
4383     index(0x4);
4384     scale(0x0);
4385     disp($off);
4386   %}
4387 %}
4388 
4389 // Indirect Memory Plus Index Register Plus Offset Operand
4390 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4391 %{
4392   match(AddP (AddP reg ireg) off);
4393 
4394   op_cost(100);
4395   format %{"[$reg + $off + $ireg]" %}
4396   interface(MEMORY_INTER) %{
4397     base($reg);
4398     index($ireg);
4399     scale(0x0);
4400     disp($off);
4401   %}
4402 %}
4403 
4404 // Indirect Memory Times Scale Plus Index Register
4405 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4406 %{
4407   match(AddP reg (LShiftI ireg scale));
4408 
4409   op_cost(100);
4410   format %{"[$reg + $ireg << $scale]" %}
4411   interface(MEMORY_INTER) %{
4412     base($reg);
4413     index($ireg);
4414     scale($scale);
4415     disp(0x0);
4416   %}
4417 %}
4418 
4419 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4420 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4421 %{
4422   match(AddP (AddP reg (LShiftI ireg scale)) off);
4423 
4424   op_cost(100);
4425   format %{"[$reg + $off + $ireg << $scale]" %}
4426   interface(MEMORY_INTER) %{
4427     base($reg);
4428     index($ireg);
4429     scale($scale);
4430     disp($off);
4431   %}
4432 %}
4433 
4434 //----------Conditional Branch Operands----------------------------------------
4435 // Comparison Op  - This is the operation of the comparison, and is limited to
4436 //                  the following set of codes:
4437 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4438 //
4439 // Other attributes of the comparison, such as unsignedness, are specified
4440 // by the comparison instruction that sets a condition code flags register.
4441 // That result is represented by a flags operand whose subtype is appropriate
4442 // to the unsignedness (etc.) of the comparison.
4443 //
4444 // Later, the instruction which matches both the Comparison Op (a Bool) and
4445 // the flags (produced by the Cmp) specifies the coding of the comparison op
4446 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4447 
4448 // Comparision Code
4449 operand cmpOp() %{
4450   match(Bool);
4451 
4452   format %{ "" %}
4453   interface(COND_INTER) %{
4454     equal(0x4, "e");
4455     not_equal(0x5, "ne");
4456     less(0xC, "l");
4457     greater_equal(0xD, "ge");
4458     less_equal(0xE, "le");
4459     greater(0xF, "g");
4460     overflow(0x0, "o");
4461     no_overflow(0x1, "no");
4462   %}
4463 %}
4464 
4465 // Comparison Code, unsigned compare.  Used by FP also, with
4466 // C2 (unordered) turned into GT or LT already.  The other bits
4467 // C0 and C3 are turned into Carry & Zero flags.
4468 operand cmpOpU() %{
4469   match(Bool);
4470 
4471   format %{ "" %}
4472   interface(COND_INTER) %{
4473     equal(0x4, "e");
4474     not_equal(0x5, "ne");
4475     less(0x2, "b");
4476     greater_equal(0x3, "nb");
4477     less_equal(0x6, "be");
4478     greater(0x7, "nbe");
4479     overflow(0x0, "o");
4480     no_overflow(0x1, "no");
4481   %}
4482 %}
4483 
4484 // Floating comparisons that don't require any fixup for the unordered case
4485 operand cmpOpUCF() %{
4486   match(Bool);
4487   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4488             n->as_Bool()->_test._test == BoolTest::ge ||
4489             n->as_Bool()->_test._test == BoolTest::le ||
4490             n->as_Bool()->_test._test == BoolTest::gt);
4491   format %{ "" %}
4492   interface(COND_INTER) %{
4493     equal(0x4, "e");
4494     not_equal(0x5, "ne");
4495     less(0x2, "b");
4496     greater_equal(0x3, "nb");
4497     less_equal(0x6, "be");
4498     greater(0x7, "nbe");
4499     overflow(0x0, "o");
4500     no_overflow(0x1, "no");
4501   %}
4502 %}
4503 
4504 
4505 // Floating comparisons that can be fixed up with extra conditional jumps
4506 operand cmpOpUCF2() %{
4507   match(Bool);
4508   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4509             n->as_Bool()->_test._test == BoolTest::eq);
4510   format %{ "" %}
4511   interface(COND_INTER) %{
4512     equal(0x4, "e");
4513     not_equal(0x5, "ne");
4514     less(0x2, "b");
4515     greater_equal(0x3, "nb");
4516     less_equal(0x6, "be");
4517     greater(0x7, "nbe");
4518     overflow(0x0, "o");
4519     no_overflow(0x1, "no");
4520   %}
4521 %}
4522 
4523 // Comparison Code for FP conditional move
4524 operand cmpOp_fcmov() %{
4525   match(Bool);
4526 
4527   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4528             n->as_Bool()->_test._test != BoolTest::no_overflow);
4529   format %{ "" %}
4530   interface(COND_INTER) %{
4531     equal        (0x0C8);
4532     not_equal    (0x1C8);
4533     less         (0x0C0);
4534     greater_equal(0x1C0);
4535     less_equal   (0x0D0);
4536     greater      (0x1D0);
4537     overflow(0x0, "o"); // not really supported by the instruction
4538     no_overflow(0x1, "no"); // not really supported by the instruction
4539   %}
4540 %}
4541 
4542 // Comparision Code used in long compares
4543 operand cmpOp_commute() %{
4544   match(Bool);
4545 
4546   format %{ "" %}
4547   interface(COND_INTER) %{
4548     equal(0x4, "e");
4549     not_equal(0x5, "ne");
4550     less(0xF, "g");
4551     greater_equal(0xE, "le");
4552     less_equal(0xD, "ge");
4553     greater(0xC, "l");
4554     overflow(0x0, "o");
4555     no_overflow(0x1, "no");
4556   %}
4557 %}
4558 
4559 //----------OPERAND CLASSES----------------------------------------------------
4560 // Operand Classes are groups of operands that are used as to simplify
4561 // instruction definitions by not requiring the AD writer to specify separate
4562 // instructions for every form of operand when the instruction accepts
4563 // multiple operand types with the same basic encoding and format.  The classic
4564 // case of this is memory operands.
4565 
4566 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4567                indIndex, indIndexScale, indIndexScaleOffset);
4568 
4569 // Long memory operations are encoded in 2 instructions and a +4 offset.
4570 // This means some kind of offset is always required and you cannot use
4571 // an oop as the offset (done when working on static globals).
4572 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4573                     indIndex, indIndexScale, indIndexScaleOffset);
4574 
4575 
4576 //----------PIPELINE-----------------------------------------------------------
4577 // Rules which define the behavior of the target architectures pipeline.
4578 pipeline %{
4579 
4580 //----------ATTRIBUTES---------------------------------------------------------
4581 attributes %{
4582   variable_size_instructions;        // Fixed size instructions
4583   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4584   instruction_unit_size = 1;         // An instruction is 1 bytes long
4585   instruction_fetch_unit_size = 16;  // The processor fetches one line
4586   instruction_fetch_units = 1;       // of 16 bytes
4587 
4588   // List of nop instructions
4589   nops( MachNop );
4590 %}
4591 
4592 //----------RESOURCES----------------------------------------------------------
4593 // Resources are the functional units available to the machine
4594 
4595 // Generic P2/P3 pipeline
4596 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4597 // 3 instructions decoded per cycle.
4598 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4599 // 2 ALU op, only ALU0 handles mul/div instructions.
4600 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4601            MS0, MS1, MEM = MS0 | MS1,
4602            BR, FPU,
4603            ALU0, ALU1, ALU = ALU0 | ALU1 );
4604 
4605 //----------PIPELINE DESCRIPTION-----------------------------------------------
4606 // Pipeline Description specifies the stages in the machine's pipeline
4607 
4608 // Generic P2/P3 pipeline
4609 pipe_desc(S0, S1, S2, S3, S4, S5);
4610 
4611 //----------PIPELINE CLASSES---------------------------------------------------
4612 // Pipeline Classes describe the stages in which input and output are
4613 // referenced by the hardware pipeline.
4614 
4615 // Naming convention: ialu or fpu
4616 // Then: _reg
4617 // Then: _reg if there is a 2nd register
4618 // Then: _long if it's a pair of instructions implementing a long
4619 // Then: _fat if it requires the big decoder
4620 //   Or: _mem if it requires the big decoder and a memory unit.
4621 
4622 // Integer ALU reg operation
4623 pipe_class ialu_reg(rRegI dst) %{
4624     single_instruction;
4625     dst    : S4(write);
4626     dst    : S3(read);
4627     DECODE : S0;        // any decoder
4628     ALU    : S3;        // any alu
4629 %}
4630 
4631 // Long ALU reg operation
4632 pipe_class ialu_reg_long(eRegL dst) %{
4633     instruction_count(2);
4634     dst    : S4(write);
4635     dst    : S3(read);
4636     DECODE : S0(2);     // any 2 decoders
4637     ALU    : S3(2);     // both alus
4638 %}
4639 
4640 // Integer ALU reg operation using big decoder
4641 pipe_class ialu_reg_fat(rRegI dst) %{
4642     single_instruction;
4643     dst    : S4(write);
4644     dst    : S3(read);
4645     D0     : S0;        // big decoder only
4646     ALU    : S3;        // any alu
4647 %}
4648 
4649 // Long ALU reg operation using big decoder
4650 pipe_class ialu_reg_long_fat(eRegL dst) %{
4651     instruction_count(2);
4652     dst    : S4(write);
4653     dst    : S3(read);
4654     D0     : S0(2);     // big decoder only; twice
4655     ALU    : S3(2);     // any 2 alus
4656 %}
4657 
4658 // Integer ALU reg-reg operation
4659 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4660     single_instruction;
4661     dst    : S4(write);
4662     src    : S3(read);
4663     DECODE : S0;        // any decoder
4664     ALU    : S3;        // any alu
4665 %}
4666 
4667 // Long ALU reg-reg operation
4668 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4669     instruction_count(2);
4670     dst    : S4(write);
4671     src    : S3(read);
4672     DECODE : S0(2);     // any 2 decoders
4673     ALU    : S3(2);     // both alus
4674 %}
4675 
4676 // Integer ALU reg-reg operation
4677 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4678     single_instruction;
4679     dst    : S4(write);
4680     src    : S3(read);
4681     D0     : S0;        // big decoder only
4682     ALU    : S3;        // any alu
4683 %}
4684 
4685 // Long ALU reg-reg operation
4686 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4687     instruction_count(2);
4688     dst    : S4(write);
4689     src    : S3(read);
4690     D0     : S0(2);     // big decoder only; twice
4691     ALU    : S3(2);     // both alus
4692 %}
4693 
4694 // Integer ALU reg-mem operation
4695 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4696     single_instruction;
4697     dst    : S5(write);
4698     mem    : S3(read);
4699     D0     : S0;        // big decoder only
4700     ALU    : S4;        // any alu
4701     MEM    : S3;        // any mem
4702 %}
4703 
4704 // Long ALU reg-mem operation
4705 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4706     instruction_count(2);
4707     dst    : S5(write);
4708     mem    : S3(read);
4709     D0     : S0(2);     // big decoder only; twice
4710     ALU    : S4(2);     // any 2 alus
4711     MEM    : S3(2);     // both mems
4712 %}
4713 
4714 // Integer mem operation (prefetch)
4715 pipe_class ialu_mem(memory mem)
4716 %{
4717     single_instruction;
4718     mem    : S3(read);
4719     D0     : S0;        // big decoder only
4720     MEM    : S3;        // any mem
4721 %}
4722 
4723 // Integer Store to Memory
4724 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4725     single_instruction;
4726     mem    : S3(read);
4727     src    : S5(read);
4728     D0     : S0;        // big decoder only
4729     ALU    : S4;        // any alu
4730     MEM    : S3;
4731 %}
4732 
4733 // Long Store to Memory
4734 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4735     instruction_count(2);
4736     mem    : S3(read);
4737     src    : S5(read);
4738     D0     : S0(2);     // big decoder only; twice
4739     ALU    : S4(2);     // any 2 alus
4740     MEM    : S3(2);     // Both mems
4741 %}
4742 
4743 // Integer Store to Memory
4744 pipe_class ialu_mem_imm(memory mem) %{
4745     single_instruction;
4746     mem    : S3(read);
4747     D0     : S0;        // big decoder only
4748     ALU    : S4;        // any alu
4749     MEM    : S3;
4750 %}
4751 
4752 // Integer ALU0 reg-reg operation
4753 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4754     single_instruction;
4755     dst    : S4(write);
4756     src    : S3(read);
4757     D0     : S0;        // Big decoder only
4758     ALU0   : S3;        // only alu0
4759 %}
4760 
4761 // Integer ALU0 reg-mem operation
4762 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4763     single_instruction;
4764     dst    : S5(write);
4765     mem    : S3(read);
4766     D0     : S0;        // big decoder only
4767     ALU0   : S4;        // ALU0 only
4768     MEM    : S3;        // any mem
4769 %}
4770 
4771 // Integer ALU reg-reg operation
4772 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4773     single_instruction;
4774     cr     : S4(write);
4775     src1   : S3(read);
4776     src2   : S3(read);
4777     DECODE : S0;        // any decoder
4778     ALU    : S3;        // any alu
4779 %}
4780 
4781 // Integer ALU reg-imm operation
4782 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4783     single_instruction;
4784     cr     : S4(write);
4785     src1   : S3(read);
4786     DECODE : S0;        // any decoder
4787     ALU    : S3;        // any alu
4788 %}
4789 
4790 // Integer ALU reg-mem operation
4791 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4792     single_instruction;
4793     cr     : S4(write);
4794     src1   : S3(read);
4795     src2   : S3(read);
4796     D0     : S0;        // big decoder only
4797     ALU    : S4;        // any alu
4798     MEM    : S3;
4799 %}
4800 
4801 // Conditional move reg-reg
4802 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4803     instruction_count(4);
4804     y      : S4(read);
4805     q      : S3(read);
4806     p      : S3(read);
4807     DECODE : S0(4);     // any decoder
4808 %}
4809 
4810 // Conditional move reg-reg
4811 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4812     single_instruction;
4813     dst    : S4(write);
4814     src    : S3(read);
4815     cr     : S3(read);
4816     DECODE : S0;        // any decoder
4817 %}
4818 
4819 // Conditional move reg-mem
4820 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4821     single_instruction;
4822     dst    : S4(write);
4823     src    : S3(read);
4824     cr     : S3(read);
4825     DECODE : S0;        // any decoder
4826     MEM    : S3;
4827 %}
4828 
4829 // Conditional move reg-reg long
4830 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4831     single_instruction;
4832     dst    : S4(write);
4833     src    : S3(read);
4834     cr     : S3(read);
4835     DECODE : S0(2);     // any 2 decoders
4836 %}
4837 
4838 // Conditional move double reg-reg
4839 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4840     single_instruction;
4841     dst    : S4(write);
4842     src    : S3(read);
4843     cr     : S3(read);
4844     DECODE : S0;        // any decoder
4845 %}
4846 
4847 // Float reg-reg operation
4848 pipe_class fpu_reg(regDPR dst) %{
4849     instruction_count(2);
4850     dst    : S3(read);
4851     DECODE : S0(2);     // any 2 decoders
4852     FPU    : S3;
4853 %}
4854 
4855 // Float reg-reg operation
4856 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4857     instruction_count(2);
4858     dst    : S4(write);
4859     src    : S3(read);
4860     DECODE : S0(2);     // any 2 decoders
4861     FPU    : S3;
4862 %}
4863 
4864 // Float reg-reg operation
4865 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4866     instruction_count(3);
4867     dst    : S4(write);
4868     src1   : S3(read);
4869     src2   : S3(read);
4870     DECODE : S0(3);     // any 3 decoders
4871     FPU    : S3(2);
4872 %}
4873 
4874 // Float reg-reg operation
4875 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4876     instruction_count(4);
4877     dst    : S4(write);
4878     src1   : S3(read);
4879     src2   : S3(read);
4880     src3   : S3(read);
4881     DECODE : S0(4);     // any 3 decoders
4882     FPU    : S3(2);
4883 %}
4884 
4885 // Float reg-reg operation
4886 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4887     instruction_count(4);
4888     dst    : S4(write);
4889     src1   : S3(read);
4890     src2   : S3(read);
4891     src3   : S3(read);
4892     DECODE : S1(3);     // any 3 decoders
4893     D0     : S0;        // Big decoder only
4894     FPU    : S3(2);
4895     MEM    : S3;
4896 %}
4897 
4898 // Float reg-mem operation
4899 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4900     instruction_count(2);
4901     dst    : S5(write);
4902     mem    : S3(read);
4903     D0     : S0;        // big decoder only
4904     DECODE : S1;        // any decoder for FPU POP
4905     FPU    : S4;
4906     MEM    : S3;        // any mem
4907 %}
4908 
4909 // Float reg-mem operation
4910 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4911     instruction_count(3);
4912     dst    : S5(write);
4913     src1   : S3(read);
4914     mem    : S3(read);
4915     D0     : S0;        // big decoder only
4916     DECODE : S1(2);     // any decoder for FPU POP
4917     FPU    : S4;
4918     MEM    : S3;        // any mem
4919 %}
4920 
4921 // Float mem-reg operation
4922 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4923     instruction_count(2);
4924     src    : S5(read);
4925     mem    : S3(read);
4926     DECODE : S0;        // any decoder for FPU PUSH
4927     D0     : S1;        // big decoder only
4928     FPU    : S4;
4929     MEM    : S3;        // any mem
4930 %}
4931 
4932 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4933     instruction_count(3);
4934     src1   : S3(read);
4935     src2   : S3(read);
4936     mem    : S3(read);
4937     DECODE : S0(2);     // any decoder for FPU PUSH
4938     D0     : S1;        // big decoder only
4939     FPU    : S4;
4940     MEM    : S3;        // any mem
4941 %}
4942 
4943 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4944     instruction_count(3);
4945     src1   : S3(read);
4946     src2   : S3(read);
4947     mem    : S4(read);
4948     DECODE : S0;        // any decoder for FPU PUSH
4949     D0     : S0(2);     // big decoder only
4950     FPU    : S4;
4951     MEM    : S3(2);     // any mem
4952 %}
4953 
4954 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4955     instruction_count(2);
4956     src1   : S3(read);
4957     dst    : S4(read);
4958     D0     : S0(2);     // big decoder only
4959     MEM    : S3(2);     // any mem
4960 %}
4961 
4962 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4963     instruction_count(3);
4964     src1   : S3(read);
4965     src2   : S3(read);
4966     dst    : S4(read);
4967     D0     : S0(3);     // big decoder only
4968     FPU    : S4;
4969     MEM    : S3(3);     // any mem
4970 %}
4971 
4972 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4973     instruction_count(3);
4974     src1   : S4(read);
4975     mem    : S4(read);
4976     DECODE : S0;        // any decoder for FPU PUSH
4977     D0     : S0(2);     // big decoder only
4978     FPU    : S4;
4979     MEM    : S3(2);     // any mem
4980 %}
4981 
4982 // Float load constant
4983 pipe_class fpu_reg_con(regDPR dst) %{
4984     instruction_count(2);
4985     dst    : S5(write);
4986     D0     : S0;        // big decoder only for the load
4987     DECODE : S1;        // any decoder for FPU POP
4988     FPU    : S4;
4989     MEM    : S3;        // any mem
4990 %}
4991 
4992 // Float load constant
4993 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4994     instruction_count(3);
4995     dst    : S5(write);
4996     src    : S3(read);
4997     D0     : S0;        // big decoder only for the load
4998     DECODE : S1(2);     // any decoder for FPU POP
4999     FPU    : S4;
5000     MEM    : S3;        // any mem
5001 %}
5002 
5003 // UnConditional branch
5004 pipe_class pipe_jmp( label labl ) %{
5005     single_instruction;
5006     BR   : S3;
5007 %}
5008 
5009 // Conditional branch
5010 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5011     single_instruction;
5012     cr    : S1(read);
5013     BR    : S3;
5014 %}
5015 
5016 // Allocation idiom
5017 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5018     instruction_count(1); force_serialization;
5019     fixed_latency(6);
5020     heap_ptr : S3(read);
5021     DECODE   : S0(3);
5022     D0       : S2;
5023     MEM      : S3;
5024     ALU      : S3(2);
5025     dst      : S5(write);
5026     BR       : S5;
5027 %}
5028 
5029 // Generic big/slow expanded idiom
5030 pipe_class pipe_slow(  ) %{
5031     instruction_count(10); multiple_bundles; force_serialization;
5032     fixed_latency(100);
5033     D0  : S0(2);
5034     MEM : S3(2);
5035 %}
5036 
5037 // The real do-nothing guy
5038 pipe_class empty( ) %{
5039     instruction_count(0);
5040 %}
5041 
5042 // Define the class for the Nop node
5043 define %{
5044    MachNop = empty;
5045 %}
5046 
5047 %}
5048 
5049 //----------INSTRUCTIONS-------------------------------------------------------
5050 //
5051 // match      -- States which machine-independent subtree may be replaced
5052 //               by this instruction.
5053 // ins_cost   -- The estimated cost of this instruction is used by instruction
5054 //               selection to identify a minimum cost tree of machine
5055 //               instructions that matches a tree of machine-independent
5056 //               instructions.
5057 // format     -- A string providing the disassembly for this instruction.
5058 //               The value of an instruction's operand may be inserted
5059 //               by referring to it with a '$' prefix.
5060 // opcode     -- Three instruction opcodes may be provided.  These are referred
5061 //               to within an encode class as $primary, $secondary, and $tertiary
5062 //               respectively.  The primary opcode is commonly used to
5063 //               indicate the type of machine instruction, while secondary
5064 //               and tertiary are often used for prefix options or addressing
5065 //               modes.
5066 // ins_encode -- A list of encode classes with parameters. The encode class
5067 //               name must have been defined in an 'enc_class' specification
5068 //               in the encode section of the architecture description.
5069 
5070 //----------BSWAP-Instruction--------------------------------------------------
5071 instruct bytes_reverse_int(rRegI dst) %{
5072   match(Set dst (ReverseBytesI dst));
5073 
5074   format %{ "BSWAP  $dst" %}
5075   opcode(0x0F, 0xC8);
5076   ins_encode( OpcP, OpcSReg(dst) );
5077   ins_pipe( ialu_reg );
5078 %}
5079 
5080 instruct bytes_reverse_long(eRegL dst) %{
5081   match(Set dst (ReverseBytesL dst));
5082 
5083   format %{ "BSWAP  $dst.lo\n\t"
5084             "BSWAP  $dst.hi\n\t"
5085             "XCHG   $dst.lo $dst.hi" %}
5086 
5087   ins_cost(125);
5088   ins_encode( bswap_long_bytes(dst) );
5089   ins_pipe( ialu_reg_reg);
5090 %}
5091 
5092 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5093   match(Set dst (ReverseBytesUS dst));
5094   effect(KILL cr);
5095 
5096   format %{ "BSWAP  $dst\n\t"
5097             "SHR    $dst,16\n\t" %}
5098   ins_encode %{
5099     __ bswapl($dst$$Register);
5100     __ shrl($dst$$Register, 16);
5101   %}
5102   ins_pipe( ialu_reg );
5103 %}
5104 
5105 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5106   match(Set dst (ReverseBytesS dst));
5107   effect(KILL cr);
5108 
5109   format %{ "BSWAP  $dst\n\t"
5110             "SAR    $dst,16\n\t" %}
5111   ins_encode %{
5112     __ bswapl($dst$$Register);
5113     __ sarl($dst$$Register, 16);
5114   %}
5115   ins_pipe( ialu_reg );
5116 %}
5117 
5118 
5119 //---------- Zeros Count Instructions ------------------------------------------
5120 
5121 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5122   predicate(UseCountLeadingZerosInstruction);
5123   match(Set dst (CountLeadingZerosI src));
5124   effect(KILL cr);
5125 
5126   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5127   ins_encode %{
5128     __ lzcntl($dst$$Register, $src$$Register);
5129   %}
5130   ins_pipe(ialu_reg);
5131 %}
5132 
5133 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5134   predicate(!UseCountLeadingZerosInstruction);
5135   match(Set dst (CountLeadingZerosI src));
5136   effect(KILL cr);
5137 
5138   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5139             "JNZ    skip\n\t"
5140             "MOV    $dst, -1\n"
5141       "skip:\n\t"
5142             "NEG    $dst\n\t"
5143             "ADD    $dst, 31" %}
5144   ins_encode %{
5145     Register Rdst = $dst$$Register;
5146     Register Rsrc = $src$$Register;
5147     Label skip;
5148     __ bsrl(Rdst, Rsrc);
5149     __ jccb(Assembler::notZero, skip);
5150     __ movl(Rdst, -1);
5151     __ bind(skip);
5152     __ negl(Rdst);
5153     __ addl(Rdst, BitsPerInt - 1);
5154   %}
5155   ins_pipe(ialu_reg);
5156 %}
5157 
5158 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5159   predicate(UseCountLeadingZerosInstruction);
5160   match(Set dst (CountLeadingZerosL src));
5161   effect(TEMP dst, KILL cr);
5162 
5163   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5164             "JNC    done\n\t"
5165             "LZCNT  $dst, $src.lo\n\t"
5166             "ADD    $dst, 32\n"
5167       "done:" %}
5168   ins_encode %{
5169     Register Rdst = $dst$$Register;
5170     Register Rsrc = $src$$Register;
5171     Label done;
5172     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5173     __ jccb(Assembler::carryClear, done);
5174     __ lzcntl(Rdst, Rsrc);
5175     __ addl(Rdst, BitsPerInt);
5176     __ bind(done);
5177   %}
5178   ins_pipe(ialu_reg);
5179 %}
5180 
5181 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5182   predicate(!UseCountLeadingZerosInstruction);
5183   match(Set dst (CountLeadingZerosL src));
5184   effect(TEMP dst, KILL cr);
5185 
5186   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5187             "JZ     msw_is_zero\n\t"
5188             "ADD    $dst, 32\n\t"
5189             "JMP    not_zero\n"
5190       "msw_is_zero:\n\t"
5191             "BSR    $dst, $src.lo\n\t"
5192             "JNZ    not_zero\n\t"
5193             "MOV    $dst, -1\n"
5194       "not_zero:\n\t"
5195             "NEG    $dst\n\t"
5196             "ADD    $dst, 63\n" %}
5197  ins_encode %{
5198     Register Rdst = $dst$$Register;
5199     Register Rsrc = $src$$Register;
5200     Label msw_is_zero;
5201     Label not_zero;
5202     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5203     __ jccb(Assembler::zero, msw_is_zero);
5204     __ addl(Rdst, BitsPerInt);
5205     __ jmpb(not_zero);
5206     __ bind(msw_is_zero);
5207     __ bsrl(Rdst, Rsrc);
5208     __ jccb(Assembler::notZero, not_zero);
5209     __ movl(Rdst, -1);
5210     __ bind(not_zero);
5211     __ negl(Rdst);
5212     __ addl(Rdst, BitsPerLong - 1);
5213   %}
5214   ins_pipe(ialu_reg);
5215 %}
5216 
5217 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5218   predicate(UseCountTrailingZerosInstruction);
5219   match(Set dst (CountTrailingZerosI src));
5220   effect(KILL cr);
5221 
5222   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5223   ins_encode %{
5224     __ tzcntl($dst$$Register, $src$$Register);
5225   %}
5226   ins_pipe(ialu_reg);
5227 %}
5228 
5229 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5230   predicate(!UseCountTrailingZerosInstruction);
5231   match(Set dst (CountTrailingZerosI src));
5232   effect(KILL cr);
5233 
5234   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5235             "JNZ    done\n\t"
5236             "MOV    $dst, 32\n"
5237       "done:" %}
5238   ins_encode %{
5239     Register Rdst = $dst$$Register;
5240     Label done;
5241     __ bsfl(Rdst, $src$$Register);
5242     __ jccb(Assembler::notZero, done);
5243     __ movl(Rdst, BitsPerInt);
5244     __ bind(done);
5245   %}
5246   ins_pipe(ialu_reg);
5247 %}
5248 
5249 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5250   predicate(UseCountTrailingZerosInstruction);
5251   match(Set dst (CountTrailingZerosL src));
5252   effect(TEMP dst, KILL cr);
5253 
5254   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5255             "JNC    done\n\t"
5256             "TZCNT  $dst, $src.hi\n\t"
5257             "ADD    $dst, 32\n"
5258             "done:" %}
5259   ins_encode %{
5260     Register Rdst = $dst$$Register;
5261     Register Rsrc = $src$$Register;
5262     Label done;
5263     __ tzcntl(Rdst, Rsrc);
5264     __ jccb(Assembler::carryClear, done);
5265     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5266     __ addl(Rdst, BitsPerInt);
5267     __ bind(done);
5268   %}
5269   ins_pipe(ialu_reg);
5270 %}
5271 
5272 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5273   predicate(!UseCountTrailingZerosInstruction);
5274   match(Set dst (CountTrailingZerosL src));
5275   effect(TEMP dst, KILL cr);
5276 
5277   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5278             "JNZ    done\n\t"
5279             "BSF    $dst, $src.hi\n\t"
5280             "JNZ    msw_not_zero\n\t"
5281             "MOV    $dst, 32\n"
5282       "msw_not_zero:\n\t"
5283             "ADD    $dst, 32\n"
5284       "done:" %}
5285   ins_encode %{
5286     Register Rdst = $dst$$Register;
5287     Register Rsrc = $src$$Register;
5288     Label msw_not_zero;
5289     Label done;
5290     __ bsfl(Rdst, Rsrc);
5291     __ jccb(Assembler::notZero, done);
5292     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5293     __ jccb(Assembler::notZero, msw_not_zero);
5294     __ movl(Rdst, BitsPerInt);
5295     __ bind(msw_not_zero);
5296     __ addl(Rdst, BitsPerInt);
5297     __ bind(done);
5298   %}
5299   ins_pipe(ialu_reg);
5300 %}
5301 
5302 
5303 //---------- Population Count Instructions -------------------------------------
5304 
5305 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5306   predicate(UsePopCountInstruction);
5307   match(Set dst (PopCountI src));
5308   effect(KILL cr);
5309 
5310   format %{ "POPCNT $dst, $src" %}
5311   ins_encode %{
5312     __ popcntl($dst$$Register, $src$$Register);
5313   %}
5314   ins_pipe(ialu_reg);
5315 %}
5316 
5317 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5318   predicate(UsePopCountInstruction);
5319   match(Set dst (PopCountI (LoadI mem)));
5320   effect(KILL cr);
5321 
5322   format %{ "POPCNT $dst, $mem" %}
5323   ins_encode %{
5324     __ popcntl($dst$$Register, $mem$$Address);
5325   %}
5326   ins_pipe(ialu_reg);
5327 %}
5328 
5329 // Note: Long.bitCount(long) returns an int.
5330 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5331   predicate(UsePopCountInstruction);
5332   match(Set dst (PopCountL src));
5333   effect(KILL cr, TEMP tmp, TEMP dst);
5334 
5335   format %{ "POPCNT $dst, $src.lo\n\t"
5336             "POPCNT $tmp, $src.hi\n\t"
5337             "ADD    $dst, $tmp" %}
5338   ins_encode %{
5339     __ popcntl($dst$$Register, $src$$Register);
5340     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5341     __ addl($dst$$Register, $tmp$$Register);
5342   %}
5343   ins_pipe(ialu_reg);
5344 %}
5345 
5346 // Note: Long.bitCount(long) returns an int.
5347 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5348   predicate(UsePopCountInstruction);
5349   match(Set dst (PopCountL (LoadL mem)));
5350   effect(KILL cr, TEMP tmp, TEMP dst);
5351 
5352   format %{ "POPCNT $dst, $mem\n\t"
5353             "POPCNT $tmp, $mem+4\n\t"
5354             "ADD    $dst, $tmp" %}
5355   ins_encode %{
5356     //__ popcntl($dst$$Register, $mem$$Address$$first);
5357     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5358     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5359     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5360     __ addl($dst$$Register, $tmp$$Register);
5361   %}
5362   ins_pipe(ialu_reg);
5363 %}
5364 
5365 
5366 //----------Load/Store/Move Instructions---------------------------------------
5367 //----------Load Instructions--------------------------------------------------
5368 // Load Byte (8bit signed)
5369 instruct loadB(xRegI dst, memory mem) %{
5370   match(Set dst (LoadB mem));
5371 
5372   ins_cost(125);
5373   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5374 
5375   ins_encode %{
5376     __ movsbl($dst$$Register, $mem$$Address);
5377   %}
5378 
5379   ins_pipe(ialu_reg_mem);
5380 %}
5381 
5382 // Load Byte (8bit signed) into Long Register
5383 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5384   match(Set dst (ConvI2L (LoadB mem)));
5385   effect(KILL cr);
5386 
5387   ins_cost(375);
5388   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5389             "MOV    $dst.hi,$dst.lo\n\t"
5390             "SAR    $dst.hi,7" %}
5391 
5392   ins_encode %{
5393     __ movsbl($dst$$Register, $mem$$Address);
5394     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5395     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5396   %}
5397 
5398   ins_pipe(ialu_reg_mem);
5399 %}
5400 
5401 // Load Unsigned Byte (8bit UNsigned)
5402 instruct loadUB(xRegI dst, memory mem) %{
5403   match(Set dst (LoadUB mem));
5404 
5405   ins_cost(125);
5406   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5407 
5408   ins_encode %{
5409     __ movzbl($dst$$Register, $mem$$Address);
5410   %}
5411 
5412   ins_pipe(ialu_reg_mem);
5413 %}
5414 
5415 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5416 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5417   match(Set dst (ConvI2L (LoadUB mem)));
5418   effect(KILL cr);
5419 
5420   ins_cost(250);
5421   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5422             "XOR    $dst.hi,$dst.hi" %}
5423 
5424   ins_encode %{
5425     Register Rdst = $dst$$Register;
5426     __ movzbl(Rdst, $mem$$Address);
5427     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5428   %}
5429 
5430   ins_pipe(ialu_reg_mem);
5431 %}
5432 
5433 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5434 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5435   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5436   effect(KILL cr);
5437 
5438   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5439             "XOR    $dst.hi,$dst.hi\n\t"
5440             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5441   ins_encode %{
5442     Register Rdst = $dst$$Register;
5443     __ movzbl(Rdst, $mem$$Address);
5444     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5445     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5446   %}
5447   ins_pipe(ialu_reg_mem);
5448 %}
5449 
5450 // Load Short (16bit signed)
5451 instruct loadS(rRegI dst, memory mem) %{
5452   match(Set dst (LoadS mem));
5453 
5454   ins_cost(125);
5455   format %{ "MOVSX  $dst,$mem\t# short" %}
5456 
5457   ins_encode %{
5458     __ movswl($dst$$Register, $mem$$Address);
5459   %}
5460 
5461   ins_pipe(ialu_reg_mem);
5462 %}
5463 
5464 // Load Short (16 bit signed) to Byte (8 bit signed)
5465 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5466   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5467 
5468   ins_cost(125);
5469   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5470   ins_encode %{
5471     __ movsbl($dst$$Register, $mem$$Address);
5472   %}
5473   ins_pipe(ialu_reg_mem);
5474 %}
5475 
5476 // Load Short (16bit signed) into Long Register
5477 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5478   match(Set dst (ConvI2L (LoadS mem)));
5479   effect(KILL cr);
5480 
5481   ins_cost(375);
5482   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5483             "MOV    $dst.hi,$dst.lo\n\t"
5484             "SAR    $dst.hi,15" %}
5485 
5486   ins_encode %{
5487     __ movswl($dst$$Register, $mem$$Address);
5488     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5489     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5490   %}
5491 
5492   ins_pipe(ialu_reg_mem);
5493 %}
5494 
5495 // Load Unsigned Short/Char (16bit unsigned)
5496 instruct loadUS(rRegI dst, memory mem) %{
5497   match(Set dst (LoadUS mem));
5498 
5499   ins_cost(125);
5500   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5501 
5502   ins_encode %{
5503     __ movzwl($dst$$Register, $mem$$Address);
5504   %}
5505 
5506   ins_pipe(ialu_reg_mem);
5507 %}
5508 
5509 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5510 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5511   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5512 
5513   ins_cost(125);
5514   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5515   ins_encode %{
5516     __ movsbl($dst$$Register, $mem$$Address);
5517   %}
5518   ins_pipe(ialu_reg_mem);
5519 %}
5520 
5521 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5522 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5523   match(Set dst (ConvI2L (LoadUS mem)));
5524   effect(KILL cr);
5525 
5526   ins_cost(250);
5527   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5528             "XOR    $dst.hi,$dst.hi" %}
5529 
5530   ins_encode %{
5531     __ movzwl($dst$$Register, $mem$$Address);
5532     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5533   %}
5534 
5535   ins_pipe(ialu_reg_mem);
5536 %}
5537 
5538 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5539 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5540   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5541   effect(KILL cr);
5542 
5543   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5544             "XOR    $dst.hi,$dst.hi" %}
5545   ins_encode %{
5546     Register Rdst = $dst$$Register;
5547     __ movzbl(Rdst, $mem$$Address);
5548     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5549   %}
5550   ins_pipe(ialu_reg_mem);
5551 %}
5552 
5553 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5554 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5555   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5556   effect(KILL cr);
5557 
5558   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5559             "XOR    $dst.hi,$dst.hi\n\t"
5560             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5561   ins_encode %{
5562     Register Rdst = $dst$$Register;
5563     __ movzwl(Rdst, $mem$$Address);
5564     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5565     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5566   %}
5567   ins_pipe(ialu_reg_mem);
5568 %}
5569 
5570 // Load Integer
5571 instruct loadI(rRegI dst, memory mem) %{
5572   match(Set dst (LoadI mem));
5573 
5574   ins_cost(125);
5575   format %{ "MOV    $dst,$mem\t# int" %}
5576 
5577   ins_encode %{
5578     __ movl($dst$$Register, $mem$$Address);
5579   %}
5580 
5581   ins_pipe(ialu_reg_mem);
5582 %}
5583 
5584 // Load Integer (32 bit signed) to Byte (8 bit signed)
5585 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5586   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5587 
5588   ins_cost(125);
5589   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5590   ins_encode %{
5591     __ movsbl($dst$$Register, $mem$$Address);
5592   %}
5593   ins_pipe(ialu_reg_mem);
5594 %}
5595 
5596 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5597 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5598   match(Set dst (AndI (LoadI mem) mask));
5599 
5600   ins_cost(125);
5601   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5602   ins_encode %{
5603     __ movzbl($dst$$Register, $mem$$Address);
5604   %}
5605   ins_pipe(ialu_reg_mem);
5606 %}
5607 
5608 // Load Integer (32 bit signed) to Short (16 bit signed)
5609 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5610   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5611 
5612   ins_cost(125);
5613   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5614   ins_encode %{
5615     __ movswl($dst$$Register, $mem$$Address);
5616   %}
5617   ins_pipe(ialu_reg_mem);
5618 %}
5619 
5620 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5621 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5622   match(Set dst (AndI (LoadI mem) mask));
5623 
5624   ins_cost(125);
5625   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5626   ins_encode %{
5627     __ movzwl($dst$$Register, $mem$$Address);
5628   %}
5629   ins_pipe(ialu_reg_mem);
5630 %}
5631 
5632 // Load Integer into Long Register
5633 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5634   match(Set dst (ConvI2L (LoadI mem)));
5635   effect(KILL cr);
5636 
5637   ins_cost(375);
5638   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5639             "MOV    $dst.hi,$dst.lo\n\t"
5640             "SAR    $dst.hi,31" %}
5641 
5642   ins_encode %{
5643     __ movl($dst$$Register, $mem$$Address);
5644     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5645     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5646   %}
5647 
5648   ins_pipe(ialu_reg_mem);
5649 %}
5650 
5651 // Load Integer with mask 0xFF into Long Register
5652 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5653   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5654   effect(KILL cr);
5655 
5656   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5657             "XOR    $dst.hi,$dst.hi" %}
5658   ins_encode %{
5659     Register Rdst = $dst$$Register;
5660     __ movzbl(Rdst, $mem$$Address);
5661     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5662   %}
5663   ins_pipe(ialu_reg_mem);
5664 %}
5665 
5666 // Load Integer with mask 0xFFFF into Long Register
5667 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5668   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5669   effect(KILL cr);
5670 
5671   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5672             "XOR    $dst.hi,$dst.hi" %}
5673   ins_encode %{
5674     Register Rdst = $dst$$Register;
5675     __ movzwl(Rdst, $mem$$Address);
5676     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5677   %}
5678   ins_pipe(ialu_reg_mem);
5679 %}
5680 
5681 // Load Integer with 31-bit mask into Long Register
5682 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5683   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5684   effect(KILL cr);
5685 
5686   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5687             "XOR    $dst.hi,$dst.hi\n\t"
5688             "AND    $dst.lo,$mask" %}
5689   ins_encode %{
5690     Register Rdst = $dst$$Register;
5691     __ movl(Rdst, $mem$$Address);
5692     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5693     __ andl(Rdst, $mask$$constant);
5694   %}
5695   ins_pipe(ialu_reg_mem);
5696 %}
5697 
5698 // Load Unsigned Integer into Long Register
5699 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5700   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5701   effect(KILL cr);
5702 
5703   ins_cost(250);
5704   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5705             "XOR    $dst.hi,$dst.hi" %}
5706 
5707   ins_encode %{
5708     __ movl($dst$$Register, $mem$$Address);
5709     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5710   %}
5711 
5712   ins_pipe(ialu_reg_mem);
5713 %}
5714 
5715 // Load Long.  Cannot clobber address while loading, so restrict address
5716 // register to ESI
5717 instruct loadL(eRegL dst, load_long_memory mem) %{
5718   predicate(!((LoadLNode*)n)->require_atomic_access());
5719   match(Set dst (LoadL mem));
5720 
5721   ins_cost(250);
5722   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5723             "MOV    $dst.hi,$mem+4" %}
5724 
5725   ins_encode %{
5726     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5727     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5728     __ movl($dst$$Register, Amemlo);
5729     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5730   %}
5731 
5732   ins_pipe(ialu_reg_long_mem);
5733 %}
5734 
5735 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5736 // then store it down to the stack and reload on the int
5737 // side.
5738 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5739   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5740   match(Set dst (LoadL mem));
5741 
5742   ins_cost(200);
5743   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5744             "FISTp  $dst" %}
5745   ins_encode(enc_loadL_volatile(mem,dst));
5746   ins_pipe( fpu_reg_mem );
5747 %}
5748 
5749 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5750   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5751   match(Set dst (LoadL mem));
5752   effect(TEMP tmp);
5753   ins_cost(180);
5754   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5755             "MOVSD  $dst,$tmp" %}
5756   ins_encode %{
5757     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5758     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5759   %}
5760   ins_pipe( pipe_slow );
5761 %}
5762 
5763 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5764   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5765   match(Set dst (LoadL mem));
5766   effect(TEMP tmp);
5767   ins_cost(160);
5768   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5769             "MOVD   $dst.lo,$tmp\n\t"
5770             "PSRLQ  $tmp,32\n\t"
5771             "MOVD   $dst.hi,$tmp" %}
5772   ins_encode %{
5773     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5774     __ movdl($dst$$Register, $tmp$$XMMRegister);
5775     __ psrlq($tmp$$XMMRegister, 32);
5776     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5777   %}
5778   ins_pipe( pipe_slow );
5779 %}
5780 
5781 // Load Range
5782 instruct loadRange(rRegI dst, memory mem) %{
5783   match(Set dst (LoadRange mem));
5784 
5785   ins_cost(125);
5786   format %{ "MOV    $dst,$mem" %}
5787   opcode(0x8B);
5788   ins_encode( OpcP, RegMem(dst,mem));
5789   ins_pipe( ialu_reg_mem );
5790 %}
5791 
5792 
5793 // Load Pointer
5794 instruct loadP(eRegP dst, memory mem) %{
5795   match(Set dst (LoadP mem));
5796 
5797   ins_cost(125);
5798   format %{ "MOV    $dst,$mem" %}
5799   opcode(0x8B);
5800   ins_encode( OpcP, RegMem(dst,mem));
5801   ins_pipe( ialu_reg_mem );
5802 %}
5803 
5804 // Load Klass Pointer
5805 instruct loadKlass(eRegP dst, memory mem) %{
5806   match(Set dst (LoadKlass mem));
5807 
5808   ins_cost(125);
5809   format %{ "MOV    $dst,$mem" %}
5810   opcode(0x8B);
5811   ins_encode( OpcP, RegMem(dst,mem));
5812   ins_pipe( ialu_reg_mem );
5813 %}
5814 
5815 // Load Double
5816 instruct loadDPR(regDPR dst, memory mem) %{
5817   predicate(UseSSE<=1);
5818   match(Set dst (LoadD mem));
5819 
5820   ins_cost(150);
5821   format %{ "FLD_D  ST,$mem\n\t"
5822             "FSTP   $dst" %}
5823   opcode(0xDD);               /* DD /0 */
5824   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5825               Pop_Reg_DPR(dst) );
5826   ins_pipe( fpu_reg_mem );
5827 %}
5828 
5829 // Load Double to XMM
5830 instruct loadD(regD dst, memory mem) %{
5831   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5832   match(Set dst (LoadD mem));
5833   ins_cost(145);
5834   format %{ "MOVSD  $dst,$mem" %}
5835   ins_encode %{
5836     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5837   %}
5838   ins_pipe( pipe_slow );
5839 %}
5840 
5841 instruct loadD_partial(regD dst, memory mem) %{
5842   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5843   match(Set dst (LoadD mem));
5844   ins_cost(145);
5845   format %{ "MOVLPD $dst,$mem" %}
5846   ins_encode %{
5847     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5848   %}
5849   ins_pipe( pipe_slow );
5850 %}
5851 
5852 // Load to XMM register (single-precision floating point)
5853 // MOVSS instruction
5854 instruct loadF(regF dst, memory mem) %{
5855   predicate(UseSSE>=1);
5856   match(Set dst (LoadF mem));
5857   ins_cost(145);
5858   format %{ "MOVSS  $dst,$mem" %}
5859   ins_encode %{
5860     __ movflt ($dst$$XMMRegister, $mem$$Address);
5861   %}
5862   ins_pipe( pipe_slow );
5863 %}
5864 
5865 // Load Float
5866 instruct loadFPR(regFPR dst, memory mem) %{
5867   predicate(UseSSE==0);
5868   match(Set dst (LoadF mem));
5869 
5870   ins_cost(150);
5871   format %{ "FLD_S  ST,$mem\n\t"
5872             "FSTP   $dst" %}
5873   opcode(0xD9);               /* D9 /0 */
5874   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5875               Pop_Reg_FPR(dst) );
5876   ins_pipe( fpu_reg_mem );
5877 %}
5878 
5879 // Load Effective Address
5880 instruct leaP8(eRegP dst, indOffset8 mem) %{
5881   match(Set dst mem);
5882 
5883   ins_cost(110);
5884   format %{ "LEA    $dst,$mem" %}
5885   opcode(0x8D);
5886   ins_encode( OpcP, RegMem(dst,mem));
5887   ins_pipe( ialu_reg_reg_fat );
5888 %}
5889 
5890 instruct leaP32(eRegP dst, indOffset32 mem) %{
5891   match(Set dst mem);
5892 
5893   ins_cost(110);
5894   format %{ "LEA    $dst,$mem" %}
5895   opcode(0x8D);
5896   ins_encode( OpcP, RegMem(dst,mem));
5897   ins_pipe( ialu_reg_reg_fat );
5898 %}
5899 
5900 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5901   match(Set dst mem);
5902 
5903   ins_cost(110);
5904   format %{ "LEA    $dst,$mem" %}
5905   opcode(0x8D);
5906   ins_encode( OpcP, RegMem(dst,mem));
5907   ins_pipe( ialu_reg_reg_fat );
5908 %}
5909 
5910 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5911   match(Set dst mem);
5912 
5913   ins_cost(110);
5914   format %{ "LEA    $dst,$mem" %}
5915   opcode(0x8D);
5916   ins_encode( OpcP, RegMem(dst,mem));
5917   ins_pipe( ialu_reg_reg_fat );
5918 %}
5919 
5920 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5921   match(Set dst mem);
5922 
5923   ins_cost(110);
5924   format %{ "LEA    $dst,$mem" %}
5925   opcode(0x8D);
5926   ins_encode( OpcP, RegMem(dst,mem));
5927   ins_pipe( ialu_reg_reg_fat );
5928 %}
5929 
5930 // Load Constant
5931 instruct loadConI(rRegI dst, immI src) %{
5932   match(Set dst src);
5933 
5934   format %{ "MOV    $dst,$src" %}
5935   ins_encode( LdImmI(dst, src) );
5936   ins_pipe( ialu_reg_fat );
5937 %}
5938 
5939 // Load Constant zero
5940 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5941   match(Set dst src);
5942   effect(KILL cr);
5943 
5944   ins_cost(50);
5945   format %{ "XOR    $dst,$dst" %}
5946   opcode(0x33);  /* + rd */
5947   ins_encode( OpcP, RegReg( dst, dst ) );
5948   ins_pipe( ialu_reg );
5949 %}
5950 
5951 instruct loadConP(eRegP dst, immP src) %{
5952   match(Set dst src);
5953 
5954   format %{ "MOV    $dst,$src" %}
5955   opcode(0xB8);  /* + rd */
5956   ins_encode( LdImmP(dst, src) );
5957   ins_pipe( ialu_reg_fat );
5958 %}
5959 
5960 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5961   match(Set dst src);
5962   effect(KILL cr);
5963   ins_cost(200);
5964   format %{ "MOV    $dst.lo,$src.lo\n\t"
5965             "MOV    $dst.hi,$src.hi" %}
5966   opcode(0xB8);
5967   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5968   ins_pipe( ialu_reg_long_fat );
5969 %}
5970 
5971 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5972   match(Set dst src);
5973   effect(KILL cr);
5974   ins_cost(150);
5975   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5976             "XOR    $dst.hi,$dst.hi" %}
5977   opcode(0x33,0x33);
5978   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5979   ins_pipe( ialu_reg_long );
5980 %}
5981 
5982 // The instruction usage is guarded by predicate in operand immFPR().
5983 instruct loadConFPR(regFPR dst, immFPR con) %{
5984   match(Set dst con);
5985   ins_cost(125);
5986   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5987             "FSTP   $dst" %}
5988   ins_encode %{
5989     __ fld_s($constantaddress($con));
5990     __ fstp_d($dst$$reg);
5991   %}
5992   ins_pipe(fpu_reg_con);
5993 %}
5994 
5995 // The instruction usage is guarded by predicate in operand immFPR0().
5996 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5997   match(Set dst con);
5998   ins_cost(125);
5999   format %{ "FLDZ   ST\n\t"
6000             "FSTP   $dst" %}
6001   ins_encode %{
6002     __ fldz();
6003     __ fstp_d($dst$$reg);
6004   %}
6005   ins_pipe(fpu_reg_con);
6006 %}
6007 
6008 // The instruction usage is guarded by predicate in operand immFPR1().
6009 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6010   match(Set dst con);
6011   ins_cost(125);
6012   format %{ "FLD1   ST\n\t"
6013             "FSTP   $dst" %}
6014   ins_encode %{
6015     __ fld1();
6016     __ fstp_d($dst$$reg);
6017   %}
6018   ins_pipe(fpu_reg_con);
6019 %}
6020 
6021 // The instruction usage is guarded by predicate in operand immF().
6022 instruct loadConF(regF dst, immF con) %{
6023   match(Set dst con);
6024   ins_cost(125);
6025   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6026   ins_encode %{
6027     __ movflt($dst$$XMMRegister, $constantaddress($con));
6028   %}
6029   ins_pipe(pipe_slow);
6030 %}
6031 
6032 // The instruction usage is guarded by predicate in operand immF0().
6033 instruct loadConF0(regF dst, immF0 src) %{
6034   match(Set dst src);
6035   ins_cost(100);
6036   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6037   ins_encode %{
6038     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6039   %}
6040   ins_pipe(pipe_slow);
6041 %}
6042 
6043 // The instruction usage is guarded by predicate in operand immDPR().
6044 instruct loadConDPR(regDPR dst, immDPR con) %{
6045   match(Set dst con);
6046   ins_cost(125);
6047 
6048   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6049             "FSTP   $dst" %}
6050   ins_encode %{
6051     __ fld_d($constantaddress($con));
6052     __ fstp_d($dst$$reg);
6053   %}
6054   ins_pipe(fpu_reg_con);
6055 %}
6056 
6057 // The instruction usage is guarded by predicate in operand immDPR0().
6058 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6059   match(Set dst con);
6060   ins_cost(125);
6061 
6062   format %{ "FLDZ   ST\n\t"
6063             "FSTP   $dst" %}
6064   ins_encode %{
6065     __ fldz();
6066     __ fstp_d($dst$$reg);
6067   %}
6068   ins_pipe(fpu_reg_con);
6069 %}
6070 
6071 // The instruction usage is guarded by predicate in operand immDPR1().
6072 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6073   match(Set dst con);
6074   ins_cost(125);
6075 
6076   format %{ "FLD1   ST\n\t"
6077             "FSTP   $dst" %}
6078   ins_encode %{
6079     __ fld1();
6080     __ fstp_d($dst$$reg);
6081   %}
6082   ins_pipe(fpu_reg_con);
6083 %}
6084 
6085 // The instruction usage is guarded by predicate in operand immD().
6086 instruct loadConD(regD dst, immD con) %{
6087   match(Set dst con);
6088   ins_cost(125);
6089   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6090   ins_encode %{
6091     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6092   %}
6093   ins_pipe(pipe_slow);
6094 %}
6095 
6096 // The instruction usage is guarded by predicate in operand immD0().
6097 instruct loadConD0(regD dst, immD0 src) %{
6098   match(Set dst src);
6099   ins_cost(100);
6100   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6101   ins_encode %{
6102     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6103   %}
6104   ins_pipe( pipe_slow );
6105 %}
6106 
6107 // Load Stack Slot
6108 instruct loadSSI(rRegI dst, stackSlotI src) %{
6109   match(Set dst src);
6110   ins_cost(125);
6111 
6112   format %{ "MOV    $dst,$src" %}
6113   opcode(0x8B);
6114   ins_encode( OpcP, RegMem(dst,src));
6115   ins_pipe( ialu_reg_mem );
6116 %}
6117 
6118 instruct loadSSL(eRegL dst, stackSlotL src) %{
6119   match(Set dst src);
6120 
6121   ins_cost(200);
6122   format %{ "MOV    $dst,$src.lo\n\t"
6123             "MOV    $dst+4,$src.hi" %}
6124   opcode(0x8B, 0x8B);
6125   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6126   ins_pipe( ialu_mem_long_reg );
6127 %}
6128 
6129 // Load Stack Slot
6130 instruct loadSSP(eRegP dst, stackSlotP src) %{
6131   match(Set dst src);
6132   ins_cost(125);
6133 
6134   format %{ "MOV    $dst,$src" %}
6135   opcode(0x8B);
6136   ins_encode( OpcP, RegMem(dst,src));
6137   ins_pipe( ialu_reg_mem );
6138 %}
6139 
6140 // Load Stack Slot
6141 instruct loadSSF(regFPR dst, stackSlotF src) %{
6142   match(Set dst src);
6143   ins_cost(125);
6144 
6145   format %{ "FLD_S  $src\n\t"
6146             "FSTP   $dst" %}
6147   opcode(0xD9);               /* D9 /0, FLD m32real */
6148   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6149               Pop_Reg_FPR(dst) );
6150   ins_pipe( fpu_reg_mem );
6151 %}
6152 
6153 // Load Stack Slot
6154 instruct loadSSD(regDPR dst, stackSlotD src) %{
6155   match(Set dst src);
6156   ins_cost(125);
6157 
6158   format %{ "FLD_D  $src\n\t"
6159             "FSTP   $dst" %}
6160   opcode(0xDD);               /* DD /0, FLD m64real */
6161   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6162               Pop_Reg_DPR(dst) );
6163   ins_pipe( fpu_reg_mem );
6164 %}
6165 
6166 // Prefetch instructions for allocation.
6167 // Must be safe to execute with invalid address (cannot fault).
6168 
6169 instruct prefetchAlloc0( memory mem ) %{
6170   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6171   match(PrefetchAllocation mem);
6172   ins_cost(0);
6173   size(0);
6174   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6175   ins_encode();
6176   ins_pipe(empty);
6177 %}
6178 
6179 instruct prefetchAlloc( memory mem ) %{
6180   predicate(AllocatePrefetchInstr==3);
6181   match( PrefetchAllocation mem );
6182   ins_cost(100);
6183 
6184   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6185   ins_encode %{
6186     __ prefetchw($mem$$Address);
6187   %}
6188   ins_pipe(ialu_mem);
6189 %}
6190 
6191 instruct prefetchAllocNTA( memory mem ) %{
6192   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6193   match(PrefetchAllocation mem);
6194   ins_cost(100);
6195 
6196   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6197   ins_encode %{
6198     __ prefetchnta($mem$$Address);
6199   %}
6200   ins_pipe(ialu_mem);
6201 %}
6202 
6203 instruct prefetchAllocT0( memory mem ) %{
6204   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6205   match(PrefetchAllocation mem);
6206   ins_cost(100);
6207 
6208   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6209   ins_encode %{
6210     __ prefetcht0($mem$$Address);
6211   %}
6212   ins_pipe(ialu_mem);
6213 %}
6214 
6215 instruct prefetchAllocT2( memory mem ) %{
6216   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6217   match(PrefetchAllocation mem);
6218   ins_cost(100);
6219 
6220   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6221   ins_encode %{
6222     __ prefetcht2($mem$$Address);
6223   %}
6224   ins_pipe(ialu_mem);
6225 %}
6226 
6227 //----------Store Instructions-------------------------------------------------
6228 
6229 // Store Byte
6230 instruct storeB(memory mem, xRegI src) %{
6231   match(Set mem (StoreB mem src));
6232 
6233   ins_cost(125);
6234   format %{ "MOV8   $mem,$src" %}
6235   opcode(0x88);
6236   ins_encode( OpcP, RegMem( src, mem ) );
6237   ins_pipe( ialu_mem_reg );
6238 %}
6239 
6240 // Store Char/Short
6241 instruct storeC(memory mem, rRegI src) %{
6242   match(Set mem (StoreC mem src));
6243 
6244   ins_cost(125);
6245   format %{ "MOV16  $mem,$src" %}
6246   opcode(0x89, 0x66);
6247   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6248   ins_pipe( ialu_mem_reg );
6249 %}
6250 
6251 // Store Integer
6252 instruct storeI(memory mem, rRegI src) %{
6253   match(Set mem (StoreI mem src));
6254 
6255   ins_cost(125);
6256   format %{ "MOV    $mem,$src" %}
6257   opcode(0x89);
6258   ins_encode( OpcP, RegMem( src, mem ) );
6259   ins_pipe( ialu_mem_reg );
6260 %}
6261 
6262 // Store Long
6263 instruct storeL(long_memory mem, eRegL src) %{
6264   predicate(!((StoreLNode*)n)->require_atomic_access());
6265   match(Set mem (StoreL mem src));
6266 
6267   ins_cost(200);
6268   format %{ "MOV    $mem,$src.lo\n\t"
6269             "MOV    $mem+4,$src.hi" %}
6270   opcode(0x89, 0x89);
6271   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6272   ins_pipe( ialu_mem_long_reg );
6273 %}
6274 
6275 // Store Long to Integer
6276 instruct storeL2I(memory mem, eRegL src) %{
6277   match(Set mem (StoreI mem (ConvL2I src)));
6278 
6279   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6280   ins_encode %{
6281     __ movl($mem$$Address, $src$$Register);
6282   %}
6283   ins_pipe(ialu_mem_reg);
6284 %}
6285 
6286 // Volatile Store Long.  Must be atomic, so move it into
6287 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6288 // target address before the store (for null-ptr checks)
6289 // so the memory operand is used twice in the encoding.
6290 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6291   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6292   match(Set mem (StoreL mem src));
6293   effect( KILL cr );
6294   ins_cost(400);
6295   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6296             "FILD   $src\n\t"
6297             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6298   opcode(0x3B);
6299   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6300   ins_pipe( fpu_reg_mem );
6301 %}
6302 
6303 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6304   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6305   match(Set mem (StoreL mem src));
6306   effect( TEMP tmp, KILL cr );
6307   ins_cost(380);
6308   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6309             "MOVSD  $tmp,$src\n\t"
6310             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6311   ins_encode %{
6312     __ cmpl(rax, $mem$$Address);
6313     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6314     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6315   %}
6316   ins_pipe( pipe_slow );
6317 %}
6318 
6319 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6320   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6321   match(Set mem (StoreL mem src));
6322   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6323   ins_cost(360);
6324   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6325             "MOVD   $tmp,$src.lo\n\t"
6326             "MOVD   $tmp2,$src.hi\n\t"
6327             "PUNPCKLDQ $tmp,$tmp2\n\t"
6328             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6329   ins_encode %{
6330     __ cmpl(rax, $mem$$Address);
6331     __ movdl($tmp$$XMMRegister, $src$$Register);
6332     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6333     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6334     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6335   %}
6336   ins_pipe( pipe_slow );
6337 %}
6338 
6339 // Store Pointer; for storing unknown oops and raw pointers
6340 instruct storeP(memory mem, anyRegP src) %{
6341   match(Set mem (StoreP mem src));
6342 
6343   ins_cost(125);
6344   format %{ "MOV    $mem,$src" %}
6345   opcode(0x89);
6346   ins_encode( OpcP, RegMem( src, mem ) );
6347   ins_pipe( ialu_mem_reg );
6348 %}
6349 
6350 // Store Integer Immediate
6351 instruct storeImmI(memory mem, immI src) %{
6352   match(Set mem (StoreI mem src));
6353 
6354   ins_cost(150);
6355   format %{ "MOV    $mem,$src" %}
6356   opcode(0xC7);               /* C7 /0 */
6357   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6358   ins_pipe( ialu_mem_imm );
6359 %}
6360 
6361 // Store Short/Char Immediate
6362 instruct storeImmI16(memory mem, immI16 src) %{
6363   predicate(UseStoreImmI16);
6364   match(Set mem (StoreC mem src));
6365 
6366   ins_cost(150);
6367   format %{ "MOV16  $mem,$src" %}
6368   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6369   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6370   ins_pipe( ialu_mem_imm );
6371 %}
6372 
6373 // Store Pointer Immediate; null pointers or constant oops that do not
6374 // need card-mark barriers.
6375 instruct storeImmP(memory mem, immP src) %{
6376   match(Set mem (StoreP mem src));
6377 
6378   ins_cost(150);
6379   format %{ "MOV    $mem,$src" %}
6380   opcode(0xC7);               /* C7 /0 */
6381   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6382   ins_pipe( ialu_mem_imm );
6383 %}
6384 
6385 // Store Byte Immediate
6386 instruct storeImmB(memory mem, immI8 src) %{
6387   match(Set mem (StoreB mem src));
6388 
6389   ins_cost(150);
6390   format %{ "MOV8   $mem,$src" %}
6391   opcode(0xC6);               /* C6 /0 */
6392   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6393   ins_pipe( ialu_mem_imm );
6394 %}
6395 
6396 // Store CMS card-mark Immediate
6397 instruct storeImmCM(memory mem, immI8 src) %{
6398   match(Set mem (StoreCM mem src));
6399 
6400   ins_cost(150);
6401   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6402   opcode(0xC6);               /* C6 /0 */
6403   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6404   ins_pipe( ialu_mem_imm );
6405 %}
6406 
6407 // Store Double
6408 instruct storeDPR( memory mem, regDPR1 src) %{
6409   predicate(UseSSE<=1);
6410   match(Set mem (StoreD mem src));
6411 
6412   ins_cost(100);
6413   format %{ "FST_D  $mem,$src" %}
6414   opcode(0xDD);       /* DD /2 */
6415   ins_encode( enc_FPR_store(mem,src) );
6416   ins_pipe( fpu_mem_reg );
6417 %}
6418 
6419 // Store double does rounding on x86
6420 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6421   predicate(UseSSE<=1);
6422   match(Set mem (StoreD mem (RoundDouble src)));
6423 
6424   ins_cost(100);
6425   format %{ "FST_D  $mem,$src\t# round" %}
6426   opcode(0xDD);       /* DD /2 */
6427   ins_encode( enc_FPR_store(mem,src) );
6428   ins_pipe( fpu_mem_reg );
6429 %}
6430 
6431 // Store XMM register to memory (double-precision floating points)
6432 // MOVSD instruction
6433 instruct storeD(memory mem, regD src) %{
6434   predicate(UseSSE>=2);
6435   match(Set mem (StoreD mem src));
6436   ins_cost(95);
6437   format %{ "MOVSD  $mem,$src" %}
6438   ins_encode %{
6439     __ movdbl($mem$$Address, $src$$XMMRegister);
6440   %}
6441   ins_pipe( pipe_slow );
6442 %}
6443 
6444 // Store XMM register to memory (single-precision floating point)
6445 // MOVSS instruction
6446 instruct storeF(memory mem, regF src) %{
6447   predicate(UseSSE>=1);
6448   match(Set mem (StoreF mem src));
6449   ins_cost(95);
6450   format %{ "MOVSS  $mem,$src" %}
6451   ins_encode %{
6452     __ movflt($mem$$Address, $src$$XMMRegister);
6453   %}
6454   ins_pipe( pipe_slow );
6455 %}
6456 
6457 // Store Float
6458 instruct storeFPR( memory mem, regFPR1 src) %{
6459   predicate(UseSSE==0);
6460   match(Set mem (StoreF mem src));
6461 
6462   ins_cost(100);
6463   format %{ "FST_S  $mem,$src" %}
6464   opcode(0xD9);       /* D9 /2 */
6465   ins_encode( enc_FPR_store(mem,src) );
6466   ins_pipe( fpu_mem_reg );
6467 %}
6468 
6469 // Store Float does rounding on x86
6470 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6471   predicate(UseSSE==0);
6472   match(Set mem (StoreF mem (RoundFloat src)));
6473 
6474   ins_cost(100);
6475   format %{ "FST_S  $mem,$src\t# round" %}
6476   opcode(0xD9);       /* D9 /2 */
6477   ins_encode( enc_FPR_store(mem,src) );
6478   ins_pipe( fpu_mem_reg );
6479 %}
6480 
6481 // Store Float does rounding on x86
6482 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6483   predicate(UseSSE<=1);
6484   match(Set mem (StoreF mem (ConvD2F src)));
6485 
6486   ins_cost(100);
6487   format %{ "FST_S  $mem,$src\t# D-round" %}
6488   opcode(0xD9);       /* D9 /2 */
6489   ins_encode( enc_FPR_store(mem,src) );
6490   ins_pipe( fpu_mem_reg );
6491 %}
6492 
6493 // Store immediate Float value (it is faster than store from FPU register)
6494 // The instruction usage is guarded by predicate in operand immFPR().
6495 instruct storeFPR_imm( memory mem, immFPR src) %{
6496   match(Set mem (StoreF mem src));
6497 
6498   ins_cost(50);
6499   format %{ "MOV    $mem,$src\t# store float" %}
6500   opcode(0xC7);               /* C7 /0 */
6501   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6502   ins_pipe( ialu_mem_imm );
6503 %}
6504 
6505 // Store immediate Float value (it is faster than store from XMM register)
6506 // The instruction usage is guarded by predicate in operand immF().
6507 instruct storeF_imm( memory mem, immF src) %{
6508   match(Set mem (StoreF mem src));
6509 
6510   ins_cost(50);
6511   format %{ "MOV    $mem,$src\t# store float" %}
6512   opcode(0xC7);               /* C7 /0 */
6513   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6514   ins_pipe( ialu_mem_imm );
6515 %}
6516 
6517 // Store Integer to stack slot
6518 instruct storeSSI(stackSlotI dst, rRegI src) %{
6519   match(Set dst src);
6520 
6521   ins_cost(100);
6522   format %{ "MOV    $dst,$src" %}
6523   opcode(0x89);
6524   ins_encode( OpcPRegSS( dst, src ) );
6525   ins_pipe( ialu_mem_reg );
6526 %}
6527 
6528 // Store Integer to stack slot
6529 instruct storeSSP(stackSlotP dst, eRegP src) %{
6530   match(Set dst src);
6531 
6532   ins_cost(100);
6533   format %{ "MOV    $dst,$src" %}
6534   opcode(0x89);
6535   ins_encode( OpcPRegSS( dst, src ) );
6536   ins_pipe( ialu_mem_reg );
6537 %}
6538 
6539 // Store Long to stack slot
6540 instruct storeSSL(stackSlotL dst, eRegL src) %{
6541   match(Set dst src);
6542 
6543   ins_cost(200);
6544   format %{ "MOV    $dst,$src.lo\n\t"
6545             "MOV    $dst+4,$src.hi" %}
6546   opcode(0x89, 0x89);
6547   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6548   ins_pipe( ialu_mem_long_reg );
6549 %}
6550 
6551 //----------MemBar Instructions-----------------------------------------------
6552 // Memory barrier flavors
6553 
6554 instruct membar_acquire() %{
6555   match(MemBarAcquire);
6556   match(LoadFence);
6557   ins_cost(400);
6558 
6559   size(0);
6560   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6561   ins_encode();
6562   ins_pipe(empty);
6563 %}
6564 
6565 instruct membar_acquire_lock() %{
6566   match(MemBarAcquireLock);
6567   ins_cost(0);
6568 
6569   size(0);
6570   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6571   ins_encode( );
6572   ins_pipe(empty);
6573 %}
6574 
6575 instruct membar_release() %{
6576   match(MemBarRelease);
6577   match(StoreFence);
6578   ins_cost(400);
6579 
6580   size(0);
6581   format %{ "MEMBAR-release ! (empty encoding)" %}
6582   ins_encode( );
6583   ins_pipe(empty);
6584 %}
6585 
6586 instruct membar_release_lock() %{
6587   match(MemBarReleaseLock);
6588   ins_cost(0);
6589 
6590   size(0);
6591   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6592   ins_encode( );
6593   ins_pipe(empty);
6594 %}
6595 
6596 instruct membar_volatile(eFlagsReg cr) %{
6597   match(MemBarVolatile);
6598   effect(KILL cr);
6599   ins_cost(400);
6600 
6601   format %{
6602     $$template
6603     if (os::is_MP()) {
6604       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6605     } else {
6606       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6607     }
6608   %}
6609   ins_encode %{
6610     __ membar(Assembler::StoreLoad);
6611   %}
6612   ins_pipe(pipe_slow);
6613 %}
6614 
6615 instruct unnecessary_membar_volatile() %{
6616   match(MemBarVolatile);
6617   predicate(Matcher::post_store_load_barrier(n));
6618   ins_cost(0);
6619 
6620   size(0);
6621   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6622   ins_encode( );
6623   ins_pipe(empty);
6624 %}
6625 
6626 instruct membar_storestore() %{
6627   match(MemBarStoreStore);
6628   ins_cost(0);
6629 
6630   size(0);
6631   format %{ "MEMBAR-storestore (empty encoding)" %}
6632   ins_encode( );
6633   ins_pipe(empty);
6634 %}
6635 
6636 //----------Move Instructions--------------------------------------------------
6637 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6638   match(Set dst (CastX2P src));
6639   format %{ "# X2P  $dst, $src" %}
6640   ins_encode( /*empty encoding*/ );
6641   ins_cost(0);
6642   ins_pipe(empty);
6643 %}
6644 
6645 instruct castP2X(rRegI dst, eRegP src ) %{
6646   match(Set dst (CastP2X src));
6647   ins_cost(50);
6648   format %{ "MOV    $dst, $src\t# CastP2X" %}
6649   ins_encode( enc_Copy( dst, src) );
6650   ins_pipe( ialu_reg_reg );
6651 %}
6652 
6653 //----------Conditional Move---------------------------------------------------
6654 // Conditional move
6655 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6656   predicate(!VM_Version::supports_cmov() );
6657   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6658   ins_cost(200);
6659   format %{ "J$cop,us skip\t# signed cmove\n\t"
6660             "MOV    $dst,$src\n"
6661       "skip:" %}
6662   ins_encode %{
6663     Label Lskip;
6664     // Invert sense of branch from sense of CMOV
6665     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6666     __ movl($dst$$Register, $src$$Register);
6667     __ bind(Lskip);
6668   %}
6669   ins_pipe( pipe_cmov_reg );
6670 %}
6671 
6672 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6673   predicate(!VM_Version::supports_cmov() );
6674   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6675   ins_cost(200);
6676   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6677             "MOV    $dst,$src\n"
6678       "skip:" %}
6679   ins_encode %{
6680     Label Lskip;
6681     // Invert sense of branch from sense of CMOV
6682     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6683     __ movl($dst$$Register, $src$$Register);
6684     __ bind(Lskip);
6685   %}
6686   ins_pipe( pipe_cmov_reg );
6687 %}
6688 
6689 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6690   predicate(VM_Version::supports_cmov() );
6691   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6692   ins_cost(200);
6693   format %{ "CMOV$cop $dst,$src" %}
6694   opcode(0x0F,0x40);
6695   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6696   ins_pipe( pipe_cmov_reg );
6697 %}
6698 
6699 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6700   predicate(VM_Version::supports_cmov() );
6701   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6702   ins_cost(200);
6703   format %{ "CMOV$cop $dst,$src" %}
6704   opcode(0x0F,0x40);
6705   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6706   ins_pipe( pipe_cmov_reg );
6707 %}
6708 
6709 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6710   predicate(VM_Version::supports_cmov() );
6711   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6712   ins_cost(200);
6713   expand %{
6714     cmovI_regU(cop, cr, dst, src);
6715   %}
6716 %}
6717 
6718 // Conditional move
6719 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6720   predicate(VM_Version::supports_cmov() );
6721   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6722   ins_cost(250);
6723   format %{ "CMOV$cop $dst,$src" %}
6724   opcode(0x0F,0x40);
6725   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6726   ins_pipe( pipe_cmov_mem );
6727 %}
6728 
6729 // Conditional move
6730 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6731   predicate(VM_Version::supports_cmov() );
6732   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6733   ins_cost(250);
6734   format %{ "CMOV$cop $dst,$src" %}
6735   opcode(0x0F,0x40);
6736   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6737   ins_pipe( pipe_cmov_mem );
6738 %}
6739 
6740 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6741   predicate(VM_Version::supports_cmov() );
6742   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6743   ins_cost(250);
6744   expand %{
6745     cmovI_memU(cop, cr, dst, src);
6746   %}
6747 %}
6748 
6749 // Conditional move
6750 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6751   predicate(VM_Version::supports_cmov() );
6752   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6753   ins_cost(200);
6754   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6755   opcode(0x0F,0x40);
6756   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6757   ins_pipe( pipe_cmov_reg );
6758 %}
6759 
6760 // Conditional move (non-P6 version)
6761 // Note:  a CMoveP is generated for  stubs and native wrappers
6762 //        regardless of whether we are on a P6, so we
6763 //        emulate a cmov here
6764 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6765   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6766   ins_cost(300);
6767   format %{ "Jn$cop   skip\n\t"
6768           "MOV    $dst,$src\t# pointer\n"
6769       "skip:" %}
6770   opcode(0x8b);
6771   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6772   ins_pipe( pipe_cmov_reg );
6773 %}
6774 
6775 // Conditional move
6776 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6777   predicate(VM_Version::supports_cmov() );
6778   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6779   ins_cost(200);
6780   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6781   opcode(0x0F,0x40);
6782   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6783   ins_pipe( pipe_cmov_reg );
6784 %}
6785 
6786 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6787   predicate(VM_Version::supports_cmov() );
6788   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6789   ins_cost(200);
6790   expand %{
6791     cmovP_regU(cop, cr, dst, src);
6792   %}
6793 %}
6794 
6795 // DISABLED: Requires the ADLC to emit a bottom_type call that
6796 // correctly meets the two pointer arguments; one is an incoming
6797 // register but the other is a memory operand.  ALSO appears to
6798 // be buggy with implicit null checks.
6799 //
6800 //// Conditional move
6801 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6802 //  predicate(VM_Version::supports_cmov() );
6803 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6804 //  ins_cost(250);
6805 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6806 //  opcode(0x0F,0x40);
6807 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6808 //  ins_pipe( pipe_cmov_mem );
6809 //%}
6810 //
6811 //// Conditional move
6812 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6813 //  predicate(VM_Version::supports_cmov() );
6814 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6815 //  ins_cost(250);
6816 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6817 //  opcode(0x0F,0x40);
6818 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6819 //  ins_pipe( pipe_cmov_mem );
6820 //%}
6821 
6822 // Conditional move
6823 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6824   predicate(UseSSE<=1);
6825   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6826   ins_cost(200);
6827   format %{ "FCMOV$cop $dst,$src\t# double" %}
6828   opcode(0xDA);
6829   ins_encode( enc_cmov_dpr(cop,src) );
6830   ins_pipe( pipe_cmovDPR_reg );
6831 %}
6832 
6833 // Conditional move
6834 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6835   predicate(UseSSE==0);
6836   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6837   ins_cost(200);
6838   format %{ "FCMOV$cop $dst,$src\t# float" %}
6839   opcode(0xDA);
6840   ins_encode( enc_cmov_dpr(cop,src) );
6841   ins_pipe( pipe_cmovDPR_reg );
6842 %}
6843 
6844 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6845 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6846   predicate(UseSSE<=1);
6847   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6848   ins_cost(200);
6849   format %{ "Jn$cop   skip\n\t"
6850             "MOV    $dst,$src\t# double\n"
6851       "skip:" %}
6852   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6853   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6854   ins_pipe( pipe_cmovDPR_reg );
6855 %}
6856 
6857 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6858 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6859   predicate(UseSSE==0);
6860   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6861   ins_cost(200);
6862   format %{ "Jn$cop    skip\n\t"
6863             "MOV    $dst,$src\t# float\n"
6864       "skip:" %}
6865   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6866   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6867   ins_pipe( pipe_cmovDPR_reg );
6868 %}
6869 
6870 // No CMOVE with SSE/SSE2
6871 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6872   predicate (UseSSE>=1);
6873   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6874   ins_cost(200);
6875   format %{ "Jn$cop   skip\n\t"
6876             "MOVSS  $dst,$src\t# float\n"
6877       "skip:" %}
6878   ins_encode %{
6879     Label skip;
6880     // Invert sense of branch from sense of CMOV
6881     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6882     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6883     __ bind(skip);
6884   %}
6885   ins_pipe( pipe_slow );
6886 %}
6887 
6888 // No CMOVE with SSE/SSE2
6889 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6890   predicate (UseSSE>=2);
6891   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6892   ins_cost(200);
6893   format %{ "Jn$cop   skip\n\t"
6894             "MOVSD  $dst,$src\t# float\n"
6895       "skip:" %}
6896   ins_encode %{
6897     Label skip;
6898     // Invert sense of branch from sense of CMOV
6899     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6900     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6901     __ bind(skip);
6902   %}
6903   ins_pipe( pipe_slow );
6904 %}
6905 
6906 // unsigned version
6907 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6908   predicate (UseSSE>=1);
6909   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6910   ins_cost(200);
6911   format %{ "Jn$cop   skip\n\t"
6912             "MOVSS  $dst,$src\t# float\n"
6913       "skip:" %}
6914   ins_encode %{
6915     Label skip;
6916     // Invert sense of branch from sense of CMOV
6917     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6918     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6919     __ bind(skip);
6920   %}
6921   ins_pipe( pipe_slow );
6922 %}
6923 
6924 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6925   predicate (UseSSE>=1);
6926   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6927   ins_cost(200);
6928   expand %{
6929     fcmovF_regU(cop, cr, dst, src);
6930   %}
6931 %}
6932 
6933 // unsigned version
6934 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6935   predicate (UseSSE>=2);
6936   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6937   ins_cost(200);
6938   format %{ "Jn$cop   skip\n\t"
6939             "MOVSD  $dst,$src\t# float\n"
6940       "skip:" %}
6941   ins_encode %{
6942     Label skip;
6943     // Invert sense of branch from sense of CMOV
6944     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6945     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6946     __ bind(skip);
6947   %}
6948   ins_pipe( pipe_slow );
6949 %}
6950 
6951 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6952   predicate (UseSSE>=2);
6953   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6954   ins_cost(200);
6955   expand %{
6956     fcmovD_regU(cop, cr, dst, src);
6957   %}
6958 %}
6959 
6960 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6961   predicate(VM_Version::supports_cmov() );
6962   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6963   ins_cost(200);
6964   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6965             "CMOV$cop $dst.hi,$src.hi" %}
6966   opcode(0x0F,0x40);
6967   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6968   ins_pipe( pipe_cmov_reg_long );
6969 %}
6970 
6971 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6972   predicate(VM_Version::supports_cmov() );
6973   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6974   ins_cost(200);
6975   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6976             "CMOV$cop $dst.hi,$src.hi" %}
6977   opcode(0x0F,0x40);
6978   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6979   ins_pipe( pipe_cmov_reg_long );
6980 %}
6981 
6982 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6983   predicate(VM_Version::supports_cmov() );
6984   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6985   ins_cost(200);
6986   expand %{
6987     cmovL_regU(cop, cr, dst, src);
6988   %}
6989 %}
6990 
6991 //----------Arithmetic Instructions--------------------------------------------
6992 //----------Addition Instructions----------------------------------------------
6993 
6994 // Integer Addition Instructions
6995 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
6996   match(Set dst (AddI dst src));
6997   effect(KILL cr);
6998 
6999   size(2);
7000   format %{ "ADD    $dst,$src" %}
7001   opcode(0x03);
7002   ins_encode( OpcP, RegReg( dst, src) );
7003   ins_pipe( ialu_reg_reg );
7004 %}
7005 
7006 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7007   match(Set dst (AddI dst src));
7008   effect(KILL cr);
7009 
7010   format %{ "ADD    $dst,$src" %}
7011   opcode(0x81, 0x00); /* /0 id */
7012   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7013   ins_pipe( ialu_reg );
7014 %}
7015 
7016 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7017   predicate(UseIncDec);
7018   match(Set dst (AddI dst src));
7019   effect(KILL cr);
7020 
7021   size(1);
7022   format %{ "INC    $dst" %}
7023   opcode(0x40); /*  */
7024   ins_encode( Opc_plus( primary, dst ) );
7025   ins_pipe( ialu_reg );
7026 %}
7027 
7028 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7029   match(Set dst (AddI src0 src1));
7030   ins_cost(110);
7031 
7032   format %{ "LEA    $dst,[$src0 + $src1]" %}
7033   opcode(0x8D); /* 0x8D /r */
7034   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7035   ins_pipe( ialu_reg_reg );
7036 %}
7037 
7038 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7039   match(Set dst (AddP src0 src1));
7040   ins_cost(110);
7041 
7042   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7043   opcode(0x8D); /* 0x8D /r */
7044   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7045   ins_pipe( ialu_reg_reg );
7046 %}
7047 
7048 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7049   predicate(UseIncDec);
7050   match(Set dst (AddI dst src));
7051   effect(KILL cr);
7052 
7053   size(1);
7054   format %{ "DEC    $dst" %}
7055   opcode(0x48); /*  */
7056   ins_encode( Opc_plus( primary, dst ) );
7057   ins_pipe( ialu_reg );
7058 %}
7059 
7060 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7061   match(Set dst (AddP dst src));
7062   effect(KILL cr);
7063 
7064   size(2);
7065   format %{ "ADD    $dst,$src" %}
7066   opcode(0x03);
7067   ins_encode( OpcP, RegReg( dst, src) );
7068   ins_pipe( ialu_reg_reg );
7069 %}
7070 
7071 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7072   match(Set dst (AddP dst src));
7073   effect(KILL cr);
7074 
7075   format %{ "ADD    $dst,$src" %}
7076   opcode(0x81,0x00); /* Opcode 81 /0 id */
7077   // ins_encode( RegImm( dst, src) );
7078   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7079   ins_pipe( ialu_reg );
7080 %}
7081 
7082 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7083   match(Set dst (AddI dst (LoadI src)));
7084   effect(KILL cr);
7085 
7086   ins_cost(125);
7087   format %{ "ADD    $dst,$src" %}
7088   opcode(0x03);
7089   ins_encode( OpcP, RegMem( dst, src) );
7090   ins_pipe( ialu_reg_mem );
7091 %}
7092 
7093 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7094   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7095   effect(KILL cr);
7096 
7097   ins_cost(150);
7098   format %{ "ADD    $dst,$src" %}
7099   opcode(0x01);  /* Opcode 01 /r */
7100   ins_encode( OpcP, RegMem( src, dst ) );
7101   ins_pipe( ialu_mem_reg );
7102 %}
7103 
7104 // Add Memory with Immediate
7105 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7106   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7107   effect(KILL cr);
7108 
7109   ins_cost(125);
7110   format %{ "ADD    $dst,$src" %}
7111   opcode(0x81);               /* Opcode 81 /0 id */
7112   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7113   ins_pipe( ialu_mem_imm );
7114 %}
7115 
7116 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7117   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7118   effect(KILL cr);
7119 
7120   ins_cost(125);
7121   format %{ "INC    $dst" %}
7122   opcode(0xFF);               /* Opcode FF /0 */
7123   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7124   ins_pipe( ialu_mem_imm );
7125 %}
7126 
7127 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7128   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7129   effect(KILL cr);
7130 
7131   ins_cost(125);
7132   format %{ "DEC    $dst" %}
7133   opcode(0xFF);               /* Opcode FF /1 */
7134   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7135   ins_pipe( ialu_mem_imm );
7136 %}
7137 
7138 
7139 instruct checkCastPP( eRegP dst ) %{
7140   match(Set dst (CheckCastPP dst));
7141 
7142   size(0);
7143   format %{ "#checkcastPP of $dst" %}
7144   ins_encode( /*empty encoding*/ );
7145   ins_pipe( empty );
7146 %}
7147 
7148 instruct castPP( eRegP dst ) %{
7149   match(Set dst (CastPP dst));
7150   format %{ "#castPP of $dst" %}
7151   ins_encode( /*empty encoding*/ );
7152   ins_pipe( empty );
7153 %}
7154 
7155 instruct castII( rRegI dst ) %{
7156   match(Set dst (CastII dst));
7157   format %{ "#castII of $dst" %}
7158   ins_encode( /*empty encoding*/ );
7159   ins_cost(0);
7160   ins_pipe( empty );
7161 %}
7162 
7163 
7164 // Load-locked - same as a regular pointer load when used with compare-swap
7165 instruct loadPLocked(eRegP dst, memory mem) %{
7166   match(Set dst (LoadPLocked mem));
7167 
7168   ins_cost(125);
7169   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7170   opcode(0x8B);
7171   ins_encode( OpcP, RegMem(dst,mem));
7172   ins_pipe( ialu_reg_mem );
7173 %}
7174 
7175 // Conditional-store of the updated heap-top.
7176 // Used during allocation of the shared heap.
7177 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7178 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7179   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7180   // EAX is killed if there is contention, but then it's also unused.
7181   // In the common case of no contention, EAX holds the new oop address.
7182   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7183   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7184   ins_pipe( pipe_cmpxchg );
7185 %}
7186 
7187 // Conditional-store of an int value.
7188 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7189 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7190   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7191   effect(KILL oldval);
7192   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7193   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7194   ins_pipe( pipe_cmpxchg );
7195 %}
7196 
7197 // Conditional-store of a long value.
7198 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7199 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7200   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7201   effect(KILL oldval);
7202   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7203             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7204             "XCHG   EBX,ECX"
7205   %}
7206   ins_encode %{
7207     // Note: we need to swap rbx, and rcx before and after the
7208     //       cmpxchg8 instruction because the instruction uses
7209     //       rcx as the high order word of the new value to store but
7210     //       our register encoding uses rbx.
7211     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7212     if( os::is_MP() )
7213       __ lock();
7214     __ cmpxchg8($mem$$Address);
7215     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7216   %}
7217   ins_pipe( pipe_cmpxchg );
7218 %}
7219 
7220 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7221 
7222 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7223   predicate(VM_Version::supports_cx8());
7224   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7225   effect(KILL cr, KILL oldval);
7226   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7227             "MOV    $res,0\n\t"
7228             "JNE,s  fail\n\t"
7229             "MOV    $res,1\n"
7230           "fail:" %}
7231   ins_encode( enc_cmpxchg8(mem_ptr),
7232               enc_flags_ne_to_boolean(res) );
7233   ins_pipe( pipe_cmpxchg );
7234 %}
7235 
7236 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7237   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7238   effect(KILL cr, KILL oldval);
7239   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7240             "MOV    $res,0\n\t"
7241             "JNE,s  fail\n\t"
7242             "MOV    $res,1\n"
7243           "fail:" %}
7244   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7245   ins_pipe( pipe_cmpxchg );
7246 %}
7247 
7248 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7249   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7250   effect(KILL cr, KILL oldval);
7251   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7252             "MOV    $res,0\n\t"
7253             "JNE,s  fail\n\t"
7254             "MOV    $res,1\n"
7255           "fail:" %}
7256   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7257   ins_pipe( pipe_cmpxchg );
7258 %}
7259 
7260 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7261   predicate(n->as_LoadStore()->result_not_used());
7262   match(Set dummy (GetAndAddI mem add));
7263   effect(KILL cr);
7264   format %{ "ADDL  [$mem],$add" %}
7265   ins_encode %{
7266     if (os::is_MP()) { __ lock(); }
7267     __ addl($mem$$Address, $add$$constant);
7268   %}
7269   ins_pipe( pipe_cmpxchg );
7270 %}
7271 
7272 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7273   match(Set newval (GetAndAddI mem newval));
7274   effect(KILL cr);
7275   format %{ "XADDL  [$mem],$newval" %}
7276   ins_encode %{
7277     if (os::is_MP()) { __ lock(); }
7278     __ xaddl($mem$$Address, $newval$$Register);
7279   %}
7280   ins_pipe( pipe_cmpxchg );
7281 %}
7282 
7283 instruct xchgI( memory mem, rRegI newval) %{
7284   match(Set newval (GetAndSetI mem newval));
7285   format %{ "XCHGL  $newval,[$mem]" %}
7286   ins_encode %{
7287     __ xchgl($newval$$Register, $mem$$Address);
7288   %}
7289   ins_pipe( pipe_cmpxchg );
7290 %}
7291 
7292 instruct xchgP( memory mem, pRegP newval) %{
7293   match(Set newval (GetAndSetP mem newval));
7294   format %{ "XCHGL  $newval,[$mem]" %}
7295   ins_encode %{
7296     __ xchgl($newval$$Register, $mem$$Address);
7297   %}
7298   ins_pipe( pipe_cmpxchg );
7299 %}
7300 
7301 //----------Subtraction Instructions-------------------------------------------
7302 
7303 // Integer Subtraction Instructions
7304 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7305   match(Set dst (SubI dst src));
7306   effect(KILL cr);
7307 
7308   size(2);
7309   format %{ "SUB    $dst,$src" %}
7310   opcode(0x2B);
7311   ins_encode( OpcP, RegReg( dst, src) );
7312   ins_pipe( ialu_reg_reg );
7313 %}
7314 
7315 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7316   match(Set dst (SubI dst src));
7317   effect(KILL cr);
7318 
7319   format %{ "SUB    $dst,$src" %}
7320   opcode(0x81,0x05);  /* Opcode 81 /5 */
7321   // ins_encode( RegImm( dst, src) );
7322   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7323   ins_pipe( ialu_reg );
7324 %}
7325 
7326 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7327   match(Set dst (SubI dst (LoadI src)));
7328   effect(KILL cr);
7329 
7330   ins_cost(125);
7331   format %{ "SUB    $dst,$src" %}
7332   opcode(0x2B);
7333   ins_encode( OpcP, RegMem( dst, src) );
7334   ins_pipe( ialu_reg_mem );
7335 %}
7336 
7337 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7338   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7339   effect(KILL cr);
7340 
7341   ins_cost(150);
7342   format %{ "SUB    $dst,$src" %}
7343   opcode(0x29);  /* Opcode 29 /r */
7344   ins_encode( OpcP, RegMem( src, dst ) );
7345   ins_pipe( ialu_mem_reg );
7346 %}
7347 
7348 // Subtract from a pointer
7349 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7350   match(Set dst (AddP dst (SubI zero src)));
7351   effect(KILL cr);
7352 
7353   size(2);
7354   format %{ "SUB    $dst,$src" %}
7355   opcode(0x2B);
7356   ins_encode( OpcP, RegReg( dst, src) );
7357   ins_pipe( ialu_reg_reg );
7358 %}
7359 
7360 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7361   match(Set dst (SubI zero dst));
7362   effect(KILL cr);
7363 
7364   size(2);
7365   format %{ "NEG    $dst" %}
7366   opcode(0xF7,0x03);  // Opcode F7 /3
7367   ins_encode( OpcP, RegOpc( dst ) );
7368   ins_pipe( ialu_reg );
7369 %}
7370 
7371 //----------Multiplication/Division Instructions-------------------------------
7372 // Integer Multiplication Instructions
7373 // Multiply Register
7374 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7375   match(Set dst (MulI dst src));
7376   effect(KILL cr);
7377 
7378   size(3);
7379   ins_cost(300);
7380   format %{ "IMUL   $dst,$src" %}
7381   opcode(0xAF, 0x0F);
7382   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7383   ins_pipe( ialu_reg_reg_alu0 );
7384 %}
7385 
7386 // Multiply 32-bit Immediate
7387 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7388   match(Set dst (MulI src imm));
7389   effect(KILL cr);
7390 
7391   ins_cost(300);
7392   format %{ "IMUL   $dst,$src,$imm" %}
7393   opcode(0x69);  /* 69 /r id */
7394   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7395   ins_pipe( ialu_reg_reg_alu0 );
7396 %}
7397 
7398 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7399   match(Set dst src);
7400   effect(KILL cr);
7401 
7402   // Note that this is artificially increased to make it more expensive than loadConL
7403   ins_cost(250);
7404   format %{ "MOV    EAX,$src\t// low word only" %}
7405   opcode(0xB8);
7406   ins_encode( LdImmL_Lo(dst, src) );
7407   ins_pipe( ialu_reg_fat );
7408 %}
7409 
7410 // Multiply by 32-bit Immediate, taking the shifted high order results
7411 //  (special case for shift by 32)
7412 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7413   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7414   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7415              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7416              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7417   effect(USE src1, KILL cr);
7418 
7419   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7420   ins_cost(0*100 + 1*400 - 150);
7421   format %{ "IMUL   EDX:EAX,$src1" %}
7422   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7423   ins_pipe( pipe_slow );
7424 %}
7425 
7426 // Multiply by 32-bit Immediate, taking the shifted high order results
7427 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7428   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7429   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7430              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7431              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7432   effect(USE src1, KILL cr);
7433 
7434   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7435   ins_cost(1*100 + 1*400 - 150);
7436   format %{ "IMUL   EDX:EAX,$src1\n\t"
7437             "SAR    EDX,$cnt-32" %}
7438   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7439   ins_pipe( pipe_slow );
7440 %}
7441 
7442 // Multiply Memory 32-bit Immediate
7443 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7444   match(Set dst (MulI (LoadI src) imm));
7445   effect(KILL cr);
7446 
7447   ins_cost(300);
7448   format %{ "IMUL   $dst,$src,$imm" %}
7449   opcode(0x69);  /* 69 /r id */
7450   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7451   ins_pipe( ialu_reg_mem_alu0 );
7452 %}
7453 
7454 // Multiply Memory
7455 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7456   match(Set dst (MulI dst (LoadI src)));
7457   effect(KILL cr);
7458 
7459   ins_cost(350);
7460   format %{ "IMUL   $dst,$src" %}
7461   opcode(0xAF, 0x0F);
7462   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7463   ins_pipe( ialu_reg_mem_alu0 );
7464 %}
7465 
7466 // Multiply Register Int to Long
7467 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7468   // Basic Idea: long = (long)int * (long)int
7469   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7470   effect(DEF dst, USE src, USE src1, KILL flags);
7471 
7472   ins_cost(300);
7473   format %{ "IMUL   $dst,$src1" %}
7474 
7475   ins_encode( long_int_multiply( dst, src1 ) );
7476   ins_pipe( ialu_reg_reg_alu0 );
7477 %}
7478 
7479 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7480   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7481   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7482   effect(KILL flags);
7483 
7484   ins_cost(300);
7485   format %{ "MUL    $dst,$src1" %}
7486 
7487   ins_encode( long_uint_multiply(dst, src1) );
7488   ins_pipe( ialu_reg_reg_alu0 );
7489 %}
7490 
7491 // Multiply Register Long
7492 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7493   match(Set dst (MulL dst src));
7494   effect(KILL cr, TEMP tmp);
7495   ins_cost(4*100+3*400);
7496 // Basic idea: lo(result) = lo(x_lo * y_lo)
7497 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7498   format %{ "MOV    $tmp,$src.lo\n\t"
7499             "IMUL   $tmp,EDX\n\t"
7500             "MOV    EDX,$src.hi\n\t"
7501             "IMUL   EDX,EAX\n\t"
7502             "ADD    $tmp,EDX\n\t"
7503             "MUL    EDX:EAX,$src.lo\n\t"
7504             "ADD    EDX,$tmp" %}
7505   ins_encode( long_multiply( dst, src, tmp ) );
7506   ins_pipe( pipe_slow );
7507 %}
7508 
7509 // Multiply Register Long where the left operand's high 32 bits are zero
7510 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7511   predicate(is_operand_hi32_zero(n->in(1)));
7512   match(Set dst (MulL dst src));
7513   effect(KILL cr, TEMP tmp);
7514   ins_cost(2*100+2*400);
7515 // Basic idea: lo(result) = lo(x_lo * y_lo)
7516 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7517   format %{ "MOV    $tmp,$src.hi\n\t"
7518             "IMUL   $tmp,EAX\n\t"
7519             "MUL    EDX:EAX,$src.lo\n\t"
7520             "ADD    EDX,$tmp" %}
7521   ins_encode %{
7522     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7523     __ imull($tmp$$Register, rax);
7524     __ mull($src$$Register);
7525     __ addl(rdx, $tmp$$Register);
7526   %}
7527   ins_pipe( pipe_slow );
7528 %}
7529 
7530 // Multiply Register Long where the right operand's high 32 bits are zero
7531 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7532   predicate(is_operand_hi32_zero(n->in(2)));
7533   match(Set dst (MulL dst src));
7534   effect(KILL cr, TEMP tmp);
7535   ins_cost(2*100+2*400);
7536 // Basic idea: lo(result) = lo(x_lo * y_lo)
7537 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7538   format %{ "MOV    $tmp,$src.lo\n\t"
7539             "IMUL   $tmp,EDX\n\t"
7540             "MUL    EDX:EAX,$src.lo\n\t"
7541             "ADD    EDX,$tmp" %}
7542   ins_encode %{
7543     __ movl($tmp$$Register, $src$$Register);
7544     __ imull($tmp$$Register, rdx);
7545     __ mull($src$$Register);
7546     __ addl(rdx, $tmp$$Register);
7547   %}
7548   ins_pipe( pipe_slow );
7549 %}
7550 
7551 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7552 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7553   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7554   match(Set dst (MulL dst src));
7555   effect(KILL cr);
7556   ins_cost(1*400);
7557 // Basic idea: lo(result) = lo(x_lo * y_lo)
7558 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7559   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7560   ins_encode %{
7561     __ mull($src$$Register);
7562   %}
7563   ins_pipe( pipe_slow );
7564 %}
7565 
7566 // Multiply Register Long by small constant
7567 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7568   match(Set dst (MulL dst src));
7569   effect(KILL cr, TEMP tmp);
7570   ins_cost(2*100+2*400);
7571   size(12);
7572 // Basic idea: lo(result) = lo(src * EAX)
7573 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7574   format %{ "IMUL   $tmp,EDX,$src\n\t"
7575             "MOV    EDX,$src\n\t"
7576             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7577             "ADD    EDX,$tmp" %}
7578   ins_encode( long_multiply_con( dst, src, tmp ) );
7579   ins_pipe( pipe_slow );
7580 %}
7581 
7582 // Integer DIV with Register
7583 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7584   match(Set rax (DivI rax div));
7585   effect(KILL rdx, KILL cr);
7586   size(26);
7587   ins_cost(30*100+10*100);
7588   format %{ "CMP    EAX,0x80000000\n\t"
7589             "JNE,s  normal\n\t"
7590             "XOR    EDX,EDX\n\t"
7591             "CMP    ECX,-1\n\t"
7592             "JE,s   done\n"
7593     "normal: CDQ\n\t"
7594             "IDIV   $div\n\t"
7595     "done:"        %}
7596   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7597   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7598   ins_pipe( ialu_reg_reg_alu0 );
7599 %}
7600 
7601 // Divide Register Long
7602 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7603   match(Set dst (DivL src1 src2));
7604   effect( KILL cr, KILL cx, KILL bx );
7605   ins_cost(10000);
7606   format %{ "PUSH   $src1.hi\n\t"
7607             "PUSH   $src1.lo\n\t"
7608             "PUSH   $src2.hi\n\t"
7609             "PUSH   $src2.lo\n\t"
7610             "CALL   SharedRuntime::ldiv\n\t"
7611             "ADD    ESP,16" %}
7612   ins_encode( long_div(src1,src2) );
7613   ins_pipe( pipe_slow );
7614 %}
7615 
7616 // Integer DIVMOD with Register, both quotient and mod results
7617 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7618   match(DivModI rax div);
7619   effect(KILL cr);
7620   size(26);
7621   ins_cost(30*100+10*100);
7622   format %{ "CMP    EAX,0x80000000\n\t"
7623             "JNE,s  normal\n\t"
7624             "XOR    EDX,EDX\n\t"
7625             "CMP    ECX,-1\n\t"
7626             "JE,s   done\n"
7627     "normal: CDQ\n\t"
7628             "IDIV   $div\n\t"
7629     "done:"        %}
7630   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7631   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7632   ins_pipe( pipe_slow );
7633 %}
7634 
7635 // Integer MOD with Register
7636 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7637   match(Set rdx (ModI rax div));
7638   effect(KILL rax, KILL cr);
7639 
7640   size(26);
7641   ins_cost(300);
7642   format %{ "CDQ\n\t"
7643             "IDIV   $div" %}
7644   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7645   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7646   ins_pipe( ialu_reg_reg_alu0 );
7647 %}
7648 
7649 // Remainder Register Long
7650 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7651   match(Set dst (ModL src1 src2));
7652   effect( KILL cr, KILL cx, KILL bx );
7653   ins_cost(10000);
7654   format %{ "PUSH   $src1.hi\n\t"
7655             "PUSH   $src1.lo\n\t"
7656             "PUSH   $src2.hi\n\t"
7657             "PUSH   $src2.lo\n\t"
7658             "CALL   SharedRuntime::lrem\n\t"
7659             "ADD    ESP,16" %}
7660   ins_encode( long_mod(src1,src2) );
7661   ins_pipe( pipe_slow );
7662 %}
7663 
7664 // Divide Register Long (no special case since divisor != -1)
7665 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7666   match(Set dst (DivL dst imm));
7667   effect( TEMP tmp, TEMP tmp2, KILL cr );
7668   ins_cost(1000);
7669   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7670             "XOR    $tmp2,$tmp2\n\t"
7671             "CMP    $tmp,EDX\n\t"
7672             "JA,s   fast\n\t"
7673             "MOV    $tmp2,EAX\n\t"
7674             "MOV    EAX,EDX\n\t"
7675             "MOV    EDX,0\n\t"
7676             "JLE,s  pos\n\t"
7677             "LNEG   EAX : $tmp2\n\t"
7678             "DIV    $tmp # unsigned division\n\t"
7679             "XCHG   EAX,$tmp2\n\t"
7680             "DIV    $tmp\n\t"
7681             "LNEG   $tmp2 : EAX\n\t"
7682             "JMP,s  done\n"
7683     "pos:\n\t"
7684             "DIV    $tmp\n\t"
7685             "XCHG   EAX,$tmp2\n"
7686     "fast:\n\t"
7687             "DIV    $tmp\n"
7688     "done:\n\t"
7689             "MOV    EDX,$tmp2\n\t"
7690             "NEG    EDX:EAX # if $imm < 0" %}
7691   ins_encode %{
7692     int con = (int)$imm$$constant;
7693     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7694     int pcon = (con > 0) ? con : -con;
7695     Label Lfast, Lpos, Ldone;
7696 
7697     __ movl($tmp$$Register, pcon);
7698     __ xorl($tmp2$$Register,$tmp2$$Register);
7699     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7700     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7701 
7702     __ movl($tmp2$$Register, $dst$$Register); // save
7703     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7704     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7705     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7706 
7707     // Negative dividend.
7708     // convert value to positive to use unsigned division
7709     __ lneg($dst$$Register, $tmp2$$Register);
7710     __ divl($tmp$$Register);
7711     __ xchgl($dst$$Register, $tmp2$$Register);
7712     __ divl($tmp$$Register);
7713     // revert result back to negative
7714     __ lneg($tmp2$$Register, $dst$$Register);
7715     __ jmpb(Ldone);
7716 
7717     __ bind(Lpos);
7718     __ divl($tmp$$Register); // Use unsigned division
7719     __ xchgl($dst$$Register, $tmp2$$Register);
7720     // Fallthrow for final divide, tmp2 has 32 bit hi result
7721 
7722     __ bind(Lfast);
7723     // fast path: src is positive
7724     __ divl($tmp$$Register); // Use unsigned division
7725 
7726     __ bind(Ldone);
7727     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7728     if (con < 0) {
7729       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7730     }
7731   %}
7732   ins_pipe( pipe_slow );
7733 %}
7734 
7735 // Remainder Register Long (remainder fit into 32 bits)
7736 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7737   match(Set dst (ModL dst imm));
7738   effect( TEMP tmp, TEMP tmp2, KILL cr );
7739   ins_cost(1000);
7740   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7741             "CMP    $tmp,EDX\n\t"
7742             "JA,s   fast\n\t"
7743             "MOV    $tmp2,EAX\n\t"
7744             "MOV    EAX,EDX\n\t"
7745             "MOV    EDX,0\n\t"
7746             "JLE,s  pos\n\t"
7747             "LNEG   EAX : $tmp2\n\t"
7748             "DIV    $tmp # unsigned division\n\t"
7749             "MOV    EAX,$tmp2\n\t"
7750             "DIV    $tmp\n\t"
7751             "NEG    EDX\n\t"
7752             "JMP,s  done\n"
7753     "pos:\n\t"
7754             "DIV    $tmp\n\t"
7755             "MOV    EAX,$tmp2\n"
7756     "fast:\n\t"
7757             "DIV    $tmp\n"
7758     "done:\n\t"
7759             "MOV    EAX,EDX\n\t"
7760             "SAR    EDX,31\n\t" %}
7761   ins_encode %{
7762     int con = (int)$imm$$constant;
7763     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7764     int pcon = (con > 0) ? con : -con;
7765     Label  Lfast, Lpos, Ldone;
7766 
7767     __ movl($tmp$$Register, pcon);
7768     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7769     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7770 
7771     __ movl($tmp2$$Register, $dst$$Register); // save
7772     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7773     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7774     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7775 
7776     // Negative dividend.
7777     // convert value to positive to use unsigned division
7778     __ lneg($dst$$Register, $tmp2$$Register);
7779     __ divl($tmp$$Register);
7780     __ movl($dst$$Register, $tmp2$$Register);
7781     __ divl($tmp$$Register);
7782     // revert remainder back to negative
7783     __ negl(HIGH_FROM_LOW($dst$$Register));
7784     __ jmpb(Ldone);
7785 
7786     __ bind(Lpos);
7787     __ divl($tmp$$Register);
7788     __ movl($dst$$Register, $tmp2$$Register);
7789 
7790     __ bind(Lfast);
7791     // fast path: src is positive
7792     __ divl($tmp$$Register);
7793 
7794     __ bind(Ldone);
7795     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7796     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7797 
7798   %}
7799   ins_pipe( pipe_slow );
7800 %}
7801 
7802 // Integer Shift Instructions
7803 // Shift Left by one
7804 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7805   match(Set dst (LShiftI dst shift));
7806   effect(KILL cr);
7807 
7808   size(2);
7809   format %{ "SHL    $dst,$shift" %}
7810   opcode(0xD1, 0x4);  /* D1 /4 */
7811   ins_encode( OpcP, RegOpc( dst ) );
7812   ins_pipe( ialu_reg );
7813 %}
7814 
7815 // Shift Left by 8-bit immediate
7816 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7817   match(Set dst (LShiftI dst shift));
7818   effect(KILL cr);
7819 
7820   size(3);
7821   format %{ "SHL    $dst,$shift" %}
7822   opcode(0xC1, 0x4);  /* C1 /4 ib */
7823   ins_encode( RegOpcImm( dst, shift) );
7824   ins_pipe( ialu_reg );
7825 %}
7826 
7827 // Shift Left by variable
7828 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7829   match(Set dst (LShiftI dst shift));
7830   effect(KILL cr);
7831 
7832   size(2);
7833   format %{ "SHL    $dst,$shift" %}
7834   opcode(0xD3, 0x4);  /* D3 /4 */
7835   ins_encode( OpcP, RegOpc( dst ) );
7836   ins_pipe( ialu_reg_reg );
7837 %}
7838 
7839 // Arithmetic shift right by one
7840 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7841   match(Set dst (RShiftI dst shift));
7842   effect(KILL cr);
7843 
7844   size(2);
7845   format %{ "SAR    $dst,$shift" %}
7846   opcode(0xD1, 0x7);  /* D1 /7 */
7847   ins_encode( OpcP, RegOpc( dst ) );
7848   ins_pipe( ialu_reg );
7849 %}
7850 
7851 // Arithmetic shift right by one
7852 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7853   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7854   effect(KILL cr);
7855   format %{ "SAR    $dst,$shift" %}
7856   opcode(0xD1, 0x7);  /* D1 /7 */
7857   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7858   ins_pipe( ialu_mem_imm );
7859 %}
7860 
7861 // Arithmetic Shift Right by 8-bit immediate
7862 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7863   match(Set dst (RShiftI dst shift));
7864   effect(KILL cr);
7865 
7866   size(3);
7867   format %{ "SAR    $dst,$shift" %}
7868   opcode(0xC1, 0x7);  /* C1 /7 ib */
7869   ins_encode( RegOpcImm( dst, shift ) );
7870   ins_pipe( ialu_mem_imm );
7871 %}
7872 
7873 // Arithmetic Shift Right by 8-bit immediate
7874 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7875   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7876   effect(KILL cr);
7877 
7878   format %{ "SAR    $dst,$shift" %}
7879   opcode(0xC1, 0x7);  /* C1 /7 ib */
7880   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7881   ins_pipe( ialu_mem_imm );
7882 %}
7883 
7884 // Arithmetic Shift Right by variable
7885 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7886   match(Set dst (RShiftI dst shift));
7887   effect(KILL cr);
7888 
7889   size(2);
7890   format %{ "SAR    $dst,$shift" %}
7891   opcode(0xD3, 0x7);  /* D3 /7 */
7892   ins_encode( OpcP, RegOpc( dst ) );
7893   ins_pipe( ialu_reg_reg );
7894 %}
7895 
7896 // Logical shift right by one
7897 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7898   match(Set dst (URShiftI dst shift));
7899   effect(KILL cr);
7900 
7901   size(2);
7902   format %{ "SHR    $dst,$shift" %}
7903   opcode(0xD1, 0x5);  /* D1 /5 */
7904   ins_encode( OpcP, RegOpc( dst ) );
7905   ins_pipe( ialu_reg );
7906 %}
7907 
7908 // Logical Shift Right by 8-bit immediate
7909 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7910   match(Set dst (URShiftI dst shift));
7911   effect(KILL cr);
7912 
7913   size(3);
7914   format %{ "SHR    $dst,$shift" %}
7915   opcode(0xC1, 0x5);  /* C1 /5 ib */
7916   ins_encode( RegOpcImm( dst, shift) );
7917   ins_pipe( ialu_reg );
7918 %}
7919 
7920 
7921 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7922 // This idiom is used by the compiler for the i2b bytecode.
7923 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7924   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7925 
7926   size(3);
7927   format %{ "MOVSX  $dst,$src :8" %}
7928   ins_encode %{
7929     __ movsbl($dst$$Register, $src$$Register);
7930   %}
7931   ins_pipe(ialu_reg_reg);
7932 %}
7933 
7934 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7935 // This idiom is used by the compiler the i2s bytecode.
7936 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7937   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7938 
7939   size(3);
7940   format %{ "MOVSX  $dst,$src :16" %}
7941   ins_encode %{
7942     __ movswl($dst$$Register, $src$$Register);
7943   %}
7944   ins_pipe(ialu_reg_reg);
7945 %}
7946 
7947 
7948 // Logical Shift Right by variable
7949 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7950   match(Set dst (URShiftI dst shift));
7951   effect(KILL cr);
7952 
7953   size(2);
7954   format %{ "SHR    $dst,$shift" %}
7955   opcode(0xD3, 0x5);  /* D3 /5 */
7956   ins_encode( OpcP, RegOpc( dst ) );
7957   ins_pipe( ialu_reg_reg );
7958 %}
7959 
7960 
7961 //----------Logical Instructions-----------------------------------------------
7962 //----------Integer Logical Instructions---------------------------------------
7963 // And Instructions
7964 // And Register with Register
7965 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7966   match(Set dst (AndI dst src));
7967   effect(KILL cr);
7968 
7969   size(2);
7970   format %{ "AND    $dst,$src" %}
7971   opcode(0x23);
7972   ins_encode( OpcP, RegReg( dst, src) );
7973   ins_pipe( ialu_reg_reg );
7974 %}
7975 
7976 // And Register with Immediate
7977 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7978   match(Set dst (AndI dst src));
7979   effect(KILL cr);
7980 
7981   format %{ "AND    $dst,$src" %}
7982   opcode(0x81,0x04);  /* Opcode 81 /4 */
7983   // ins_encode( RegImm( dst, src) );
7984   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7985   ins_pipe( ialu_reg );
7986 %}
7987 
7988 // And Register with Memory
7989 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7990   match(Set dst (AndI dst (LoadI src)));
7991   effect(KILL cr);
7992 
7993   ins_cost(125);
7994   format %{ "AND    $dst,$src" %}
7995   opcode(0x23);
7996   ins_encode( OpcP, RegMem( dst, src) );
7997   ins_pipe( ialu_reg_mem );
7998 %}
7999 
8000 // And Memory with Register
8001 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8002   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8003   effect(KILL cr);
8004 
8005   ins_cost(150);
8006   format %{ "AND    $dst,$src" %}
8007   opcode(0x21);  /* Opcode 21 /r */
8008   ins_encode( OpcP, RegMem( src, dst ) );
8009   ins_pipe( ialu_mem_reg );
8010 %}
8011 
8012 // And Memory with Immediate
8013 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8014   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8015   effect(KILL cr);
8016 
8017   ins_cost(125);
8018   format %{ "AND    $dst,$src" %}
8019   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8020   // ins_encode( MemImm( dst, src) );
8021   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8022   ins_pipe( ialu_mem_imm );
8023 %}
8024 
8025 // BMI1 instructions
8026 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8027   match(Set dst (AndI (XorI src1 minus_1) src2));
8028   predicate(UseBMI1Instructions);
8029   effect(KILL cr);
8030 
8031   format %{ "ANDNL  $dst, $src1, $src2" %}
8032 
8033   ins_encode %{
8034     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8035   %}
8036   ins_pipe(ialu_reg);
8037 %}
8038 
8039 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8040   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8041   predicate(UseBMI1Instructions);
8042   effect(KILL cr);
8043 
8044   ins_cost(125);
8045   format %{ "ANDNL  $dst, $src1, $src2" %}
8046 
8047   ins_encode %{
8048     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8049   %}
8050   ins_pipe(ialu_reg_mem);
8051 %}
8052 
8053 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8054   match(Set dst (AndI (SubI imm_zero src) src));
8055   predicate(UseBMI1Instructions);
8056   effect(KILL cr);
8057 
8058   format %{ "BLSIL  $dst, $src" %}
8059 
8060   ins_encode %{
8061     __ blsil($dst$$Register, $src$$Register);
8062   %}
8063   ins_pipe(ialu_reg);
8064 %}
8065 
8066 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8067   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8068   predicate(UseBMI1Instructions);
8069   effect(KILL cr);
8070 
8071   ins_cost(125);
8072   format %{ "BLSIL  $dst, $src" %}
8073 
8074   ins_encode %{
8075     __ blsil($dst$$Register, $src$$Address);
8076   %}
8077   ins_pipe(ialu_reg_mem);
8078 %}
8079 
8080 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8081 %{
8082   match(Set dst (XorI (AddI src minus_1) src));
8083   predicate(UseBMI1Instructions);
8084   effect(KILL cr);
8085 
8086   format %{ "BLSMSKL $dst, $src" %}
8087 
8088   ins_encode %{
8089     __ blsmskl($dst$$Register, $src$$Register);
8090   %}
8091 
8092   ins_pipe(ialu_reg);
8093 %}
8094 
8095 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8096 %{
8097   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8098   predicate(UseBMI1Instructions);
8099   effect(KILL cr);
8100 
8101   ins_cost(125);
8102   format %{ "BLSMSKL $dst, $src" %}
8103 
8104   ins_encode %{
8105     __ blsmskl($dst$$Register, $src$$Address);
8106   %}
8107 
8108   ins_pipe(ialu_reg_mem);
8109 %}
8110 
8111 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8112 %{
8113   match(Set dst (AndI (AddI src minus_1) src) );
8114   predicate(UseBMI1Instructions);
8115   effect(KILL cr);
8116 
8117   format %{ "BLSRL  $dst, $src" %}
8118 
8119   ins_encode %{
8120     __ blsrl($dst$$Register, $src$$Register);
8121   %}
8122 
8123   ins_pipe(ialu_reg);
8124 %}
8125 
8126 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8127 %{
8128   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8129   predicate(UseBMI1Instructions);
8130   effect(KILL cr);
8131 
8132   ins_cost(125);
8133   format %{ "BLSRL  $dst, $src" %}
8134 
8135   ins_encode %{
8136     __ blsrl($dst$$Register, $src$$Address);
8137   %}
8138 
8139   ins_pipe(ialu_reg_mem);
8140 %}
8141 
8142 // Or Instructions
8143 // Or Register with Register
8144 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8145   match(Set dst (OrI dst src));
8146   effect(KILL cr);
8147 
8148   size(2);
8149   format %{ "OR     $dst,$src" %}
8150   opcode(0x0B);
8151   ins_encode( OpcP, RegReg( dst, src) );
8152   ins_pipe( ialu_reg_reg );
8153 %}
8154 
8155 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8156   match(Set dst (OrI dst (CastP2X src)));
8157   effect(KILL cr);
8158 
8159   size(2);
8160   format %{ "OR     $dst,$src" %}
8161   opcode(0x0B);
8162   ins_encode( OpcP, RegReg( dst, src) );
8163   ins_pipe( ialu_reg_reg );
8164 %}
8165 
8166 
8167 // Or Register with Immediate
8168 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8169   match(Set dst (OrI dst src));
8170   effect(KILL cr);
8171 
8172   format %{ "OR     $dst,$src" %}
8173   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8174   // ins_encode( RegImm( dst, src) );
8175   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8176   ins_pipe( ialu_reg );
8177 %}
8178 
8179 // Or Register with Memory
8180 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8181   match(Set dst (OrI dst (LoadI src)));
8182   effect(KILL cr);
8183 
8184   ins_cost(125);
8185   format %{ "OR     $dst,$src" %}
8186   opcode(0x0B);
8187   ins_encode( OpcP, RegMem( dst, src) );
8188   ins_pipe( ialu_reg_mem );
8189 %}
8190 
8191 // Or Memory with Register
8192 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8193   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8194   effect(KILL cr);
8195 
8196   ins_cost(150);
8197   format %{ "OR     $dst,$src" %}
8198   opcode(0x09);  /* Opcode 09 /r */
8199   ins_encode( OpcP, RegMem( src, dst ) );
8200   ins_pipe( ialu_mem_reg );
8201 %}
8202 
8203 // Or Memory with Immediate
8204 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8205   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8206   effect(KILL cr);
8207 
8208   ins_cost(125);
8209   format %{ "OR     $dst,$src" %}
8210   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8211   // ins_encode( MemImm( dst, src) );
8212   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8213   ins_pipe( ialu_mem_imm );
8214 %}
8215 
8216 // ROL/ROR
8217 // ROL expand
8218 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8219   effect(USE_DEF dst, USE shift, KILL cr);
8220 
8221   format %{ "ROL    $dst, $shift" %}
8222   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8223   ins_encode( OpcP, RegOpc( dst ));
8224   ins_pipe( ialu_reg );
8225 %}
8226 
8227 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8228   effect(USE_DEF dst, USE shift, KILL cr);
8229 
8230   format %{ "ROL    $dst, $shift" %}
8231   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8232   ins_encode( RegOpcImm(dst, shift) );
8233   ins_pipe(ialu_reg);
8234 %}
8235 
8236 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8237   effect(USE_DEF dst, USE shift, KILL cr);
8238 
8239   format %{ "ROL    $dst, $shift" %}
8240   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8241   ins_encode(OpcP, RegOpc(dst));
8242   ins_pipe( ialu_reg_reg );
8243 %}
8244 // end of ROL expand
8245 
8246 // ROL 32bit by one once
8247 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8248   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8249 
8250   expand %{
8251     rolI_eReg_imm1(dst, lshift, cr);
8252   %}
8253 %}
8254 
8255 // ROL 32bit var by imm8 once
8256 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8257   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8258   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8259 
8260   expand %{
8261     rolI_eReg_imm8(dst, lshift, cr);
8262   %}
8263 %}
8264 
8265 // ROL 32bit var by var once
8266 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8267   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8268 
8269   expand %{
8270     rolI_eReg_CL(dst, shift, cr);
8271   %}
8272 %}
8273 
8274 // ROL 32bit var by var once
8275 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8276   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8277 
8278   expand %{
8279     rolI_eReg_CL(dst, shift, cr);
8280   %}
8281 %}
8282 
8283 // ROR expand
8284 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8285   effect(USE_DEF dst, USE shift, KILL cr);
8286 
8287   format %{ "ROR    $dst, $shift" %}
8288   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8289   ins_encode( OpcP, RegOpc( dst ) );
8290   ins_pipe( ialu_reg );
8291 %}
8292 
8293 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8294   effect (USE_DEF dst, USE shift, KILL cr);
8295 
8296   format %{ "ROR    $dst, $shift" %}
8297   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8298   ins_encode( RegOpcImm(dst, shift) );
8299   ins_pipe( ialu_reg );
8300 %}
8301 
8302 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8303   effect(USE_DEF dst, USE shift, KILL cr);
8304 
8305   format %{ "ROR    $dst, $shift" %}
8306   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8307   ins_encode(OpcP, RegOpc(dst));
8308   ins_pipe( ialu_reg_reg );
8309 %}
8310 // end of ROR expand
8311 
8312 // ROR right once
8313 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8314   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8315 
8316   expand %{
8317     rorI_eReg_imm1(dst, rshift, cr);
8318   %}
8319 %}
8320 
8321 // ROR 32bit by immI8 once
8322 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8323   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8324   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8325 
8326   expand %{
8327     rorI_eReg_imm8(dst, rshift, cr);
8328   %}
8329 %}
8330 
8331 // ROR 32bit var by var once
8332 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8333   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8334 
8335   expand %{
8336     rorI_eReg_CL(dst, shift, cr);
8337   %}
8338 %}
8339 
8340 // ROR 32bit var by var once
8341 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8342   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8343 
8344   expand %{
8345     rorI_eReg_CL(dst, shift, cr);
8346   %}
8347 %}
8348 
8349 // Xor Instructions
8350 // Xor Register with Register
8351 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8352   match(Set dst (XorI dst src));
8353   effect(KILL cr);
8354 
8355   size(2);
8356   format %{ "XOR    $dst,$src" %}
8357   opcode(0x33);
8358   ins_encode( OpcP, RegReg( dst, src) );
8359   ins_pipe( ialu_reg_reg );
8360 %}
8361 
8362 // Xor Register with Immediate -1
8363 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8364   match(Set dst (XorI dst imm));
8365 
8366   size(2);
8367   format %{ "NOT    $dst" %}
8368   ins_encode %{
8369      __ notl($dst$$Register);
8370   %}
8371   ins_pipe( ialu_reg );
8372 %}
8373 
8374 // Xor Register with Immediate
8375 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8376   match(Set dst (XorI dst src));
8377   effect(KILL cr);
8378 
8379   format %{ "XOR    $dst,$src" %}
8380   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8381   // ins_encode( RegImm( dst, src) );
8382   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8383   ins_pipe( ialu_reg );
8384 %}
8385 
8386 // Xor Register with Memory
8387 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8388   match(Set dst (XorI dst (LoadI src)));
8389   effect(KILL cr);
8390 
8391   ins_cost(125);
8392   format %{ "XOR    $dst,$src" %}
8393   opcode(0x33);
8394   ins_encode( OpcP, RegMem(dst, src) );
8395   ins_pipe( ialu_reg_mem );
8396 %}
8397 
8398 // Xor Memory with Register
8399 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8400   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8401   effect(KILL cr);
8402 
8403   ins_cost(150);
8404   format %{ "XOR    $dst,$src" %}
8405   opcode(0x31);  /* Opcode 31 /r */
8406   ins_encode( OpcP, RegMem( src, dst ) );
8407   ins_pipe( ialu_mem_reg );
8408 %}
8409 
8410 // Xor Memory with Immediate
8411 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8412   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8413   effect(KILL cr);
8414 
8415   ins_cost(125);
8416   format %{ "XOR    $dst,$src" %}
8417   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8418   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8419   ins_pipe( ialu_mem_imm );
8420 %}
8421 
8422 //----------Convert Int to Boolean---------------------------------------------
8423 
8424 instruct movI_nocopy(rRegI dst, rRegI src) %{
8425   effect( DEF dst, USE src );
8426   format %{ "MOV    $dst,$src" %}
8427   ins_encode( enc_Copy( dst, src) );
8428   ins_pipe( ialu_reg_reg );
8429 %}
8430 
8431 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8432   effect( USE_DEF dst, USE src, KILL cr );
8433 
8434   size(4);
8435   format %{ "NEG    $dst\n\t"
8436             "ADC    $dst,$src" %}
8437   ins_encode( neg_reg(dst),
8438               OpcRegReg(0x13,dst,src) );
8439   ins_pipe( ialu_reg_reg_long );
8440 %}
8441 
8442 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8443   match(Set dst (Conv2B src));
8444 
8445   expand %{
8446     movI_nocopy(dst,src);
8447     ci2b(dst,src,cr);
8448   %}
8449 %}
8450 
8451 instruct movP_nocopy(rRegI dst, eRegP src) %{
8452   effect( DEF dst, USE src );
8453   format %{ "MOV    $dst,$src" %}
8454   ins_encode( enc_Copy( dst, src) );
8455   ins_pipe( ialu_reg_reg );
8456 %}
8457 
8458 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8459   effect( USE_DEF dst, USE src, KILL cr );
8460   format %{ "NEG    $dst\n\t"
8461             "ADC    $dst,$src" %}
8462   ins_encode( neg_reg(dst),
8463               OpcRegReg(0x13,dst,src) );
8464   ins_pipe( ialu_reg_reg_long );
8465 %}
8466 
8467 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8468   match(Set dst (Conv2B src));
8469 
8470   expand %{
8471     movP_nocopy(dst,src);
8472     cp2b(dst,src,cr);
8473   %}
8474 %}
8475 
8476 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8477   match(Set dst (CmpLTMask p q));
8478   effect(KILL cr);
8479   ins_cost(400);
8480 
8481   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8482   format %{ "XOR    $dst,$dst\n\t"
8483             "CMP    $p,$q\n\t"
8484             "SETlt  $dst\n\t"
8485             "NEG    $dst" %}
8486   ins_encode %{
8487     Register Rp = $p$$Register;
8488     Register Rq = $q$$Register;
8489     Register Rd = $dst$$Register;
8490     Label done;
8491     __ xorl(Rd, Rd);
8492     __ cmpl(Rp, Rq);
8493     __ setb(Assembler::less, Rd);
8494     __ negl(Rd);
8495   %}
8496 
8497   ins_pipe(pipe_slow);
8498 %}
8499 
8500 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8501   match(Set dst (CmpLTMask dst zero));
8502   effect(DEF dst, KILL cr);
8503   ins_cost(100);
8504 
8505   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8506   ins_encode %{
8507   __ sarl($dst$$Register, 31);
8508   %}
8509   ins_pipe(ialu_reg);
8510 %}
8511 
8512 /* better to save a register than avoid a branch */
8513 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8514   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8515   effect(KILL cr);
8516   ins_cost(400);
8517   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8518             "JGE    done\n\t"
8519             "ADD    $p,$y\n"
8520             "done:  " %}
8521   ins_encode %{
8522     Register Rp = $p$$Register;
8523     Register Rq = $q$$Register;
8524     Register Ry = $y$$Register;
8525     Label done;
8526     __ subl(Rp, Rq);
8527     __ jccb(Assembler::greaterEqual, done);
8528     __ addl(Rp, Ry);
8529     __ bind(done);
8530   %}
8531 
8532   ins_pipe(pipe_cmplt);
8533 %}
8534 
8535 /* better to save a register than avoid a branch */
8536 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8537   match(Set y (AndI (CmpLTMask p q) y));
8538   effect(KILL cr);
8539 
8540   ins_cost(300);
8541 
8542   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8543             "JLT      done\n\t"
8544             "XORL     $y, $y\n"
8545             "done:  " %}
8546   ins_encode %{
8547     Register Rp = $p$$Register;
8548     Register Rq = $q$$Register;
8549     Register Ry = $y$$Register;
8550     Label done;
8551     __ cmpl(Rp, Rq);
8552     __ jccb(Assembler::less, done);
8553     __ xorl(Ry, Ry);
8554     __ bind(done);
8555   %}
8556 
8557   ins_pipe(pipe_cmplt);
8558 %}
8559 
8560 /* If I enable this, I encourage spilling in the inner loop of compress.
8561 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8562   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8563 */
8564 //----------Overflow Math Instructions-----------------------------------------
8565 
8566 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8567 %{
8568   match(Set cr (OverflowAddI op1 op2));
8569   effect(DEF cr, USE_KILL op1, USE op2);
8570 
8571   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8572 
8573   ins_encode %{
8574     __ addl($op1$$Register, $op2$$Register);
8575   %}
8576   ins_pipe(ialu_reg_reg);
8577 %}
8578 
8579 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8580 %{
8581   match(Set cr (OverflowAddI op1 op2));
8582   effect(DEF cr, USE_KILL op1, USE op2);
8583 
8584   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8585 
8586   ins_encode %{
8587     __ addl($op1$$Register, $op2$$constant);
8588   %}
8589   ins_pipe(ialu_reg_reg);
8590 %}
8591 
8592 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8593 %{
8594   match(Set cr (OverflowSubI op1 op2));
8595 
8596   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8597   ins_encode %{
8598     __ cmpl($op1$$Register, $op2$$Register);
8599   %}
8600   ins_pipe(ialu_reg_reg);
8601 %}
8602 
8603 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8604 %{
8605   match(Set cr (OverflowSubI op1 op2));
8606 
8607   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8608   ins_encode %{
8609     __ cmpl($op1$$Register, $op2$$constant);
8610   %}
8611   ins_pipe(ialu_reg_reg);
8612 %}
8613 
8614 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8615 %{
8616   match(Set cr (OverflowSubI zero op2));
8617   effect(DEF cr, USE_KILL op2);
8618 
8619   format %{ "NEG    $op2\t# overflow check int" %}
8620   ins_encode %{
8621     __ negl($op2$$Register);
8622   %}
8623   ins_pipe(ialu_reg_reg);
8624 %}
8625 
8626 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8627 %{
8628   match(Set cr (OverflowMulI op1 op2));
8629   effect(DEF cr, USE_KILL op1, USE op2);
8630 
8631   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8632   ins_encode %{
8633     __ imull($op1$$Register, $op2$$Register);
8634   %}
8635   ins_pipe(ialu_reg_reg_alu0);
8636 %}
8637 
8638 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8639 %{
8640   match(Set cr (OverflowMulI op1 op2));
8641   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8642 
8643   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8644   ins_encode %{
8645     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8646   %}
8647   ins_pipe(ialu_reg_reg_alu0);
8648 %}
8649 
8650 //----------Long Instructions------------------------------------------------
8651 // Add Long Register with Register
8652 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8653   match(Set dst (AddL dst src));
8654   effect(KILL cr);
8655   ins_cost(200);
8656   format %{ "ADD    $dst.lo,$src.lo\n\t"
8657             "ADC    $dst.hi,$src.hi" %}
8658   opcode(0x03, 0x13);
8659   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8660   ins_pipe( ialu_reg_reg_long );
8661 %}
8662 
8663 // Add Long Register with Immediate
8664 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8665   match(Set dst (AddL dst src));
8666   effect(KILL cr);
8667   format %{ "ADD    $dst.lo,$src.lo\n\t"
8668             "ADC    $dst.hi,$src.hi" %}
8669   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8670   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8671   ins_pipe( ialu_reg_long );
8672 %}
8673 
8674 // Add Long Register with Memory
8675 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8676   match(Set dst (AddL dst (LoadL mem)));
8677   effect(KILL cr);
8678   ins_cost(125);
8679   format %{ "ADD    $dst.lo,$mem\n\t"
8680             "ADC    $dst.hi,$mem+4" %}
8681   opcode(0x03, 0x13);
8682   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8683   ins_pipe( ialu_reg_long_mem );
8684 %}
8685 
8686 // Subtract Long Register with Register.
8687 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8688   match(Set dst (SubL dst src));
8689   effect(KILL cr);
8690   ins_cost(200);
8691   format %{ "SUB    $dst.lo,$src.lo\n\t"
8692             "SBB    $dst.hi,$src.hi" %}
8693   opcode(0x2B, 0x1B);
8694   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8695   ins_pipe( ialu_reg_reg_long );
8696 %}
8697 
8698 // Subtract Long Register with Immediate
8699 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8700   match(Set dst (SubL dst src));
8701   effect(KILL cr);
8702   format %{ "SUB    $dst.lo,$src.lo\n\t"
8703             "SBB    $dst.hi,$src.hi" %}
8704   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8705   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8706   ins_pipe( ialu_reg_long );
8707 %}
8708 
8709 // Subtract Long Register with Memory
8710 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8711   match(Set dst (SubL dst (LoadL mem)));
8712   effect(KILL cr);
8713   ins_cost(125);
8714   format %{ "SUB    $dst.lo,$mem\n\t"
8715             "SBB    $dst.hi,$mem+4" %}
8716   opcode(0x2B, 0x1B);
8717   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8718   ins_pipe( ialu_reg_long_mem );
8719 %}
8720 
8721 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8722   match(Set dst (SubL zero dst));
8723   effect(KILL cr);
8724   ins_cost(300);
8725   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8726   ins_encode( neg_long(dst) );
8727   ins_pipe( ialu_reg_reg_long );
8728 %}
8729 
8730 // And Long Register with Register
8731 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8732   match(Set dst (AndL dst src));
8733   effect(KILL cr);
8734   format %{ "AND    $dst.lo,$src.lo\n\t"
8735             "AND    $dst.hi,$src.hi" %}
8736   opcode(0x23,0x23);
8737   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8738   ins_pipe( ialu_reg_reg_long );
8739 %}
8740 
8741 // And Long Register with Immediate
8742 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8743   match(Set dst (AndL dst src));
8744   effect(KILL cr);
8745   format %{ "AND    $dst.lo,$src.lo\n\t"
8746             "AND    $dst.hi,$src.hi" %}
8747   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8748   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8749   ins_pipe( ialu_reg_long );
8750 %}
8751 
8752 // And Long Register with Memory
8753 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8754   match(Set dst (AndL dst (LoadL mem)));
8755   effect(KILL cr);
8756   ins_cost(125);
8757   format %{ "AND    $dst.lo,$mem\n\t"
8758             "AND    $dst.hi,$mem+4" %}
8759   opcode(0x23, 0x23);
8760   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8761   ins_pipe( ialu_reg_long_mem );
8762 %}
8763 
8764 // BMI1 instructions
8765 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8766   match(Set dst (AndL (XorL src1 minus_1) src2));
8767   predicate(UseBMI1Instructions);
8768   effect(KILL cr, TEMP dst);
8769 
8770   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8771             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8772          %}
8773 
8774   ins_encode %{
8775     Register Rdst = $dst$$Register;
8776     Register Rsrc1 = $src1$$Register;
8777     Register Rsrc2 = $src2$$Register;
8778     __ andnl(Rdst, Rsrc1, Rsrc2);
8779     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8780   %}
8781   ins_pipe(ialu_reg_reg_long);
8782 %}
8783 
8784 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8785   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8786   predicate(UseBMI1Instructions);
8787   effect(KILL cr, TEMP dst);
8788 
8789   ins_cost(125);
8790   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8791             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8792          %}
8793 
8794   ins_encode %{
8795     Register Rdst = $dst$$Register;
8796     Register Rsrc1 = $src1$$Register;
8797     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8798 
8799     __ andnl(Rdst, Rsrc1, $src2$$Address);
8800     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8801   %}
8802   ins_pipe(ialu_reg_mem);
8803 %}
8804 
8805 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8806   match(Set dst (AndL (SubL imm_zero src) src));
8807   predicate(UseBMI1Instructions);
8808   effect(KILL cr, TEMP dst);
8809 
8810   format %{ "MOVL   $dst.hi, 0\n\t"
8811             "BLSIL  $dst.lo, $src.lo\n\t"
8812             "JNZ    done\n\t"
8813             "BLSIL  $dst.hi, $src.hi\n"
8814             "done:"
8815          %}
8816 
8817   ins_encode %{
8818     Label done;
8819     Register Rdst = $dst$$Register;
8820     Register Rsrc = $src$$Register;
8821     __ movl(HIGH_FROM_LOW(Rdst), 0);
8822     __ blsil(Rdst, Rsrc);
8823     __ jccb(Assembler::notZero, done);
8824     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8825     __ bind(done);
8826   %}
8827   ins_pipe(ialu_reg);
8828 %}
8829 
8830 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8831   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8832   predicate(UseBMI1Instructions);
8833   effect(KILL cr, TEMP dst);
8834 
8835   ins_cost(125);
8836   format %{ "MOVL   $dst.hi, 0\n\t"
8837             "BLSIL  $dst.lo, $src\n\t"
8838             "JNZ    done\n\t"
8839             "BLSIL  $dst.hi, $src+4\n"
8840             "done:"
8841          %}
8842 
8843   ins_encode %{
8844     Label done;
8845     Register Rdst = $dst$$Register;
8846     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8847 
8848     __ movl(HIGH_FROM_LOW(Rdst), 0);
8849     __ blsil(Rdst, $src$$Address);
8850     __ jccb(Assembler::notZero, done);
8851     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8852     __ bind(done);
8853   %}
8854   ins_pipe(ialu_reg_mem);
8855 %}
8856 
8857 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8858 %{
8859   match(Set dst (XorL (AddL src minus_1) src));
8860   predicate(UseBMI1Instructions);
8861   effect(KILL cr, TEMP dst);
8862 
8863   format %{ "MOVL    $dst.hi, 0\n\t"
8864             "BLSMSKL $dst.lo, $src.lo\n\t"
8865             "JNC     done\n\t"
8866             "BLSMSKL $dst.hi, $src.hi\n"
8867             "done:"
8868          %}
8869 
8870   ins_encode %{
8871     Label done;
8872     Register Rdst = $dst$$Register;
8873     Register Rsrc = $src$$Register;
8874     __ movl(HIGH_FROM_LOW(Rdst), 0);
8875     __ blsmskl(Rdst, Rsrc);
8876     __ jccb(Assembler::carryClear, done);
8877     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8878     __ bind(done);
8879   %}
8880 
8881   ins_pipe(ialu_reg);
8882 %}
8883 
8884 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8885 %{
8886   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8887   predicate(UseBMI1Instructions);
8888   effect(KILL cr, TEMP dst);
8889 
8890   ins_cost(125);
8891   format %{ "MOVL    $dst.hi, 0\n\t"
8892             "BLSMSKL $dst.lo, $src\n\t"
8893             "JNC     done\n\t"
8894             "BLSMSKL $dst.hi, $src+4\n"
8895             "done:"
8896          %}
8897 
8898   ins_encode %{
8899     Label done;
8900     Register Rdst = $dst$$Register;
8901     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8902 
8903     __ movl(HIGH_FROM_LOW(Rdst), 0);
8904     __ blsmskl(Rdst, $src$$Address);
8905     __ jccb(Assembler::carryClear, done);
8906     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8907     __ bind(done);
8908   %}
8909 
8910   ins_pipe(ialu_reg_mem);
8911 %}
8912 
8913 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8914 %{
8915   match(Set dst (AndL (AddL src minus_1) src) );
8916   predicate(UseBMI1Instructions);
8917   effect(KILL cr, TEMP dst);
8918 
8919   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8920             "BLSRL  $dst.lo, $src.lo\n\t"
8921             "JNC    done\n\t"
8922             "BLSRL  $dst.hi, $src.hi\n"
8923             "done:"
8924   %}
8925 
8926   ins_encode %{
8927     Label done;
8928     Register Rdst = $dst$$Register;
8929     Register Rsrc = $src$$Register;
8930     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8931     __ blsrl(Rdst, Rsrc);
8932     __ jccb(Assembler::carryClear, done);
8933     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8934     __ bind(done);
8935   %}
8936 
8937   ins_pipe(ialu_reg);
8938 %}
8939 
8940 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8941 %{
8942   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8943   predicate(UseBMI1Instructions);
8944   effect(KILL cr, TEMP dst);
8945 
8946   ins_cost(125);
8947   format %{ "MOVL   $dst.hi, $src+4\n\t"
8948             "BLSRL  $dst.lo, $src\n\t"
8949             "JNC    done\n\t"
8950             "BLSRL  $dst.hi, $src+4\n"
8951             "done:"
8952   %}
8953 
8954   ins_encode %{
8955     Label done;
8956     Register Rdst = $dst$$Register;
8957     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8958     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8959     __ blsrl(Rdst, $src$$Address);
8960     __ jccb(Assembler::carryClear, done);
8961     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8962     __ bind(done);
8963   %}
8964 
8965   ins_pipe(ialu_reg_mem);
8966 %}
8967 
8968 // Or Long Register with Register
8969 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8970   match(Set dst (OrL dst src));
8971   effect(KILL cr);
8972   format %{ "OR     $dst.lo,$src.lo\n\t"
8973             "OR     $dst.hi,$src.hi" %}
8974   opcode(0x0B,0x0B);
8975   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8976   ins_pipe( ialu_reg_reg_long );
8977 %}
8978 
8979 // Or Long Register with Immediate
8980 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8981   match(Set dst (OrL dst src));
8982   effect(KILL cr);
8983   format %{ "OR     $dst.lo,$src.lo\n\t"
8984             "OR     $dst.hi,$src.hi" %}
8985   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8986   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8987   ins_pipe( ialu_reg_long );
8988 %}
8989 
8990 // Or Long Register with Memory
8991 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8992   match(Set dst (OrL dst (LoadL mem)));
8993   effect(KILL cr);
8994   ins_cost(125);
8995   format %{ "OR     $dst.lo,$mem\n\t"
8996             "OR     $dst.hi,$mem+4" %}
8997   opcode(0x0B,0x0B);
8998   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8999   ins_pipe( ialu_reg_long_mem );
9000 %}
9001 
9002 // Xor Long Register with Register
9003 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9004   match(Set dst (XorL dst src));
9005   effect(KILL cr);
9006   format %{ "XOR    $dst.lo,$src.lo\n\t"
9007             "XOR    $dst.hi,$src.hi" %}
9008   opcode(0x33,0x33);
9009   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9010   ins_pipe( ialu_reg_reg_long );
9011 %}
9012 
9013 // Xor Long Register with Immediate -1
9014 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9015   match(Set dst (XorL dst imm));
9016   format %{ "NOT    $dst.lo\n\t"
9017             "NOT    $dst.hi" %}
9018   ins_encode %{
9019      __ notl($dst$$Register);
9020      __ notl(HIGH_FROM_LOW($dst$$Register));
9021   %}
9022   ins_pipe( ialu_reg_long );
9023 %}
9024 
9025 // Xor Long Register with Immediate
9026 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9027   match(Set dst (XorL dst src));
9028   effect(KILL cr);
9029   format %{ "XOR    $dst.lo,$src.lo\n\t"
9030             "XOR    $dst.hi,$src.hi" %}
9031   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9032   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9033   ins_pipe( ialu_reg_long );
9034 %}
9035 
9036 // Xor Long Register with Memory
9037 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9038   match(Set dst (XorL dst (LoadL mem)));
9039   effect(KILL cr);
9040   ins_cost(125);
9041   format %{ "XOR    $dst.lo,$mem\n\t"
9042             "XOR    $dst.hi,$mem+4" %}
9043   opcode(0x33,0x33);
9044   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9045   ins_pipe( ialu_reg_long_mem );
9046 %}
9047 
9048 // Shift Left Long by 1
9049 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9050   predicate(UseNewLongLShift);
9051   match(Set dst (LShiftL dst cnt));
9052   effect(KILL cr);
9053   ins_cost(100);
9054   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9055             "ADC    $dst.hi,$dst.hi" %}
9056   ins_encode %{
9057     __ addl($dst$$Register,$dst$$Register);
9058     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9059   %}
9060   ins_pipe( ialu_reg_long );
9061 %}
9062 
9063 // Shift Left Long by 2
9064 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9065   predicate(UseNewLongLShift);
9066   match(Set dst (LShiftL dst cnt));
9067   effect(KILL cr);
9068   ins_cost(100);
9069   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9070             "ADC    $dst.hi,$dst.hi\n\t"
9071             "ADD    $dst.lo,$dst.lo\n\t"
9072             "ADC    $dst.hi,$dst.hi" %}
9073   ins_encode %{
9074     __ addl($dst$$Register,$dst$$Register);
9075     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9076     __ addl($dst$$Register,$dst$$Register);
9077     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9078   %}
9079   ins_pipe( ialu_reg_long );
9080 %}
9081 
9082 // Shift Left Long by 3
9083 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9084   predicate(UseNewLongLShift);
9085   match(Set dst (LShiftL dst cnt));
9086   effect(KILL cr);
9087   ins_cost(100);
9088   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9089             "ADC    $dst.hi,$dst.hi\n\t"
9090             "ADD    $dst.lo,$dst.lo\n\t"
9091             "ADC    $dst.hi,$dst.hi\n\t"
9092             "ADD    $dst.lo,$dst.lo\n\t"
9093             "ADC    $dst.hi,$dst.hi" %}
9094   ins_encode %{
9095     __ addl($dst$$Register,$dst$$Register);
9096     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9097     __ addl($dst$$Register,$dst$$Register);
9098     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9099     __ addl($dst$$Register,$dst$$Register);
9100     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9101   %}
9102   ins_pipe( ialu_reg_long );
9103 %}
9104 
9105 // Shift Left Long by 1-31
9106 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9107   match(Set dst (LShiftL dst cnt));
9108   effect(KILL cr);
9109   ins_cost(200);
9110   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9111             "SHL    $dst.lo,$cnt" %}
9112   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9113   ins_encode( move_long_small_shift(dst,cnt) );
9114   ins_pipe( ialu_reg_long );
9115 %}
9116 
9117 // Shift Left Long by 32-63
9118 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9119   match(Set dst (LShiftL dst cnt));
9120   effect(KILL cr);
9121   ins_cost(300);
9122   format %{ "MOV    $dst.hi,$dst.lo\n"
9123           "\tSHL    $dst.hi,$cnt-32\n"
9124           "\tXOR    $dst.lo,$dst.lo" %}
9125   opcode(0xC1, 0x4);  /* C1 /4 ib */
9126   ins_encode( move_long_big_shift_clr(dst,cnt) );
9127   ins_pipe( ialu_reg_long );
9128 %}
9129 
9130 // Shift Left Long by variable
9131 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9132   match(Set dst (LShiftL dst shift));
9133   effect(KILL cr);
9134   ins_cost(500+200);
9135   size(17);
9136   format %{ "TEST   $shift,32\n\t"
9137             "JEQ,s  small\n\t"
9138             "MOV    $dst.hi,$dst.lo\n\t"
9139             "XOR    $dst.lo,$dst.lo\n"
9140     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9141             "SHL    $dst.lo,$shift" %}
9142   ins_encode( shift_left_long( dst, shift ) );
9143   ins_pipe( pipe_slow );
9144 %}
9145 
9146 // Shift Right Long by 1-31
9147 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9148   match(Set dst (URShiftL dst cnt));
9149   effect(KILL cr);
9150   ins_cost(200);
9151   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9152             "SHR    $dst.hi,$cnt" %}
9153   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9154   ins_encode( move_long_small_shift(dst,cnt) );
9155   ins_pipe( ialu_reg_long );
9156 %}
9157 
9158 // Shift Right Long by 32-63
9159 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9160   match(Set dst (URShiftL dst cnt));
9161   effect(KILL cr);
9162   ins_cost(300);
9163   format %{ "MOV    $dst.lo,$dst.hi\n"
9164           "\tSHR    $dst.lo,$cnt-32\n"
9165           "\tXOR    $dst.hi,$dst.hi" %}
9166   opcode(0xC1, 0x5);  /* C1 /5 ib */
9167   ins_encode( move_long_big_shift_clr(dst,cnt) );
9168   ins_pipe( ialu_reg_long );
9169 %}
9170 
9171 // Shift Right Long by variable
9172 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9173   match(Set dst (URShiftL dst shift));
9174   effect(KILL cr);
9175   ins_cost(600);
9176   size(17);
9177   format %{ "TEST   $shift,32\n\t"
9178             "JEQ,s  small\n\t"
9179             "MOV    $dst.lo,$dst.hi\n\t"
9180             "XOR    $dst.hi,$dst.hi\n"
9181     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9182             "SHR    $dst.hi,$shift" %}
9183   ins_encode( shift_right_long( dst, shift ) );
9184   ins_pipe( pipe_slow );
9185 %}
9186 
9187 // Shift Right Long by 1-31
9188 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9189   match(Set dst (RShiftL dst cnt));
9190   effect(KILL cr);
9191   ins_cost(200);
9192   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9193             "SAR    $dst.hi,$cnt" %}
9194   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9195   ins_encode( move_long_small_shift(dst,cnt) );
9196   ins_pipe( ialu_reg_long );
9197 %}
9198 
9199 // Shift Right Long by 32-63
9200 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9201   match(Set dst (RShiftL dst cnt));
9202   effect(KILL cr);
9203   ins_cost(300);
9204   format %{ "MOV    $dst.lo,$dst.hi\n"
9205           "\tSAR    $dst.lo,$cnt-32\n"
9206           "\tSAR    $dst.hi,31" %}
9207   opcode(0xC1, 0x7);  /* C1 /7 ib */
9208   ins_encode( move_long_big_shift_sign(dst,cnt) );
9209   ins_pipe( ialu_reg_long );
9210 %}
9211 
9212 // Shift Right arithmetic Long by variable
9213 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9214   match(Set dst (RShiftL dst shift));
9215   effect(KILL cr);
9216   ins_cost(600);
9217   size(18);
9218   format %{ "TEST   $shift,32\n\t"
9219             "JEQ,s  small\n\t"
9220             "MOV    $dst.lo,$dst.hi\n\t"
9221             "SAR    $dst.hi,31\n"
9222     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9223             "SAR    $dst.hi,$shift" %}
9224   ins_encode( shift_right_arith_long( dst, shift ) );
9225   ins_pipe( pipe_slow );
9226 %}
9227 
9228 
9229 //----------Double Instructions------------------------------------------------
9230 // Double Math
9231 
9232 // Compare & branch
9233 
9234 // P6 version of float compare, sets condition codes in EFLAGS
9235 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9236   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9237   match(Set cr (CmpD src1 src2));
9238   effect(KILL rax);
9239   ins_cost(150);
9240   format %{ "FLD    $src1\n\t"
9241             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9242             "JNP    exit\n\t"
9243             "MOV    ah,1       // saw a NaN, set CF\n\t"
9244             "SAHF\n"
9245      "exit:\tNOP               // avoid branch to branch" %}
9246   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9247   ins_encode( Push_Reg_DPR(src1),
9248               OpcP, RegOpc(src2),
9249               cmpF_P6_fixup );
9250   ins_pipe( pipe_slow );
9251 %}
9252 
9253 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9254   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9255   match(Set cr (CmpD src1 src2));
9256   ins_cost(150);
9257   format %{ "FLD    $src1\n\t"
9258             "FUCOMIP ST,$src2  // P6 instruction" %}
9259   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9260   ins_encode( Push_Reg_DPR(src1),
9261               OpcP, RegOpc(src2));
9262   ins_pipe( pipe_slow );
9263 %}
9264 
9265 // Compare & branch
9266 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9267   predicate(UseSSE<=1);
9268   match(Set cr (CmpD src1 src2));
9269   effect(KILL rax);
9270   ins_cost(200);
9271   format %{ "FLD    $src1\n\t"
9272             "FCOMp  $src2\n\t"
9273             "FNSTSW AX\n\t"
9274             "TEST   AX,0x400\n\t"
9275             "JZ,s   flags\n\t"
9276             "MOV    AH,1\t# unordered treat as LT\n"
9277     "flags:\tSAHF" %}
9278   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9279   ins_encode( Push_Reg_DPR(src1),
9280               OpcP, RegOpc(src2),
9281               fpu_flags);
9282   ins_pipe( pipe_slow );
9283 %}
9284 
9285 // Compare vs zero into -1,0,1
9286 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9287   predicate(UseSSE<=1);
9288   match(Set dst (CmpD3 src1 zero));
9289   effect(KILL cr, KILL rax);
9290   ins_cost(280);
9291   format %{ "FTSTD  $dst,$src1" %}
9292   opcode(0xE4, 0xD9);
9293   ins_encode( Push_Reg_DPR(src1),
9294               OpcS, OpcP, PopFPU,
9295               CmpF_Result(dst));
9296   ins_pipe( pipe_slow );
9297 %}
9298 
9299 // Compare into -1,0,1
9300 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9301   predicate(UseSSE<=1);
9302   match(Set dst (CmpD3 src1 src2));
9303   effect(KILL cr, KILL rax);
9304   ins_cost(300);
9305   format %{ "FCMPD  $dst,$src1,$src2" %}
9306   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9307   ins_encode( Push_Reg_DPR(src1),
9308               OpcP, RegOpc(src2),
9309               CmpF_Result(dst));
9310   ins_pipe( pipe_slow );
9311 %}
9312 
9313 // float compare and set condition codes in EFLAGS by XMM regs
9314 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9315   predicate(UseSSE>=2);
9316   match(Set cr (CmpD src1 src2));
9317   ins_cost(145);
9318   format %{ "UCOMISD $src1,$src2\n\t"
9319             "JNP,s   exit\n\t"
9320             "PUSHF\t# saw NaN, set CF\n\t"
9321             "AND     [rsp], #0xffffff2b\n\t"
9322             "POPF\n"
9323     "exit:" %}
9324   ins_encode %{
9325     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9326     emit_cmpfp_fixup(_masm);
9327   %}
9328   ins_pipe( pipe_slow );
9329 %}
9330 
9331 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9332   predicate(UseSSE>=2);
9333   match(Set cr (CmpD src1 src2));
9334   ins_cost(100);
9335   format %{ "UCOMISD $src1,$src2" %}
9336   ins_encode %{
9337     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9338   %}
9339   ins_pipe( pipe_slow );
9340 %}
9341 
9342 // float compare and set condition codes in EFLAGS by XMM regs
9343 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9344   predicate(UseSSE>=2);
9345   match(Set cr (CmpD src1 (LoadD src2)));
9346   ins_cost(145);
9347   format %{ "UCOMISD $src1,$src2\n\t"
9348             "JNP,s   exit\n\t"
9349             "PUSHF\t# saw NaN, set CF\n\t"
9350             "AND     [rsp], #0xffffff2b\n\t"
9351             "POPF\n"
9352     "exit:" %}
9353   ins_encode %{
9354     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9355     emit_cmpfp_fixup(_masm);
9356   %}
9357   ins_pipe( pipe_slow );
9358 %}
9359 
9360 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9361   predicate(UseSSE>=2);
9362   match(Set cr (CmpD src1 (LoadD src2)));
9363   ins_cost(100);
9364   format %{ "UCOMISD $src1,$src2" %}
9365   ins_encode %{
9366     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9367   %}
9368   ins_pipe( pipe_slow );
9369 %}
9370 
9371 // Compare into -1,0,1 in XMM
9372 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9373   predicate(UseSSE>=2);
9374   match(Set dst (CmpD3 src1 src2));
9375   effect(KILL cr);
9376   ins_cost(255);
9377   format %{ "UCOMISD $src1, $src2\n\t"
9378             "MOV     $dst, #-1\n\t"
9379             "JP,s    done\n\t"
9380             "JB,s    done\n\t"
9381             "SETNE   $dst\n\t"
9382             "MOVZB   $dst, $dst\n"
9383     "done:" %}
9384   ins_encode %{
9385     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9386     emit_cmpfp3(_masm, $dst$$Register);
9387   %}
9388   ins_pipe( pipe_slow );
9389 %}
9390 
9391 // Compare into -1,0,1 in XMM and memory
9392 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9393   predicate(UseSSE>=2);
9394   match(Set dst (CmpD3 src1 (LoadD src2)));
9395   effect(KILL cr);
9396   ins_cost(275);
9397   format %{ "UCOMISD $src1, $src2\n\t"
9398             "MOV     $dst, #-1\n\t"
9399             "JP,s    done\n\t"
9400             "JB,s    done\n\t"
9401             "SETNE   $dst\n\t"
9402             "MOVZB   $dst, $dst\n"
9403     "done:" %}
9404   ins_encode %{
9405     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9406     emit_cmpfp3(_masm, $dst$$Register);
9407   %}
9408   ins_pipe( pipe_slow );
9409 %}
9410 
9411 
9412 instruct subDPR_reg(regDPR dst, regDPR src) %{
9413   predicate (UseSSE <=1);
9414   match(Set dst (SubD dst src));
9415 
9416   format %{ "FLD    $src\n\t"
9417             "DSUBp  $dst,ST" %}
9418   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9419   ins_cost(150);
9420   ins_encode( Push_Reg_DPR(src),
9421               OpcP, RegOpc(dst) );
9422   ins_pipe( fpu_reg_reg );
9423 %}
9424 
9425 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9426   predicate (UseSSE <=1);
9427   match(Set dst (RoundDouble (SubD src1 src2)));
9428   ins_cost(250);
9429 
9430   format %{ "FLD    $src2\n\t"
9431             "DSUB   ST,$src1\n\t"
9432             "FSTP_D $dst\t# D-round" %}
9433   opcode(0xD8, 0x5);
9434   ins_encode( Push_Reg_DPR(src2),
9435               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9436   ins_pipe( fpu_mem_reg_reg );
9437 %}
9438 
9439 
9440 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9441   predicate (UseSSE <=1);
9442   match(Set dst (SubD dst (LoadD src)));
9443   ins_cost(150);
9444 
9445   format %{ "FLD    $src\n\t"
9446             "DSUBp  $dst,ST" %}
9447   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9448   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9449               OpcP, RegOpc(dst) );
9450   ins_pipe( fpu_reg_mem );
9451 %}
9452 
9453 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9454   predicate (UseSSE<=1);
9455   match(Set dst (AbsD src));
9456   ins_cost(100);
9457   format %{ "FABS" %}
9458   opcode(0xE1, 0xD9);
9459   ins_encode( OpcS, OpcP );
9460   ins_pipe( fpu_reg_reg );
9461 %}
9462 
9463 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9464   predicate(UseSSE<=1);
9465   match(Set dst (NegD src));
9466   ins_cost(100);
9467   format %{ "FCHS" %}
9468   opcode(0xE0, 0xD9);
9469   ins_encode( OpcS, OpcP );
9470   ins_pipe( fpu_reg_reg );
9471 %}
9472 
9473 instruct addDPR_reg(regDPR dst, regDPR src) %{
9474   predicate(UseSSE<=1);
9475   match(Set dst (AddD dst src));
9476   format %{ "FLD    $src\n\t"
9477             "DADD   $dst,ST" %}
9478   size(4);
9479   ins_cost(150);
9480   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9481   ins_encode( Push_Reg_DPR(src),
9482               OpcP, RegOpc(dst) );
9483   ins_pipe( fpu_reg_reg );
9484 %}
9485 
9486 
9487 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9488   predicate(UseSSE<=1);
9489   match(Set dst (RoundDouble (AddD src1 src2)));
9490   ins_cost(250);
9491 
9492   format %{ "FLD    $src2\n\t"
9493             "DADD   ST,$src1\n\t"
9494             "FSTP_D $dst\t# D-round" %}
9495   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9496   ins_encode( Push_Reg_DPR(src2),
9497               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9498   ins_pipe( fpu_mem_reg_reg );
9499 %}
9500 
9501 
9502 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9503   predicate(UseSSE<=1);
9504   match(Set dst (AddD dst (LoadD src)));
9505   ins_cost(150);
9506 
9507   format %{ "FLD    $src\n\t"
9508             "DADDp  $dst,ST" %}
9509   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9510   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9511               OpcP, RegOpc(dst) );
9512   ins_pipe( fpu_reg_mem );
9513 %}
9514 
9515 // add-to-memory
9516 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9517   predicate(UseSSE<=1);
9518   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9519   ins_cost(150);
9520 
9521   format %{ "FLD_D  $dst\n\t"
9522             "DADD   ST,$src\n\t"
9523             "FST_D  $dst" %}
9524   opcode(0xDD, 0x0);
9525   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9526               Opcode(0xD8), RegOpc(src),
9527               set_instruction_start,
9528               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9529   ins_pipe( fpu_reg_mem );
9530 %}
9531 
9532 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9533   predicate(UseSSE<=1);
9534   match(Set dst (AddD dst con));
9535   ins_cost(125);
9536   format %{ "FLD1\n\t"
9537             "DADDp  $dst,ST" %}
9538   ins_encode %{
9539     __ fld1();
9540     __ faddp($dst$$reg);
9541   %}
9542   ins_pipe(fpu_reg);
9543 %}
9544 
9545 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9546   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9547   match(Set dst (AddD dst con));
9548   ins_cost(200);
9549   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9550             "DADDp  $dst,ST" %}
9551   ins_encode %{
9552     __ fld_d($constantaddress($con));
9553     __ faddp($dst$$reg);
9554   %}
9555   ins_pipe(fpu_reg_mem);
9556 %}
9557 
9558 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9559   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9560   match(Set dst (RoundDouble (AddD src con)));
9561   ins_cost(200);
9562   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9563             "DADD   ST,$src\n\t"
9564             "FSTP_D $dst\t# D-round" %}
9565   ins_encode %{
9566     __ fld_d($constantaddress($con));
9567     __ fadd($src$$reg);
9568     __ fstp_d(Address(rsp, $dst$$disp));
9569   %}
9570   ins_pipe(fpu_mem_reg_con);
9571 %}
9572 
9573 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9574   predicate(UseSSE<=1);
9575   match(Set dst (MulD dst src));
9576   format %{ "FLD    $src\n\t"
9577             "DMULp  $dst,ST" %}
9578   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9579   ins_cost(150);
9580   ins_encode( Push_Reg_DPR(src),
9581               OpcP, RegOpc(dst) );
9582   ins_pipe( fpu_reg_reg );
9583 %}
9584 
9585 // Strict FP instruction biases argument before multiply then
9586 // biases result to avoid double rounding of subnormals.
9587 //
9588 // scale arg1 by multiplying arg1 by 2^(-15360)
9589 // load arg2
9590 // multiply scaled arg1 by arg2
9591 // rescale product by 2^(15360)
9592 //
9593 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9594   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9595   match(Set dst (MulD dst src));
9596   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9597 
9598   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9599             "DMULp  $dst,ST\n\t"
9600             "FLD    $src\n\t"
9601             "DMULp  $dst,ST\n\t"
9602             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9603             "DMULp  $dst,ST\n\t" %}
9604   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9605   ins_encode( strictfp_bias1(dst),
9606               Push_Reg_DPR(src),
9607               OpcP, RegOpc(dst),
9608               strictfp_bias2(dst) );
9609   ins_pipe( fpu_reg_reg );
9610 %}
9611 
9612 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9613   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9614   match(Set dst (MulD dst con));
9615   ins_cost(200);
9616   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9617             "DMULp  $dst,ST" %}
9618   ins_encode %{
9619     __ fld_d($constantaddress($con));
9620     __ fmulp($dst$$reg);
9621   %}
9622   ins_pipe(fpu_reg_mem);
9623 %}
9624 
9625 
9626 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9627   predicate( UseSSE<=1 );
9628   match(Set dst (MulD dst (LoadD src)));
9629   ins_cost(200);
9630   format %{ "FLD_D  $src\n\t"
9631             "DMULp  $dst,ST" %}
9632   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9633   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9634               OpcP, RegOpc(dst) );
9635   ins_pipe( fpu_reg_mem );
9636 %}
9637 
9638 //
9639 // Cisc-alternate to reg-reg multiply
9640 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9641   predicate( UseSSE<=1 );
9642   match(Set dst (MulD src (LoadD mem)));
9643   ins_cost(250);
9644   format %{ "FLD_D  $mem\n\t"
9645             "DMUL   ST,$src\n\t"
9646             "FSTP_D $dst" %}
9647   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9648   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9649               OpcReg_FPR(src),
9650               Pop_Reg_DPR(dst) );
9651   ins_pipe( fpu_reg_reg_mem );
9652 %}
9653 
9654 
9655 // MACRO3 -- addDPR a mulDPR
9656 // This instruction is a '2-address' instruction in that the result goes
9657 // back to src2.  This eliminates a move from the macro; possibly the
9658 // register allocator will have to add it back (and maybe not).
9659 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9660   predicate( UseSSE<=1 );
9661   match(Set src2 (AddD (MulD src0 src1) src2));
9662   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9663             "DMUL   ST,$src1\n\t"
9664             "DADDp  $src2,ST" %}
9665   ins_cost(250);
9666   opcode(0xDD); /* LoadD DD /0 */
9667   ins_encode( Push_Reg_FPR(src0),
9668               FMul_ST_reg(src1),
9669               FAddP_reg_ST(src2) );
9670   ins_pipe( fpu_reg_reg_reg );
9671 %}
9672 
9673 
9674 // MACRO3 -- subDPR a mulDPR
9675 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9676   predicate( UseSSE<=1 );
9677   match(Set src2 (SubD (MulD src0 src1) src2));
9678   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9679             "DMUL   ST,$src1\n\t"
9680             "DSUBRp $src2,ST" %}
9681   ins_cost(250);
9682   ins_encode( Push_Reg_FPR(src0),
9683               FMul_ST_reg(src1),
9684               Opcode(0xDE), Opc_plus(0xE0,src2));
9685   ins_pipe( fpu_reg_reg_reg );
9686 %}
9687 
9688 
9689 instruct divDPR_reg(regDPR dst, regDPR src) %{
9690   predicate( UseSSE<=1 );
9691   match(Set dst (DivD dst src));
9692 
9693   format %{ "FLD    $src\n\t"
9694             "FDIVp  $dst,ST" %}
9695   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9696   ins_cost(150);
9697   ins_encode( Push_Reg_DPR(src),
9698               OpcP, RegOpc(dst) );
9699   ins_pipe( fpu_reg_reg );
9700 %}
9701 
9702 // Strict FP instruction biases argument before division then
9703 // biases result, to avoid double rounding of subnormals.
9704 //
9705 // scale dividend by multiplying dividend by 2^(-15360)
9706 // load divisor
9707 // divide scaled dividend by divisor
9708 // rescale quotient by 2^(15360)
9709 //
9710 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9711   predicate (UseSSE<=1);
9712   match(Set dst (DivD dst src));
9713   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9714   ins_cost(01);
9715 
9716   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9717             "DMULp  $dst,ST\n\t"
9718             "FLD    $src\n\t"
9719             "FDIVp  $dst,ST\n\t"
9720             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9721             "DMULp  $dst,ST\n\t" %}
9722   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9723   ins_encode( strictfp_bias1(dst),
9724               Push_Reg_DPR(src),
9725               OpcP, RegOpc(dst),
9726               strictfp_bias2(dst) );
9727   ins_pipe( fpu_reg_reg );
9728 %}
9729 
9730 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9731   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9732   match(Set dst (RoundDouble (DivD src1 src2)));
9733 
9734   format %{ "FLD    $src1\n\t"
9735             "FDIV   ST,$src2\n\t"
9736             "FSTP_D $dst\t# D-round" %}
9737   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9738   ins_encode( Push_Reg_DPR(src1),
9739               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9740   ins_pipe( fpu_mem_reg_reg );
9741 %}
9742 
9743 
9744 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9745   predicate(UseSSE<=1);
9746   match(Set dst (ModD dst src));
9747   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9748 
9749   format %{ "DMOD   $dst,$src" %}
9750   ins_cost(250);
9751   ins_encode(Push_Reg_Mod_DPR(dst, src),
9752               emitModDPR(),
9753               Push_Result_Mod_DPR(src),
9754               Pop_Reg_DPR(dst));
9755   ins_pipe( pipe_slow );
9756 %}
9757 
9758 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9759   predicate(UseSSE>=2);
9760   match(Set dst (ModD src0 src1));
9761   effect(KILL rax, KILL cr);
9762 
9763   format %{ "SUB    ESP,8\t # DMOD\n"
9764           "\tMOVSD  [ESP+0],$src1\n"
9765           "\tFLD_D  [ESP+0]\n"
9766           "\tMOVSD  [ESP+0],$src0\n"
9767           "\tFLD_D  [ESP+0]\n"
9768      "loop:\tFPREM\n"
9769           "\tFWAIT\n"
9770           "\tFNSTSW AX\n"
9771           "\tSAHF\n"
9772           "\tJP     loop\n"
9773           "\tFSTP_D [ESP+0]\n"
9774           "\tMOVSD  $dst,[ESP+0]\n"
9775           "\tADD    ESP,8\n"
9776           "\tFSTP   ST0\t # Restore FPU Stack"
9777     %}
9778   ins_cost(250);
9779   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9780   ins_pipe( pipe_slow );
9781 %}
9782 
9783 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9784   predicate (UseSSE<=1);
9785   match(Set dst (SinD src));
9786   ins_cost(1800);
9787   format %{ "DSIN   $dst" %}
9788   opcode(0xD9, 0xFE);
9789   ins_encode( OpcP, OpcS );
9790   ins_pipe( pipe_slow );
9791 %}
9792 
9793 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9794   predicate (UseSSE>=2);
9795   match(Set dst (SinD dst));
9796   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9797   ins_cost(1800);
9798   format %{ "DSIN   $dst" %}
9799   opcode(0xD9, 0xFE);
9800   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9801   ins_pipe( pipe_slow );
9802 %}
9803 
9804 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9805   predicate (UseSSE<=1);
9806   match(Set dst (CosD src));
9807   ins_cost(1800);
9808   format %{ "DCOS   $dst" %}
9809   opcode(0xD9, 0xFF);
9810   ins_encode( OpcP, OpcS );
9811   ins_pipe( pipe_slow );
9812 %}
9813 
9814 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9815   predicate (UseSSE>=2);
9816   match(Set dst (CosD dst));
9817   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9818   ins_cost(1800);
9819   format %{ "DCOS   $dst" %}
9820   opcode(0xD9, 0xFF);
9821   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9822   ins_pipe( pipe_slow );
9823 %}
9824 
9825 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9826   predicate (UseSSE<=1);
9827   match(Set dst(TanD src));
9828   format %{ "DTAN   $dst" %}
9829   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9830               Opcode(0xDD), Opcode(0xD8));   // fstp st
9831   ins_pipe( pipe_slow );
9832 %}
9833 
9834 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9835   predicate (UseSSE>=2);
9836   match(Set dst(TanD dst));
9837   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9838   format %{ "DTAN   $dst" %}
9839   ins_encode( Push_SrcD(dst),
9840               Opcode(0xD9), Opcode(0xF2),    // fptan
9841               Opcode(0xDD), Opcode(0xD8),   // fstp st
9842               Push_ResultD(dst) );
9843   ins_pipe( pipe_slow );
9844 %}
9845 
9846 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9847   predicate (UseSSE<=1);
9848   match(Set dst(AtanD dst src));
9849   format %{ "DATA   $dst,$src" %}
9850   opcode(0xD9, 0xF3);
9851   ins_encode( Push_Reg_DPR(src),
9852               OpcP, OpcS, RegOpc(dst) );
9853   ins_pipe( pipe_slow );
9854 %}
9855 
9856 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9857   predicate (UseSSE>=2);
9858   match(Set dst(AtanD dst src));
9859   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9860   format %{ "DATA   $dst,$src" %}
9861   opcode(0xD9, 0xF3);
9862   ins_encode( Push_SrcD(src),
9863               OpcP, OpcS, Push_ResultD(dst) );
9864   ins_pipe( pipe_slow );
9865 %}
9866 
9867 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9868   predicate (UseSSE<=1);
9869   match(Set dst (SqrtD src));
9870   format %{ "DSQRT  $dst,$src" %}
9871   opcode(0xFA, 0xD9);
9872   ins_encode( Push_Reg_DPR(src),
9873               OpcS, OpcP, Pop_Reg_DPR(dst) );
9874   ins_pipe( pipe_slow );
9875 %}
9876 
9877 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9878   predicate (UseSSE<=1);
9879   match(Set Y (PowD X Y));  // Raise X to the Yth power
9880   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9881   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9882   ins_encode %{
9883     __ subptr(rsp, 8);
9884     __ fld_s($X$$reg - 1);
9885     __ fast_pow();
9886     __ addptr(rsp, 8);
9887   %}
9888   ins_pipe( pipe_slow );
9889 %}
9890 
9891 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9892   predicate (UseSSE>=2);
9893   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9894   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9895   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9896   ins_encode %{
9897     __ subptr(rsp, 8);
9898     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9899     __ fld_d(Address(rsp, 0));
9900     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9901     __ fld_d(Address(rsp, 0));
9902     __ fast_pow();
9903     __ fstp_d(Address(rsp, 0));
9904     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9905     __ addptr(rsp, 8);
9906   %}
9907   ins_pipe( pipe_slow );
9908 %}
9909 
9910 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9911   predicate (UseSSE<=1);
9912   // The source Double operand on FPU stack
9913   match(Set dst (Log10D src));
9914   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9915   // fxch         ; swap ST(0) with ST(1)
9916   // fyl2x        ; compute log_10(2) * log_2(x)
9917   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9918             "FXCH   \n\t"
9919             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9920          %}
9921   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9922               Opcode(0xD9), Opcode(0xC9),   // fxch
9923               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9924 
9925   ins_pipe( pipe_slow );
9926 %}
9927 
9928 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9929   predicate (UseSSE>=2);
9930   effect(KILL cr);
9931   match(Set dst (Log10D src));
9932   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9933   // fyl2x        ; compute log_10(2) * log_2(x)
9934   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9935             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9936          %}
9937   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9938               Push_SrcD(src),
9939               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9940               Push_ResultD(dst));
9941 
9942   ins_pipe( pipe_slow );
9943 %}
9944 
9945 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
9946   predicate (UseSSE<=1);
9947   // The source Double operand on FPU stack
9948   match(Set dst (LogD src));
9949   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9950   // fxch         ; swap ST(0) with ST(1)
9951   // fyl2x        ; compute log_e(2) * log_2(x)
9952   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9953             "FXCH   \n\t"
9954             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9955          %}
9956   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9957               Opcode(0xD9), Opcode(0xC9),   // fxch
9958               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9959 
9960   ins_pipe( pipe_slow );
9961 %}
9962 
9963 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
9964   predicate (UseSSE>=2);
9965   effect(KILL cr);
9966   // The source and result Double operands in XMM registers
9967   match(Set dst (LogD src));
9968   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9969   // fyl2x        ; compute log_e(2) * log_2(x)
9970   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9971             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9972          %}
9973   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9974               Push_SrcD(src),
9975               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9976               Push_ResultD(dst));
9977   ins_pipe( pipe_slow );
9978 %}
9979 
9980 //-------------Float Instructions-------------------------------
9981 // Float Math
9982 
9983 // Code for float compare:
9984 //     fcompp();
9985 //     fwait(); fnstsw_ax();
9986 //     sahf();
9987 //     movl(dst, unordered_result);
9988 //     jcc(Assembler::parity, exit);
9989 //     movl(dst, less_result);
9990 //     jcc(Assembler::below, exit);
9991 //     movl(dst, equal_result);
9992 //     jcc(Assembler::equal, exit);
9993 //     movl(dst, greater_result);
9994 //   exit:
9995 
9996 // P6 version of float compare, sets condition codes in EFLAGS
9997 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9998   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9999   match(Set cr (CmpF src1 src2));
10000   effect(KILL rax);
10001   ins_cost(150);
10002   format %{ "FLD    $src1\n\t"
10003             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10004             "JNP    exit\n\t"
10005             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10006             "SAHF\n"
10007      "exit:\tNOP               // avoid branch to branch" %}
10008   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10009   ins_encode( Push_Reg_DPR(src1),
10010               OpcP, RegOpc(src2),
10011               cmpF_P6_fixup );
10012   ins_pipe( pipe_slow );
10013 %}
10014 
10015 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10016   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10017   match(Set cr (CmpF src1 src2));
10018   ins_cost(100);
10019   format %{ "FLD    $src1\n\t"
10020             "FUCOMIP ST,$src2  // P6 instruction" %}
10021   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10022   ins_encode( Push_Reg_DPR(src1),
10023               OpcP, RegOpc(src2));
10024   ins_pipe( pipe_slow );
10025 %}
10026 
10027 
10028 // Compare & branch
10029 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10030   predicate(UseSSE == 0);
10031   match(Set cr (CmpF src1 src2));
10032   effect(KILL rax);
10033   ins_cost(200);
10034   format %{ "FLD    $src1\n\t"
10035             "FCOMp  $src2\n\t"
10036             "FNSTSW AX\n\t"
10037             "TEST   AX,0x400\n\t"
10038             "JZ,s   flags\n\t"
10039             "MOV    AH,1\t# unordered treat as LT\n"
10040     "flags:\tSAHF" %}
10041   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10042   ins_encode( Push_Reg_DPR(src1),
10043               OpcP, RegOpc(src2),
10044               fpu_flags);
10045   ins_pipe( pipe_slow );
10046 %}
10047 
10048 // Compare vs zero into -1,0,1
10049 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10050   predicate(UseSSE == 0);
10051   match(Set dst (CmpF3 src1 zero));
10052   effect(KILL cr, KILL rax);
10053   ins_cost(280);
10054   format %{ "FTSTF  $dst,$src1" %}
10055   opcode(0xE4, 0xD9);
10056   ins_encode( Push_Reg_DPR(src1),
10057               OpcS, OpcP, PopFPU,
10058               CmpF_Result(dst));
10059   ins_pipe( pipe_slow );
10060 %}
10061 
10062 // Compare into -1,0,1
10063 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10064   predicate(UseSSE == 0);
10065   match(Set dst (CmpF3 src1 src2));
10066   effect(KILL cr, KILL rax);
10067   ins_cost(300);
10068   format %{ "FCMPF  $dst,$src1,$src2" %}
10069   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10070   ins_encode( Push_Reg_DPR(src1),
10071               OpcP, RegOpc(src2),
10072               CmpF_Result(dst));
10073   ins_pipe( pipe_slow );
10074 %}
10075 
10076 // float compare and set condition codes in EFLAGS by XMM regs
10077 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10078   predicate(UseSSE>=1);
10079   match(Set cr (CmpF src1 src2));
10080   ins_cost(145);
10081   format %{ "UCOMISS $src1,$src2\n\t"
10082             "JNP,s   exit\n\t"
10083             "PUSHF\t# saw NaN, set CF\n\t"
10084             "AND     [rsp], #0xffffff2b\n\t"
10085             "POPF\n"
10086     "exit:" %}
10087   ins_encode %{
10088     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10089     emit_cmpfp_fixup(_masm);
10090   %}
10091   ins_pipe( pipe_slow );
10092 %}
10093 
10094 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10095   predicate(UseSSE>=1);
10096   match(Set cr (CmpF src1 src2));
10097   ins_cost(100);
10098   format %{ "UCOMISS $src1,$src2" %}
10099   ins_encode %{
10100     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10101   %}
10102   ins_pipe( pipe_slow );
10103 %}
10104 
10105 // float compare and set condition codes in EFLAGS by XMM regs
10106 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10107   predicate(UseSSE>=1);
10108   match(Set cr (CmpF src1 (LoadF src2)));
10109   ins_cost(165);
10110   format %{ "UCOMISS $src1,$src2\n\t"
10111             "JNP,s   exit\n\t"
10112             "PUSHF\t# saw NaN, set CF\n\t"
10113             "AND     [rsp], #0xffffff2b\n\t"
10114             "POPF\n"
10115     "exit:" %}
10116   ins_encode %{
10117     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10118     emit_cmpfp_fixup(_masm);
10119   %}
10120   ins_pipe( pipe_slow );
10121 %}
10122 
10123 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10124   predicate(UseSSE>=1);
10125   match(Set cr (CmpF src1 (LoadF src2)));
10126   ins_cost(100);
10127   format %{ "UCOMISS $src1,$src2" %}
10128   ins_encode %{
10129     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10130   %}
10131   ins_pipe( pipe_slow );
10132 %}
10133 
10134 // Compare into -1,0,1 in XMM
10135 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10136   predicate(UseSSE>=1);
10137   match(Set dst (CmpF3 src1 src2));
10138   effect(KILL cr);
10139   ins_cost(255);
10140   format %{ "UCOMISS $src1, $src2\n\t"
10141             "MOV     $dst, #-1\n\t"
10142             "JP,s    done\n\t"
10143             "JB,s    done\n\t"
10144             "SETNE   $dst\n\t"
10145             "MOVZB   $dst, $dst\n"
10146     "done:" %}
10147   ins_encode %{
10148     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10149     emit_cmpfp3(_masm, $dst$$Register);
10150   %}
10151   ins_pipe( pipe_slow );
10152 %}
10153 
10154 // Compare into -1,0,1 in XMM and memory
10155 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10156   predicate(UseSSE>=1);
10157   match(Set dst (CmpF3 src1 (LoadF src2)));
10158   effect(KILL cr);
10159   ins_cost(275);
10160   format %{ "UCOMISS $src1, $src2\n\t"
10161             "MOV     $dst, #-1\n\t"
10162             "JP,s    done\n\t"
10163             "JB,s    done\n\t"
10164             "SETNE   $dst\n\t"
10165             "MOVZB   $dst, $dst\n"
10166     "done:" %}
10167   ins_encode %{
10168     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10169     emit_cmpfp3(_masm, $dst$$Register);
10170   %}
10171   ins_pipe( pipe_slow );
10172 %}
10173 
10174 // Spill to obtain 24-bit precision
10175 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10176   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10177   match(Set dst (SubF src1 src2));
10178 
10179   format %{ "FSUB   $dst,$src1 - $src2" %}
10180   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10181   ins_encode( Push_Reg_FPR(src1),
10182               OpcReg_FPR(src2),
10183               Pop_Mem_FPR(dst) );
10184   ins_pipe( fpu_mem_reg_reg );
10185 %}
10186 //
10187 // This instruction does not round to 24-bits
10188 instruct subFPR_reg(regFPR dst, regFPR src) %{
10189   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10190   match(Set dst (SubF dst src));
10191 
10192   format %{ "FSUB   $dst,$src" %}
10193   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10194   ins_encode( Push_Reg_FPR(src),
10195               OpcP, RegOpc(dst) );
10196   ins_pipe( fpu_reg_reg );
10197 %}
10198 
10199 // Spill to obtain 24-bit precision
10200 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10201   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10202   match(Set dst (AddF src1 src2));
10203 
10204   format %{ "FADD   $dst,$src1,$src2" %}
10205   opcode(0xD8, 0x0); /* D8 C0+i */
10206   ins_encode( Push_Reg_FPR(src2),
10207               OpcReg_FPR(src1),
10208               Pop_Mem_FPR(dst) );
10209   ins_pipe( fpu_mem_reg_reg );
10210 %}
10211 //
10212 // This instruction does not round to 24-bits
10213 instruct addFPR_reg(regFPR dst, regFPR src) %{
10214   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10215   match(Set dst (AddF dst src));
10216 
10217   format %{ "FLD    $src\n\t"
10218             "FADDp  $dst,ST" %}
10219   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10220   ins_encode( Push_Reg_FPR(src),
10221               OpcP, RegOpc(dst) );
10222   ins_pipe( fpu_reg_reg );
10223 %}
10224 
10225 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10226   predicate(UseSSE==0);
10227   match(Set dst (AbsF src));
10228   ins_cost(100);
10229   format %{ "FABS" %}
10230   opcode(0xE1, 0xD9);
10231   ins_encode( OpcS, OpcP );
10232   ins_pipe( fpu_reg_reg );
10233 %}
10234 
10235 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10236   predicate(UseSSE==0);
10237   match(Set dst (NegF src));
10238   ins_cost(100);
10239   format %{ "FCHS" %}
10240   opcode(0xE0, 0xD9);
10241   ins_encode( OpcS, OpcP );
10242   ins_pipe( fpu_reg_reg );
10243 %}
10244 
10245 // Cisc-alternate to addFPR_reg
10246 // Spill to obtain 24-bit precision
10247 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10248   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10249   match(Set dst (AddF src1 (LoadF src2)));
10250 
10251   format %{ "FLD    $src2\n\t"
10252             "FADD   ST,$src1\n\t"
10253             "FSTP_S $dst" %}
10254   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10255   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10256               OpcReg_FPR(src1),
10257               Pop_Mem_FPR(dst) );
10258   ins_pipe( fpu_mem_reg_mem );
10259 %}
10260 //
10261 // Cisc-alternate to addFPR_reg
10262 // This instruction does not round to 24-bits
10263 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10264   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10265   match(Set dst (AddF dst (LoadF src)));
10266 
10267   format %{ "FADD   $dst,$src" %}
10268   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10269   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10270               OpcP, RegOpc(dst) );
10271   ins_pipe( fpu_reg_mem );
10272 %}
10273 
10274 // // Following two instructions for _222_mpegaudio
10275 // Spill to obtain 24-bit precision
10276 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10277   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10278   match(Set dst (AddF src1 src2));
10279 
10280   format %{ "FADD   $dst,$src1,$src2" %}
10281   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10282   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10283               OpcReg_FPR(src2),
10284               Pop_Mem_FPR(dst) );
10285   ins_pipe( fpu_mem_reg_mem );
10286 %}
10287 
10288 // Cisc-spill variant
10289 // Spill to obtain 24-bit precision
10290 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10291   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10292   match(Set dst (AddF src1 (LoadF src2)));
10293 
10294   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10295   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10296   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10297               set_instruction_start,
10298               OpcP, RMopc_Mem(secondary,src1),
10299               Pop_Mem_FPR(dst) );
10300   ins_pipe( fpu_mem_mem_mem );
10301 %}
10302 
10303 // Spill to obtain 24-bit precision
10304 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10305   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10306   match(Set dst (AddF src1 src2));
10307 
10308   format %{ "FADD   $dst,$src1,$src2" %}
10309   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10310   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10311               set_instruction_start,
10312               OpcP, RMopc_Mem(secondary,src1),
10313               Pop_Mem_FPR(dst) );
10314   ins_pipe( fpu_mem_mem_mem );
10315 %}
10316 
10317 
10318 // Spill to obtain 24-bit precision
10319 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10320   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10321   match(Set dst (AddF src con));
10322   format %{ "FLD    $src\n\t"
10323             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10324             "FSTP_S $dst"  %}
10325   ins_encode %{
10326     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10327     __ fadd_s($constantaddress($con));
10328     __ fstp_s(Address(rsp, $dst$$disp));
10329   %}
10330   ins_pipe(fpu_mem_reg_con);
10331 %}
10332 //
10333 // This instruction does not round to 24-bits
10334 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10335   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10336   match(Set dst (AddF src con));
10337   format %{ "FLD    $src\n\t"
10338             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10339             "FSTP   $dst"  %}
10340   ins_encode %{
10341     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10342     __ fadd_s($constantaddress($con));
10343     __ fstp_d($dst$$reg);
10344   %}
10345   ins_pipe(fpu_reg_reg_con);
10346 %}
10347 
10348 // Spill to obtain 24-bit precision
10349 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10350   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10351   match(Set dst (MulF src1 src2));
10352 
10353   format %{ "FLD    $src1\n\t"
10354             "FMUL   $src2\n\t"
10355             "FSTP_S $dst"  %}
10356   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10357   ins_encode( Push_Reg_FPR(src1),
10358               OpcReg_FPR(src2),
10359               Pop_Mem_FPR(dst) );
10360   ins_pipe( fpu_mem_reg_reg );
10361 %}
10362 //
10363 // This instruction does not round to 24-bits
10364 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10365   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10366   match(Set dst (MulF src1 src2));
10367 
10368   format %{ "FLD    $src1\n\t"
10369             "FMUL   $src2\n\t"
10370             "FSTP_S $dst"  %}
10371   opcode(0xD8, 0x1); /* D8 C8+i */
10372   ins_encode( Push_Reg_FPR(src2),
10373               OpcReg_FPR(src1),
10374               Pop_Reg_FPR(dst) );
10375   ins_pipe( fpu_reg_reg_reg );
10376 %}
10377 
10378 
10379 // Spill to obtain 24-bit precision
10380 // Cisc-alternate to reg-reg multiply
10381 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10382   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10383   match(Set dst (MulF src1 (LoadF src2)));
10384 
10385   format %{ "FLD_S  $src2\n\t"
10386             "FMUL   $src1\n\t"
10387             "FSTP_S $dst"  %}
10388   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10389   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10390               OpcReg_FPR(src1),
10391               Pop_Mem_FPR(dst) );
10392   ins_pipe( fpu_mem_reg_mem );
10393 %}
10394 //
10395 // This instruction does not round to 24-bits
10396 // Cisc-alternate to reg-reg multiply
10397 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10398   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10399   match(Set dst (MulF src1 (LoadF src2)));
10400 
10401   format %{ "FMUL   $dst,$src1,$src2" %}
10402   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10403   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10404               OpcReg_FPR(src1),
10405               Pop_Reg_FPR(dst) );
10406   ins_pipe( fpu_reg_reg_mem );
10407 %}
10408 
10409 // Spill to obtain 24-bit precision
10410 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10411   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10412   match(Set dst (MulF src1 src2));
10413 
10414   format %{ "FMUL   $dst,$src1,$src2" %}
10415   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10416   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10417               set_instruction_start,
10418               OpcP, RMopc_Mem(secondary,src1),
10419               Pop_Mem_FPR(dst) );
10420   ins_pipe( fpu_mem_mem_mem );
10421 %}
10422 
10423 // Spill to obtain 24-bit precision
10424 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10425   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10426   match(Set dst (MulF src con));
10427 
10428   format %{ "FLD    $src\n\t"
10429             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10430             "FSTP_S $dst"  %}
10431   ins_encode %{
10432     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10433     __ fmul_s($constantaddress($con));
10434     __ fstp_s(Address(rsp, $dst$$disp));
10435   %}
10436   ins_pipe(fpu_mem_reg_con);
10437 %}
10438 //
10439 // This instruction does not round to 24-bits
10440 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10441   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10442   match(Set dst (MulF src con));
10443 
10444   format %{ "FLD    $src\n\t"
10445             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10446             "FSTP   $dst"  %}
10447   ins_encode %{
10448     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10449     __ fmul_s($constantaddress($con));
10450     __ fstp_d($dst$$reg);
10451   %}
10452   ins_pipe(fpu_reg_reg_con);
10453 %}
10454 
10455 
10456 //
10457 // MACRO1 -- subsume unshared load into mulFPR
10458 // This instruction does not round to 24-bits
10459 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10460   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10461   match(Set dst (MulF (LoadF mem1) src));
10462 
10463   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10464             "FMUL   ST,$src\n\t"
10465             "FSTP   $dst" %}
10466   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10467   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10468               OpcReg_FPR(src),
10469               Pop_Reg_FPR(dst) );
10470   ins_pipe( fpu_reg_reg_mem );
10471 %}
10472 //
10473 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10474 // This instruction does not round to 24-bits
10475 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10476   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10477   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10478   ins_cost(95);
10479 
10480   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10481             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10482             "FADD   ST,$src2\n\t"
10483             "FSTP   $dst" %}
10484   opcode(0xD9); /* LoadF D9 /0 */
10485   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10486               FMul_ST_reg(src1),
10487               FAdd_ST_reg(src2),
10488               Pop_Reg_FPR(dst) );
10489   ins_pipe( fpu_reg_mem_reg_reg );
10490 %}
10491 
10492 // MACRO3 -- addFPR a mulFPR
10493 // This instruction does not round to 24-bits.  It is a '2-address'
10494 // instruction in that the result goes back to src2.  This eliminates
10495 // a move from the macro; possibly the register allocator will have
10496 // to add it back (and maybe not).
10497 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10498   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10499   match(Set src2 (AddF (MulF src0 src1) src2));
10500 
10501   format %{ "FLD    $src0     ===MACRO3===\n\t"
10502             "FMUL   ST,$src1\n\t"
10503             "FADDP  $src2,ST" %}
10504   opcode(0xD9); /* LoadF D9 /0 */
10505   ins_encode( Push_Reg_FPR(src0),
10506               FMul_ST_reg(src1),
10507               FAddP_reg_ST(src2) );
10508   ins_pipe( fpu_reg_reg_reg );
10509 %}
10510 
10511 // MACRO4 -- divFPR subFPR
10512 // This instruction does not round to 24-bits
10513 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10514   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10515   match(Set dst (DivF (SubF src2 src1) src3));
10516 
10517   format %{ "FLD    $src2   ===MACRO4===\n\t"
10518             "FSUB   ST,$src1\n\t"
10519             "FDIV   ST,$src3\n\t"
10520             "FSTP  $dst" %}
10521   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10522   ins_encode( Push_Reg_FPR(src2),
10523               subFPR_divFPR_encode(src1,src3),
10524               Pop_Reg_FPR(dst) );
10525   ins_pipe( fpu_reg_reg_reg_reg );
10526 %}
10527 
10528 // Spill to obtain 24-bit precision
10529 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10530   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10531   match(Set dst (DivF src1 src2));
10532 
10533   format %{ "FDIV   $dst,$src1,$src2" %}
10534   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10535   ins_encode( Push_Reg_FPR(src1),
10536               OpcReg_FPR(src2),
10537               Pop_Mem_FPR(dst) );
10538   ins_pipe( fpu_mem_reg_reg );
10539 %}
10540 //
10541 // This instruction does not round to 24-bits
10542 instruct divFPR_reg(regFPR dst, regFPR src) %{
10543   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10544   match(Set dst (DivF dst src));
10545 
10546   format %{ "FDIV   $dst,$src" %}
10547   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10548   ins_encode( Push_Reg_FPR(src),
10549               OpcP, RegOpc(dst) );
10550   ins_pipe( fpu_reg_reg );
10551 %}
10552 
10553 
10554 // Spill to obtain 24-bit precision
10555 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10556   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10557   match(Set dst (ModF src1 src2));
10558   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10559 
10560   format %{ "FMOD   $dst,$src1,$src2" %}
10561   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10562               emitModDPR(),
10563               Push_Result_Mod_DPR(src2),
10564               Pop_Mem_FPR(dst));
10565   ins_pipe( pipe_slow );
10566 %}
10567 //
10568 // This instruction does not round to 24-bits
10569 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10570   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10571   match(Set dst (ModF dst src));
10572   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10573 
10574   format %{ "FMOD   $dst,$src" %}
10575   ins_encode(Push_Reg_Mod_DPR(dst, src),
10576               emitModDPR(),
10577               Push_Result_Mod_DPR(src),
10578               Pop_Reg_FPR(dst));
10579   ins_pipe( pipe_slow );
10580 %}
10581 
10582 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10583   predicate(UseSSE>=1);
10584   match(Set dst (ModF src0 src1));
10585   effect(KILL rax, KILL cr);
10586   format %{ "SUB    ESP,4\t # FMOD\n"
10587           "\tMOVSS  [ESP+0],$src1\n"
10588           "\tFLD_S  [ESP+0]\n"
10589           "\tMOVSS  [ESP+0],$src0\n"
10590           "\tFLD_S  [ESP+0]\n"
10591      "loop:\tFPREM\n"
10592           "\tFWAIT\n"
10593           "\tFNSTSW AX\n"
10594           "\tSAHF\n"
10595           "\tJP     loop\n"
10596           "\tFSTP_S [ESP+0]\n"
10597           "\tMOVSS  $dst,[ESP+0]\n"
10598           "\tADD    ESP,4\n"
10599           "\tFSTP   ST0\t # Restore FPU Stack"
10600     %}
10601   ins_cost(250);
10602   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10603   ins_pipe( pipe_slow );
10604 %}
10605 
10606 
10607 //----------Arithmetic Conversion Instructions---------------------------------
10608 // The conversions operations are all Alpha sorted.  Please keep it that way!
10609 
10610 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10611   predicate(UseSSE==0);
10612   match(Set dst (RoundFloat src));
10613   ins_cost(125);
10614   format %{ "FST_S  $dst,$src\t# F-round" %}
10615   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10616   ins_pipe( fpu_mem_reg );
10617 %}
10618 
10619 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10620   predicate(UseSSE<=1);
10621   match(Set dst (RoundDouble src));
10622   ins_cost(125);
10623   format %{ "FST_D  $dst,$src\t# D-round" %}
10624   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10625   ins_pipe( fpu_mem_reg );
10626 %}
10627 
10628 // Force rounding to 24-bit precision and 6-bit exponent
10629 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10630   predicate(UseSSE==0);
10631   match(Set dst (ConvD2F src));
10632   format %{ "FST_S  $dst,$src\t# F-round" %}
10633   expand %{
10634     roundFloat_mem_reg(dst,src);
10635   %}
10636 %}
10637 
10638 // Force rounding to 24-bit precision and 6-bit exponent
10639 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10640   predicate(UseSSE==1);
10641   match(Set dst (ConvD2F src));
10642   effect( KILL cr );
10643   format %{ "SUB    ESP,4\n\t"
10644             "FST_S  [ESP],$src\t# F-round\n\t"
10645             "MOVSS  $dst,[ESP]\n\t"
10646             "ADD ESP,4" %}
10647   ins_encode %{
10648     __ subptr(rsp, 4);
10649     if ($src$$reg != FPR1L_enc) {
10650       __ fld_s($src$$reg-1);
10651       __ fstp_s(Address(rsp, 0));
10652     } else {
10653       __ fst_s(Address(rsp, 0));
10654     }
10655     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10656     __ addptr(rsp, 4);
10657   %}
10658   ins_pipe( pipe_slow );
10659 %}
10660 
10661 // Force rounding double precision to single precision
10662 instruct convD2F_reg(regF dst, regD src) %{
10663   predicate(UseSSE>=2);
10664   match(Set dst (ConvD2F src));
10665   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10666   ins_encode %{
10667     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10668   %}
10669   ins_pipe( pipe_slow );
10670 %}
10671 
10672 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10673   predicate(UseSSE==0);
10674   match(Set dst (ConvF2D src));
10675   format %{ "FST_S  $dst,$src\t# D-round" %}
10676   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10677   ins_pipe( fpu_reg_reg );
10678 %}
10679 
10680 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10681   predicate(UseSSE==1);
10682   match(Set dst (ConvF2D src));
10683   format %{ "FST_D  $dst,$src\t# D-round" %}
10684   expand %{
10685     roundDouble_mem_reg(dst,src);
10686   %}
10687 %}
10688 
10689 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10690   predicate(UseSSE==1);
10691   match(Set dst (ConvF2D src));
10692   effect( KILL cr );
10693   format %{ "SUB    ESP,4\n\t"
10694             "MOVSS  [ESP] $src\n\t"
10695             "FLD_S  [ESP]\n\t"
10696             "ADD    ESP,4\n\t"
10697             "FSTP   $dst\t# D-round" %}
10698   ins_encode %{
10699     __ subptr(rsp, 4);
10700     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10701     __ fld_s(Address(rsp, 0));
10702     __ addptr(rsp, 4);
10703     __ fstp_d($dst$$reg);
10704   %}
10705   ins_pipe( pipe_slow );
10706 %}
10707 
10708 instruct convF2D_reg(regD dst, regF src) %{
10709   predicate(UseSSE>=2);
10710   match(Set dst (ConvF2D src));
10711   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10712   ins_encode %{
10713     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10714   %}
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10719 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10720   predicate(UseSSE<=1);
10721   match(Set dst (ConvD2I src));
10722   effect( KILL tmp, KILL cr );
10723   format %{ "FLD    $src\t# Convert double to int \n\t"
10724             "FLDCW  trunc mode\n\t"
10725             "SUB    ESP,4\n\t"
10726             "FISTp  [ESP + #0]\n\t"
10727             "FLDCW  std/24-bit mode\n\t"
10728             "POP    EAX\n\t"
10729             "CMP    EAX,0x80000000\n\t"
10730             "JNE,s  fast\n\t"
10731             "FLD_D  $src\n\t"
10732             "CALL   d2i_wrapper\n"
10733       "fast:" %}
10734   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10735   ins_pipe( pipe_slow );
10736 %}
10737 
10738 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10739 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10740   predicate(UseSSE>=2);
10741   match(Set dst (ConvD2I src));
10742   effect( KILL tmp, KILL cr );
10743   format %{ "CVTTSD2SI $dst, $src\n\t"
10744             "CMP    $dst,0x80000000\n\t"
10745             "JNE,s  fast\n\t"
10746             "SUB    ESP, 8\n\t"
10747             "MOVSD  [ESP], $src\n\t"
10748             "FLD_D  [ESP]\n\t"
10749             "ADD    ESP, 8\n\t"
10750             "CALL   d2i_wrapper\n"
10751       "fast:" %}
10752   ins_encode %{
10753     Label fast;
10754     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10755     __ cmpl($dst$$Register, 0x80000000);
10756     __ jccb(Assembler::notEqual, fast);
10757     __ subptr(rsp, 8);
10758     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10759     __ fld_d(Address(rsp, 0));
10760     __ addptr(rsp, 8);
10761     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10762     __ bind(fast);
10763   %}
10764   ins_pipe( pipe_slow );
10765 %}
10766 
10767 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10768   predicate(UseSSE<=1);
10769   match(Set dst (ConvD2L src));
10770   effect( KILL cr );
10771   format %{ "FLD    $src\t# Convert double to long\n\t"
10772             "FLDCW  trunc mode\n\t"
10773             "SUB    ESP,8\n\t"
10774             "FISTp  [ESP + #0]\n\t"
10775             "FLDCW  std/24-bit mode\n\t"
10776             "POP    EAX\n\t"
10777             "POP    EDX\n\t"
10778             "CMP    EDX,0x80000000\n\t"
10779             "JNE,s  fast\n\t"
10780             "TEST   EAX,EAX\n\t"
10781             "JNE,s  fast\n\t"
10782             "FLD    $src\n\t"
10783             "CALL   d2l_wrapper\n"
10784       "fast:" %}
10785   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10786   ins_pipe( pipe_slow );
10787 %}
10788 
10789 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10790 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10791   predicate (UseSSE>=2);
10792   match(Set dst (ConvD2L src));
10793   effect( KILL cr );
10794   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10795             "MOVSD  [ESP],$src\n\t"
10796             "FLD_D  [ESP]\n\t"
10797             "FLDCW  trunc mode\n\t"
10798             "FISTp  [ESP + #0]\n\t"
10799             "FLDCW  std/24-bit mode\n\t"
10800             "POP    EAX\n\t"
10801             "POP    EDX\n\t"
10802             "CMP    EDX,0x80000000\n\t"
10803             "JNE,s  fast\n\t"
10804             "TEST   EAX,EAX\n\t"
10805             "JNE,s  fast\n\t"
10806             "SUB    ESP,8\n\t"
10807             "MOVSD  [ESP],$src\n\t"
10808             "FLD_D  [ESP]\n\t"
10809             "ADD    ESP,8\n\t"
10810             "CALL   d2l_wrapper\n"
10811       "fast:" %}
10812   ins_encode %{
10813     Label fast;
10814     __ subptr(rsp, 8);
10815     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10816     __ fld_d(Address(rsp, 0));
10817     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10818     __ fistp_d(Address(rsp, 0));
10819     // Restore the rounding mode, mask the exception
10820     if (Compile::current()->in_24_bit_fp_mode()) {
10821       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10822     } else {
10823       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10824     }
10825     // Load the converted long, adjust CPU stack
10826     __ pop(rax);
10827     __ pop(rdx);
10828     __ cmpl(rdx, 0x80000000);
10829     __ jccb(Assembler::notEqual, fast);
10830     __ testl(rax, rax);
10831     __ jccb(Assembler::notEqual, fast);
10832     __ subptr(rsp, 8);
10833     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10834     __ fld_d(Address(rsp, 0));
10835     __ addptr(rsp, 8);
10836     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10837     __ bind(fast);
10838   %}
10839   ins_pipe( pipe_slow );
10840 %}
10841 
10842 // Convert a double to an int.  Java semantics require we do complex
10843 // manglations in the corner cases.  So we set the rounding mode to
10844 // 'zero', store the darned double down as an int, and reset the
10845 // rounding mode to 'nearest'.  The hardware stores a flag value down
10846 // if we would overflow or converted a NAN; we check for this and
10847 // and go the slow path if needed.
10848 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10849   predicate(UseSSE==0);
10850   match(Set dst (ConvF2I src));
10851   effect( KILL tmp, KILL cr );
10852   format %{ "FLD    $src\t# Convert float to int \n\t"
10853             "FLDCW  trunc mode\n\t"
10854             "SUB    ESP,4\n\t"
10855             "FISTp  [ESP + #0]\n\t"
10856             "FLDCW  std/24-bit mode\n\t"
10857             "POP    EAX\n\t"
10858             "CMP    EAX,0x80000000\n\t"
10859             "JNE,s  fast\n\t"
10860             "FLD    $src\n\t"
10861             "CALL   d2i_wrapper\n"
10862       "fast:" %}
10863   // DPR2I_encoding works for FPR2I
10864   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10865   ins_pipe( pipe_slow );
10866 %}
10867 
10868 // Convert a float in xmm to an int reg.
10869 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10870   predicate(UseSSE>=1);
10871   match(Set dst (ConvF2I src));
10872   effect( KILL tmp, KILL cr );
10873   format %{ "CVTTSS2SI $dst, $src\n\t"
10874             "CMP    $dst,0x80000000\n\t"
10875             "JNE,s  fast\n\t"
10876             "SUB    ESP, 4\n\t"
10877             "MOVSS  [ESP], $src\n\t"
10878             "FLD    [ESP]\n\t"
10879             "ADD    ESP, 4\n\t"
10880             "CALL   d2i_wrapper\n"
10881       "fast:" %}
10882   ins_encode %{
10883     Label fast;
10884     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10885     __ cmpl($dst$$Register, 0x80000000);
10886     __ jccb(Assembler::notEqual, fast);
10887     __ subptr(rsp, 4);
10888     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10889     __ fld_s(Address(rsp, 0));
10890     __ addptr(rsp, 4);
10891     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10892     __ bind(fast);
10893   %}
10894   ins_pipe( pipe_slow );
10895 %}
10896 
10897 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10898   predicate(UseSSE==0);
10899   match(Set dst (ConvF2L src));
10900   effect( KILL cr );
10901   format %{ "FLD    $src\t# Convert float to long\n\t"
10902             "FLDCW  trunc mode\n\t"
10903             "SUB    ESP,8\n\t"
10904             "FISTp  [ESP + #0]\n\t"
10905             "FLDCW  std/24-bit mode\n\t"
10906             "POP    EAX\n\t"
10907             "POP    EDX\n\t"
10908             "CMP    EDX,0x80000000\n\t"
10909             "JNE,s  fast\n\t"
10910             "TEST   EAX,EAX\n\t"
10911             "JNE,s  fast\n\t"
10912             "FLD    $src\n\t"
10913             "CALL   d2l_wrapper\n"
10914       "fast:" %}
10915   // DPR2L_encoding works for FPR2L
10916   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10917   ins_pipe( pipe_slow );
10918 %}
10919 
10920 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10921 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10922   predicate (UseSSE>=1);
10923   match(Set dst (ConvF2L src));
10924   effect( KILL cr );
10925   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10926             "MOVSS  [ESP],$src\n\t"
10927             "FLD_S  [ESP]\n\t"
10928             "FLDCW  trunc mode\n\t"
10929             "FISTp  [ESP + #0]\n\t"
10930             "FLDCW  std/24-bit mode\n\t"
10931             "POP    EAX\n\t"
10932             "POP    EDX\n\t"
10933             "CMP    EDX,0x80000000\n\t"
10934             "JNE,s  fast\n\t"
10935             "TEST   EAX,EAX\n\t"
10936             "JNE,s  fast\n\t"
10937             "SUB    ESP,4\t# Convert float to long\n\t"
10938             "MOVSS  [ESP],$src\n\t"
10939             "FLD_S  [ESP]\n\t"
10940             "ADD    ESP,4\n\t"
10941             "CALL   d2l_wrapper\n"
10942       "fast:" %}
10943   ins_encode %{
10944     Label fast;
10945     __ subptr(rsp, 8);
10946     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10947     __ fld_s(Address(rsp, 0));
10948     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10949     __ fistp_d(Address(rsp, 0));
10950     // Restore the rounding mode, mask the exception
10951     if (Compile::current()->in_24_bit_fp_mode()) {
10952       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10953     } else {
10954       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10955     }
10956     // Load the converted long, adjust CPU stack
10957     __ pop(rax);
10958     __ pop(rdx);
10959     __ cmpl(rdx, 0x80000000);
10960     __ jccb(Assembler::notEqual, fast);
10961     __ testl(rax, rax);
10962     __ jccb(Assembler::notEqual, fast);
10963     __ subptr(rsp, 4);
10964     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10965     __ fld_s(Address(rsp, 0));
10966     __ addptr(rsp, 4);
10967     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10968     __ bind(fast);
10969   %}
10970   ins_pipe( pipe_slow );
10971 %}
10972 
10973 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10974   predicate( UseSSE<=1 );
10975   match(Set dst (ConvI2D src));
10976   format %{ "FILD   $src\n\t"
10977             "FSTP   $dst" %}
10978   opcode(0xDB, 0x0);  /* DB /0 */
10979   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10980   ins_pipe( fpu_reg_mem );
10981 %}
10982 
10983 instruct convI2D_reg(regD dst, rRegI src) %{
10984   predicate( UseSSE>=2 && !UseXmmI2D );
10985   match(Set dst (ConvI2D src));
10986   format %{ "CVTSI2SD $dst,$src" %}
10987   ins_encode %{
10988     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10989   %}
10990   ins_pipe( pipe_slow );
10991 %}
10992 
10993 instruct convI2D_mem(regD dst, memory mem) %{
10994   predicate( UseSSE>=2 );
10995   match(Set dst (ConvI2D (LoadI mem)));
10996   format %{ "CVTSI2SD $dst,$mem" %}
10997   ins_encode %{
10998     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10999   %}
11000   ins_pipe( pipe_slow );
11001 %}
11002 
11003 instruct convXI2D_reg(regD dst, rRegI src)
11004 %{
11005   predicate( UseSSE>=2 && UseXmmI2D );
11006   match(Set dst (ConvI2D src));
11007 
11008   format %{ "MOVD  $dst,$src\n\t"
11009             "CVTDQ2PD $dst,$dst\t# i2d" %}
11010   ins_encode %{
11011     __ movdl($dst$$XMMRegister, $src$$Register);
11012     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11013   %}
11014   ins_pipe(pipe_slow); // XXX
11015 %}
11016 
11017 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11018   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11019   match(Set dst (ConvI2D (LoadI mem)));
11020   format %{ "FILD   $mem\n\t"
11021             "FSTP   $dst" %}
11022   opcode(0xDB);      /* DB /0 */
11023   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11024               Pop_Reg_DPR(dst));
11025   ins_pipe( fpu_reg_mem );
11026 %}
11027 
11028 // Convert a byte to a float; no rounding step needed.
11029 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11030   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11031   match(Set dst (ConvI2F src));
11032   format %{ "FILD   $src\n\t"
11033             "FSTP   $dst" %}
11034 
11035   opcode(0xDB, 0x0);  /* DB /0 */
11036   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11037   ins_pipe( fpu_reg_mem );
11038 %}
11039 
11040 // In 24-bit mode, force exponent rounding by storing back out
11041 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11042   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11043   match(Set dst (ConvI2F src));
11044   ins_cost(200);
11045   format %{ "FILD   $src\n\t"
11046             "FSTP_S $dst" %}
11047   opcode(0xDB, 0x0);  /* DB /0 */
11048   ins_encode( Push_Mem_I(src),
11049               Pop_Mem_FPR(dst));
11050   ins_pipe( fpu_mem_mem );
11051 %}
11052 
11053 // In 24-bit mode, force exponent rounding by storing back out
11054 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11055   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11056   match(Set dst (ConvI2F (LoadI mem)));
11057   ins_cost(200);
11058   format %{ "FILD   $mem\n\t"
11059             "FSTP_S $dst" %}
11060   opcode(0xDB);  /* DB /0 */
11061   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11062               Pop_Mem_FPR(dst));
11063   ins_pipe( fpu_mem_mem );
11064 %}
11065 
11066 // This instruction does not round to 24-bits
11067 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11068   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11069   match(Set dst (ConvI2F src));
11070   format %{ "FILD   $src\n\t"
11071             "FSTP   $dst" %}
11072   opcode(0xDB, 0x0);  /* DB /0 */
11073   ins_encode( Push_Mem_I(src),
11074               Pop_Reg_FPR(dst));
11075   ins_pipe( fpu_reg_mem );
11076 %}
11077 
11078 // This instruction does not round to 24-bits
11079 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11080   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11081   match(Set dst (ConvI2F (LoadI mem)));
11082   format %{ "FILD   $mem\n\t"
11083             "FSTP   $dst" %}
11084   opcode(0xDB);      /* DB /0 */
11085   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11086               Pop_Reg_FPR(dst));
11087   ins_pipe( fpu_reg_mem );
11088 %}
11089 
11090 // Convert an int to a float in xmm; no rounding step needed.
11091 instruct convI2F_reg(regF dst, rRegI src) %{
11092   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11093   match(Set dst (ConvI2F src));
11094   format %{ "CVTSI2SS $dst, $src" %}
11095   ins_encode %{
11096     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11097   %}
11098   ins_pipe( pipe_slow );
11099 %}
11100 
11101  instruct convXI2F_reg(regF dst, rRegI src)
11102 %{
11103   predicate( UseSSE>=2 && UseXmmI2F );
11104   match(Set dst (ConvI2F src));
11105 
11106   format %{ "MOVD  $dst,$src\n\t"
11107             "CVTDQ2PS $dst,$dst\t# i2f" %}
11108   ins_encode %{
11109     __ movdl($dst$$XMMRegister, $src$$Register);
11110     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11111   %}
11112   ins_pipe(pipe_slow); // XXX
11113 %}
11114 
11115 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11116   match(Set dst (ConvI2L src));
11117   effect(KILL cr);
11118   ins_cost(375);
11119   format %{ "MOV    $dst.lo,$src\n\t"
11120             "MOV    $dst.hi,$src\n\t"
11121             "SAR    $dst.hi,31" %}
11122   ins_encode(convert_int_long(dst,src));
11123   ins_pipe( ialu_reg_reg_long );
11124 %}
11125 
11126 // Zero-extend convert int to long
11127 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11128   match(Set dst (AndL (ConvI2L src) mask) );
11129   effect( KILL flags );
11130   ins_cost(250);
11131   format %{ "MOV    $dst.lo,$src\n\t"
11132             "XOR    $dst.hi,$dst.hi" %}
11133   opcode(0x33); // XOR
11134   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11135   ins_pipe( ialu_reg_reg_long );
11136 %}
11137 
11138 // Zero-extend long
11139 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11140   match(Set dst (AndL src mask) );
11141   effect( KILL flags );
11142   ins_cost(250);
11143   format %{ "MOV    $dst.lo,$src.lo\n\t"
11144             "XOR    $dst.hi,$dst.hi\n\t" %}
11145   opcode(0x33); // XOR
11146   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11147   ins_pipe( ialu_reg_reg_long );
11148 %}
11149 
11150 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11151   predicate (UseSSE<=1);
11152   match(Set dst (ConvL2D src));
11153   effect( KILL cr );
11154   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11155             "PUSH   $src.lo\n\t"
11156             "FILD   ST,[ESP + #0]\n\t"
11157             "ADD    ESP,8\n\t"
11158             "FSTP_D $dst\t# D-round" %}
11159   opcode(0xDF, 0x5);  /* DF /5 */
11160   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11161   ins_pipe( pipe_slow );
11162 %}
11163 
11164 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11165   predicate (UseSSE>=2);
11166   match(Set dst (ConvL2D src));
11167   effect( KILL cr );
11168   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11169             "PUSH   $src.lo\n\t"
11170             "FILD_D [ESP]\n\t"
11171             "FSTP_D [ESP]\n\t"
11172             "MOVSD  $dst,[ESP]\n\t"
11173             "ADD    ESP,8" %}
11174   opcode(0xDF, 0x5);  /* DF /5 */
11175   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11176   ins_pipe( pipe_slow );
11177 %}
11178 
11179 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11180   predicate (UseSSE>=1);
11181   match(Set dst (ConvL2F src));
11182   effect( KILL cr );
11183   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11184             "PUSH   $src.lo\n\t"
11185             "FILD_D [ESP]\n\t"
11186             "FSTP_S [ESP]\n\t"
11187             "MOVSS  $dst,[ESP]\n\t"
11188             "ADD    ESP,8" %}
11189   opcode(0xDF, 0x5);  /* DF /5 */
11190   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11191   ins_pipe( pipe_slow );
11192 %}
11193 
11194 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11195   match(Set dst (ConvL2F src));
11196   effect( KILL cr );
11197   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11198             "PUSH   $src.lo\n\t"
11199             "FILD   ST,[ESP + #0]\n\t"
11200             "ADD    ESP,8\n\t"
11201             "FSTP_S $dst\t# F-round" %}
11202   opcode(0xDF, 0x5);  /* DF /5 */
11203   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11204   ins_pipe( pipe_slow );
11205 %}
11206 
11207 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11208   match(Set dst (ConvL2I src));
11209   effect( DEF dst, USE src );
11210   format %{ "MOV    $dst,$src.lo" %}
11211   ins_encode(enc_CopyL_Lo(dst,src));
11212   ins_pipe( ialu_reg_reg );
11213 %}
11214 
11215 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11216   match(Set dst (MoveF2I src));
11217   effect( DEF dst, USE src );
11218   ins_cost(100);
11219   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11220   ins_encode %{
11221     __ movl($dst$$Register, Address(rsp, $src$$disp));
11222   %}
11223   ins_pipe( ialu_reg_mem );
11224 %}
11225 
11226 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11227   predicate(UseSSE==0);
11228   match(Set dst (MoveF2I src));
11229   effect( DEF dst, USE src );
11230 
11231   ins_cost(125);
11232   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11233   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11234   ins_pipe( fpu_mem_reg );
11235 %}
11236 
11237 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11238   predicate(UseSSE>=1);
11239   match(Set dst (MoveF2I src));
11240   effect( DEF dst, USE src );
11241 
11242   ins_cost(95);
11243   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11244   ins_encode %{
11245     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11246   %}
11247   ins_pipe( pipe_slow );
11248 %}
11249 
11250 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11251   predicate(UseSSE>=2);
11252   match(Set dst (MoveF2I src));
11253   effect( DEF dst, USE src );
11254   ins_cost(85);
11255   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11256   ins_encode %{
11257     __ movdl($dst$$Register, $src$$XMMRegister);
11258   %}
11259   ins_pipe( pipe_slow );
11260 %}
11261 
11262 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11263   match(Set dst (MoveI2F src));
11264   effect( DEF dst, USE src );
11265 
11266   ins_cost(100);
11267   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11268   ins_encode %{
11269     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11270   %}
11271   ins_pipe( ialu_mem_reg );
11272 %}
11273 
11274 
11275 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11276   predicate(UseSSE==0);
11277   match(Set dst (MoveI2F src));
11278   effect(DEF dst, USE src);
11279 
11280   ins_cost(125);
11281   format %{ "FLD_S  $src\n\t"
11282             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11283   opcode(0xD9);               /* D9 /0, FLD m32real */
11284   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11285               Pop_Reg_FPR(dst) );
11286   ins_pipe( fpu_reg_mem );
11287 %}
11288 
11289 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11290   predicate(UseSSE>=1);
11291   match(Set dst (MoveI2F src));
11292   effect( DEF dst, USE src );
11293 
11294   ins_cost(95);
11295   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11296   ins_encode %{
11297     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11298   %}
11299   ins_pipe( pipe_slow );
11300 %}
11301 
11302 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11303   predicate(UseSSE>=2);
11304   match(Set dst (MoveI2F src));
11305   effect( DEF dst, USE src );
11306 
11307   ins_cost(85);
11308   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11309   ins_encode %{
11310     __ movdl($dst$$XMMRegister, $src$$Register);
11311   %}
11312   ins_pipe( pipe_slow );
11313 %}
11314 
11315 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11316   match(Set dst (MoveD2L src));
11317   effect(DEF dst, USE src);
11318 
11319   ins_cost(250);
11320   format %{ "MOV    $dst.lo,$src\n\t"
11321             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11322   opcode(0x8B, 0x8B);
11323   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11324   ins_pipe( ialu_mem_long_reg );
11325 %}
11326 
11327 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11328   predicate(UseSSE<=1);
11329   match(Set dst (MoveD2L src));
11330   effect(DEF dst, USE src);
11331 
11332   ins_cost(125);
11333   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11334   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11335   ins_pipe( fpu_mem_reg );
11336 %}
11337 
11338 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11339   predicate(UseSSE>=2);
11340   match(Set dst (MoveD2L src));
11341   effect(DEF dst, USE src);
11342   ins_cost(95);
11343   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11344   ins_encode %{
11345     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11346   %}
11347   ins_pipe( pipe_slow );
11348 %}
11349 
11350 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11351   predicate(UseSSE>=2);
11352   match(Set dst (MoveD2L src));
11353   effect(DEF dst, USE src, TEMP tmp);
11354   ins_cost(85);
11355   format %{ "MOVD   $dst.lo,$src\n\t"
11356             "PSHUFLW $tmp,$src,0x4E\n\t"
11357             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11358   ins_encode %{
11359     __ movdl($dst$$Register, $src$$XMMRegister);
11360     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11361     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11362   %}
11363   ins_pipe( pipe_slow );
11364 %}
11365 
11366 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11367   match(Set dst (MoveL2D src));
11368   effect(DEF dst, USE src);
11369 
11370   ins_cost(200);
11371   format %{ "MOV    $dst,$src.lo\n\t"
11372             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11373   opcode(0x89, 0x89);
11374   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11375   ins_pipe( ialu_mem_long_reg );
11376 %}
11377 
11378 
11379 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11380   predicate(UseSSE<=1);
11381   match(Set dst (MoveL2D src));
11382   effect(DEF dst, USE src);
11383   ins_cost(125);
11384 
11385   format %{ "FLD_D  $src\n\t"
11386             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11387   opcode(0xDD);               /* DD /0, FLD m64real */
11388   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11389               Pop_Reg_DPR(dst) );
11390   ins_pipe( fpu_reg_mem );
11391 %}
11392 
11393 
11394 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11395   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11396   match(Set dst (MoveL2D src));
11397   effect(DEF dst, USE src);
11398 
11399   ins_cost(95);
11400   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11401   ins_encode %{
11402     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11403   %}
11404   ins_pipe( pipe_slow );
11405 %}
11406 
11407 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11408   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11409   match(Set dst (MoveL2D src));
11410   effect(DEF dst, USE src);
11411 
11412   ins_cost(95);
11413   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11414   ins_encode %{
11415     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11416   %}
11417   ins_pipe( pipe_slow );
11418 %}
11419 
11420 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11421   predicate(UseSSE>=2);
11422   match(Set dst (MoveL2D src));
11423   effect(TEMP dst, USE src, TEMP tmp);
11424   ins_cost(85);
11425   format %{ "MOVD   $dst,$src.lo\n\t"
11426             "MOVD   $tmp,$src.hi\n\t"
11427             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11428   ins_encode %{
11429     __ movdl($dst$$XMMRegister, $src$$Register);
11430     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11431     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11432   %}
11433   ins_pipe( pipe_slow );
11434 %}
11435 
11436 
11437 // =======================================================================
11438 // fast clearing of an array
11439 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11440   predicate(!UseFastStosb);
11441   match(Set dummy (ClearArray cnt base));
11442   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11443   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11444             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11445             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11446   ins_encode %{
11447     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11448   %}
11449   ins_pipe( pipe_slow );
11450 %}
11451 
11452 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11453   predicate(UseFastStosb);
11454   match(Set dummy (ClearArray cnt base));
11455   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11456   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11457             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11458             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11459   ins_encode %{
11460     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11461   %}
11462   ins_pipe( pipe_slow );
11463 %}
11464 
11465 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11466                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11467   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11468   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11469 
11470   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11471   ins_encode %{
11472     __ string_compare($str1$$Register, $str2$$Register,
11473                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11474                       $tmp1$$XMMRegister);
11475   %}
11476   ins_pipe( pipe_slow );
11477 %}
11478 
11479 // fast string equals
11480 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11481                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11482   match(Set result (StrEquals (Binary str1 str2) cnt));
11483   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11484 
11485   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11486   ins_encode %{
11487     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11488                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11489                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11490   %}
11491   ins_pipe( pipe_slow );
11492 %}
11493 
11494 // fast search of substring with known size.
11495 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11496                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11497   predicate(UseSSE42Intrinsics);
11498   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11499   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11500 
11501   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11502   ins_encode %{
11503     int icnt2 = (int)$int_cnt2$$constant;
11504     if (icnt2 >= 8) {
11505       // IndexOf for constant substrings with size >= 8 elements
11506       // which don't need to be loaded through stack.
11507       __ string_indexofC8($str1$$Register, $str2$$Register,
11508                           $cnt1$$Register, $cnt2$$Register,
11509                           icnt2, $result$$Register,
11510                           $vec$$XMMRegister, $tmp$$Register);
11511     } else {
11512       // Small strings are loaded through stack if they cross page boundary.
11513       __ string_indexof($str1$$Register, $str2$$Register,
11514                         $cnt1$$Register, $cnt2$$Register,
11515                         icnt2, $result$$Register,
11516                         $vec$$XMMRegister, $tmp$$Register);
11517     }
11518   %}
11519   ins_pipe( pipe_slow );
11520 %}
11521 
11522 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11523                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11524   predicate(UseSSE42Intrinsics);
11525   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11526   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11527 
11528   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11529   ins_encode %{
11530     __ string_indexof($str1$$Register, $str2$$Register,
11531                       $cnt1$$Register, $cnt2$$Register,
11532                       (-1), $result$$Register,
11533                       $vec$$XMMRegister, $tmp$$Register);
11534   %}
11535   ins_pipe( pipe_slow );
11536 %}
11537 
11538 // fast array equals
11539 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11540                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11541 %{
11542   match(Set result (AryEq ary1 ary2));
11543   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11544   //ins_cost(300);
11545 
11546   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11547   ins_encode %{
11548     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11549                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11550                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11551   %}
11552   ins_pipe( pipe_slow );
11553 %}
11554 
11555 // encode char[] to byte[] in ISO_8859_1
11556 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11557                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11558                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11559   match(Set result (EncodeISOArray src (Binary dst len)));
11560   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11561 
11562   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11563   ins_encode %{
11564     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11565                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11566                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11567   %}
11568   ins_pipe( pipe_slow );
11569 %}
11570 
11571 
11572 //----------Control Flow Instructions------------------------------------------
11573 // Signed compare Instructions
11574 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11575   match(Set cr (CmpI op1 op2));
11576   effect( DEF cr, USE op1, USE op2 );
11577   format %{ "CMP    $op1,$op2" %}
11578   opcode(0x3B);  /* Opcode 3B /r */
11579   ins_encode( OpcP, RegReg( op1, op2) );
11580   ins_pipe( ialu_cr_reg_reg );
11581 %}
11582 
11583 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11584   match(Set cr (CmpI op1 op2));
11585   effect( DEF cr, USE op1 );
11586   format %{ "CMP    $op1,$op2" %}
11587   opcode(0x81,0x07);  /* Opcode 81 /7 */
11588   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11589   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11590   ins_pipe( ialu_cr_reg_imm );
11591 %}
11592 
11593 // Cisc-spilled version of cmpI_eReg
11594 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11595   match(Set cr (CmpI op1 (LoadI op2)));
11596 
11597   format %{ "CMP    $op1,$op2" %}
11598   ins_cost(500);
11599   opcode(0x3B);  /* Opcode 3B /r */
11600   ins_encode( OpcP, RegMem( op1, op2) );
11601   ins_pipe( ialu_cr_reg_mem );
11602 %}
11603 
11604 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11605   match(Set cr (CmpI src zero));
11606   effect( DEF cr, USE src );
11607 
11608   format %{ "TEST   $src,$src" %}
11609   opcode(0x85);
11610   ins_encode( OpcP, RegReg( src, src ) );
11611   ins_pipe( ialu_cr_reg_imm );
11612 %}
11613 
11614 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11615   match(Set cr (CmpI (AndI src con) zero));
11616 
11617   format %{ "TEST   $src,$con" %}
11618   opcode(0xF7,0x00);
11619   ins_encode( OpcP, RegOpc(src), Con32(con) );
11620   ins_pipe( ialu_cr_reg_imm );
11621 %}
11622 
11623 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11624   match(Set cr (CmpI (AndI src mem) zero));
11625 
11626   format %{ "TEST   $src,$mem" %}
11627   opcode(0x85);
11628   ins_encode( OpcP, RegMem( src, mem ) );
11629   ins_pipe( ialu_cr_reg_mem );
11630 %}
11631 
11632 // Unsigned compare Instructions; really, same as signed except they
11633 // produce an eFlagsRegU instead of eFlagsReg.
11634 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11635   match(Set cr (CmpU op1 op2));
11636 
11637   format %{ "CMPu   $op1,$op2" %}
11638   opcode(0x3B);  /* Opcode 3B /r */
11639   ins_encode( OpcP, RegReg( op1, op2) );
11640   ins_pipe( ialu_cr_reg_reg );
11641 %}
11642 
11643 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11644   match(Set cr (CmpU op1 op2));
11645 
11646   format %{ "CMPu   $op1,$op2" %}
11647   opcode(0x81,0x07);  /* Opcode 81 /7 */
11648   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11649   ins_pipe( ialu_cr_reg_imm );
11650 %}
11651 
11652 // // Cisc-spilled version of cmpU_eReg
11653 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11654   match(Set cr (CmpU op1 (LoadI op2)));
11655 
11656   format %{ "CMPu   $op1,$op2" %}
11657   ins_cost(500);
11658   opcode(0x3B);  /* Opcode 3B /r */
11659   ins_encode( OpcP, RegMem( op1, op2) );
11660   ins_pipe( ialu_cr_reg_mem );
11661 %}
11662 
11663 // // Cisc-spilled version of cmpU_eReg
11664 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11665 //  match(Set cr (CmpU (LoadI op1) op2));
11666 //
11667 //  format %{ "CMPu   $op1,$op2" %}
11668 //  ins_cost(500);
11669 //  opcode(0x39);  /* Opcode 39 /r */
11670 //  ins_encode( OpcP, RegMem( op1, op2) );
11671 //%}
11672 
11673 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11674   match(Set cr (CmpU src zero));
11675 
11676   format %{ "TESTu  $src,$src" %}
11677   opcode(0x85);
11678   ins_encode( OpcP, RegReg( src, src ) );
11679   ins_pipe( ialu_cr_reg_imm );
11680 %}
11681 
11682 // Unsigned pointer compare Instructions
11683 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11684   match(Set cr (CmpP op1 op2));
11685 
11686   format %{ "CMPu   $op1,$op2" %}
11687   opcode(0x3B);  /* Opcode 3B /r */
11688   ins_encode( OpcP, RegReg( op1, op2) );
11689   ins_pipe( ialu_cr_reg_reg );
11690 %}
11691 
11692 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11693   match(Set cr (CmpP op1 op2));
11694 
11695   format %{ "CMPu   $op1,$op2" %}
11696   opcode(0x81,0x07);  /* Opcode 81 /7 */
11697   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11698   ins_pipe( ialu_cr_reg_imm );
11699 %}
11700 
11701 // // Cisc-spilled version of cmpP_eReg
11702 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11703   match(Set cr (CmpP op1 (LoadP op2)));
11704 
11705   format %{ "CMPu   $op1,$op2" %}
11706   ins_cost(500);
11707   opcode(0x3B);  /* Opcode 3B /r */
11708   ins_encode( OpcP, RegMem( op1, op2) );
11709   ins_pipe( ialu_cr_reg_mem );
11710 %}
11711 
11712 // // Cisc-spilled version of cmpP_eReg
11713 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11714 //  match(Set cr (CmpP (LoadP op1) op2));
11715 //
11716 //  format %{ "CMPu   $op1,$op2" %}
11717 //  ins_cost(500);
11718 //  opcode(0x39);  /* Opcode 39 /r */
11719 //  ins_encode( OpcP, RegMem( op1, op2) );
11720 //%}
11721 
11722 // Compare raw pointer (used in out-of-heap check).
11723 // Only works because non-oop pointers must be raw pointers
11724 // and raw pointers have no anti-dependencies.
11725 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11726   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11727   match(Set cr (CmpP op1 (LoadP op2)));
11728 
11729   format %{ "CMPu   $op1,$op2" %}
11730   opcode(0x3B);  /* Opcode 3B /r */
11731   ins_encode( OpcP, RegMem( op1, op2) );
11732   ins_pipe( ialu_cr_reg_mem );
11733 %}
11734 
11735 //
11736 // This will generate a signed flags result. This should be ok
11737 // since any compare to a zero should be eq/neq.
11738 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11739   match(Set cr (CmpP src zero));
11740 
11741   format %{ "TEST   $src,$src" %}
11742   opcode(0x85);
11743   ins_encode( OpcP, RegReg( src, src ) );
11744   ins_pipe( ialu_cr_reg_imm );
11745 %}
11746 
11747 // Cisc-spilled version of testP_reg
11748 // This will generate a signed flags result. This should be ok
11749 // since any compare to a zero should be eq/neq.
11750 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11751   match(Set cr (CmpP (LoadP op) zero));
11752 
11753   format %{ "TEST   $op,0xFFFFFFFF" %}
11754   ins_cost(500);
11755   opcode(0xF7);               /* Opcode F7 /0 */
11756   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11757   ins_pipe( ialu_cr_reg_imm );
11758 %}
11759 
11760 // Yanked all unsigned pointer compare operations.
11761 // Pointer compares are done with CmpP which is already unsigned.
11762 
11763 //----------Max and Min--------------------------------------------------------
11764 // Min Instructions
11765 ////
11766 //   *** Min and Max using the conditional move are slower than the
11767 //   *** branch version on a Pentium III.
11768 // // Conditional move for min
11769 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11770 //  effect( USE_DEF op2, USE op1, USE cr );
11771 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11772 //  opcode(0x4C,0x0F);
11773 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11774 //  ins_pipe( pipe_cmov_reg );
11775 //%}
11776 //
11777 //// Min Register with Register (P6 version)
11778 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11779 //  predicate(VM_Version::supports_cmov() );
11780 //  match(Set op2 (MinI op1 op2));
11781 //  ins_cost(200);
11782 //  expand %{
11783 //    eFlagsReg cr;
11784 //    compI_eReg(cr,op1,op2);
11785 //    cmovI_reg_lt(op2,op1,cr);
11786 //  %}
11787 //%}
11788 
11789 // Min Register with Register (generic version)
11790 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11791   match(Set dst (MinI dst src));
11792   effect(KILL flags);
11793   ins_cost(300);
11794 
11795   format %{ "MIN    $dst,$src" %}
11796   opcode(0xCC);
11797   ins_encode( min_enc(dst,src) );
11798   ins_pipe( pipe_slow );
11799 %}
11800 
11801 // Max Register with Register
11802 //   *** Min and Max using the conditional move are slower than the
11803 //   *** branch version on a Pentium III.
11804 // // Conditional move for max
11805 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11806 //  effect( USE_DEF op2, USE op1, USE cr );
11807 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11808 //  opcode(0x4F,0x0F);
11809 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11810 //  ins_pipe( pipe_cmov_reg );
11811 //%}
11812 //
11813 // // Max Register with Register (P6 version)
11814 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11815 //  predicate(VM_Version::supports_cmov() );
11816 //  match(Set op2 (MaxI op1 op2));
11817 //  ins_cost(200);
11818 //  expand %{
11819 //    eFlagsReg cr;
11820 //    compI_eReg(cr,op1,op2);
11821 //    cmovI_reg_gt(op2,op1,cr);
11822 //  %}
11823 //%}
11824 
11825 // Max Register with Register (generic version)
11826 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11827   match(Set dst (MaxI dst src));
11828   effect(KILL flags);
11829   ins_cost(300);
11830 
11831   format %{ "MAX    $dst,$src" %}
11832   opcode(0xCC);
11833   ins_encode( max_enc(dst,src) );
11834   ins_pipe( pipe_slow );
11835 %}
11836 
11837 // ============================================================================
11838 // Counted Loop limit node which represents exact final iterator value.
11839 // Note: the resulting value should fit into integer range since
11840 // counted loops have limit check on overflow.
11841 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11842   match(Set limit (LoopLimit (Binary init limit) stride));
11843   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11844   ins_cost(300);
11845 
11846   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11847   ins_encode %{
11848     int strd = (int)$stride$$constant;
11849     assert(strd != 1 && strd != -1, "sanity");
11850     int m1 = (strd > 0) ? 1 : -1;
11851     // Convert limit to long (EAX:EDX)
11852     __ cdql();
11853     // Convert init to long (init:tmp)
11854     __ movl($tmp$$Register, $init$$Register);
11855     __ sarl($tmp$$Register, 31);
11856     // $limit - $init
11857     __ subl($limit$$Register, $init$$Register);
11858     __ sbbl($limit_hi$$Register, $tmp$$Register);
11859     // + ($stride - 1)
11860     if (strd > 0) {
11861       __ addl($limit$$Register, (strd - 1));
11862       __ adcl($limit_hi$$Register, 0);
11863       __ movl($tmp$$Register, strd);
11864     } else {
11865       __ addl($limit$$Register, (strd + 1));
11866       __ adcl($limit_hi$$Register, -1);
11867       __ lneg($limit_hi$$Register, $limit$$Register);
11868       __ movl($tmp$$Register, -strd);
11869     }
11870     // signed devision: (EAX:EDX) / pos_stride
11871     __ idivl($tmp$$Register);
11872     if (strd < 0) {
11873       // restore sign
11874       __ negl($tmp$$Register);
11875     }
11876     // (EAX) * stride
11877     __ mull($tmp$$Register);
11878     // + init (ignore upper bits)
11879     __ addl($limit$$Register, $init$$Register);
11880   %}
11881   ins_pipe( pipe_slow );
11882 %}
11883 
11884 // ============================================================================
11885 // Branch Instructions
11886 // Jump Table
11887 instruct jumpXtnd(rRegI switch_val) %{
11888   match(Jump switch_val);
11889   ins_cost(350);
11890   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
11891   ins_encode %{
11892     // Jump to Address(table_base + switch_reg)
11893     Address index(noreg, $switch_val$$Register, Address::times_1);
11894     __ jump(ArrayAddress($constantaddress, index));
11895   %}
11896   ins_pipe(pipe_jmp);
11897 %}
11898 
11899 // Jump Direct - Label defines a relative address from JMP+1
11900 instruct jmpDir(label labl) %{
11901   match(Goto);
11902   effect(USE labl);
11903 
11904   ins_cost(300);
11905   format %{ "JMP    $labl" %}
11906   size(5);
11907   ins_encode %{
11908     Label* L = $labl$$label;
11909     __ jmp(*L, false); // Always long jump
11910   %}
11911   ins_pipe( pipe_jmp );
11912 %}
11913 
11914 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11915 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
11916   match(If cop cr);
11917   effect(USE labl);
11918 
11919   ins_cost(300);
11920   format %{ "J$cop    $labl" %}
11921   size(6);
11922   ins_encode %{
11923     Label* L = $labl$$label;
11924     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11925   %}
11926   ins_pipe( pipe_jcc );
11927 %}
11928 
11929 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11930 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
11931   match(CountedLoopEnd cop cr);
11932   effect(USE labl);
11933 
11934   ins_cost(300);
11935   format %{ "J$cop    $labl\t# Loop end" %}
11936   size(6);
11937   ins_encode %{
11938     Label* L = $labl$$label;
11939     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11940   %}
11941   ins_pipe( pipe_jcc );
11942 %}
11943 
11944 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11945 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11946   match(CountedLoopEnd cop cmp);
11947   effect(USE labl);
11948 
11949   ins_cost(300);
11950   format %{ "J$cop,u  $labl\t# Loop end" %}
11951   size(6);
11952   ins_encode %{
11953     Label* L = $labl$$label;
11954     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11955   %}
11956   ins_pipe( pipe_jcc );
11957 %}
11958 
11959 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11960   match(CountedLoopEnd cop cmp);
11961   effect(USE labl);
11962 
11963   ins_cost(200);
11964   format %{ "J$cop,u  $labl\t# Loop end" %}
11965   size(6);
11966   ins_encode %{
11967     Label* L = $labl$$label;
11968     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11969   %}
11970   ins_pipe( pipe_jcc );
11971 %}
11972 
11973 // Jump Direct Conditional - using unsigned comparison
11974 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11975   match(If cop cmp);
11976   effect(USE labl);
11977 
11978   ins_cost(300);
11979   format %{ "J$cop,u  $labl" %}
11980   size(6);
11981   ins_encode %{
11982     Label* L = $labl$$label;
11983     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11984   %}
11985   ins_pipe(pipe_jcc);
11986 %}
11987 
11988 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11989   match(If cop cmp);
11990   effect(USE labl);
11991 
11992   ins_cost(200);
11993   format %{ "J$cop,u  $labl" %}
11994   size(6);
11995   ins_encode %{
11996     Label* L = $labl$$label;
11997     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11998   %}
11999   ins_pipe(pipe_jcc);
12000 %}
12001 
12002 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12003   match(If cop cmp);
12004   effect(USE labl);
12005 
12006   ins_cost(200);
12007   format %{ $$template
12008     if ($cop$$cmpcode == Assembler::notEqual) {
12009       $$emit$$"JP,u   $labl\n\t"
12010       $$emit$$"J$cop,u   $labl"
12011     } else {
12012       $$emit$$"JP,u   done\n\t"
12013       $$emit$$"J$cop,u   $labl\n\t"
12014       $$emit$$"done:"
12015     }
12016   %}
12017   ins_encode %{
12018     Label* l = $labl$$label;
12019     if ($cop$$cmpcode == Assembler::notEqual) {
12020       __ jcc(Assembler::parity, *l, false);
12021       __ jcc(Assembler::notEqual, *l, false);
12022     } else if ($cop$$cmpcode == Assembler::equal) {
12023       Label done;
12024       __ jccb(Assembler::parity, done);
12025       __ jcc(Assembler::equal, *l, false);
12026       __ bind(done);
12027     } else {
12028        ShouldNotReachHere();
12029     }
12030   %}
12031   ins_pipe(pipe_jcc);
12032 %}
12033 
12034 // ============================================================================
12035 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12036 // array for an instance of the superklass.  Set a hidden internal cache on a
12037 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12038 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12039 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12040   match(Set result (PartialSubtypeCheck sub super));
12041   effect( KILL rcx, KILL cr );
12042 
12043   ins_cost(1100);  // slightly larger than the next version
12044   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12045             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12046             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12047             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12048             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12049             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12050             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12051      "miss:\t" %}
12052 
12053   opcode(0x1); // Force a XOR of EDI
12054   ins_encode( enc_PartialSubtypeCheck() );
12055   ins_pipe( pipe_slow );
12056 %}
12057 
12058 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12059   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12060   effect( KILL rcx, KILL result );
12061 
12062   ins_cost(1000);
12063   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12064             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12065             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12066             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12067             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12068             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12069      "miss:\t" %}
12070 
12071   opcode(0x0);  // No need to XOR EDI
12072   ins_encode( enc_PartialSubtypeCheck() );
12073   ins_pipe( pipe_slow );
12074 %}
12075 
12076 // ============================================================================
12077 // Branch Instructions -- short offset versions
12078 //
12079 // These instructions are used to replace jumps of a long offset (the default
12080 // match) with jumps of a shorter offset.  These instructions are all tagged
12081 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12082 // match rules in general matching.  Instead, the ADLC generates a conversion
12083 // method in the MachNode which can be used to do in-place replacement of the
12084 // long variant with the shorter variant.  The compiler will determine if a
12085 // branch can be taken by the is_short_branch_offset() predicate in the machine
12086 // specific code section of the file.
12087 
12088 // Jump Direct - Label defines a relative address from JMP+1
12089 instruct jmpDir_short(label labl) %{
12090   match(Goto);
12091   effect(USE labl);
12092 
12093   ins_cost(300);
12094   format %{ "JMP,s  $labl" %}
12095   size(2);
12096   ins_encode %{
12097     Label* L = $labl$$label;
12098     __ jmpb(*L);
12099   %}
12100   ins_pipe( pipe_jmp );
12101   ins_short_branch(1);
12102 %}
12103 
12104 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12105 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12106   match(If cop cr);
12107   effect(USE labl);
12108 
12109   ins_cost(300);
12110   format %{ "J$cop,s  $labl" %}
12111   size(2);
12112   ins_encode %{
12113     Label* L = $labl$$label;
12114     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12115   %}
12116   ins_pipe( pipe_jcc );
12117   ins_short_branch(1);
12118 %}
12119 
12120 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12121 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12122   match(CountedLoopEnd cop cr);
12123   effect(USE labl);
12124 
12125   ins_cost(300);
12126   format %{ "J$cop,s  $labl\t# Loop end" %}
12127   size(2);
12128   ins_encode %{
12129     Label* L = $labl$$label;
12130     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12131   %}
12132   ins_pipe( pipe_jcc );
12133   ins_short_branch(1);
12134 %}
12135 
12136 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12137 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12138   match(CountedLoopEnd cop cmp);
12139   effect(USE labl);
12140 
12141   ins_cost(300);
12142   format %{ "J$cop,us $labl\t# Loop end" %}
12143   size(2);
12144   ins_encode %{
12145     Label* L = $labl$$label;
12146     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12147   %}
12148   ins_pipe( pipe_jcc );
12149   ins_short_branch(1);
12150 %}
12151 
12152 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12153   match(CountedLoopEnd cop cmp);
12154   effect(USE labl);
12155 
12156   ins_cost(300);
12157   format %{ "J$cop,us $labl\t# Loop end" %}
12158   size(2);
12159   ins_encode %{
12160     Label* L = $labl$$label;
12161     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12162   %}
12163   ins_pipe( pipe_jcc );
12164   ins_short_branch(1);
12165 %}
12166 
12167 // Jump Direct Conditional - using unsigned comparison
12168 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12169   match(If cop cmp);
12170   effect(USE labl);
12171 
12172   ins_cost(300);
12173   format %{ "J$cop,us $labl" %}
12174   size(2);
12175   ins_encode %{
12176     Label* L = $labl$$label;
12177     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12178   %}
12179   ins_pipe( pipe_jcc );
12180   ins_short_branch(1);
12181 %}
12182 
12183 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12184   match(If cop cmp);
12185   effect(USE labl);
12186 
12187   ins_cost(300);
12188   format %{ "J$cop,us $labl" %}
12189   size(2);
12190   ins_encode %{
12191     Label* L = $labl$$label;
12192     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12193   %}
12194   ins_pipe( pipe_jcc );
12195   ins_short_branch(1);
12196 %}
12197 
12198 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12199   match(If cop cmp);
12200   effect(USE labl);
12201 
12202   ins_cost(300);
12203   format %{ $$template
12204     if ($cop$$cmpcode == Assembler::notEqual) {
12205       $$emit$$"JP,u,s   $labl\n\t"
12206       $$emit$$"J$cop,u,s   $labl"
12207     } else {
12208       $$emit$$"JP,u,s   done\n\t"
12209       $$emit$$"J$cop,u,s  $labl\n\t"
12210       $$emit$$"done:"
12211     }
12212   %}
12213   size(4);
12214   ins_encode %{
12215     Label* l = $labl$$label;
12216     if ($cop$$cmpcode == Assembler::notEqual) {
12217       __ jccb(Assembler::parity, *l);
12218       __ jccb(Assembler::notEqual, *l);
12219     } else if ($cop$$cmpcode == Assembler::equal) {
12220       Label done;
12221       __ jccb(Assembler::parity, done);
12222       __ jccb(Assembler::equal, *l);
12223       __ bind(done);
12224     } else {
12225        ShouldNotReachHere();
12226     }
12227   %}
12228   ins_pipe(pipe_jcc);
12229   ins_short_branch(1);
12230 %}
12231 
12232 // ============================================================================
12233 // Long Compare
12234 //
12235 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12236 // is tricky.  The flavor of compare used depends on whether we are testing
12237 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12238 // The GE test is the negated LT test.  The LE test can be had by commuting
12239 // the operands (yielding a GE test) and then negating; negate again for the
12240 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12241 // NE test is negated from that.
12242 
12243 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12244 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12245 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12246 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12247 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12248 // foo match ends up with the wrong leaf.  One fix is to not match both
12249 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12250 // both forms beat the trinary form of long-compare and both are very useful
12251 // on Intel which has so few registers.
12252 
12253 // Manifest a CmpL result in an integer register.  Very painful.
12254 // This is the test to avoid.
12255 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12256   match(Set dst (CmpL3 src1 src2));
12257   effect( KILL flags );
12258   ins_cost(1000);
12259   format %{ "XOR    $dst,$dst\n\t"
12260             "CMP    $src1.hi,$src2.hi\n\t"
12261             "JLT,s  m_one\n\t"
12262             "JGT,s  p_one\n\t"
12263             "CMP    $src1.lo,$src2.lo\n\t"
12264             "JB,s   m_one\n\t"
12265             "JEQ,s  done\n"
12266     "p_one:\tINC    $dst\n\t"
12267             "JMP,s  done\n"
12268     "m_one:\tDEC    $dst\n"
12269      "done:" %}
12270   ins_encode %{
12271     Label p_one, m_one, done;
12272     __ xorptr($dst$$Register, $dst$$Register);
12273     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12274     __ jccb(Assembler::less,    m_one);
12275     __ jccb(Assembler::greater, p_one);
12276     __ cmpl($src1$$Register, $src2$$Register);
12277     __ jccb(Assembler::below,   m_one);
12278     __ jccb(Assembler::equal,   done);
12279     __ bind(p_one);
12280     __ incrementl($dst$$Register);
12281     __ jmpb(done);
12282     __ bind(m_one);
12283     __ decrementl($dst$$Register);
12284     __ bind(done);
12285   %}
12286   ins_pipe( pipe_slow );
12287 %}
12288 
12289 //======
12290 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12291 // compares.  Can be used for LE or GT compares by reversing arguments.
12292 // NOT GOOD FOR EQ/NE tests.
12293 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12294   match( Set flags (CmpL src zero ));
12295   ins_cost(100);
12296   format %{ "TEST   $src.hi,$src.hi" %}
12297   opcode(0x85);
12298   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12299   ins_pipe( ialu_cr_reg_reg );
12300 %}
12301 
12302 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12303 // compares.  Can be used for LE or GT compares by reversing arguments.
12304 // NOT GOOD FOR EQ/NE tests.
12305 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12306   match( Set flags (CmpL src1 src2 ));
12307   effect( TEMP tmp );
12308   ins_cost(300);
12309   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12310             "MOV    $tmp,$src1.hi\n\t"
12311             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12312   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12313   ins_pipe( ialu_cr_reg_reg );
12314 %}
12315 
12316 // Long compares reg < zero/req OR reg >= zero/req.
12317 // Just a wrapper for a normal branch, plus the predicate test.
12318 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12319   match(If cmp flags);
12320   effect(USE labl);
12321   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12322   expand %{
12323     jmpCon(cmp,flags,labl);    // JLT or JGE...
12324   %}
12325 %}
12326 
12327 // Compare 2 longs and CMOVE longs.
12328 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12329   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12330   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12331   ins_cost(400);
12332   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12333             "CMOV$cmp $dst.hi,$src.hi" %}
12334   opcode(0x0F,0x40);
12335   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12336   ins_pipe( pipe_cmov_reg_long );
12337 %}
12338 
12339 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12340   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12341   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12342   ins_cost(500);
12343   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12344             "CMOV$cmp $dst.hi,$src.hi" %}
12345   opcode(0x0F,0x40);
12346   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12347   ins_pipe( pipe_cmov_reg_long );
12348 %}
12349 
12350 // Compare 2 longs and CMOVE ints.
12351 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12352   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12353   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12354   ins_cost(200);
12355   format %{ "CMOV$cmp $dst,$src" %}
12356   opcode(0x0F,0x40);
12357   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12358   ins_pipe( pipe_cmov_reg );
12359 %}
12360 
12361 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12362   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12363   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12364   ins_cost(250);
12365   format %{ "CMOV$cmp $dst,$src" %}
12366   opcode(0x0F,0x40);
12367   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12368   ins_pipe( pipe_cmov_mem );
12369 %}
12370 
12371 // Compare 2 longs and CMOVE ints.
12372 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12373   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12374   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12375   ins_cost(200);
12376   format %{ "CMOV$cmp $dst,$src" %}
12377   opcode(0x0F,0x40);
12378   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12379   ins_pipe( pipe_cmov_reg );
12380 %}
12381 
12382 // Compare 2 longs and CMOVE doubles
12383 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12384   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12385   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12386   ins_cost(200);
12387   expand %{
12388     fcmovDPR_regS(cmp,flags,dst,src);
12389   %}
12390 %}
12391 
12392 // Compare 2 longs and CMOVE doubles
12393 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12394   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12395   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12396   ins_cost(200);
12397   expand %{
12398     fcmovD_regS(cmp,flags,dst,src);
12399   %}
12400 %}
12401 
12402 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12403   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12404   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12405   ins_cost(200);
12406   expand %{
12407     fcmovFPR_regS(cmp,flags,dst,src);
12408   %}
12409 %}
12410 
12411 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12412   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12413   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12414   ins_cost(200);
12415   expand %{
12416     fcmovF_regS(cmp,flags,dst,src);
12417   %}
12418 %}
12419 
12420 //======
12421 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12422 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12423   match( Set flags (CmpL src zero ));
12424   effect(TEMP tmp);
12425   ins_cost(200);
12426   format %{ "MOV    $tmp,$src.lo\n\t"
12427             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12428   ins_encode( long_cmp_flags0( src, tmp ) );
12429   ins_pipe( ialu_reg_reg_long );
12430 %}
12431 
12432 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12433 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12434   match( Set flags (CmpL src1 src2 ));
12435   ins_cost(200+300);
12436   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12437             "JNE,s  skip\n\t"
12438             "CMP    $src1.hi,$src2.hi\n\t"
12439      "skip:\t" %}
12440   ins_encode( long_cmp_flags1( src1, src2 ) );
12441   ins_pipe( ialu_cr_reg_reg );
12442 %}
12443 
12444 // Long compare reg == zero/reg OR reg != zero/reg
12445 // Just a wrapper for a normal branch, plus the predicate test.
12446 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12447   match(If cmp flags);
12448   effect(USE labl);
12449   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12450   expand %{
12451     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12452   %}
12453 %}
12454 
12455 // Compare 2 longs and CMOVE longs.
12456 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12457   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12458   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12459   ins_cost(400);
12460   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12461             "CMOV$cmp $dst.hi,$src.hi" %}
12462   opcode(0x0F,0x40);
12463   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12464   ins_pipe( pipe_cmov_reg_long );
12465 %}
12466 
12467 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12468   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12469   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12470   ins_cost(500);
12471   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12472             "CMOV$cmp $dst.hi,$src.hi" %}
12473   opcode(0x0F,0x40);
12474   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12475   ins_pipe( pipe_cmov_reg_long );
12476 %}
12477 
12478 // Compare 2 longs and CMOVE ints.
12479 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12480   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12481   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12482   ins_cost(200);
12483   format %{ "CMOV$cmp $dst,$src" %}
12484   opcode(0x0F,0x40);
12485   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12486   ins_pipe( pipe_cmov_reg );
12487 %}
12488 
12489 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12490   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12491   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12492   ins_cost(250);
12493   format %{ "CMOV$cmp $dst,$src" %}
12494   opcode(0x0F,0x40);
12495   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12496   ins_pipe( pipe_cmov_mem );
12497 %}
12498 
12499 // Compare 2 longs and CMOVE ints.
12500 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12501   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12502   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12503   ins_cost(200);
12504   format %{ "CMOV$cmp $dst,$src" %}
12505   opcode(0x0F,0x40);
12506   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12507   ins_pipe( pipe_cmov_reg );
12508 %}
12509 
12510 // Compare 2 longs and CMOVE doubles
12511 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12512   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12513   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12514   ins_cost(200);
12515   expand %{
12516     fcmovDPR_regS(cmp,flags,dst,src);
12517   %}
12518 %}
12519 
12520 // Compare 2 longs and CMOVE doubles
12521 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12522   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12523   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12524   ins_cost(200);
12525   expand %{
12526     fcmovD_regS(cmp,flags,dst,src);
12527   %}
12528 %}
12529 
12530 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12531   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12532   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12533   ins_cost(200);
12534   expand %{
12535     fcmovFPR_regS(cmp,flags,dst,src);
12536   %}
12537 %}
12538 
12539 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12540   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12541   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12542   ins_cost(200);
12543   expand %{
12544     fcmovF_regS(cmp,flags,dst,src);
12545   %}
12546 %}
12547 
12548 //======
12549 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12550 // Same as cmpL_reg_flags_LEGT except must negate src
12551 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12552   match( Set flags (CmpL src zero ));
12553   effect( TEMP tmp );
12554   ins_cost(300);
12555   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12556             "CMP    $tmp,$src.lo\n\t"
12557             "SBB    $tmp,$src.hi\n\t" %}
12558   ins_encode( long_cmp_flags3(src, tmp) );
12559   ins_pipe( ialu_reg_reg_long );
12560 %}
12561 
12562 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12563 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12564 // requires a commuted test to get the same result.
12565 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12566   match( Set flags (CmpL src1 src2 ));
12567   effect( TEMP tmp );
12568   ins_cost(300);
12569   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12570             "MOV    $tmp,$src2.hi\n\t"
12571             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12572   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12573   ins_pipe( ialu_cr_reg_reg );
12574 %}
12575 
12576 // Long compares reg < zero/req OR reg >= zero/req.
12577 // Just a wrapper for a normal branch, plus the predicate test
12578 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12579   match(If cmp flags);
12580   effect(USE labl);
12581   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12582   ins_cost(300);
12583   expand %{
12584     jmpCon(cmp,flags,labl);    // JGT or JLE...
12585   %}
12586 %}
12587 
12588 // Compare 2 longs and CMOVE longs.
12589 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12590   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12591   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12592   ins_cost(400);
12593   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12594             "CMOV$cmp $dst.hi,$src.hi" %}
12595   opcode(0x0F,0x40);
12596   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12597   ins_pipe( pipe_cmov_reg_long );
12598 %}
12599 
12600 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12601   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12602   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12603   ins_cost(500);
12604   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12605             "CMOV$cmp $dst.hi,$src.hi+4" %}
12606   opcode(0x0F,0x40);
12607   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12608   ins_pipe( pipe_cmov_reg_long );
12609 %}
12610 
12611 // Compare 2 longs and CMOVE ints.
12612 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12613   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12614   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12615   ins_cost(200);
12616   format %{ "CMOV$cmp $dst,$src" %}
12617   opcode(0x0F,0x40);
12618   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12619   ins_pipe( pipe_cmov_reg );
12620 %}
12621 
12622 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12623   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12624   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12625   ins_cost(250);
12626   format %{ "CMOV$cmp $dst,$src" %}
12627   opcode(0x0F,0x40);
12628   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12629   ins_pipe( pipe_cmov_mem );
12630 %}
12631 
12632 // Compare 2 longs and CMOVE ptrs.
12633 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12634   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12635   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12636   ins_cost(200);
12637   format %{ "CMOV$cmp $dst,$src" %}
12638   opcode(0x0F,0x40);
12639   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12640   ins_pipe( pipe_cmov_reg );
12641 %}
12642 
12643 // Compare 2 longs and CMOVE doubles
12644 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12645   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12646   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12647   ins_cost(200);
12648   expand %{
12649     fcmovDPR_regS(cmp,flags,dst,src);
12650   %}
12651 %}
12652 
12653 // Compare 2 longs and CMOVE doubles
12654 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12655   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12656   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12657   ins_cost(200);
12658   expand %{
12659     fcmovD_regS(cmp,flags,dst,src);
12660   %}
12661 %}
12662 
12663 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12664   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12665   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12666   ins_cost(200);
12667   expand %{
12668     fcmovFPR_regS(cmp,flags,dst,src);
12669   %}
12670 %}
12671 
12672 
12673 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12674   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12675   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12676   ins_cost(200);
12677   expand %{
12678     fcmovF_regS(cmp,flags,dst,src);
12679   %}
12680 %}
12681 
12682 
12683 // ============================================================================
12684 // Procedure Call/Return Instructions
12685 // Call Java Static Instruction
12686 // Note: If this code changes, the corresponding ret_addr_offset() and
12687 //       compute_padding() functions will have to be adjusted.
12688 instruct CallStaticJavaDirect(method meth) %{
12689   match(CallStaticJava);
12690   effect(USE meth);
12691 
12692   ins_cost(300);
12693   format %{ "CALL,static " %}
12694   opcode(0xE8); /* E8 cd */
12695   ins_encode( pre_call_resets,
12696               Java_Static_Call( meth ),
12697               call_epilog,
12698               post_call_FPU );
12699   ins_pipe( pipe_slow );
12700   ins_alignment(4);
12701 %}
12702 
12703 // Call Java Dynamic Instruction
12704 // Note: If this code changes, the corresponding ret_addr_offset() and
12705 //       compute_padding() functions will have to be adjusted.
12706 instruct CallDynamicJavaDirect(method meth) %{
12707   match(CallDynamicJava);
12708   effect(USE meth);
12709 
12710   ins_cost(300);
12711   format %{ "MOV    EAX,(oop)-1\n\t"
12712             "CALL,dynamic" %}
12713   opcode(0xE8); /* E8 cd */
12714   ins_encode( pre_call_resets,
12715               Java_Dynamic_Call( meth ),
12716               call_epilog,
12717               post_call_FPU );
12718   ins_pipe( pipe_slow );
12719   ins_alignment(4);
12720 %}
12721 
12722 // Call Runtime Instruction
12723 instruct CallRuntimeDirect(method meth) %{
12724   match(CallRuntime );
12725   effect(USE meth);
12726 
12727   ins_cost(300);
12728   format %{ "CALL,runtime " %}
12729   opcode(0xE8); /* E8 cd */
12730   // Use FFREEs to clear entries in float stack
12731   ins_encode( pre_call_resets,
12732               FFree_Float_Stack_All,
12733               Java_To_Runtime( meth ),
12734               post_call_FPU );
12735   ins_pipe( pipe_slow );
12736 %}
12737 
12738 // Call runtime without safepoint
12739 instruct CallLeafDirect(method meth) %{
12740   match(CallLeaf);
12741   effect(USE meth);
12742 
12743   ins_cost(300);
12744   format %{ "CALL_LEAF,runtime " %}
12745   opcode(0xE8); /* E8 cd */
12746   ins_encode( pre_call_resets,
12747               FFree_Float_Stack_All,
12748               Java_To_Runtime( meth ),
12749               Verify_FPU_For_Leaf, post_call_FPU );
12750   ins_pipe( pipe_slow );
12751 %}
12752 
12753 instruct CallLeafNoFPDirect(method meth) %{
12754   match(CallLeafNoFP);
12755   effect(USE meth);
12756 
12757   ins_cost(300);
12758   format %{ "CALL_LEAF_NOFP,runtime " %}
12759   opcode(0xE8); /* E8 cd */
12760   ins_encode(Java_To_Runtime(meth));
12761   ins_pipe( pipe_slow );
12762 %}
12763 
12764 
12765 // Return Instruction
12766 // Remove the return address & jump to it.
12767 instruct Ret() %{
12768   match(Return);
12769   format %{ "RET" %}
12770   opcode(0xC3);
12771   ins_encode(OpcP);
12772   ins_pipe( pipe_jmp );
12773 %}
12774 
12775 // Tail Call; Jump from runtime stub to Java code.
12776 // Also known as an 'interprocedural jump'.
12777 // Target of jump will eventually return to caller.
12778 // TailJump below removes the return address.
12779 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12780   match(TailCall jump_target method_oop );
12781   ins_cost(300);
12782   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12783   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12784   ins_encode( OpcP, RegOpc(jump_target) );
12785   ins_pipe( pipe_jmp );
12786 %}
12787 
12788 
12789 // Tail Jump; remove the return address; jump to target.
12790 // TailCall above leaves the return address around.
12791 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12792   match( TailJump jump_target ex_oop );
12793   ins_cost(300);
12794   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12795             "JMP    $jump_target " %}
12796   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12797   ins_encode( enc_pop_rdx,
12798               OpcP, RegOpc(jump_target) );
12799   ins_pipe( pipe_jmp );
12800 %}
12801 
12802 // Create exception oop: created by stack-crawling runtime code.
12803 // Created exception is now available to this handler, and is setup
12804 // just prior to jumping to this handler.  No code emitted.
12805 instruct CreateException( eAXRegP ex_oop )
12806 %{
12807   match(Set ex_oop (CreateEx));
12808 
12809   size(0);
12810   // use the following format syntax
12811   format %{ "# exception oop is in EAX; no code emitted" %}
12812   ins_encode();
12813   ins_pipe( empty );
12814 %}
12815 
12816 
12817 // Rethrow exception:
12818 // The exception oop will come in the first argument position.
12819 // Then JUMP (not call) to the rethrow stub code.
12820 instruct RethrowException()
12821 %{
12822   match(Rethrow);
12823 
12824   // use the following format syntax
12825   format %{ "JMP    rethrow_stub" %}
12826   ins_encode(enc_rethrow);
12827   ins_pipe( pipe_jmp );
12828 %}
12829 
12830 // inlined locking and unlocking
12831 
12832 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12833   predicate(Compile::current()->use_rtm());
12834   match(Set cr (FastLock object box));
12835   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12836   ins_cost(300);
12837   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12838   ins_encode %{
12839     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12840                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12841                  _counters, _rtm_counters, _stack_rtm_counters,
12842                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12843                  true, ra_->C->profile_rtm());
12844   %}
12845   ins_pipe(pipe_slow);
12846 %}
12847 
12848 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12849   predicate(!Compile::current()->use_rtm());
12850   match(Set cr (FastLock object box));
12851   effect(TEMP tmp, TEMP scr, USE_KILL box);
12852   ins_cost(300);
12853   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12854   ins_encode %{
12855     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12856                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12857   %}
12858   ins_pipe(pipe_slow);
12859 %}
12860 
12861 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12862   match(Set cr (FastUnlock object box));
12863   effect(TEMP tmp, USE_KILL box);
12864   ins_cost(300);
12865   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12866   ins_encode %{
12867     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12868   %}
12869   ins_pipe(pipe_slow);
12870 %}
12871 
12872 
12873 
12874 // ============================================================================
12875 // Safepoint Instruction
12876 instruct safePoint_poll(eFlagsReg cr) %{
12877   match(SafePoint);
12878   effect(KILL cr);
12879 
12880   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12881   // On SPARC that might be acceptable as we can generate the address with
12882   // just a sethi, saving an or.  By polling at offset 0 we can end up
12883   // putting additional pressure on the index-0 in the D$.  Because of
12884   // alignment (just like the situation at hand) the lower indices tend
12885   // to see more traffic.  It'd be better to change the polling address
12886   // to offset 0 of the last $line in the polling page.
12887 
12888   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
12889   ins_cost(125);
12890   size(6) ;
12891   ins_encode( Safepoint_Poll() );
12892   ins_pipe( ialu_reg_mem );
12893 %}
12894 
12895 
12896 // ============================================================================
12897 // This name is KNOWN by the ADLC and cannot be changed.
12898 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12899 // for this guy.
12900 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
12901   match(Set dst (ThreadLocal));
12902   effect(DEF dst, KILL cr);
12903 
12904   format %{ "MOV    $dst, Thread::current()" %}
12905   ins_encode %{
12906     Register dstReg = as_Register($dst$$reg);
12907     __ get_thread(dstReg);
12908   %}
12909   ins_pipe( ialu_reg_fat );
12910 %}
12911 
12912 
12913 
12914 //----------PEEPHOLE RULES-----------------------------------------------------
12915 // These must follow all instruction definitions as they use the names
12916 // defined in the instructions definitions.
12917 //
12918 // peepmatch ( root_instr_name [preceding_instruction]* );
12919 //
12920 // peepconstraint %{
12921 // (instruction_number.operand_name relational_op instruction_number.operand_name
12922 //  [, ...] );
12923 // // instruction numbers are zero-based using left to right order in peepmatch
12924 //
12925 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12926 // // provide an instruction_number.operand_name for each operand that appears
12927 // // in the replacement instruction's match rule
12928 //
12929 // ---------VM FLAGS---------------------------------------------------------
12930 //
12931 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12932 //
12933 // Each peephole rule is given an identifying number starting with zero and
12934 // increasing by one in the order seen by the parser.  An individual peephole
12935 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12936 // on the command-line.
12937 //
12938 // ---------CURRENT LIMITATIONS----------------------------------------------
12939 //
12940 // Only match adjacent instructions in same basic block
12941 // Only equality constraints
12942 // Only constraints between operands, not (0.dest_reg == EAX_enc)
12943 // Only one replacement instruction
12944 //
12945 // ---------EXAMPLE----------------------------------------------------------
12946 //
12947 // // pertinent parts of existing instructions in architecture description
12948 // instruct movI(rRegI dst, rRegI src) %{
12949 //   match(Set dst (CopyI src));
12950 // %}
12951 //
12952 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
12953 //   match(Set dst (AddI dst src));
12954 //   effect(KILL cr);
12955 // %}
12956 //
12957 // // Change (inc mov) to lea
12958 // peephole %{
12959 //   // increment preceeded by register-register move
12960 //   peepmatch ( incI_eReg movI );
12961 //   // require that the destination register of the increment
12962 //   // match the destination register of the move
12963 //   peepconstraint ( 0.dst == 1.dst );
12964 //   // construct a replacement instruction that sets
12965 //   // the destination to ( move's source register + one )
12966 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12967 // %}
12968 //
12969 // Implementation no longer uses movX instructions since
12970 // machine-independent system no longer uses CopyX nodes.
12971 //
12972 // peephole %{
12973 //   peepmatch ( incI_eReg movI );
12974 //   peepconstraint ( 0.dst == 1.dst );
12975 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12976 // %}
12977 //
12978 // peephole %{
12979 //   peepmatch ( decI_eReg movI );
12980 //   peepconstraint ( 0.dst == 1.dst );
12981 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12982 // %}
12983 //
12984 // peephole %{
12985 //   peepmatch ( addI_eReg_imm movI );
12986 //   peepconstraint ( 0.dst == 1.dst );
12987 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12988 // %}
12989 //
12990 // peephole %{
12991 //   peepmatch ( addP_eReg_imm movP );
12992 //   peepconstraint ( 0.dst == 1.dst );
12993 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
12994 // %}
12995 
12996 // // Change load of spilled value to only a spill
12997 // instruct storeI(memory mem, rRegI src) %{
12998 //   match(Set mem (StoreI mem src));
12999 // %}
13000 //
13001 // instruct loadI(rRegI dst, memory mem) %{
13002 //   match(Set dst (LoadI mem));
13003 // %}
13004 //
13005 peephole %{
13006   peepmatch ( loadI storeI );
13007   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13008   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13009 %}
13010 
13011 //----------SMARTSPILL RULES---------------------------------------------------
13012 // These must follow all instruction definitions as they use the names
13013 // defined in the instructions definitions.