New src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
 799     //                          it maps more cases to single byte displacement
 800     _masm.set_managed();
 801     if (reg_lo+1 == reg_hi) { // double move?
 802       if (is_load) {
 803         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 804       } else {
 805         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 806       }
 807     } else {
 808       if (is_load) {
 809         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 810       } else {
 811         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 812       }
 813     }
 814 #ifndef PRODUCT
 815   } else if (!do_size) {
 816     if (size != 0) st->print("\n\t");
 817     if (reg_lo+1 == reg_hi) { // double move?
 818       if (is_load) st->print("%s %s,[ESP + #%d]",
 819                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSD  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     } else {
 824       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 825                               Matcher::regName[reg_lo], offset);
 826       else         st->print("MOVSS  [ESP + #%d],%s",
 827                               offset, Matcher::regName[reg_lo]);
 828     }
 829 #endif
 830   }
 831   bool is_single_byte = false;
 832   if ((UseAVX > 2) && (offset != 0)) {
 833     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 834   }
 835   int offset_size = 0;
 836   if (UseAVX > 2 ) {
 837     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 838   } else {
 839     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 840   }
 841   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 842   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 843   return size+5+offset_size;
 844 }
 845 
 846 
 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 848                             int src_hi, int dst_hi, int size, outputStream* st ) {
 849   if (cbuf) {
 850     MacroAssembler _masm(cbuf);
 851     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 852     _masm.set_managed();
 853     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     } else {
 857       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 858                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 859     }
 860 #ifndef PRODUCT
 861   } else if (!do_size) {
 862     if (size != 0) st->print("\n\t");
 863     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 864       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 865         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 866       } else {
 867         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 868       }
 869     } else {
 870       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 871         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 872       } else {
 873         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 874       }
 875     }
 876 #endif
 877   }
 878   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 879   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 880   int sz = (UseAVX > 2) ? 6 : 4;
 881   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 882       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 883   return size + sz;
 884 }
 885 
 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 887                             int src_hi, int dst_hi, int size, outputStream* st ) {
 888   // 32-bit
 889   if (cbuf) {
 890     MacroAssembler _masm(cbuf);
 891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 892     _masm.set_managed();
 893     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 894              as_Register(Matcher::_regEncode[src_lo]));
 895 #ifndef PRODUCT
 896   } else if (!do_size) {
 897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 898 #endif
 899   }
 900   return (UseAVX> 2) ? 6 : 4;
 901 }
 902 
 903 
 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 905                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 906   // 32-bit
 907   if (cbuf) {
 908     MacroAssembler _masm(cbuf);
 909     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 910     _masm.set_managed();
 911     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 912              as_XMMRegister(Matcher::_regEncode[src_lo]));
 913 #ifndef PRODUCT
 914   } else if (!do_size) {
 915     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 916 #endif
 917   }
 918   return (UseAVX> 2) ? 6 : 4;
 919 }
 920 
 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 922   if( cbuf ) {
 923     emit_opcode(*cbuf, 0x8B );
 924     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 925 #ifndef PRODUCT
 926   } else if( !do_size ) {
 927     if( size != 0 ) st->print("\n\t");
 928     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 929 #endif
 930   }
 931   return size+2;
 932 }
 933 
 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 935                                  int offset, int size, outputStream* st ) {
 936   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 937     if( cbuf ) {
 938       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 939       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 940 #ifndef PRODUCT
 941     } else if( !do_size ) {
 942       if( size != 0 ) st->print("\n\t");
 943       st->print("FLD    %s",Matcher::regName[src_lo]);
 944 #endif
 945     }
 946     size += 2;
 947   }
 948 
 949   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 950   const char *op_str;
 951   int op;
 952   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 953     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 954     op = 0xDD;
 955   } else {                   // 32-bit store
 956     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 957     op = 0xD9;
 958     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 959   }
 960 
 961   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 962 }
 963 
 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 966                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 967 
 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 969                             int stack_offset, int reg, uint ireg, outputStream* st);
 970 
 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 972                                      int dst_offset, uint ireg, outputStream* st) {
 973   int calc_size = 0;
 974   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 975   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 976   switch (ireg) {
 977   case Op_VecS:
 978     calc_size = 3+src_offset_size + 3+dst_offset_size;
 979     break;
 980   case Op_VecD: {
 981     calc_size = 3+src_offset_size + 3+dst_offset_size;
 982     int tmp_src_offset = src_offset + 4;
 983     int tmp_dst_offset = dst_offset + 4;
 984     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 985     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 986     calc_size += 3+src_offset_size + 3+dst_offset_size;
 987     break;
 988   }   
 989   case Op_VecX:
 990   case Op_VecY:
 991   case Op_VecZ:
 992     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 993     break;
 994   default:
 995     ShouldNotReachHere();
 996   }
 997   if (cbuf) {
 998     MacroAssembler _masm(cbuf);
 999     int offset = __ offset();
1000     switch (ireg) {
1001     case Op_VecS:
1002       __ pushl(Address(rsp, src_offset));
1003       __ popl (Address(rsp, dst_offset));
1004       break;
1005     case Op_VecD:
1006       __ pushl(Address(rsp, src_offset));
1007       __ popl (Address(rsp, dst_offset));
1008       __ pushl(Address(rsp, src_offset+4));
1009       __ popl (Address(rsp, dst_offset+4));
1010       break;
1011     case Op_VecX:
1012       __ movdqu(Address(rsp, -16), xmm0);
1013       __ movdqu(xmm0, Address(rsp, src_offset));
1014       __ movdqu(Address(rsp, dst_offset), xmm0);
1015       __ movdqu(xmm0, Address(rsp, -16));
1016       break;
1017     case Op_VecY:
1018       __ vmovdqu(Address(rsp, -32), xmm0);
1019       __ vmovdqu(xmm0, Address(rsp, src_offset));
1020       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1021       __ vmovdqu(xmm0, Address(rsp, -32));
1022       break;
1023     case Op_VecZ:
1024       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1025       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1026       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1027       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1028       break;
1029     default:
1030       ShouldNotReachHere();
1031     }
1032     int size = __ offset() - offset;
1033     assert(size == calc_size, "incorrect size calculation");
1034     return size;
1035 #ifndef PRODUCT
1036   } else if (!do_size) {
1037     switch (ireg) {
1038     case Op_VecS:
1039       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1040                 "popl    [rsp + #%d]",
1041                 src_offset, dst_offset);
1042       break;
1043     case Op_VecD:
1044       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1045                 "popq    [rsp + #%d]\n\t"
1046                 "pushl   [rsp + #%d]\n\t"
1047                 "popq    [rsp + #%d]",
1048                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1049       break;
1050      case Op_VecX:
1051       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1052                 "movdqu  xmm0, [rsp + #%d]\n\t"
1053                 "movdqu  [rsp + #%d], xmm0\n\t"
1054                 "movdqu  xmm0, [rsp - #16]",
1055                 src_offset, dst_offset);
1056       break;
1057     case Op_VecY:
1058       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1059                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1060                 "vmovdqu [rsp + #%d], xmm0\n\t"
1061                 "vmovdqu xmm0, [rsp - #32]",
1062                 src_offset, dst_offset);
1063       break;
1064     case Op_VecZ:
1065       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1066                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1067                 "vmovdqu [rsp + #%d], xmm0\n\t"
1068                 "vmovdqu xmm0, [rsp - #64]",
1069                 src_offset, dst_offset);
1070       break;
1071     default:
1072       ShouldNotReachHere();
1073     }
1074 #endif
1075   }
1076   return calc_size;
1077 }
1078 
1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1080   // Get registers to move
1081   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1082   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1083   OptoReg::Name dst_second = ra_->get_reg_second(this );
1084   OptoReg::Name dst_first = ra_->get_reg_first(this );
1085 
1086   enum RC src_second_rc = rc_class(src_second);
1087   enum RC src_first_rc = rc_class(src_first);
1088   enum RC dst_second_rc = rc_class(dst_second);
1089   enum RC dst_first_rc = rc_class(dst_first);
1090 
1091   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1092 
1093   // Generate spill code!
1094   int size = 0;
1095 
1096   if( src_first == dst_first && src_second == dst_second )
1097     return size;            // Self copy, no move
1098 
1099   if (bottom_type()->isa_vect() != NULL) {
1100     uint ireg = ideal_reg();
1101     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1102     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1103     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1104     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1105       // mem -> mem
1106       int src_offset = ra_->reg2offset(src_first);
1107       int dst_offset = ra_->reg2offset(dst_first);
1108       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1109     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1110       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1111     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1112       int stack_offset = ra_->reg2offset(dst_first);
1113       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1114     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1115       int stack_offset = ra_->reg2offset(src_first);
1116       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1117     } else {
1118       ShouldNotReachHere();
1119     }
1120   }
1121 
1122   // --------------------------------------
1123   // Check for mem-mem move.  push/pop to move.
1124   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1125     if( src_second == dst_first ) { // overlapping stack copy ranges
1126       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1127       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1128       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1129       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1130     }
1131     // move low bits
1132     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1133     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1134     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1135       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1136       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1137     }
1138     return size;
1139   }
1140 
1141   // --------------------------------------
1142   // Check for integer reg-reg copy
1143   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1144     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1145 
1146   // Check for integer store
1147   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1148     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1149 
1150   // Check for integer load
1151   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1152     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1153 
1154   // Check for integer reg-xmm reg copy
1155   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1156     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1157             "no 64 bit integer-float reg moves" );
1158     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1159   }
1160   // --------------------------------------
1161   // Check for float reg-reg copy
1162   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1163     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1164             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1165     if( cbuf ) {
1166 
1167       // Note the mucking with the register encode to compensate for the 0/1
1168       // indexing issue mentioned in a comment in the reg_def sections
1169       // for FPR registers many lines above here.
1170 
1171       if( src_first != FPR1L_num ) {
1172         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1173         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1174         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1175         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1176      } else {
1177         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1178         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1179      }
1180 #ifndef PRODUCT
1181     } else if( !do_size ) {
1182       if( size != 0 ) st->print("\n\t");
1183       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1184       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1185 #endif
1186     }
1187     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1188   }
1189 
1190   // Check for float store
1191   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1192     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1193   }
1194 
1195   // Check for float load
1196   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1197     int offset = ra_->reg2offset(src_first);
1198     const char *op_str;
1199     int op;
1200     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1201       op_str = "FLD_D";
1202       op = 0xDD;
1203     } else {                   // 32-bit load
1204       op_str = "FLD_S";
1205       op = 0xD9;
1206       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1207     }
1208     if( cbuf ) {
1209       emit_opcode  (*cbuf, op );
1210       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1211       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1212       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1213 #ifndef PRODUCT
1214     } else if( !do_size ) {
1215       if( size != 0 ) st->print("\n\t");
1216       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1217 #endif
1218     }
1219     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1220     return size + 3+offset_size+2;
1221   }
1222 
1223   // Check for xmm reg-reg copy
1224   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1225     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1226             (src_first+1 == src_second && dst_first+1 == dst_second),
1227             "no non-adjacent float-moves" );
1228     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1229   }
1230 
1231   // Check for xmm reg-integer reg copy
1232   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1233     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1234             "no 64 bit float-integer reg moves" );
1235     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1236   }
1237 
1238   // Check for xmm store
1239   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1240     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1241   }
1242 
1243   // Check for float xmm load
1244   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1245     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1246   }
1247 
1248   // Copy from float reg to xmm reg
1249   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1250     // copy to the top of stack from floating point reg
1251     // and use LEA to preserve flags
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0xF8);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP-8]");
1261 #endif
1262     }
1263     size += 4;
1264 
1265     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1266 
1267     // Copy from the temp memory to the xmm reg.
1268     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1269 
1270     if( cbuf ) {
1271       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1272       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1273       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1274       emit_d8(*cbuf,0x08);
1275 #ifndef PRODUCT
1276     } else if( !do_size ) {
1277       if( size != 0 ) st->print("\n\t");
1278       st->print("LEA    ESP,[ESP+8]");
1279 #endif
1280     }
1281     size += 4;
1282     return size;
1283   }
1284 
1285   assert( size > 0, "missed a case" );
1286 
1287   // --------------------------------------------------------------------
1288   // Check for second bits still needing moving.
1289   if( src_second == dst_second )
1290     return size;               // Self copy; no move
1291   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1292 
1293   // Check for second word int-int move
1294   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1295     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1296 
1297   // Check for second word integer store
1298   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1299     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1300 
1301   // Check for second word integer load
1302   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1303     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1304 
1305 
1306   Unimplemented();
1307   return 0; // Mute compiler
1308 }
1309 
1310 #ifndef PRODUCT
1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1312   implementation( NULL, ra_, false, st );
1313 }
1314 #endif
1315 
1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   implementation( &cbuf, ra_, false, NULL );
1318 }
1319 
1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1321   return implementation( NULL, ra_, true, NULL );
1322 }
1323 
1324 
1325 //=============================================================================
1326 #ifndef PRODUCT
1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   int reg = ra_->get_reg_first(this);
1330   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1331 }
1332 #endif
1333 
1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1336   int reg = ra_->get_encode(this);
1337   if( offset >= 128 ) {
1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1339     emit_rm(cbuf, 0x2, reg, 0x04);
1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1341     emit_d32(cbuf, offset);
1342   }
1343   else {
1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1345     emit_rm(cbuf, 0x1, reg, 0x04);
1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347     emit_d8(cbuf, offset);
1348   }
1349 }
1350 
1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1353   if( offset >= 128 ) {
1354     return 7;
1355   }
1356   else {
1357     return 4;
1358   }
1359 }
1360 
1361 //=============================================================================
1362 #ifndef PRODUCT
1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1364   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1365   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1366   st->print_cr("\tNOP");
1367   st->print_cr("\tNOP");
1368   if( !OptoBreakpoint )
1369     st->print_cr("\tNOP");
1370 }
1371 #endif
1372 
1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1374   MacroAssembler masm(&cbuf);
1375 #ifdef ASSERT
1376   uint insts_size = cbuf.insts_size();
1377 #endif
1378   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1379   masm.jump_cc(Assembler::notEqual,
1380                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1381   /* WARNING these NOPs are critical so that verified entry point is properly
1382      aligned for patching by NativeJump::patch_verified_entry() */
1383   int nops_cnt = 2;
1384   if( !OptoBreakpoint ) // Leave space for int3
1385      nops_cnt += 1;
1386   masm.nop(nops_cnt);
1387 
1388   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1389 }
1390 
1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1392   return OptoBreakpoint ? 11 : 12;
1393 }
1394 
1395 
1396 //=============================================================================
1397 
1398 int Matcher::regnum_to_fpu_offset(int regnum) {
1399   return regnum - 32; // The FP registers are in the second chunk
1400 }
1401 
1402 // This is UltraSparc specific, true just means we have fast l2f conversion
1403 const bool Matcher::convL2FSupported(void) {
1404   return true;
1405 }
1406 
1407 // Is this branch offset short enough that a short branch can be used?
1408 //
1409 // NOTE: If the platform does not provide any short branch variants, then
1410 //       this method should return false for offset 0.
1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1412   // The passed offset is relative to address of the branch.
1413   // On 86 a branch displacement is calculated relative to address
1414   // of a next instruction.
1415   offset -= br_size;
1416 
1417   // the short version of jmpConUCF2 contains multiple branches,
1418   // making the reach slightly less
1419   if (rule == jmpConUCF2_rule)
1420     return (-126 <= offset && offset <= 125);
1421   return (-128 <= offset && offset <= 127);
1422 }
1423 
1424 const bool Matcher::isSimpleConstant64(jlong value) {
1425   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1426   return false;
1427 }
1428 
1429 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1430 const bool Matcher::init_array_count_is_in_bytes = false;
1431 
1432 // Needs 2 CMOV's for longs.
1433 const int Matcher::long_cmove_cost() { return 1; }
1434 
1435 // No CMOVF/CMOVD with SSE/SSE2
1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1437 
1438 // Does the CPU require late expand (see block.cpp for description of late expand)?
1439 const bool Matcher::require_postalloc_expand = false;
1440 
1441 // Should the Matcher clone shifts on addressing modes, expecting them to
1442 // be subsumed into complex addressing expressions or compute them into
1443 // registers?  True for Intel but false for most RISCs
1444 const bool Matcher::clone_shift_expressions = true;
1445 
1446 // Do we need to mask the count passed to shift instructions or does
1447 // the cpu only look at the lower 5/6 bits anyway?
1448 const bool Matcher::need_masked_shift_count = false;
1449 
1450 bool Matcher::narrow_oop_use_complex_address() {
1451   ShouldNotCallThis();
1452   return true;
1453 }
1454 
1455 bool Matcher::narrow_klass_use_complex_address() {
1456   ShouldNotCallThis();
1457   return true;
1458 }
1459 
1460 bool Matcher::const_oop_prefer_decode() {
1461   ShouldNotCallThis();
1462   return true;
1463 }
1464 
1465 bool Matcher::const_klass_prefer_decode() {
1466   ShouldNotCallThis();
1467   return true;
1468 }
1469 
1470 // Is it better to copy float constants, or load them directly from memory?
1471 // Intel can load a float constant from a direct address, requiring no
1472 // extra registers.  Most RISCs will have to materialize an address into a
1473 // register first, so they would do better to copy the constant from stack.
1474 const bool Matcher::rematerialize_float_constants = true;
1475 
1476 // If CPU can load and store mis-aligned doubles directly then no fixup is
1477 // needed.  Else we split the double into 2 integer pieces and move it
1478 // piece-by-piece.  Only happens when passing doubles into C code as the
1479 // Java calling convention forces doubles to be aligned.
1480 const bool Matcher::misaligned_doubles_ok = true;
1481 
1482 
1483 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1484   // Get the memory operand from the node
1485   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1486   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1487   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1488   uint opcnt     = 1;                 // First operand
1489   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1490   while( idx >= skipped+num_edges ) {
1491     skipped += num_edges;
1492     opcnt++;                          // Bump operand count
1493     assert( opcnt < numopnds, "Accessing non-existent operand" );
1494     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1495   }
1496 
1497   MachOper *memory = node->_opnds[opcnt];
1498   MachOper *new_memory = NULL;
1499   switch (memory->opcode()) {
1500   case DIRECT:
1501   case INDOFFSET32X:
1502     // No transformation necessary.
1503     return;
1504   case INDIRECT:
1505     new_memory = new indirect_win95_safeOper( );
1506     break;
1507   case INDOFFSET8:
1508     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1509     break;
1510   case INDOFFSET32:
1511     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1512     break;
1513   case INDINDEXOFFSET:
1514     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1515     break;
1516   case INDINDEXSCALE:
1517     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1518     break;
1519   case INDINDEXSCALEOFFSET:
1520     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1521     break;
1522   case LOAD_LONG_INDIRECT:
1523   case LOAD_LONG_INDOFFSET32:
1524     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1525     return;
1526   default:
1527     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1528     return;
1529   }
1530   node->_opnds[opcnt] = new_memory;
1531 }
1532 
1533 // Advertise here if the CPU requires explicit rounding operations
1534 // to implement the UseStrictFP mode.
1535 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1536 
1537 // Are floats conerted to double when stored to stack during deoptimization?
1538 // On x32 it is stored with convertion only when FPU is used for floats.
1539 bool Matcher::float_in_double() { return (UseSSE == 0); }
1540 
1541 // Do ints take an entire long register or just half?
1542 const bool Matcher::int_in_long = false;
1543 
1544 // Return whether or not this register is ever used as an argument.  This
1545 // function is used on startup to build the trampoline stubs in generateOptoStub.
1546 // Registers not mentioned will be killed by the VM call in the trampoline, and
1547 // arguments in those registers not be available to the callee.
1548 bool Matcher::can_be_java_arg( int reg ) {
1549   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1550   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1551   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1552   return false;
1553 }
1554 
1555 bool Matcher::is_spillable_arg( int reg ) {
1556   return can_be_java_arg(reg);
1557 }
1558 
1559 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1560   // Use hardware integer DIV instruction when
1561   // it is faster than a code which use multiply.
1562   // Only when constant divisor fits into 32 bit
1563   // (min_jint is excluded to get only correct
1564   // positive 32 bit values from negative).
1565   return VM_Version::has_fast_idiv() &&
1566          (divisor == (int)divisor && divisor != min_jint);
1567 }
1568 
1569 // Register for DIVI projection of divmodI
1570 RegMask Matcher::divI_proj_mask() {
1571   return EAX_REG_mask();
1572 }
1573 
1574 // Register for MODI projection of divmodI
1575 RegMask Matcher::modI_proj_mask() {
1576   return EDX_REG_mask();
1577 }
1578 
1579 // Register for DIVL projection of divmodL
1580 RegMask Matcher::divL_proj_mask() {
1581   ShouldNotReachHere();
1582   return RegMask();
1583 }
1584 
1585 // Register for MODL projection of divmodL
1586 RegMask Matcher::modL_proj_mask() {
1587   ShouldNotReachHere();
1588   return RegMask();
1589 }
1590 
1591 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1592   return NO_REG_mask();
1593 }
1594 
1595 // Returns true if the high 32 bits of the value is known to be zero.
1596 bool is_operand_hi32_zero(Node* n) {
1597   int opc = n->Opcode();
1598   if (opc == Op_AndL) {
1599     Node* o2 = n->in(2);
1600     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1601       return true;
1602     }
1603   }
1604   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1605     return true;
1606   }
1607   return false;
1608 }
1609 
1610 %}
1611 
1612 //----------ENCODING BLOCK-----------------------------------------------------
1613 // This block specifies the encoding classes used by the compiler to output
1614 // byte streams.  Encoding classes generate functions which are called by
1615 // Machine Instruction Nodes in order to generate the bit encoding of the
1616 // instruction.  Operands specify their base encoding interface with the
1617 // interface keyword.  There are currently supported four interfaces,
1618 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1619 // operand to generate a function which returns its register number when
1620 // queried.   CONST_INTER causes an operand to generate a function which
1621 // returns the value of the constant when queried.  MEMORY_INTER causes an
1622 // operand to generate four functions which return the Base Register, the
1623 // Index Register, the Scale Value, and the Offset Value of the operand when
1624 // queried.  COND_INTER causes an operand to generate six functions which
1625 // return the encoding code (ie - encoding bits for the instruction)
1626 // associated with each basic boolean condition for a conditional instruction.
1627 // Instructions specify two basic values for encoding.  They use the
1628 // ins_encode keyword to specify their encoding class (which must be one of
1629 // the class names specified in the encoding block), and they use the
1630 // opcode keyword to specify, in order, their primary, secondary, and
1631 // tertiary opcode.  Only the opcode sections which a particular instruction
1632 // needs for encoding need to be specified.
1633 encode %{
1634   // Build emit functions for each basic byte or larger field in the intel
1635   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1636   // code in the enc_class source block.  Emit functions will live in the
1637   // main source block for now.  In future, we can generalize this by
1638   // adding a syntax that specifies the sizes of fields in an order,
1639   // so that the adlc can build the emit functions automagically
1640 
1641   // Emit primary opcode
1642   enc_class OpcP %{
1643     emit_opcode(cbuf, $primary);
1644   %}
1645 
1646   // Emit secondary opcode
1647   enc_class OpcS %{
1648     emit_opcode(cbuf, $secondary);
1649   %}
1650 
1651   // Emit opcode directly
1652   enc_class Opcode(immI d8) %{
1653     emit_opcode(cbuf, $d8$$constant);
1654   %}
1655 
1656   enc_class SizePrefix %{
1657     emit_opcode(cbuf,0x66);
1658   %}
1659 
1660   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1661     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1662   %}
1663 
1664   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1665     emit_opcode(cbuf,$opcode$$constant);
1666     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1667   %}
1668 
1669   enc_class mov_r32_imm0( rRegI dst ) %{
1670     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1671     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1672   %}
1673 
1674   enc_class cdq_enc %{
1675     // Full implementation of Java idiv and irem; checks for
1676     // special case as described in JVM spec., p.243 & p.271.
1677     //
1678     //         normal case                           special case
1679     //
1680     // input : rax,: dividend                         min_int
1681     //         reg: divisor                          -1
1682     //
1683     // output: rax,: quotient  (= rax, idiv reg)       min_int
1684     //         rdx: remainder (= rax, irem reg)       0
1685     //
1686     //  Code sequnce:
1687     //
1688     //  81 F8 00 00 00 80    cmp         rax,80000000h
1689     //  0F 85 0B 00 00 00    jne         normal_case
1690     //  33 D2                xor         rdx,edx
1691     //  83 F9 FF             cmp         rcx,0FFh
1692     //  0F 84 03 00 00 00    je          done
1693     //                  normal_case:
1694     //  99                   cdq
1695     //  F7 F9                idiv        rax,ecx
1696     //                  done:
1697     //
1698     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1699     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1700     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1701     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1702     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1703     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1704     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1705     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1706     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1707     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1708     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1709     // normal_case:
1710     emit_opcode(cbuf,0x99);                                         // cdq
1711     // idiv (note: must be emitted by the user of this rule)
1712     // normal:
1713   %}
1714 
1715   // Dense encoding for older common ops
1716   enc_class Opc_plus(immI opcode, rRegI reg) %{
1717     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1718   %}
1719 
1720 
1721   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1722   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1723     // Check for 8-bit immediate, and set sign extend bit in opcode
1724     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1725       emit_opcode(cbuf, $primary | 0x02);
1726     }
1727     else {                          // If 32-bit immediate
1728       emit_opcode(cbuf, $primary);
1729     }
1730   %}
1731 
1732   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1733     // Emit primary opcode and set sign-extend bit
1734     // Check for 8-bit immediate, and set sign extend bit in opcode
1735     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1736       emit_opcode(cbuf, $primary | 0x02);    }
1737     else {                          // If 32-bit immediate
1738       emit_opcode(cbuf, $primary);
1739     }
1740     // Emit r/m byte with secondary opcode, after primary opcode.
1741     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1742   %}
1743 
1744   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1745     // Check for 8-bit immediate, and set sign extend bit in opcode
1746     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1747       $$$emit8$imm$$constant;
1748     }
1749     else {                          // If 32-bit immediate
1750       // Output immediate
1751       $$$emit32$imm$$constant;
1752     }
1753   %}
1754 
1755   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1756     // Emit primary opcode and set sign-extend bit
1757     // Check for 8-bit immediate, and set sign extend bit in opcode
1758     int con = (int)$imm$$constant; // Throw away top bits
1759     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1760     // Emit r/m byte with secondary opcode, after primary opcode.
1761     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1762     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1763     else                               emit_d32(cbuf,con);
1764   %}
1765 
1766   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1767     // Emit primary opcode and set sign-extend bit
1768     // Check for 8-bit immediate, and set sign extend bit in opcode
1769     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1770     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1771     // Emit r/m byte with tertiary opcode, after primary opcode.
1772     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1773     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1774     else                               emit_d32(cbuf,con);
1775   %}
1776 
1777   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1778     emit_cc(cbuf, $secondary, $dst$$reg );
1779   %}
1780 
1781   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1782     int destlo = $dst$$reg;
1783     int desthi = HIGH_FROM_LOW(destlo);
1784     // bswap lo
1785     emit_opcode(cbuf, 0x0F);
1786     emit_cc(cbuf, 0xC8, destlo);
1787     // bswap hi
1788     emit_opcode(cbuf, 0x0F);
1789     emit_cc(cbuf, 0xC8, desthi);
1790     // xchg lo and hi
1791     emit_opcode(cbuf, 0x87);
1792     emit_rm(cbuf, 0x3, destlo, desthi);
1793   %}
1794 
1795   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1796     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1797   %}
1798 
1799   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1800     $$$emit8$primary;
1801     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1802   %}
1803 
1804   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1805     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1806     emit_d8(cbuf, op >> 8 );
1807     emit_d8(cbuf, op & 255);
1808   %}
1809 
1810   // emulate a CMOV with a conditional branch around a MOV
1811   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1812     // Invert sense of branch from sense of CMOV
1813     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1814     emit_d8( cbuf, $brOffs$$constant );
1815   %}
1816 
1817   enc_class enc_PartialSubtypeCheck( ) %{
1818     Register Redi = as_Register(EDI_enc); // result register
1819     Register Reax = as_Register(EAX_enc); // super class
1820     Register Recx = as_Register(ECX_enc); // killed
1821     Register Resi = as_Register(ESI_enc); // sub class
1822     Label miss;
1823 
1824     MacroAssembler _masm(&cbuf);
1825     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1826                                      NULL, &miss,
1827                                      /*set_cond_codes:*/ true);
1828     if ($primary) {
1829       __ xorptr(Redi, Redi);
1830     }
1831     __ bind(miss);
1832   %}
1833 
1834   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1835     MacroAssembler masm(&cbuf);
1836     int start = masm.offset();
1837     if (UseSSE >= 2) {
1838       if (VerifyFPU) {
1839         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1840       }
1841     } else {
1842       // External c_calling_convention expects the FPU stack to be 'clean'.
1843       // Compiled code leaves it dirty.  Do cleanup now.
1844       masm.empty_FPU_stack();
1845     }
1846     if (sizeof_FFree_Float_Stack_All == -1) {
1847       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1848     } else {
1849       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1850     }
1851   %}
1852 
1853   enc_class Verify_FPU_For_Leaf %{
1854     if( VerifyFPU ) {
1855       MacroAssembler masm(&cbuf);
1856       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1857     }
1858   %}
1859 
1860   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1861     // This is the instruction starting address for relocation info.
1862     cbuf.set_insts_mark();
1863     $$$emit8$primary;
1864     // CALL directly to the runtime
1865     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1866                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1867 
1868     if (UseSSE >= 2) {
1869       MacroAssembler _masm(&cbuf);
1870       BasicType rt = tf()->return_type();
1871 
1872       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1873         // A C runtime call where the return value is unused.  In SSE2+
1874         // mode the result needs to be removed from the FPU stack.  It's
1875         // likely that this function call could be removed by the
1876         // optimizer if the C function is a pure function.
1877         __ ffree(0);
1878       } else if (rt == T_FLOAT) {
1879         __ lea(rsp, Address(rsp, -4));
1880         __ fstp_s(Address(rsp, 0));
1881         __ movflt(xmm0, Address(rsp, 0));
1882         __ lea(rsp, Address(rsp,  4));
1883       } else if (rt == T_DOUBLE) {
1884         __ lea(rsp, Address(rsp, -8));
1885         __ fstp_d(Address(rsp, 0));
1886         __ movdbl(xmm0, Address(rsp, 0));
1887         __ lea(rsp, Address(rsp,  8));
1888       }
1889     }
1890   %}
1891 
1892 
1893   enc_class pre_call_resets %{
1894     // If method sets FPU control word restore it here
1895     debug_only(int off0 = cbuf.insts_size());
1896     if (ra_->C->in_24_bit_fp_mode()) {
1897       MacroAssembler _masm(&cbuf);
1898       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1899     }
1900     if (ra_->C->max_vector_size() > 16) {
1901       // Clear upper bits of YMM registers when current compiled code uses
1902       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1903       MacroAssembler _masm(&cbuf);
1904       __ vzeroupper();
1905     }
1906     debug_only(int off1 = cbuf.insts_size());
1907     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1908   %}
1909 
1910   enc_class post_call_FPU %{
1911     // If method sets FPU control word do it here also
1912     if (Compile::current()->in_24_bit_fp_mode()) {
1913       MacroAssembler masm(&cbuf);
1914       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1915     }
1916   %}
1917 
1918   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1919     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1920     // who we intended to call.
1921     cbuf.set_insts_mark();
1922     $$$emit8$primary;
1923 
1924     if (!_method) {
1925       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1926                      runtime_call_Relocation::spec(),
1927                      RELOC_IMM32);
1928     } else {
1929       int method_index = resolved_method_index(cbuf);
1930       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1931                                                   : static_call_Relocation::spec(method_index);
1932       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1933                      rspec, RELOC_DISP32);
1934       // Emit stubs for static call.
1935       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1936       if (stub == NULL) {
1937         ciEnv::current()->record_failure("CodeCache is full");
1938         return;
1939       }
1940     }
1941   %}
1942 
1943   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1944     MacroAssembler _masm(&cbuf);
1945     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1946   %}
1947 
1948   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1949     int disp = in_bytes(Method::from_compiled_offset());
1950     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1951 
1952     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1953     cbuf.set_insts_mark();
1954     $$$emit8$primary;
1955     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1956     emit_d8(cbuf, disp);             // Displacement
1957 
1958   %}
1959 
1960 //   Following encoding is no longer used, but may be restored if calling
1961 //   convention changes significantly.
1962 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1963 //
1964 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1965 //     // int ic_reg     = Matcher::inline_cache_reg();
1966 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1967 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1968 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1969 //
1970 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1971 //     // // so we load it immediately before the call
1972 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1973 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1974 //
1975 //     // xor rbp,ebp
1976 //     emit_opcode(cbuf, 0x33);
1977 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1978 //
1979 //     // CALL to interpreter.
1980 //     cbuf.set_insts_mark();
1981 //     $$$emit8$primary;
1982 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1983 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1984 //   %}
1985 
1986   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1987     $$$emit8$primary;
1988     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1989     $$$emit8$shift$$constant;
1990   %}
1991 
1992   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1993     // Load immediate does not have a zero or sign extended version
1994     // for 8-bit immediates
1995     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1996     $$$emit32$src$$constant;
1997   %}
1998 
1999   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
2000     // Load immediate does not have a zero or sign extended version
2001     // for 8-bit immediates
2002     emit_opcode(cbuf, $primary + $dst$$reg);
2003     $$$emit32$src$$constant;
2004   %}
2005 
2006   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
2007     // Load immediate does not have a zero or sign extended version
2008     // for 8-bit immediates
2009     int dst_enc = $dst$$reg;
2010     int src_con = $src$$constant & 0x0FFFFFFFFL;
2011     if (src_con == 0) {
2012       // xor dst, dst
2013       emit_opcode(cbuf, 0x33);
2014       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2015     } else {
2016       emit_opcode(cbuf, $primary + dst_enc);
2017       emit_d32(cbuf, src_con);
2018     }
2019   %}
2020 
2021   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2022     // Load immediate does not have a zero or sign extended version
2023     // for 8-bit immediates
2024     int dst_enc = $dst$$reg + 2;
2025     int src_con = ((julong)($src$$constant)) >> 32;
2026     if (src_con == 0) {
2027       // xor dst, dst
2028       emit_opcode(cbuf, 0x33);
2029       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2030     } else {
2031       emit_opcode(cbuf, $primary + dst_enc);
2032       emit_d32(cbuf, src_con);
2033     }
2034   %}
2035 
2036 
2037   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2038   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2039     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2040   %}
2041 
2042   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2043     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2044   %}
2045 
2046   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2047     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2048   %}
2049 
2050   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2051     $$$emit8$primary;
2052     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2053   %}
2054 
2055   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2056     $$$emit8$secondary;
2057     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2058   %}
2059 
2060   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2061     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2062   %}
2063 
2064   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2065     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2066   %}
2067 
2068   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2069     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2070   %}
2071 
2072   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2073     // Output immediate
2074     $$$emit32$src$$constant;
2075   %}
2076 
2077   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2078     // Output Float immediate bits
2079     jfloat jf = $src$$constant;
2080     int    jf_as_bits = jint_cast( jf );
2081     emit_d32(cbuf, jf_as_bits);
2082   %}
2083 
2084   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2085     // Output Float immediate bits
2086     jfloat jf = $src$$constant;
2087     int    jf_as_bits = jint_cast( jf );
2088     emit_d32(cbuf, jf_as_bits);
2089   %}
2090 
2091   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2092     // Output immediate
2093     $$$emit16$src$$constant;
2094   %}
2095 
2096   enc_class Con_d32(immI src) %{
2097     emit_d32(cbuf,$src$$constant);
2098   %}
2099 
2100   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2101     // Output immediate memory reference
2102     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2103     emit_d32(cbuf, 0x00);
2104   %}
2105 
2106   enc_class lock_prefix( ) %{
2107     if( os::is_MP() )
2108       emit_opcode(cbuf,0xF0);         // [Lock]
2109   %}
2110 
2111   // Cmp-xchg long value.
2112   // Note: we need to swap rbx, and rcx before and after the
2113   //       cmpxchg8 instruction because the instruction uses
2114   //       rcx as the high order word of the new value to store but
2115   //       our register encoding uses rbx,.
2116   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2117 
2118     // XCHG  rbx,ecx
2119     emit_opcode(cbuf,0x87);
2120     emit_opcode(cbuf,0xD9);
2121     // [Lock]
2122     if( os::is_MP() )
2123       emit_opcode(cbuf,0xF0);
2124     // CMPXCHG8 [Eptr]
2125     emit_opcode(cbuf,0x0F);
2126     emit_opcode(cbuf,0xC7);
2127     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2128     // XCHG  rbx,ecx
2129     emit_opcode(cbuf,0x87);
2130     emit_opcode(cbuf,0xD9);
2131   %}
2132 
2133   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2134     // [Lock]
2135     if( os::is_MP() )
2136       emit_opcode(cbuf,0xF0);
2137 
2138     // CMPXCHG [Eptr]
2139     emit_opcode(cbuf,0x0F);
2140     emit_opcode(cbuf,0xB1);
2141     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2142   %}
2143 
2144   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2145     int res_encoding = $res$$reg;
2146 
2147     // MOV  res,0
2148     emit_opcode( cbuf, 0xB8 + res_encoding);
2149     emit_d32( cbuf, 0 );
2150     // JNE,s  fail
2151     emit_opcode(cbuf,0x75);
2152     emit_d8(cbuf, 5 );
2153     // MOV  res,1
2154     emit_opcode( cbuf, 0xB8 + res_encoding);
2155     emit_d32( cbuf, 1 );
2156     // fail:
2157   %}
2158 
2159   enc_class set_instruction_start( ) %{
2160     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2161   %}
2162 
2163   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2164     int reg_encoding = $ereg$$reg;
2165     int base  = $mem$$base;
2166     int index = $mem$$index;
2167     int scale = $mem$$scale;
2168     int displace = $mem$$disp;
2169     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2170     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2171   %}
2172 
2173   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2174     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2175     int base  = $mem$$base;
2176     int index = $mem$$index;
2177     int scale = $mem$$scale;
2178     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2179     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2180     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2181   %}
2182 
2183   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2184     int r1, r2;
2185     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2186     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2187     emit_opcode(cbuf,0x0F);
2188     emit_opcode(cbuf,$tertiary);
2189     emit_rm(cbuf, 0x3, r1, r2);
2190     emit_d8(cbuf,$cnt$$constant);
2191     emit_d8(cbuf,$primary);
2192     emit_rm(cbuf, 0x3, $secondary, r1);
2193     emit_d8(cbuf,$cnt$$constant);
2194   %}
2195 
2196   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2197     emit_opcode( cbuf, 0x8B ); // Move
2198     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2199     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2200       emit_d8(cbuf,$primary);
2201       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2202       emit_d8(cbuf,$cnt$$constant-32);
2203     }
2204     emit_d8(cbuf,$primary);
2205     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2206     emit_d8(cbuf,31);
2207   %}
2208 
2209   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2210     int r1, r2;
2211     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2212     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2213 
2214     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2215     emit_rm(cbuf, 0x3, r1, r2);
2216     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2217       emit_opcode(cbuf,$primary);
2218       emit_rm(cbuf, 0x3, $secondary, r1);
2219       emit_d8(cbuf,$cnt$$constant-32);
2220     }
2221     emit_opcode(cbuf,0x33);  // XOR r2,r2
2222     emit_rm(cbuf, 0x3, r2, r2);
2223   %}
2224 
2225   // Clone of RegMem but accepts an extra parameter to access each
2226   // half of a double in memory; it never needs relocation info.
2227   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2228     emit_opcode(cbuf,$opcode$$constant);
2229     int reg_encoding = $rm_reg$$reg;
2230     int base     = $mem$$base;
2231     int index    = $mem$$index;
2232     int scale    = $mem$$scale;
2233     int displace = $mem$$disp + $disp_for_half$$constant;
2234     relocInfo::relocType disp_reloc = relocInfo::none;
2235     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2236   %}
2237 
2238   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2239   //
2240   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2241   // and it never needs relocation information.
2242   // Frequently used to move data between FPU's Stack Top and memory.
2243   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2244     int rm_byte_opcode = $rm_opcode$$constant;
2245     int base     = $mem$$base;
2246     int index    = $mem$$index;
2247     int scale    = $mem$$scale;
2248     int displace = $mem$$disp;
2249     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2250     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2251   %}
2252 
2253   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2254     int rm_byte_opcode = $rm_opcode$$constant;
2255     int base     = $mem$$base;
2256     int index    = $mem$$index;
2257     int scale    = $mem$$scale;
2258     int displace = $mem$$disp;
2259     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2260     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2261   %}
2262 
2263   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2264     int reg_encoding = $dst$$reg;
2265     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2266     int index        = 0x04;            // 0x04 indicates no index
2267     int scale        = 0x00;            // 0x00 indicates no scale
2268     int displace     = $src1$$constant; // 0x00 indicates no displacement
2269     relocInfo::relocType disp_reloc = relocInfo::none;
2270     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2271   %}
2272 
2273   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2274     // Compare dst,src
2275     emit_opcode(cbuf,0x3B);
2276     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2277     // jmp dst < src around move
2278     emit_opcode(cbuf,0x7C);
2279     emit_d8(cbuf,2);
2280     // move dst,src
2281     emit_opcode(cbuf,0x8B);
2282     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2283   %}
2284 
2285   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2286     // Compare dst,src
2287     emit_opcode(cbuf,0x3B);
2288     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2289     // jmp dst > src around move
2290     emit_opcode(cbuf,0x7F);
2291     emit_d8(cbuf,2);
2292     // move dst,src
2293     emit_opcode(cbuf,0x8B);
2294     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2295   %}
2296 
2297   enc_class enc_FPR_store(memory mem, regDPR src) %{
2298     // If src is FPR1, we can just FST to store it.
2299     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2300     int reg_encoding = 0x2; // Just store
2301     int base  = $mem$$base;
2302     int index = $mem$$index;
2303     int scale = $mem$$scale;
2304     int displace = $mem$$disp;
2305     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2306     if( $src$$reg != FPR1L_enc ) {
2307       reg_encoding = 0x3;  // Store & pop
2308       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2309       emit_d8( cbuf, 0xC0-1+$src$$reg );
2310     }
2311     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2312     emit_opcode(cbuf,$primary);
2313     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2314   %}
2315 
2316   enc_class neg_reg(rRegI dst) %{
2317     // NEG $dst
2318     emit_opcode(cbuf,0xF7);
2319     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2320   %}
2321 
2322   enc_class setLT_reg(eCXRegI dst) %{
2323     // SETLT $dst
2324     emit_opcode(cbuf,0x0F);
2325     emit_opcode(cbuf,0x9C);
2326     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2327   %}
2328 
2329   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2330     int tmpReg = $tmp$$reg;
2331 
2332     // SUB $p,$q
2333     emit_opcode(cbuf,0x2B);
2334     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2335     // SBB $tmp,$tmp
2336     emit_opcode(cbuf,0x1B);
2337     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2338     // AND $tmp,$y
2339     emit_opcode(cbuf,0x23);
2340     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2341     // ADD $p,$tmp
2342     emit_opcode(cbuf,0x03);
2343     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2344   %}
2345 
2346   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2347     // TEST shift,32
2348     emit_opcode(cbuf,0xF7);
2349     emit_rm(cbuf, 0x3, 0, ECX_enc);
2350     emit_d32(cbuf,0x20);
2351     // JEQ,s small
2352     emit_opcode(cbuf, 0x74);
2353     emit_d8(cbuf, 0x04);
2354     // MOV    $dst.hi,$dst.lo
2355     emit_opcode( cbuf, 0x8B );
2356     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2357     // CLR    $dst.lo
2358     emit_opcode(cbuf, 0x33);
2359     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2360 // small:
2361     // SHLD   $dst.hi,$dst.lo,$shift
2362     emit_opcode(cbuf,0x0F);
2363     emit_opcode(cbuf,0xA5);
2364     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2365     // SHL    $dst.lo,$shift"
2366     emit_opcode(cbuf,0xD3);
2367     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2368   %}
2369 
2370   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2371     // TEST shift,32
2372     emit_opcode(cbuf,0xF7);
2373     emit_rm(cbuf, 0x3, 0, ECX_enc);
2374     emit_d32(cbuf,0x20);
2375     // JEQ,s small
2376     emit_opcode(cbuf, 0x74);
2377     emit_d8(cbuf, 0x04);
2378     // MOV    $dst.lo,$dst.hi
2379     emit_opcode( cbuf, 0x8B );
2380     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2381     // CLR    $dst.hi
2382     emit_opcode(cbuf, 0x33);
2383     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2384 // small:
2385     // SHRD   $dst.lo,$dst.hi,$shift
2386     emit_opcode(cbuf,0x0F);
2387     emit_opcode(cbuf,0xAD);
2388     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2389     // SHR    $dst.hi,$shift"
2390     emit_opcode(cbuf,0xD3);
2391     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2392   %}
2393 
2394   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2395     // TEST shift,32
2396     emit_opcode(cbuf,0xF7);
2397     emit_rm(cbuf, 0x3, 0, ECX_enc);
2398     emit_d32(cbuf,0x20);
2399     // JEQ,s small
2400     emit_opcode(cbuf, 0x74);
2401     emit_d8(cbuf, 0x05);
2402     // MOV    $dst.lo,$dst.hi
2403     emit_opcode( cbuf, 0x8B );
2404     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2405     // SAR    $dst.hi,31
2406     emit_opcode(cbuf, 0xC1);
2407     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2408     emit_d8(cbuf, 0x1F );
2409 // small:
2410     // SHRD   $dst.lo,$dst.hi,$shift
2411     emit_opcode(cbuf,0x0F);
2412     emit_opcode(cbuf,0xAD);
2413     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2414     // SAR    $dst.hi,$shift"
2415     emit_opcode(cbuf,0xD3);
2416     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2417   %}
2418 
2419 
2420   // ----------------- Encodings for floating point unit -----------------
2421   // May leave result in FPU-TOS or FPU reg depending on opcodes
2422   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2423     $$$emit8$primary;
2424     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2425   %}
2426 
2427   // Pop argument in FPR0 with FSTP ST(0)
2428   enc_class PopFPU() %{
2429     emit_opcode( cbuf, 0xDD );
2430     emit_d8( cbuf, 0xD8 );
2431   %}
2432 
2433   // !!!!! equivalent to Pop_Reg_F
2434   enc_class Pop_Reg_DPR( regDPR dst ) %{
2435     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2436     emit_d8( cbuf, 0xD8+$dst$$reg );
2437   %}
2438 
2439   enc_class Push_Reg_DPR( regDPR dst ) %{
2440     emit_opcode( cbuf, 0xD9 );
2441     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2442   %}
2443 
2444   enc_class strictfp_bias1( regDPR dst ) %{
2445     emit_opcode( cbuf, 0xDB );           // FLD m80real
2446     emit_opcode( cbuf, 0x2D );
2447     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2448     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2449     emit_opcode( cbuf, 0xC8+$dst$$reg );
2450   %}
2451 
2452   enc_class strictfp_bias2( regDPR dst ) %{
2453     emit_opcode( cbuf, 0xDB );           // FLD m80real
2454     emit_opcode( cbuf, 0x2D );
2455     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2456     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2457     emit_opcode( cbuf, 0xC8+$dst$$reg );
2458   %}
2459 
2460   // Special case for moving an integer register to a stack slot.
2461   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2462     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2463   %}
2464 
2465   // Special case for moving a register to a stack slot.
2466   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2467     // Opcode already emitted
2468     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2469     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2470     emit_d32(cbuf, $dst$$disp);   // Displacement
2471   %}
2472 
2473   // Push the integer in stackSlot 'src' onto FP-stack
2474   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2475     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2476   %}
2477 
2478   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2479   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2480     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2481   %}
2482 
2483   // Same as Pop_Mem_F except for opcode
2484   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2485   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2486     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2487   %}
2488 
2489   enc_class Pop_Reg_FPR( regFPR dst ) %{
2490     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2491     emit_d8( cbuf, 0xD8+$dst$$reg );
2492   %}
2493 
2494   enc_class Push_Reg_FPR( regFPR dst ) %{
2495     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2496     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2497   %}
2498 
2499   // Push FPU's float to a stack-slot, and pop FPU-stack
2500   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2501     int pop = 0x02;
2502     if ($src$$reg != FPR1L_enc) {
2503       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2504       emit_d8( cbuf, 0xC0-1+$src$$reg );
2505       pop = 0x03;
2506     }
2507     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2508   %}
2509 
2510   // Push FPU's double to a stack-slot, and pop FPU-stack
2511   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2512     int pop = 0x02;
2513     if ($src$$reg != FPR1L_enc) {
2514       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2515       emit_d8( cbuf, 0xC0-1+$src$$reg );
2516       pop = 0x03;
2517     }
2518     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2519   %}
2520 
2521   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2522   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2523     int pop = 0xD0 - 1; // -1 since we skip FLD
2524     if ($src$$reg != FPR1L_enc) {
2525       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2526       emit_d8( cbuf, 0xC0-1+$src$$reg );
2527       pop = 0xD8;
2528     }
2529     emit_opcode( cbuf, 0xDD );
2530     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2531   %}
2532 
2533 
2534   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2535     // load dst in FPR0
2536     emit_opcode( cbuf, 0xD9 );
2537     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2538     if ($src$$reg != FPR1L_enc) {
2539       // fincstp
2540       emit_opcode (cbuf, 0xD9);
2541       emit_opcode (cbuf, 0xF7);
2542       // swap src with FPR1:
2543       // FXCH FPR1 with src
2544       emit_opcode(cbuf, 0xD9);
2545       emit_d8(cbuf, 0xC8-1+$src$$reg );
2546       // fdecstp
2547       emit_opcode (cbuf, 0xD9);
2548       emit_opcode (cbuf, 0xF6);
2549     }
2550   %}
2551 
2552   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2553     MacroAssembler _masm(&cbuf);
2554     __ subptr(rsp, 8);
2555     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2556     __ fld_d(Address(rsp, 0));
2557     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2558     __ fld_d(Address(rsp, 0));
2559   %}
2560 
2561   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2562     MacroAssembler _masm(&cbuf);
2563     __ subptr(rsp, 4);
2564     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2565     __ fld_s(Address(rsp, 0));
2566     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2567     __ fld_s(Address(rsp, 0));
2568   %}
2569 
2570   enc_class Push_ResultD(regD dst) %{
2571     MacroAssembler _masm(&cbuf);
2572     __ fstp_d(Address(rsp, 0));
2573     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2574     __ addptr(rsp, 8);
2575   %}
2576 
2577   enc_class Push_ResultF(regF dst, immI d8) %{
2578     MacroAssembler _masm(&cbuf);
2579     __ fstp_s(Address(rsp, 0));
2580     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2581     __ addptr(rsp, $d8$$constant);
2582   %}
2583 
2584   enc_class Push_SrcD(regD src) %{
2585     MacroAssembler _masm(&cbuf);
2586     __ subptr(rsp, 8);
2587     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2588     __ fld_d(Address(rsp, 0));
2589   %}
2590 
2591   enc_class push_stack_temp_qword() %{
2592     MacroAssembler _masm(&cbuf);
2593     __ subptr(rsp, 8);
2594   %}
2595 
2596   enc_class pop_stack_temp_qword() %{
2597     MacroAssembler _masm(&cbuf);
2598     __ addptr(rsp, 8);
2599   %}
2600 
2601   enc_class push_xmm_to_fpr1(regD src) %{
2602     MacroAssembler _masm(&cbuf);
2603     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2604     __ fld_d(Address(rsp, 0));
2605   %}
2606 
2607   enc_class Push_Result_Mod_DPR( regDPR src) %{
2608     if ($src$$reg != FPR1L_enc) {
2609       // fincstp
2610       emit_opcode (cbuf, 0xD9);
2611       emit_opcode (cbuf, 0xF7);
2612       // FXCH FPR1 with src
2613       emit_opcode(cbuf, 0xD9);
2614       emit_d8(cbuf, 0xC8-1+$src$$reg );
2615       // fdecstp
2616       emit_opcode (cbuf, 0xD9);
2617       emit_opcode (cbuf, 0xF6);
2618     }
2619     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2620     // // FSTP   FPR$dst$$reg
2621     // emit_opcode( cbuf, 0xDD );
2622     // emit_d8( cbuf, 0xD8+$dst$$reg );
2623   %}
2624 
2625   enc_class fnstsw_sahf_skip_parity() %{
2626     // fnstsw ax
2627     emit_opcode( cbuf, 0xDF );
2628     emit_opcode( cbuf, 0xE0 );
2629     // sahf
2630     emit_opcode( cbuf, 0x9E );
2631     // jnp  ::skip
2632     emit_opcode( cbuf, 0x7B );
2633     emit_opcode( cbuf, 0x05 );
2634   %}
2635 
2636   enc_class emitModDPR() %{
2637     // fprem must be iterative
2638     // :: loop
2639     // fprem
2640     emit_opcode( cbuf, 0xD9 );
2641     emit_opcode( cbuf, 0xF8 );
2642     // wait
2643     emit_opcode( cbuf, 0x9b );
2644     // fnstsw ax
2645     emit_opcode( cbuf, 0xDF );
2646     emit_opcode( cbuf, 0xE0 );
2647     // sahf
2648     emit_opcode( cbuf, 0x9E );
2649     // jp  ::loop
2650     emit_opcode( cbuf, 0x0F );
2651     emit_opcode( cbuf, 0x8A );
2652     emit_opcode( cbuf, 0xF4 );
2653     emit_opcode( cbuf, 0xFF );
2654     emit_opcode( cbuf, 0xFF );
2655     emit_opcode( cbuf, 0xFF );
2656   %}
2657 
2658   enc_class fpu_flags() %{
2659     // fnstsw_ax
2660     emit_opcode( cbuf, 0xDF);
2661     emit_opcode( cbuf, 0xE0);
2662     // test ax,0x0400
2663     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2664     emit_opcode( cbuf, 0xA9 );
2665     emit_d16   ( cbuf, 0x0400 );
2666     // // // This sequence works, but stalls for 12-16 cycles on PPro
2667     // // test rax,0x0400
2668     // emit_opcode( cbuf, 0xA9 );
2669     // emit_d32   ( cbuf, 0x00000400 );
2670     //
2671     // jz exit (no unordered comparison)
2672     emit_opcode( cbuf, 0x74 );
2673     emit_d8    ( cbuf, 0x02 );
2674     // mov ah,1 - treat as LT case (set carry flag)
2675     emit_opcode( cbuf, 0xB4 );
2676     emit_d8    ( cbuf, 0x01 );
2677     // sahf
2678     emit_opcode( cbuf, 0x9E);
2679   %}
2680 
2681   enc_class cmpF_P6_fixup() %{
2682     // Fixup the integer flags in case comparison involved a NaN
2683     //
2684     // JNP exit (no unordered comparison, P-flag is set by NaN)
2685     emit_opcode( cbuf, 0x7B );
2686     emit_d8    ( cbuf, 0x03 );
2687     // MOV AH,1 - treat as LT case (set carry flag)
2688     emit_opcode( cbuf, 0xB4 );
2689     emit_d8    ( cbuf, 0x01 );
2690     // SAHF
2691     emit_opcode( cbuf, 0x9E);
2692     // NOP     // target for branch to avoid branch to branch
2693     emit_opcode( cbuf, 0x90);
2694   %}
2695 
2696 //     fnstsw_ax();
2697 //     sahf();
2698 //     movl(dst, nan_result);
2699 //     jcc(Assembler::parity, exit);
2700 //     movl(dst, less_result);
2701 //     jcc(Assembler::below, exit);
2702 //     movl(dst, equal_result);
2703 //     jcc(Assembler::equal, exit);
2704 //     movl(dst, greater_result);
2705 
2706 // less_result     =  1;
2707 // greater_result  = -1;
2708 // equal_result    = 0;
2709 // nan_result      = -1;
2710 
2711   enc_class CmpF_Result(rRegI dst) %{
2712     // fnstsw_ax();
2713     emit_opcode( cbuf, 0xDF);
2714     emit_opcode( cbuf, 0xE0);
2715     // sahf
2716     emit_opcode( cbuf, 0x9E);
2717     // movl(dst, nan_result);
2718     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2719     emit_d32( cbuf, -1 );
2720     // jcc(Assembler::parity, exit);
2721     emit_opcode( cbuf, 0x7A );
2722     emit_d8    ( cbuf, 0x13 );
2723     // movl(dst, less_result);
2724     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2725     emit_d32( cbuf, -1 );
2726     // jcc(Assembler::below, exit);
2727     emit_opcode( cbuf, 0x72 );
2728     emit_d8    ( cbuf, 0x0C );
2729     // movl(dst, equal_result);
2730     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2731     emit_d32( cbuf, 0 );
2732     // jcc(Assembler::equal, exit);
2733     emit_opcode( cbuf, 0x74 );
2734     emit_d8    ( cbuf, 0x05 );
2735     // movl(dst, greater_result);
2736     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2737     emit_d32( cbuf, 1 );
2738   %}
2739 
2740 
2741   // Compare the longs and set flags
2742   // BROKEN!  Do Not use as-is
2743   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2744     // CMP    $src1.hi,$src2.hi
2745     emit_opcode( cbuf, 0x3B );
2746     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2747     // JNE,s  done
2748     emit_opcode(cbuf,0x75);
2749     emit_d8(cbuf, 2 );
2750     // CMP    $src1.lo,$src2.lo
2751     emit_opcode( cbuf, 0x3B );
2752     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2753 // done:
2754   %}
2755 
2756   enc_class convert_int_long( regL dst, rRegI src ) %{
2757     // mov $dst.lo,$src
2758     int dst_encoding = $dst$$reg;
2759     int src_encoding = $src$$reg;
2760     encode_Copy( cbuf, dst_encoding  , src_encoding );
2761     // mov $dst.hi,$src
2762     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2763     // sar $dst.hi,31
2764     emit_opcode( cbuf, 0xC1 );
2765     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2766     emit_d8(cbuf, 0x1F );
2767   %}
2768 
2769   enc_class convert_long_double( eRegL src ) %{
2770     // push $src.hi
2771     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2772     // push $src.lo
2773     emit_opcode(cbuf, 0x50+$src$$reg  );
2774     // fild 64-bits at [SP]
2775     emit_opcode(cbuf,0xdf);
2776     emit_d8(cbuf, 0x6C);
2777     emit_d8(cbuf, 0x24);
2778     emit_d8(cbuf, 0x00);
2779     // pop stack
2780     emit_opcode(cbuf, 0x83); // add  SP, #8
2781     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2782     emit_d8(cbuf, 0x8);
2783   %}
2784 
2785   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2786     // IMUL   EDX:EAX,$src1
2787     emit_opcode( cbuf, 0xF7 );
2788     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2789     // SAR    EDX,$cnt-32
2790     int shift_count = ((int)$cnt$$constant) - 32;
2791     if (shift_count > 0) {
2792       emit_opcode(cbuf, 0xC1);
2793       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2794       emit_d8(cbuf, shift_count);
2795     }
2796   %}
2797 
2798   // this version doesn't have add sp, 8
2799   enc_class convert_long_double2( eRegL src ) %{
2800     // push $src.hi
2801     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2802     // push $src.lo
2803     emit_opcode(cbuf, 0x50+$src$$reg  );
2804     // fild 64-bits at [SP]
2805     emit_opcode(cbuf,0xdf);
2806     emit_d8(cbuf, 0x6C);
2807     emit_d8(cbuf, 0x24);
2808     emit_d8(cbuf, 0x00);
2809   %}
2810 
2811   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2812     // Basic idea: long = (long)int * (long)int
2813     // IMUL EDX:EAX, src
2814     emit_opcode( cbuf, 0xF7 );
2815     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2816   %}
2817 
2818   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2819     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2820     // MUL EDX:EAX, src
2821     emit_opcode( cbuf, 0xF7 );
2822     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2823   %}
2824 
2825   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2826     // Basic idea: lo(result) = lo(x_lo * y_lo)
2827     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2828     // MOV    $tmp,$src.lo
2829     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2830     // IMUL   $tmp,EDX
2831     emit_opcode( cbuf, 0x0F );
2832     emit_opcode( cbuf, 0xAF );
2833     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2834     // MOV    EDX,$src.hi
2835     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2836     // IMUL   EDX,EAX
2837     emit_opcode( cbuf, 0x0F );
2838     emit_opcode( cbuf, 0xAF );
2839     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2840     // ADD    $tmp,EDX
2841     emit_opcode( cbuf, 0x03 );
2842     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2843     // MUL   EDX:EAX,$src.lo
2844     emit_opcode( cbuf, 0xF7 );
2845     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2846     // ADD    EDX,ESI
2847     emit_opcode( cbuf, 0x03 );
2848     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2849   %}
2850 
2851   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2852     // Basic idea: lo(result) = lo(src * y_lo)
2853     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2854     // IMUL   $tmp,EDX,$src
2855     emit_opcode( cbuf, 0x6B );
2856     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2857     emit_d8( cbuf, (int)$src$$constant );
2858     // MOV    EDX,$src
2859     emit_opcode(cbuf, 0xB8 + EDX_enc);
2860     emit_d32( cbuf, (int)$src$$constant );
2861     // MUL   EDX:EAX,EDX
2862     emit_opcode( cbuf, 0xF7 );
2863     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2864     // ADD    EDX,ESI
2865     emit_opcode( cbuf, 0x03 );
2866     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2867   %}
2868 
2869   enc_class long_div( eRegL src1, eRegL src2 ) %{
2870     // PUSH src1.hi
2871     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2872     // PUSH src1.lo
2873     emit_opcode(cbuf,               0x50+$src1$$reg  );
2874     // PUSH src2.hi
2875     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2876     // PUSH src2.lo
2877     emit_opcode(cbuf,               0x50+$src2$$reg  );
2878     // CALL directly to the runtime
2879     cbuf.set_insts_mark();
2880     emit_opcode(cbuf,0xE8);       // Call into runtime
2881     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2882     // Restore stack
2883     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2884     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2885     emit_d8(cbuf, 4*4);
2886   %}
2887 
2888   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2889     // PUSH src1.hi
2890     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2891     // PUSH src1.lo
2892     emit_opcode(cbuf,               0x50+$src1$$reg  );
2893     // PUSH src2.hi
2894     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2895     // PUSH src2.lo
2896     emit_opcode(cbuf,               0x50+$src2$$reg  );
2897     // CALL directly to the runtime
2898     cbuf.set_insts_mark();
2899     emit_opcode(cbuf,0xE8);       // Call into runtime
2900     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2901     // Restore stack
2902     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2903     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2904     emit_d8(cbuf, 4*4);
2905   %}
2906 
2907   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2908     // MOV   $tmp,$src.lo
2909     emit_opcode(cbuf, 0x8B);
2910     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2911     // OR    $tmp,$src.hi
2912     emit_opcode(cbuf, 0x0B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2914   %}
2915 
2916   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2917     // CMP    $src1.lo,$src2.lo
2918     emit_opcode( cbuf, 0x3B );
2919     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2920     // JNE,s  skip
2921     emit_cc(cbuf, 0x70, 0x5);
2922     emit_d8(cbuf,2);
2923     // CMP    $src1.hi,$src2.hi
2924     emit_opcode( cbuf, 0x3B );
2925     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2926   %}
2927 
2928   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2929     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2930     emit_opcode( cbuf, 0x3B );
2931     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2932     // MOV    $tmp,$src1.hi
2933     emit_opcode( cbuf, 0x8B );
2934     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2935     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2936     emit_opcode( cbuf, 0x1B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2938   %}
2939 
2940   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2941     // XOR    $tmp,$tmp
2942     emit_opcode(cbuf,0x33);  // XOR
2943     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2944     // CMP    $tmp,$src.lo
2945     emit_opcode( cbuf, 0x3B );
2946     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2947     // SBB    $tmp,$src.hi
2948     emit_opcode( cbuf, 0x1B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2950   %}
2951 
2952  // Sniff, sniff... smells like Gnu Superoptimizer
2953   enc_class neg_long( eRegL dst ) %{
2954     emit_opcode(cbuf,0xF7);    // NEG hi
2955     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2956     emit_opcode(cbuf,0xF7);    // NEG lo
2957     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2958     emit_opcode(cbuf,0x83);    // SBB hi,0
2959     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2960     emit_d8    (cbuf,0 );
2961   %}
2962 
2963   enc_class enc_pop_rdx() %{
2964     emit_opcode(cbuf,0x5A);
2965   %}
2966 
2967   enc_class enc_rethrow() %{
2968     cbuf.set_insts_mark();
2969     emit_opcode(cbuf, 0xE9);        // jmp    entry
2970     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2971                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2972   %}
2973 
2974 
2975   // Convert a double to an int.  Java semantics require we do complex
2976   // manglelations in the corner cases.  So we set the rounding mode to
2977   // 'zero', store the darned double down as an int, and reset the
2978   // rounding mode to 'nearest'.  The hardware throws an exception which
2979   // patches up the correct value directly to the stack.
2980   enc_class DPR2I_encoding( regDPR src ) %{
2981     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2982     // exceptions here, so that a NAN or other corner-case value will
2983     // thrown an exception (but normal values get converted at full speed).
2984     // However, I2C adapters and other float-stack manglers leave pending
2985     // invalid-op exceptions hanging.  We would have to clear them before
2986     // enabling them and that is more expensive than just testing for the
2987     // invalid value Intel stores down in the corner cases.
2988     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2989     emit_opcode(cbuf,0x2D);
2990     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2991     // Allocate a word
2992     emit_opcode(cbuf,0x83);            // SUB ESP,4
2993     emit_opcode(cbuf,0xEC);
2994     emit_d8(cbuf,0x04);
2995     // Encoding assumes a double has been pushed into FPR0.
2996     // Store down the double as an int, popping the FPU stack
2997     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2998     emit_opcode(cbuf,0x1C);
2999     emit_d8(cbuf,0x24);
3000     // Restore the rounding mode; mask the exception
3001     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3002     emit_opcode(cbuf,0x2D);
3003     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3004         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3005         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3006 
3007     // Load the converted int; adjust CPU stack
3008     emit_opcode(cbuf,0x58);       // POP EAX
3009     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3010     emit_d32   (cbuf,0x80000000); //         0x80000000
3011     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3012     emit_d8    (cbuf,0x07);       // Size of slow_call
3013     // Push src onto stack slow-path
3014     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3015     emit_d8    (cbuf,0xC0-1+$src$$reg );
3016     // CALL directly to the runtime
3017     cbuf.set_insts_mark();
3018     emit_opcode(cbuf,0xE8);       // Call into runtime
3019     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3020     // Carry on here...
3021   %}
3022 
3023   enc_class DPR2L_encoding( regDPR src ) %{
3024     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3025     emit_opcode(cbuf,0x2D);
3026     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3027     // Allocate a word
3028     emit_opcode(cbuf,0x83);            // SUB ESP,8
3029     emit_opcode(cbuf,0xEC);
3030     emit_d8(cbuf,0x08);
3031     // Encoding assumes a double has been pushed into FPR0.
3032     // Store down the double as a long, popping the FPU stack
3033     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3034     emit_opcode(cbuf,0x3C);
3035     emit_d8(cbuf,0x24);
3036     // Restore the rounding mode; mask the exception
3037     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3038     emit_opcode(cbuf,0x2D);
3039     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3040         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3041         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3042 
3043     // Load the converted int; adjust CPU stack
3044     emit_opcode(cbuf,0x58);       // POP EAX
3045     emit_opcode(cbuf,0x5A);       // POP EDX
3046     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3047     emit_d8    (cbuf,0xFA);       // rdx
3048     emit_d32   (cbuf,0x80000000); //         0x80000000
3049     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3050     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3051     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3052     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3053     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3054     emit_d8    (cbuf,0x07);       // Size of slow_call
3055     // Push src onto stack slow-path
3056     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3057     emit_d8    (cbuf,0xC0-1+$src$$reg );
3058     // CALL directly to the runtime
3059     cbuf.set_insts_mark();
3060     emit_opcode(cbuf,0xE8);       // Call into runtime
3061     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3062     // Carry on here...
3063   %}
3064 
3065   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3066     // Operand was loaded from memory into fp ST (stack top)
3067     // FMUL   ST,$src  /* D8 C8+i */
3068     emit_opcode(cbuf, 0xD8);
3069     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3070   %}
3071 
3072   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3073     // FADDP  ST,src2  /* D8 C0+i */
3074     emit_opcode(cbuf, 0xD8);
3075     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3076     //could use FADDP  src2,fpST  /* DE C0+i */
3077   %}
3078 
3079   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3080     // FADDP  src2,ST  /* DE C0+i */
3081     emit_opcode(cbuf, 0xDE);
3082     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3083   %}
3084 
3085   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3086     // Operand has been loaded into fp ST (stack top)
3087       // FSUB   ST,$src1
3088       emit_opcode(cbuf, 0xD8);
3089       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3090 
3091       // FDIV
3092       emit_opcode(cbuf, 0xD8);
3093       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3094   %}
3095 
3096   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3097     // Operand was loaded from memory into fp ST (stack top)
3098     // FADD   ST,$src  /* D8 C0+i */
3099     emit_opcode(cbuf, 0xD8);
3100     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3101 
3102     // FMUL  ST,src2  /* D8 C*+i */
3103     emit_opcode(cbuf, 0xD8);
3104     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3105   %}
3106 
3107 
3108   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3109     // Operand was loaded from memory into fp ST (stack top)
3110     // FADD   ST,$src  /* D8 C0+i */
3111     emit_opcode(cbuf, 0xD8);
3112     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3113 
3114     // FMULP  src2,ST  /* DE C8+i */
3115     emit_opcode(cbuf, 0xDE);
3116     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3117   %}
3118 
3119   // Atomically load the volatile long
3120   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3121     emit_opcode(cbuf,0xDF);
3122     int rm_byte_opcode = 0x05;
3123     int base     = $mem$$base;
3124     int index    = $mem$$index;
3125     int scale    = $mem$$scale;
3126     int displace = $mem$$disp;
3127     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3128     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3129     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3130   %}
3131 
3132   // Volatile Store Long.  Must be atomic, so move it into
3133   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3134   // target address before the store (for null-ptr checks)
3135   // so the memory operand is used twice in the encoding.
3136   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3137     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3138     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3139     emit_opcode(cbuf,0xDF);
3140     int rm_byte_opcode = 0x07;
3141     int base     = $mem$$base;
3142     int index    = $mem$$index;
3143     int scale    = $mem$$scale;
3144     int displace = $mem$$disp;
3145     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3146     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3147   %}
3148 
3149   // Safepoint Poll.  This polls the safepoint page, and causes an
3150   // exception if it is not readable. Unfortunately, it kills the condition code
3151   // in the process
3152   // We current use TESTL [spp],EDI
3153   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3154 
3155   enc_class Safepoint_Poll() %{
3156     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3157     emit_opcode(cbuf,0x85);
3158     emit_rm (cbuf, 0x0, 0x7, 0x5);
3159     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3160   %}
3161 %}
3162 
3163 
3164 //----------FRAME--------------------------------------------------------------
3165 // Definition of frame structure and management information.
3166 //
3167 //  S T A C K   L A Y O U T    Allocators stack-slot number
3168 //                             |   (to get allocators register number
3169 //  G  Owned by    |        |  v    add OptoReg::stack0())
3170 //  r   CALLER     |        |
3171 //  o     |        +--------+      pad to even-align allocators stack-slot
3172 //  w     V        |  pad0  |        numbers; owned by CALLER
3173 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3174 //  h     ^        |   in   |  5
3175 //        |        |  args  |  4   Holes in incoming args owned by SELF
3176 //  |     |        |        |  3
3177 //  |     |        +--------+
3178 //  V     |        | old out|      Empty on Intel, window on Sparc
3179 //        |    old |preserve|      Must be even aligned.
3180 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3181 //        |        |   in   |  3   area for Intel ret address
3182 //     Owned by    |preserve|      Empty on Sparc.
3183 //       SELF      +--------+
3184 //        |        |  pad2  |  2   pad to align old SP
3185 //        |        +--------+  1
3186 //        |        | locks  |  0
3187 //        |        +--------+----> OptoReg::stack0(), even aligned
3188 //        |        |  pad1  | 11   pad to align new SP
3189 //        |        +--------+
3190 //        |        |        | 10
3191 //        |        | spills |  9   spills
3192 //        V        |        |  8   (pad0 slot for callee)
3193 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3194 //        ^        |  out   |  7
3195 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3196 //     Owned by    +--------+
3197 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3198 //        |    new |preserve|      Must be even-aligned.
3199 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3200 //        |        |        |
3201 //
3202 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3203 //         known from SELF's arguments and the Java calling convention.
3204 //         Region 6-7 is determined per call site.
3205 // Note 2: If the calling convention leaves holes in the incoming argument
3206 //         area, those holes are owned by SELF.  Holes in the outgoing area
3207 //         are owned by the CALLEE.  Holes should not be nessecary in the
3208 //         incoming area, as the Java calling convention is completely under
3209 //         the control of the AD file.  Doubles can be sorted and packed to
3210 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3211 //         varargs C calling conventions.
3212 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3213 //         even aligned with pad0 as needed.
3214 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3215 //         region 6-11 is even aligned; it may be padded out more so that
3216 //         the region from SP to FP meets the minimum stack alignment.
3217 
3218 frame %{
3219   // What direction does stack grow in (assumed to be same for C & Java)
3220   stack_direction(TOWARDS_LOW);
3221 
3222   // These three registers define part of the calling convention
3223   // between compiled code and the interpreter.
3224   inline_cache_reg(EAX);                // Inline Cache Register
3225   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3226 
3227   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3228   cisc_spilling_operand_name(indOffset32);
3229 
3230   // Number of stack slots consumed by locking an object
3231   sync_stack_slots(1);
3232 
3233   // Compiled code's Frame Pointer
3234   frame_pointer(ESP);
3235   // Interpreter stores its frame pointer in a register which is
3236   // stored to the stack by I2CAdaptors.
3237   // I2CAdaptors convert from interpreted java to compiled java.
3238   interpreter_frame_pointer(EBP);
3239 
3240   // Stack alignment requirement
3241   // Alignment size in bytes (128-bit -> 16 bytes)
3242   stack_alignment(StackAlignmentInBytes);
3243 
3244   // Number of stack slots between incoming argument block and the start of
3245   // a new frame.  The PROLOG must add this many slots to the stack.  The
3246   // EPILOG must remove this many slots.  Intel needs one slot for
3247   // return address and one for rbp, (must save rbp)
3248   in_preserve_stack_slots(2+VerifyStackAtCalls);
3249 
3250   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3251   // for calls to C.  Supports the var-args backing area for register parms.
3252   varargs_C_out_slots_killed(0);
3253 
3254   // The after-PROLOG location of the return address.  Location of
3255   // return address specifies a type (REG or STACK) and a number
3256   // representing the register number (i.e. - use a register name) or
3257   // stack slot.
3258   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3259   // Otherwise, it is above the locks and verification slot and alignment word
3260   return_addr(STACK - 1 +
3261               round_to((Compile::current()->in_preserve_stack_slots() +
3262                         Compile::current()->fixed_slots()),
3263                        stack_alignment_in_slots()));
3264 
3265   // Body of function which returns an integer array locating
3266   // arguments either in registers or in stack slots.  Passed an array
3267   // of ideal registers called "sig" and a "length" count.  Stack-slot
3268   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3269   // arguments for a CALLEE.  Incoming stack arguments are
3270   // automatically biased by the preserve_stack_slots field above.
3271   calling_convention %{
3272     // No difference between ingoing/outgoing just pass false
3273     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3274   %}
3275 
3276 
3277   // Body of function which returns an integer array locating
3278   // arguments either in registers or in stack slots.  Passed an array
3279   // of ideal registers called "sig" and a "length" count.  Stack-slot
3280   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3281   // arguments for a CALLEE.  Incoming stack arguments are
3282   // automatically biased by the preserve_stack_slots field above.
3283   c_calling_convention %{
3284     // This is obviously always outgoing
3285     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3286   %}
3287 
3288   // Location of C & interpreter return values
3289   c_return_value %{
3290     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3291     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3292     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3293 
3294     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3295     // that C functions return float and double results in XMM0.
3296     if( ideal_reg == Op_RegD && UseSSE>=2 )
3297       return OptoRegPair(XMM0b_num,XMM0_num);
3298     if( ideal_reg == Op_RegF && UseSSE>=2 )
3299       return OptoRegPair(OptoReg::Bad,XMM0_num);
3300 
3301     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3302   %}
3303 
3304   // Location of return values
3305   return_value %{
3306     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3307     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3308     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3309     if( ideal_reg == Op_RegD && UseSSE>=2 )
3310       return OptoRegPair(XMM0b_num,XMM0_num);
3311     if( ideal_reg == Op_RegF && UseSSE>=1 )
3312       return OptoRegPair(OptoReg::Bad,XMM0_num);
3313     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3314   %}
3315 
3316 %}
3317 
3318 //----------ATTRIBUTES---------------------------------------------------------
3319 //----------Operand Attributes-------------------------------------------------
3320 op_attrib op_cost(0);        // Required cost attribute
3321 
3322 //----------Instruction Attributes---------------------------------------------
3323 ins_attrib ins_cost(100);       // Required cost attribute
3324 ins_attrib ins_size(8);         // Required size attribute (in bits)
3325 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3326                                 // non-matching short branch variant of some
3327                                                             // long branch?
3328 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3329                                 // specifies the alignment that some part of the instruction (not
3330                                 // necessarily the start) requires.  If > 1, a compute_padding()
3331                                 // function must be provided for the instruction
3332 
3333 //----------OPERANDS-----------------------------------------------------------
3334 // Operand definitions must precede instruction definitions for correct parsing
3335 // in the ADLC because operands constitute user defined types which are used in
3336 // instruction definitions.
3337 
3338 //----------Simple Operands----------------------------------------------------
3339 // Immediate Operands
3340 // Integer Immediate
3341 operand immI() %{
3342   match(ConI);
3343 
3344   op_cost(10);
3345   format %{ %}
3346   interface(CONST_INTER);
3347 %}
3348 
3349 // Constant for test vs zero
3350 operand immI0() %{
3351   predicate(n->get_int() == 0);
3352   match(ConI);
3353 
3354   op_cost(0);
3355   format %{ %}
3356   interface(CONST_INTER);
3357 %}
3358 
3359 // Constant for increment
3360 operand immI1() %{
3361   predicate(n->get_int() == 1);
3362   match(ConI);
3363 
3364   op_cost(0);
3365   format %{ %}
3366   interface(CONST_INTER);
3367 %}
3368 
3369 // Constant for decrement
3370 operand immI_M1() %{
3371   predicate(n->get_int() == -1);
3372   match(ConI);
3373 
3374   op_cost(0);
3375   format %{ %}
3376   interface(CONST_INTER);
3377 %}
3378 
3379 // Valid scale values for addressing modes
3380 operand immI2() %{
3381   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3382   match(ConI);
3383 
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 operand immI8() %{
3389   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3390   match(ConI);
3391 
3392   op_cost(5);
3393   format %{ %}
3394   interface(CONST_INTER);
3395 %}
3396 
3397 operand immI16() %{
3398   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3399   match(ConI);
3400 
3401   op_cost(10);
3402   format %{ %}
3403   interface(CONST_INTER);
3404 %}
3405 
3406 // Int Immediate non-negative
3407 operand immU31()
3408 %{
3409   predicate(n->get_int() >= 0);
3410   match(ConI);
3411 
3412   op_cost(0);
3413   format %{ %}
3414   interface(CONST_INTER);
3415 %}
3416 
3417 // Constant for long shifts
3418 operand immI_32() %{
3419   predicate( n->get_int() == 32 );
3420   match(ConI);
3421 
3422   op_cost(0);
3423   format %{ %}
3424   interface(CONST_INTER);
3425 %}
3426 
3427 operand immI_1_31() %{
3428   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3429   match(ConI);
3430 
3431   op_cost(0);
3432   format %{ %}
3433   interface(CONST_INTER);
3434 %}
3435 
3436 operand immI_32_63() %{
3437   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3438   match(ConI);
3439   op_cost(0);
3440 
3441   format %{ %}
3442   interface(CONST_INTER);
3443 %}
3444 
3445 operand immI_1() %{
3446   predicate( n->get_int() == 1 );
3447   match(ConI);
3448 
3449   op_cost(0);
3450   format %{ %}
3451   interface(CONST_INTER);
3452 %}
3453 
3454 operand immI_2() %{
3455   predicate( n->get_int() == 2 );
3456   match(ConI);
3457 
3458   op_cost(0);
3459   format %{ %}
3460   interface(CONST_INTER);
3461 %}
3462 
3463 operand immI_3() %{
3464   predicate( n->get_int() == 3 );
3465   match(ConI);
3466 
3467   op_cost(0);
3468   format %{ %}
3469   interface(CONST_INTER);
3470 %}
3471 
3472 // Pointer Immediate
3473 operand immP() %{
3474   match(ConP);
3475 
3476   op_cost(10);
3477   format %{ %}
3478   interface(CONST_INTER);
3479 %}
3480 
3481 // NULL Pointer Immediate
3482 operand immP0() %{
3483   predicate( n->get_ptr() == 0 );
3484   match(ConP);
3485   op_cost(0);
3486 
3487   format %{ %}
3488   interface(CONST_INTER);
3489 %}
3490 
3491 // Long Immediate
3492 operand immL() %{
3493   match(ConL);
3494 
3495   op_cost(20);
3496   format %{ %}
3497   interface(CONST_INTER);
3498 %}
3499 
3500 // Long Immediate zero
3501 operand immL0() %{
3502   predicate( n->get_long() == 0L );
3503   match(ConL);
3504   op_cost(0);
3505 
3506   format %{ %}
3507   interface(CONST_INTER);
3508 %}
3509 
3510 // Long Immediate zero
3511 operand immL_M1() %{
3512   predicate( n->get_long() == -1L );
3513   match(ConL);
3514   op_cost(0);
3515 
3516   format %{ %}
3517   interface(CONST_INTER);
3518 %}
3519 
3520 // Long immediate from 0 to 127.
3521 // Used for a shorter form of long mul by 10.
3522 operand immL_127() %{
3523   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3524   match(ConL);
3525   op_cost(0);
3526 
3527   format %{ %}
3528   interface(CONST_INTER);
3529 %}
3530 
3531 // Long Immediate: low 32-bit mask
3532 operand immL_32bits() %{
3533   predicate(n->get_long() == 0xFFFFFFFFL);
3534   match(ConL);
3535   op_cost(0);
3536 
3537   format %{ %}
3538   interface(CONST_INTER);
3539 %}
3540 
3541 // Long Immediate: low 32-bit mask
3542 operand immL32() %{
3543   predicate(n->get_long() == (int)(n->get_long()));
3544   match(ConL);
3545   op_cost(20);
3546 
3547   format %{ %}
3548   interface(CONST_INTER);
3549 %}
3550 
3551 //Double Immediate zero
3552 operand immDPR0() %{
3553   // Do additional (and counter-intuitive) test against NaN to work around VC++
3554   // bug that generates code such that NaNs compare equal to 0.0
3555   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3556   match(ConD);
3557 
3558   op_cost(5);
3559   format %{ %}
3560   interface(CONST_INTER);
3561 %}
3562 
3563 // Double Immediate one
3564 operand immDPR1() %{
3565   predicate( UseSSE<=1 && n->getd() == 1.0 );
3566   match(ConD);
3567 
3568   op_cost(5);
3569   format %{ %}
3570   interface(CONST_INTER);
3571 %}
3572 
3573 // Double Immediate
3574 operand immDPR() %{
3575   predicate(UseSSE<=1);
3576   match(ConD);
3577 
3578   op_cost(5);
3579   format %{ %}
3580   interface(CONST_INTER);
3581 %}
3582 
3583 operand immD() %{
3584   predicate(UseSSE>=2);
3585   match(ConD);
3586 
3587   op_cost(5);
3588   format %{ %}
3589   interface(CONST_INTER);
3590 %}
3591 
3592 // Double Immediate zero
3593 operand immD0() %{
3594   // Do additional (and counter-intuitive) test against NaN to work around VC++
3595   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3596   // compare equal to -0.0.
3597   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3598   match(ConD);
3599 
3600   format %{ %}
3601   interface(CONST_INTER);
3602 %}
3603 
3604 // Float Immediate zero
3605 operand immFPR0() %{
3606   predicate(UseSSE == 0 && n->getf() == 0.0F);
3607   match(ConF);
3608 
3609   op_cost(5);
3610   format %{ %}
3611   interface(CONST_INTER);
3612 %}
3613 
3614 // Float Immediate one
3615 operand immFPR1() %{
3616   predicate(UseSSE == 0 && n->getf() == 1.0F);
3617   match(ConF);
3618 
3619   op_cost(5);
3620   format %{ %}
3621   interface(CONST_INTER);
3622 %}
3623 
3624 // Float Immediate
3625 operand immFPR() %{
3626   predicate( UseSSE == 0 );
3627   match(ConF);
3628 
3629   op_cost(5);
3630   format %{ %}
3631   interface(CONST_INTER);
3632 %}
3633 
3634 // Float Immediate
3635 operand immF() %{
3636   predicate(UseSSE >= 1);
3637   match(ConF);
3638 
3639   op_cost(5);
3640   format %{ %}
3641   interface(CONST_INTER);
3642 %}
3643 
3644 // Float Immediate zero.  Zero and not -0.0
3645 operand immF0() %{
3646   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3647   match(ConF);
3648 
3649   op_cost(5);
3650   format %{ %}
3651   interface(CONST_INTER);
3652 %}
3653 
3654 // Immediates for special shifts (sign extend)
3655 
3656 // Constants for increment
3657 operand immI_16() %{
3658   predicate( n->get_int() == 16 );
3659   match(ConI);
3660 
3661   format %{ %}
3662   interface(CONST_INTER);
3663 %}
3664 
3665 operand immI_24() %{
3666   predicate( n->get_int() == 24 );
3667   match(ConI);
3668 
3669   format %{ %}
3670   interface(CONST_INTER);
3671 %}
3672 
3673 // Constant for byte-wide masking
3674 operand immI_255() %{
3675   predicate( n->get_int() == 255 );
3676   match(ConI);
3677 
3678   format %{ %}
3679   interface(CONST_INTER);
3680 %}
3681 
3682 // Constant for short-wide masking
3683 operand immI_65535() %{
3684   predicate(n->get_int() == 65535);
3685   match(ConI);
3686 
3687   format %{ %}
3688   interface(CONST_INTER);
3689 %}
3690 
3691 // Register Operands
3692 // Integer Register
3693 operand rRegI() %{
3694   constraint(ALLOC_IN_RC(int_reg));
3695   match(RegI);
3696   match(xRegI);
3697   match(eAXRegI);
3698   match(eBXRegI);
3699   match(eCXRegI);
3700   match(eDXRegI);
3701   match(eDIRegI);
3702   match(eSIRegI);
3703 
3704   format %{ %}
3705   interface(REG_INTER);
3706 %}
3707 
3708 // Subset of Integer Register
3709 operand xRegI(rRegI reg) %{
3710   constraint(ALLOC_IN_RC(int_x_reg));
3711   match(reg);
3712   match(eAXRegI);
3713   match(eBXRegI);
3714   match(eCXRegI);
3715   match(eDXRegI);
3716 
3717   format %{ %}
3718   interface(REG_INTER);
3719 %}
3720 
3721 // Special Registers
3722 operand eAXRegI(xRegI reg) %{
3723   constraint(ALLOC_IN_RC(eax_reg));
3724   match(reg);
3725   match(rRegI);
3726 
3727   format %{ "EAX" %}
3728   interface(REG_INTER);
3729 %}
3730 
3731 // Special Registers
3732 operand eBXRegI(xRegI reg) %{
3733   constraint(ALLOC_IN_RC(ebx_reg));
3734   match(reg);
3735   match(rRegI);
3736 
3737   format %{ "EBX" %}
3738   interface(REG_INTER);
3739 %}
3740 
3741 operand eCXRegI(xRegI reg) %{
3742   constraint(ALLOC_IN_RC(ecx_reg));
3743   match(reg);
3744   match(rRegI);
3745 
3746   format %{ "ECX" %}
3747   interface(REG_INTER);
3748 %}
3749 
3750 operand eDXRegI(xRegI reg) %{
3751   constraint(ALLOC_IN_RC(edx_reg));
3752   match(reg);
3753   match(rRegI);
3754 
3755   format %{ "EDX" %}
3756   interface(REG_INTER);
3757 %}
3758 
3759 operand eDIRegI(xRegI reg) %{
3760   constraint(ALLOC_IN_RC(edi_reg));
3761   match(reg);
3762   match(rRegI);
3763 
3764   format %{ "EDI" %}
3765   interface(REG_INTER);
3766 %}
3767 
3768 operand naxRegI() %{
3769   constraint(ALLOC_IN_RC(nax_reg));
3770   match(RegI);
3771   match(eCXRegI);
3772   match(eDXRegI);
3773   match(eSIRegI);
3774   match(eDIRegI);
3775 
3776   format %{ %}
3777   interface(REG_INTER);
3778 %}
3779 
3780 operand nadxRegI() %{
3781   constraint(ALLOC_IN_RC(nadx_reg));
3782   match(RegI);
3783   match(eBXRegI);
3784   match(eCXRegI);
3785   match(eSIRegI);
3786   match(eDIRegI);
3787 
3788   format %{ %}
3789   interface(REG_INTER);
3790 %}
3791 
3792 operand ncxRegI() %{
3793   constraint(ALLOC_IN_RC(ncx_reg));
3794   match(RegI);
3795   match(eAXRegI);
3796   match(eDXRegI);
3797   match(eSIRegI);
3798   match(eDIRegI);
3799 
3800   format %{ %}
3801   interface(REG_INTER);
3802 %}
3803 
3804 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3805 // //
3806 operand eSIRegI(xRegI reg) %{
3807    constraint(ALLOC_IN_RC(esi_reg));
3808    match(reg);
3809    match(rRegI);
3810 
3811    format %{ "ESI" %}
3812    interface(REG_INTER);
3813 %}
3814 
3815 // Pointer Register
3816 operand anyRegP() %{
3817   constraint(ALLOC_IN_RC(any_reg));
3818   match(RegP);
3819   match(eAXRegP);
3820   match(eBXRegP);
3821   match(eCXRegP);
3822   match(eDIRegP);
3823   match(eRegP);
3824 
3825   format %{ %}
3826   interface(REG_INTER);
3827 %}
3828 
3829 operand eRegP() %{
3830   constraint(ALLOC_IN_RC(int_reg));
3831   match(RegP);
3832   match(eAXRegP);
3833   match(eBXRegP);
3834   match(eCXRegP);
3835   match(eDIRegP);
3836 
3837   format %{ %}
3838   interface(REG_INTER);
3839 %}
3840 
3841 // On windows95, EBP is not safe to use for implicit null tests.
3842 operand eRegP_no_EBP() %{
3843   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3844   match(RegP);
3845   match(eAXRegP);
3846   match(eBXRegP);
3847   match(eCXRegP);
3848   match(eDIRegP);
3849 
3850   op_cost(100);
3851   format %{ %}
3852   interface(REG_INTER);
3853 %}
3854 
3855 operand naxRegP() %{
3856   constraint(ALLOC_IN_RC(nax_reg));
3857   match(RegP);
3858   match(eBXRegP);
3859   match(eDXRegP);
3860   match(eCXRegP);
3861   match(eSIRegP);
3862   match(eDIRegP);
3863 
3864   format %{ %}
3865   interface(REG_INTER);
3866 %}
3867 
3868 operand nabxRegP() %{
3869   constraint(ALLOC_IN_RC(nabx_reg));
3870   match(RegP);
3871   match(eCXRegP);
3872   match(eDXRegP);
3873   match(eSIRegP);
3874   match(eDIRegP);
3875 
3876   format %{ %}
3877   interface(REG_INTER);
3878 %}
3879 
3880 operand pRegP() %{
3881   constraint(ALLOC_IN_RC(p_reg));
3882   match(RegP);
3883   match(eBXRegP);
3884   match(eDXRegP);
3885   match(eSIRegP);
3886   match(eDIRegP);
3887 
3888   format %{ %}
3889   interface(REG_INTER);
3890 %}
3891 
3892 // Special Registers
3893 // Return a pointer value
3894 operand eAXRegP(eRegP reg) %{
3895   constraint(ALLOC_IN_RC(eax_reg));
3896   match(reg);
3897   format %{ "EAX" %}
3898   interface(REG_INTER);
3899 %}
3900 
3901 // Used in AtomicAdd
3902 operand eBXRegP(eRegP reg) %{
3903   constraint(ALLOC_IN_RC(ebx_reg));
3904   match(reg);
3905   format %{ "EBX" %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 // Tail-call (interprocedural jump) to interpreter
3910 operand eCXRegP(eRegP reg) %{
3911   constraint(ALLOC_IN_RC(ecx_reg));
3912   match(reg);
3913   format %{ "ECX" %}
3914   interface(REG_INTER);
3915 %}
3916 
3917 operand eSIRegP(eRegP reg) %{
3918   constraint(ALLOC_IN_RC(esi_reg));
3919   match(reg);
3920   format %{ "ESI" %}
3921   interface(REG_INTER);
3922 %}
3923 
3924 // Used in rep stosw
3925 operand eDIRegP(eRegP reg) %{
3926   constraint(ALLOC_IN_RC(edi_reg));
3927   match(reg);
3928   format %{ "EDI" %}
3929   interface(REG_INTER);
3930 %}
3931 
3932 operand eRegL() %{
3933   constraint(ALLOC_IN_RC(long_reg));
3934   match(RegL);
3935   match(eADXRegL);
3936 
3937   format %{ %}
3938   interface(REG_INTER);
3939 %}
3940 
3941 operand eADXRegL( eRegL reg ) %{
3942   constraint(ALLOC_IN_RC(eadx_reg));
3943   match(reg);
3944 
3945   format %{ "EDX:EAX" %}
3946   interface(REG_INTER);
3947 %}
3948 
3949 operand eBCXRegL( eRegL reg ) %{
3950   constraint(ALLOC_IN_RC(ebcx_reg));
3951   match(reg);
3952 
3953   format %{ "EBX:ECX" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 // Special case for integer high multiply
3958 operand eADXRegL_low_only() %{
3959   constraint(ALLOC_IN_RC(eadx_reg));
3960   match(RegL);
3961 
3962   format %{ "EAX" %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 // Flags register, used as output of compare instructions
3967 operand eFlagsReg() %{
3968   constraint(ALLOC_IN_RC(int_flags));
3969   match(RegFlags);
3970 
3971   format %{ "EFLAGS" %}
3972   interface(REG_INTER);
3973 %}
3974 
3975 // Flags register, used as output of FLOATING POINT compare instructions
3976 operand eFlagsRegU() %{
3977   constraint(ALLOC_IN_RC(int_flags));
3978   match(RegFlags);
3979 
3980   format %{ "EFLAGS_U" %}
3981   interface(REG_INTER);
3982 %}
3983 
3984 operand eFlagsRegUCF() %{
3985   constraint(ALLOC_IN_RC(int_flags));
3986   match(RegFlags);
3987   predicate(false);
3988 
3989   format %{ "EFLAGS_U_CF" %}
3990   interface(REG_INTER);
3991 %}
3992 
3993 // Condition Code Register used by long compare
3994 operand flagsReg_long_LTGE() %{
3995   constraint(ALLOC_IN_RC(int_flags));
3996   match(RegFlags);
3997   format %{ "FLAGS_LTGE" %}
3998   interface(REG_INTER);
3999 %}
4000 operand flagsReg_long_EQNE() %{
4001   constraint(ALLOC_IN_RC(int_flags));
4002   match(RegFlags);
4003   format %{ "FLAGS_EQNE" %}
4004   interface(REG_INTER);
4005 %}
4006 operand flagsReg_long_LEGT() %{
4007   constraint(ALLOC_IN_RC(int_flags));
4008   match(RegFlags);
4009   format %{ "FLAGS_LEGT" %}
4010   interface(REG_INTER);
4011 %}
4012 
4013 // Float register operands
4014 operand regDPR() %{
4015   predicate( UseSSE < 2 );
4016   constraint(ALLOC_IN_RC(fp_dbl_reg));
4017   match(RegD);
4018   match(regDPR1);
4019   match(regDPR2);
4020   format %{ %}
4021   interface(REG_INTER);
4022 %}
4023 
4024 operand regDPR1(regDPR reg) %{
4025   predicate( UseSSE < 2 );
4026   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4027   match(reg);
4028   format %{ "FPR1" %}
4029   interface(REG_INTER);
4030 %}
4031 
4032 operand regDPR2(regDPR reg) %{
4033   predicate( UseSSE < 2 );
4034   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4035   match(reg);
4036   format %{ "FPR2" %}
4037   interface(REG_INTER);
4038 %}
4039 
4040 operand regnotDPR1(regDPR reg) %{
4041   predicate( UseSSE < 2 );
4042   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4043   match(reg);
4044   format %{ %}
4045   interface(REG_INTER);
4046 %}
4047 
4048 // Float register operands
4049 operand regFPR() %{
4050   predicate( UseSSE < 2 );
4051   constraint(ALLOC_IN_RC(fp_flt_reg));
4052   match(RegF);
4053   match(regFPR1);
4054   format %{ %}
4055   interface(REG_INTER);
4056 %}
4057 
4058 // Float register operands
4059 operand regFPR1(regFPR reg) %{
4060   predicate( UseSSE < 2 );
4061   constraint(ALLOC_IN_RC(fp_flt_reg0));
4062   match(reg);
4063   format %{ "FPR1" %}
4064   interface(REG_INTER);
4065 %}
4066 
4067 // XMM Float register operands
4068 operand regF() %{
4069   predicate( UseSSE>=1 );
4070   constraint(ALLOC_IN_RC(float_reg_legacy));
4071   match(RegF);
4072   format %{ %}
4073   interface(REG_INTER);
4074 %}
4075 
4076 // XMM Double register operands
4077 operand regD() %{
4078   predicate( UseSSE>=2 );
4079   constraint(ALLOC_IN_RC(double_reg_legacy));
4080   match(RegD);
4081   format %{ %}
4082   interface(REG_INTER);
4083 %}
4084 
4085 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4086 // runtime code generation via reg_class_dynamic.
4087 operand vecS() %{
4088   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4089   match(VecS);
4090 
4091   format %{ %}
4092   interface(REG_INTER);
4093 %}
4094 
4095 operand vecD() %{
4096   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4097   match(VecD);
4098 
4099   format %{ %}
4100   interface(REG_INTER);
4101 %}
4102 
4103 operand vecX() %{
4104   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4105   match(VecX);
4106 
4107   format %{ %}
4108   interface(REG_INTER);
4109 %}
4110 
4111 operand vecY() %{
4112   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4113   match(VecY);
4114 
4115   format %{ %}
4116   interface(REG_INTER);
4117 %}
4118 
4119 //----------Memory Operands----------------------------------------------------
4120 // Direct Memory Operand
4121 operand direct(immP addr) %{
4122   match(addr);
4123 
4124   format %{ "[$addr]" %}
4125   interface(MEMORY_INTER) %{
4126     base(0xFFFFFFFF);
4127     index(0x4);
4128     scale(0x0);
4129     disp($addr);
4130   %}
4131 %}
4132 
4133 // Indirect Memory Operand
4134 operand indirect(eRegP reg) %{
4135   constraint(ALLOC_IN_RC(int_reg));
4136   match(reg);
4137 
4138   format %{ "[$reg]" %}
4139   interface(MEMORY_INTER) %{
4140     base($reg);
4141     index(0x4);
4142     scale(0x0);
4143     disp(0x0);
4144   %}
4145 %}
4146 
4147 // Indirect Memory Plus Short Offset Operand
4148 operand indOffset8(eRegP reg, immI8 off) %{
4149   match(AddP reg off);
4150 
4151   format %{ "[$reg + $off]" %}
4152   interface(MEMORY_INTER) %{
4153     base($reg);
4154     index(0x4);
4155     scale(0x0);
4156     disp($off);
4157   %}
4158 %}
4159 
4160 // Indirect Memory Plus Long Offset Operand
4161 operand indOffset32(eRegP reg, immI off) %{
4162   match(AddP reg off);
4163 
4164   format %{ "[$reg + $off]" %}
4165   interface(MEMORY_INTER) %{
4166     base($reg);
4167     index(0x4);
4168     scale(0x0);
4169     disp($off);
4170   %}
4171 %}
4172 
4173 // Indirect Memory Plus Long Offset Operand
4174 operand indOffset32X(rRegI reg, immP off) %{
4175   match(AddP off reg);
4176 
4177   format %{ "[$reg + $off]" %}
4178   interface(MEMORY_INTER) %{
4179     base($reg);
4180     index(0x4);
4181     scale(0x0);
4182     disp($off);
4183   %}
4184 %}
4185 
4186 // Indirect Memory Plus Index Register Plus Offset Operand
4187 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4188   match(AddP (AddP reg ireg) off);
4189 
4190   op_cost(10);
4191   format %{"[$reg + $off + $ireg]" %}
4192   interface(MEMORY_INTER) %{
4193     base($reg);
4194     index($ireg);
4195     scale(0x0);
4196     disp($off);
4197   %}
4198 %}
4199 
4200 // Indirect Memory Plus Index Register Plus Offset Operand
4201 operand indIndex(eRegP reg, rRegI ireg) %{
4202   match(AddP reg ireg);
4203 
4204   op_cost(10);
4205   format %{"[$reg + $ireg]" %}
4206   interface(MEMORY_INTER) %{
4207     base($reg);
4208     index($ireg);
4209     scale(0x0);
4210     disp(0x0);
4211   %}
4212 %}
4213 
4214 // // -------------------------------------------------------------------------
4215 // // 486 architecture doesn't support "scale * index + offset" with out a base
4216 // // -------------------------------------------------------------------------
4217 // // Scaled Memory Operands
4218 // // Indirect Memory Times Scale Plus Offset Operand
4219 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4220 //   match(AddP off (LShiftI ireg scale));
4221 //
4222 //   op_cost(10);
4223 //   format %{"[$off + $ireg << $scale]" %}
4224 //   interface(MEMORY_INTER) %{
4225 //     base(0x4);
4226 //     index($ireg);
4227 //     scale($scale);
4228 //     disp($off);
4229 //   %}
4230 // %}
4231 
4232 // Indirect Memory Times Scale Plus Index Register
4233 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4234   match(AddP reg (LShiftI ireg scale));
4235 
4236   op_cost(10);
4237   format %{"[$reg + $ireg << $scale]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index($ireg);
4241     scale($scale);
4242     disp(0x0);
4243   %}
4244 %}
4245 
4246 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4247 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4248   match(AddP (AddP reg (LShiftI ireg scale)) off);
4249 
4250   op_cost(10);
4251   format %{"[$reg + $off + $ireg << $scale]" %}
4252   interface(MEMORY_INTER) %{
4253     base($reg);
4254     index($ireg);
4255     scale($scale);
4256     disp($off);
4257   %}
4258 %}
4259 
4260 //----------Load Long Memory Operands------------------------------------------
4261 // The load-long idiom will use it's address expression again after loading
4262 // the first word of the long.  If the load-long destination overlaps with
4263 // registers used in the addressing expression, the 2nd half will be loaded
4264 // from a clobbered address.  Fix this by requiring that load-long use
4265 // address registers that do not overlap with the load-long target.
4266 
4267 // load-long support
4268 operand load_long_RegP() %{
4269   constraint(ALLOC_IN_RC(esi_reg));
4270   match(RegP);
4271   match(eSIRegP);
4272   op_cost(100);
4273   format %{  %}
4274   interface(REG_INTER);
4275 %}
4276 
4277 // Indirect Memory Operand Long
4278 operand load_long_indirect(load_long_RegP reg) %{
4279   constraint(ALLOC_IN_RC(esi_reg));
4280   match(reg);
4281 
4282   format %{ "[$reg]" %}
4283   interface(MEMORY_INTER) %{
4284     base($reg);
4285     index(0x4);
4286     scale(0x0);
4287     disp(0x0);
4288   %}
4289 %}
4290 
4291 // Indirect Memory Plus Long Offset Operand
4292 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4293   match(AddP reg off);
4294 
4295   format %{ "[$reg + $off]" %}
4296   interface(MEMORY_INTER) %{
4297     base($reg);
4298     index(0x4);
4299     scale(0x0);
4300     disp($off);
4301   %}
4302 %}
4303 
4304 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4305 
4306 
4307 //----------Special Memory Operands--------------------------------------------
4308 // Stack Slot Operand - This operand is used for loading and storing temporary
4309 //                      values on the stack where a match requires a value to
4310 //                      flow through memory.
4311 operand stackSlotP(sRegP reg) %{
4312   constraint(ALLOC_IN_RC(stack_slots));
4313   // No match rule because this operand is only generated in matching
4314   format %{ "[$reg]" %}
4315   interface(MEMORY_INTER) %{
4316     base(0x4);   // ESP
4317     index(0x4);  // No Index
4318     scale(0x0);  // No Scale
4319     disp($reg);  // Stack Offset
4320   %}
4321 %}
4322 
4323 operand stackSlotI(sRegI reg) %{
4324   constraint(ALLOC_IN_RC(stack_slots));
4325   // No match rule because this operand is only generated in matching
4326   format %{ "[$reg]" %}
4327   interface(MEMORY_INTER) %{
4328     base(0x4);   // ESP
4329     index(0x4);  // No Index
4330     scale(0x0);  // No Scale
4331     disp($reg);  // Stack Offset
4332   %}
4333 %}
4334 
4335 operand stackSlotF(sRegF reg) %{
4336   constraint(ALLOC_IN_RC(stack_slots));
4337   // No match rule because this operand is only generated in matching
4338   format %{ "[$reg]" %}
4339   interface(MEMORY_INTER) %{
4340     base(0x4);   // ESP
4341     index(0x4);  // No Index
4342     scale(0x0);  // No Scale
4343     disp($reg);  // Stack Offset
4344   %}
4345 %}
4346 
4347 operand stackSlotD(sRegD reg) %{
4348   constraint(ALLOC_IN_RC(stack_slots));
4349   // No match rule because this operand is only generated in matching
4350   format %{ "[$reg]" %}
4351   interface(MEMORY_INTER) %{
4352     base(0x4);   // ESP
4353     index(0x4);  // No Index
4354     scale(0x0);  // No Scale
4355     disp($reg);  // Stack Offset
4356   %}
4357 %}
4358 
4359 operand stackSlotL(sRegL reg) %{
4360   constraint(ALLOC_IN_RC(stack_slots));
4361   // No match rule because this operand is only generated in matching
4362   format %{ "[$reg]" %}
4363   interface(MEMORY_INTER) %{
4364     base(0x4);   // ESP
4365     index(0x4);  // No Index
4366     scale(0x0);  // No Scale
4367     disp($reg);  // Stack Offset
4368   %}
4369 %}
4370 
4371 //----------Memory Operands - Win95 Implicit Null Variants----------------
4372 // Indirect Memory Operand
4373 operand indirect_win95_safe(eRegP_no_EBP reg)
4374 %{
4375   constraint(ALLOC_IN_RC(int_reg));
4376   match(reg);
4377 
4378   op_cost(100);
4379   format %{ "[$reg]" %}
4380   interface(MEMORY_INTER) %{
4381     base($reg);
4382     index(0x4);
4383     scale(0x0);
4384     disp(0x0);
4385   %}
4386 %}
4387 
4388 // Indirect Memory Plus Short Offset Operand
4389 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4390 %{
4391   match(AddP reg off);
4392 
4393   op_cost(100);
4394   format %{ "[$reg + $off]" %}
4395   interface(MEMORY_INTER) %{
4396     base($reg);
4397     index(0x4);
4398     scale(0x0);
4399     disp($off);
4400   %}
4401 %}
4402 
4403 // Indirect Memory Plus Long Offset Operand
4404 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4405 %{
4406   match(AddP reg off);
4407 
4408   op_cost(100);
4409   format %{ "[$reg + $off]" %}
4410   interface(MEMORY_INTER) %{
4411     base($reg);
4412     index(0x4);
4413     scale(0x0);
4414     disp($off);
4415   %}
4416 %}
4417 
4418 // Indirect Memory Plus Index Register Plus Offset Operand
4419 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4420 %{
4421   match(AddP (AddP reg ireg) off);
4422 
4423   op_cost(100);
4424   format %{"[$reg + $off + $ireg]" %}
4425   interface(MEMORY_INTER) %{
4426     base($reg);
4427     index($ireg);
4428     scale(0x0);
4429     disp($off);
4430   %}
4431 %}
4432 
4433 // Indirect Memory Times Scale Plus Index Register
4434 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4435 %{
4436   match(AddP reg (LShiftI ireg scale));
4437 
4438   op_cost(100);
4439   format %{"[$reg + $ireg << $scale]" %}
4440   interface(MEMORY_INTER) %{
4441     base($reg);
4442     index($ireg);
4443     scale($scale);
4444     disp(0x0);
4445   %}
4446 %}
4447 
4448 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4449 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4450 %{
4451   match(AddP (AddP reg (LShiftI ireg scale)) off);
4452 
4453   op_cost(100);
4454   format %{"[$reg + $off + $ireg << $scale]" %}
4455   interface(MEMORY_INTER) %{
4456     base($reg);
4457     index($ireg);
4458     scale($scale);
4459     disp($off);
4460   %}
4461 %}
4462 
4463 //----------Conditional Branch Operands----------------------------------------
4464 // Comparison Op  - This is the operation of the comparison, and is limited to
4465 //                  the following set of codes:
4466 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4467 //
4468 // Other attributes of the comparison, such as unsignedness, are specified
4469 // by the comparison instruction that sets a condition code flags register.
4470 // That result is represented by a flags operand whose subtype is appropriate
4471 // to the unsignedness (etc.) of the comparison.
4472 //
4473 // Later, the instruction which matches both the Comparison Op (a Bool) and
4474 // the flags (produced by the Cmp) specifies the coding of the comparison op
4475 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4476 
4477 // Comparision Code
4478 operand cmpOp() %{
4479   match(Bool);
4480 
4481   format %{ "" %}
4482   interface(COND_INTER) %{
4483     equal(0x4, "e");
4484     not_equal(0x5, "ne");
4485     less(0xC, "l");
4486     greater_equal(0xD, "ge");
4487     less_equal(0xE, "le");
4488     greater(0xF, "g");
4489     overflow(0x0, "o");
4490     no_overflow(0x1, "no");
4491   %}
4492 %}
4493 
4494 // Comparison Code, unsigned compare.  Used by FP also, with
4495 // C2 (unordered) turned into GT or LT already.  The other bits
4496 // C0 and C3 are turned into Carry & Zero flags.
4497 operand cmpOpU() %{
4498   match(Bool);
4499 
4500   format %{ "" %}
4501   interface(COND_INTER) %{
4502     equal(0x4, "e");
4503     not_equal(0x5, "ne");
4504     less(0x2, "b");
4505     greater_equal(0x3, "nb");
4506     less_equal(0x6, "be");
4507     greater(0x7, "nbe");
4508     overflow(0x0, "o");
4509     no_overflow(0x1, "no");
4510   %}
4511 %}
4512 
4513 // Floating comparisons that don't require any fixup for the unordered case
4514 operand cmpOpUCF() %{
4515   match(Bool);
4516   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4517             n->as_Bool()->_test._test == BoolTest::ge ||
4518             n->as_Bool()->_test._test == BoolTest::le ||
4519             n->as_Bool()->_test._test == BoolTest::gt);
4520   format %{ "" %}
4521   interface(COND_INTER) %{
4522     equal(0x4, "e");
4523     not_equal(0x5, "ne");
4524     less(0x2, "b");
4525     greater_equal(0x3, "nb");
4526     less_equal(0x6, "be");
4527     greater(0x7, "nbe");
4528     overflow(0x0, "o");
4529     no_overflow(0x1, "no");
4530   %}
4531 %}
4532 
4533 
4534 // Floating comparisons that can be fixed up with extra conditional jumps
4535 operand cmpOpUCF2() %{
4536   match(Bool);
4537   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4538             n->as_Bool()->_test._test == BoolTest::eq);
4539   format %{ "" %}
4540   interface(COND_INTER) %{
4541     equal(0x4, "e");
4542     not_equal(0x5, "ne");
4543     less(0x2, "b");
4544     greater_equal(0x3, "nb");
4545     less_equal(0x6, "be");
4546     greater(0x7, "nbe");
4547     overflow(0x0, "o");
4548     no_overflow(0x1, "no");
4549   %}
4550 %}
4551 
4552 // Comparison Code for FP conditional move
4553 operand cmpOp_fcmov() %{
4554   match(Bool);
4555 
4556   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4557             n->as_Bool()->_test._test != BoolTest::no_overflow);
4558   format %{ "" %}
4559   interface(COND_INTER) %{
4560     equal        (0x0C8);
4561     not_equal    (0x1C8);
4562     less         (0x0C0);
4563     greater_equal(0x1C0);
4564     less_equal   (0x0D0);
4565     greater      (0x1D0);
4566     overflow(0x0, "o"); // not really supported by the instruction
4567     no_overflow(0x1, "no"); // not really supported by the instruction
4568   %}
4569 %}
4570 
4571 // Comparision Code used in long compares
4572 operand cmpOp_commute() %{
4573   match(Bool);
4574 
4575   format %{ "" %}
4576   interface(COND_INTER) %{
4577     equal(0x4, "e");
4578     not_equal(0x5, "ne");
4579     less(0xF, "g");
4580     greater_equal(0xE, "le");
4581     less_equal(0xD, "ge");
4582     greater(0xC, "l");
4583     overflow(0x0, "o");
4584     no_overflow(0x1, "no");
4585   %}
4586 %}
4587 
4588 //----------OPERAND CLASSES----------------------------------------------------
4589 // Operand Classes are groups of operands that are used as to simplify
4590 // instruction definitions by not requiring the AD writer to specify separate
4591 // instructions for every form of operand when the instruction accepts
4592 // multiple operand types with the same basic encoding and format.  The classic
4593 // case of this is memory operands.
4594 
4595 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4596                indIndex, indIndexScale, indIndexScaleOffset);
4597 
4598 // Long memory operations are encoded in 2 instructions and a +4 offset.
4599 // This means some kind of offset is always required and you cannot use
4600 // an oop as the offset (done when working on static globals).
4601 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4602                     indIndex, indIndexScale, indIndexScaleOffset);
4603 
4604 
4605 //----------PIPELINE-----------------------------------------------------------
4606 // Rules which define the behavior of the target architectures pipeline.
4607 pipeline %{
4608 
4609 //----------ATTRIBUTES---------------------------------------------------------
4610 attributes %{
4611   variable_size_instructions;        // Fixed size instructions
4612   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4613   instruction_unit_size = 1;         // An instruction is 1 bytes long
4614   instruction_fetch_unit_size = 16;  // The processor fetches one line
4615   instruction_fetch_units = 1;       // of 16 bytes
4616 
4617   // List of nop instructions
4618   nops( MachNop );
4619 %}
4620 
4621 //----------RESOURCES----------------------------------------------------------
4622 // Resources are the functional units available to the machine
4623 
4624 // Generic P2/P3 pipeline
4625 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4626 // 3 instructions decoded per cycle.
4627 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4628 // 2 ALU op, only ALU0 handles mul/div instructions.
4629 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4630            MS0, MS1, MEM = MS0 | MS1,
4631            BR, FPU,
4632            ALU0, ALU1, ALU = ALU0 | ALU1 );
4633 
4634 //----------PIPELINE DESCRIPTION-----------------------------------------------
4635 // Pipeline Description specifies the stages in the machine's pipeline
4636 
4637 // Generic P2/P3 pipeline
4638 pipe_desc(S0, S1, S2, S3, S4, S5);
4639 
4640 //----------PIPELINE CLASSES---------------------------------------------------
4641 // Pipeline Classes describe the stages in which input and output are
4642 // referenced by the hardware pipeline.
4643 
4644 // Naming convention: ialu or fpu
4645 // Then: _reg
4646 // Then: _reg if there is a 2nd register
4647 // Then: _long if it's a pair of instructions implementing a long
4648 // Then: _fat if it requires the big decoder
4649 //   Or: _mem if it requires the big decoder and a memory unit.
4650 
4651 // Integer ALU reg operation
4652 pipe_class ialu_reg(rRegI dst) %{
4653     single_instruction;
4654     dst    : S4(write);
4655     dst    : S3(read);
4656     DECODE : S0;        // any decoder
4657     ALU    : S3;        // any alu
4658 %}
4659 
4660 // Long ALU reg operation
4661 pipe_class ialu_reg_long(eRegL dst) %{
4662     instruction_count(2);
4663     dst    : S4(write);
4664     dst    : S3(read);
4665     DECODE : S0(2);     // any 2 decoders
4666     ALU    : S3(2);     // both alus
4667 %}
4668 
4669 // Integer ALU reg operation using big decoder
4670 pipe_class ialu_reg_fat(rRegI dst) %{
4671     single_instruction;
4672     dst    : S4(write);
4673     dst    : S3(read);
4674     D0     : S0;        // big decoder only
4675     ALU    : S3;        // any alu
4676 %}
4677 
4678 // Long ALU reg operation using big decoder
4679 pipe_class ialu_reg_long_fat(eRegL dst) %{
4680     instruction_count(2);
4681     dst    : S4(write);
4682     dst    : S3(read);
4683     D0     : S0(2);     // big decoder only; twice
4684     ALU    : S3(2);     // any 2 alus
4685 %}
4686 
4687 // Integer ALU reg-reg operation
4688 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4689     single_instruction;
4690     dst    : S4(write);
4691     src    : S3(read);
4692     DECODE : S0;        // any decoder
4693     ALU    : S3;        // any alu
4694 %}
4695 
4696 // Long ALU reg-reg operation
4697 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4698     instruction_count(2);
4699     dst    : S4(write);
4700     src    : S3(read);
4701     DECODE : S0(2);     // any 2 decoders
4702     ALU    : S3(2);     // both alus
4703 %}
4704 
4705 // Integer ALU reg-reg operation
4706 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4707     single_instruction;
4708     dst    : S4(write);
4709     src    : S3(read);
4710     D0     : S0;        // big decoder only
4711     ALU    : S3;        // any alu
4712 %}
4713 
4714 // Long ALU reg-reg operation
4715 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4716     instruction_count(2);
4717     dst    : S4(write);
4718     src    : S3(read);
4719     D0     : S0(2);     // big decoder only; twice
4720     ALU    : S3(2);     // both alus
4721 %}
4722 
4723 // Integer ALU reg-mem operation
4724 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4725     single_instruction;
4726     dst    : S5(write);
4727     mem    : S3(read);
4728     D0     : S0;        // big decoder only
4729     ALU    : S4;        // any alu
4730     MEM    : S3;        // any mem
4731 %}
4732 
4733 // Long ALU reg-mem operation
4734 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4735     instruction_count(2);
4736     dst    : S5(write);
4737     mem    : S3(read);
4738     D0     : S0(2);     // big decoder only; twice
4739     ALU    : S4(2);     // any 2 alus
4740     MEM    : S3(2);     // both mems
4741 %}
4742 
4743 // Integer mem operation (prefetch)
4744 pipe_class ialu_mem(memory mem)
4745 %{
4746     single_instruction;
4747     mem    : S3(read);
4748     D0     : S0;        // big decoder only
4749     MEM    : S3;        // any mem
4750 %}
4751 
4752 // Integer Store to Memory
4753 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4754     single_instruction;
4755     mem    : S3(read);
4756     src    : S5(read);
4757     D0     : S0;        // big decoder only
4758     ALU    : S4;        // any alu
4759     MEM    : S3;
4760 %}
4761 
4762 // Long Store to Memory
4763 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4764     instruction_count(2);
4765     mem    : S3(read);
4766     src    : S5(read);
4767     D0     : S0(2);     // big decoder only; twice
4768     ALU    : S4(2);     // any 2 alus
4769     MEM    : S3(2);     // Both mems
4770 %}
4771 
4772 // Integer Store to Memory
4773 pipe_class ialu_mem_imm(memory mem) %{
4774     single_instruction;
4775     mem    : S3(read);
4776     D0     : S0;        // big decoder only
4777     ALU    : S4;        // any alu
4778     MEM    : S3;
4779 %}
4780 
4781 // Integer ALU0 reg-reg operation
4782 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4783     single_instruction;
4784     dst    : S4(write);
4785     src    : S3(read);
4786     D0     : S0;        // Big decoder only
4787     ALU0   : S3;        // only alu0
4788 %}
4789 
4790 // Integer ALU0 reg-mem operation
4791 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4792     single_instruction;
4793     dst    : S5(write);
4794     mem    : S3(read);
4795     D0     : S0;        // big decoder only
4796     ALU0   : S4;        // ALU0 only
4797     MEM    : S3;        // any mem
4798 %}
4799 
4800 // Integer ALU reg-reg operation
4801 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4802     single_instruction;
4803     cr     : S4(write);
4804     src1   : S3(read);
4805     src2   : S3(read);
4806     DECODE : S0;        // any decoder
4807     ALU    : S3;        // any alu
4808 %}
4809 
4810 // Integer ALU reg-imm operation
4811 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4812     single_instruction;
4813     cr     : S4(write);
4814     src1   : S3(read);
4815     DECODE : S0;        // any decoder
4816     ALU    : S3;        // any alu
4817 %}
4818 
4819 // Integer ALU reg-mem operation
4820 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4821     single_instruction;
4822     cr     : S4(write);
4823     src1   : S3(read);
4824     src2   : S3(read);
4825     D0     : S0;        // big decoder only
4826     ALU    : S4;        // any alu
4827     MEM    : S3;
4828 %}
4829 
4830 // Conditional move reg-reg
4831 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4832     instruction_count(4);
4833     y      : S4(read);
4834     q      : S3(read);
4835     p      : S3(read);
4836     DECODE : S0(4);     // any decoder
4837 %}
4838 
4839 // Conditional move reg-reg
4840 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4841     single_instruction;
4842     dst    : S4(write);
4843     src    : S3(read);
4844     cr     : S3(read);
4845     DECODE : S0;        // any decoder
4846 %}
4847 
4848 // Conditional move reg-mem
4849 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4850     single_instruction;
4851     dst    : S4(write);
4852     src    : S3(read);
4853     cr     : S3(read);
4854     DECODE : S0;        // any decoder
4855     MEM    : S3;
4856 %}
4857 
4858 // Conditional move reg-reg long
4859 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4860     single_instruction;
4861     dst    : S4(write);
4862     src    : S3(read);
4863     cr     : S3(read);
4864     DECODE : S0(2);     // any 2 decoders
4865 %}
4866 
4867 // Conditional move double reg-reg
4868 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4869     single_instruction;
4870     dst    : S4(write);
4871     src    : S3(read);
4872     cr     : S3(read);
4873     DECODE : S0;        // any decoder
4874 %}
4875 
4876 // Float reg-reg operation
4877 pipe_class fpu_reg(regDPR dst) %{
4878     instruction_count(2);
4879     dst    : S3(read);
4880     DECODE : S0(2);     // any 2 decoders
4881     FPU    : S3;
4882 %}
4883 
4884 // Float reg-reg operation
4885 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4886     instruction_count(2);
4887     dst    : S4(write);
4888     src    : S3(read);
4889     DECODE : S0(2);     // any 2 decoders
4890     FPU    : S3;
4891 %}
4892 
4893 // Float reg-reg operation
4894 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4895     instruction_count(3);
4896     dst    : S4(write);
4897     src1   : S3(read);
4898     src2   : S3(read);
4899     DECODE : S0(3);     // any 3 decoders
4900     FPU    : S3(2);
4901 %}
4902 
4903 // Float reg-reg operation
4904 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4905     instruction_count(4);
4906     dst    : S4(write);
4907     src1   : S3(read);
4908     src2   : S3(read);
4909     src3   : S3(read);
4910     DECODE : S0(4);     // any 3 decoders
4911     FPU    : S3(2);
4912 %}
4913 
4914 // Float reg-reg operation
4915 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4916     instruction_count(4);
4917     dst    : S4(write);
4918     src1   : S3(read);
4919     src2   : S3(read);
4920     src3   : S3(read);
4921     DECODE : S1(3);     // any 3 decoders
4922     D0     : S0;        // Big decoder only
4923     FPU    : S3(2);
4924     MEM    : S3;
4925 %}
4926 
4927 // Float reg-mem operation
4928 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4929     instruction_count(2);
4930     dst    : S5(write);
4931     mem    : S3(read);
4932     D0     : S0;        // big decoder only
4933     DECODE : S1;        // any decoder for FPU POP
4934     FPU    : S4;
4935     MEM    : S3;        // any mem
4936 %}
4937 
4938 // Float reg-mem operation
4939 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4940     instruction_count(3);
4941     dst    : S5(write);
4942     src1   : S3(read);
4943     mem    : S3(read);
4944     D0     : S0;        // big decoder only
4945     DECODE : S1(2);     // any decoder for FPU POP
4946     FPU    : S4;
4947     MEM    : S3;        // any mem
4948 %}
4949 
4950 // Float mem-reg operation
4951 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4952     instruction_count(2);
4953     src    : S5(read);
4954     mem    : S3(read);
4955     DECODE : S0;        // any decoder for FPU PUSH
4956     D0     : S1;        // big decoder only
4957     FPU    : S4;
4958     MEM    : S3;        // any mem
4959 %}
4960 
4961 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4962     instruction_count(3);
4963     src1   : S3(read);
4964     src2   : S3(read);
4965     mem    : S3(read);
4966     DECODE : S0(2);     // any decoder for FPU PUSH
4967     D0     : S1;        // big decoder only
4968     FPU    : S4;
4969     MEM    : S3;        // any mem
4970 %}
4971 
4972 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4973     instruction_count(3);
4974     src1   : S3(read);
4975     src2   : S3(read);
4976     mem    : S4(read);
4977     DECODE : S0;        // any decoder for FPU PUSH
4978     D0     : S0(2);     // big decoder only
4979     FPU    : S4;
4980     MEM    : S3(2);     // any mem
4981 %}
4982 
4983 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4984     instruction_count(2);
4985     src1   : S3(read);
4986     dst    : S4(read);
4987     D0     : S0(2);     // big decoder only
4988     MEM    : S3(2);     // any mem
4989 %}
4990 
4991 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4992     instruction_count(3);
4993     src1   : S3(read);
4994     src2   : S3(read);
4995     dst    : S4(read);
4996     D0     : S0(3);     // big decoder only
4997     FPU    : S4;
4998     MEM    : S3(3);     // any mem
4999 %}
5000 
5001 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5002     instruction_count(3);
5003     src1   : S4(read);
5004     mem    : S4(read);
5005     DECODE : S0;        // any decoder for FPU PUSH
5006     D0     : S0(2);     // big decoder only
5007     FPU    : S4;
5008     MEM    : S3(2);     // any mem
5009 %}
5010 
5011 // Float load constant
5012 pipe_class fpu_reg_con(regDPR dst) %{
5013     instruction_count(2);
5014     dst    : S5(write);
5015     D0     : S0;        // big decoder only for the load
5016     DECODE : S1;        // any decoder for FPU POP
5017     FPU    : S4;
5018     MEM    : S3;        // any mem
5019 %}
5020 
5021 // Float load constant
5022 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5023     instruction_count(3);
5024     dst    : S5(write);
5025     src    : S3(read);
5026     D0     : S0;        // big decoder only for the load
5027     DECODE : S1(2);     // any decoder for FPU POP
5028     FPU    : S4;
5029     MEM    : S3;        // any mem
5030 %}
5031 
5032 // UnConditional branch
5033 pipe_class pipe_jmp( label labl ) %{
5034     single_instruction;
5035     BR   : S3;
5036 %}
5037 
5038 // Conditional branch
5039 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5040     single_instruction;
5041     cr    : S1(read);
5042     BR    : S3;
5043 %}
5044 
5045 // Allocation idiom
5046 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5047     instruction_count(1); force_serialization;
5048     fixed_latency(6);
5049     heap_ptr : S3(read);
5050     DECODE   : S0(3);
5051     D0       : S2;
5052     MEM      : S3;
5053     ALU      : S3(2);
5054     dst      : S5(write);
5055     BR       : S5;
5056 %}
5057 
5058 // Generic big/slow expanded idiom
5059 pipe_class pipe_slow(  ) %{
5060     instruction_count(10); multiple_bundles; force_serialization;
5061     fixed_latency(100);
5062     D0  : S0(2);
5063     MEM : S3(2);
5064 %}
5065 
5066 // The real do-nothing guy
5067 pipe_class empty( ) %{
5068     instruction_count(0);
5069 %}
5070 
5071 // Define the class for the Nop node
5072 define %{
5073    MachNop = empty;
5074 %}
5075 
5076 %}
5077 
5078 //----------INSTRUCTIONS-------------------------------------------------------
5079 //
5080 // match      -- States which machine-independent subtree may be replaced
5081 //               by this instruction.
5082 // ins_cost   -- The estimated cost of this instruction is used by instruction
5083 //               selection to identify a minimum cost tree of machine
5084 //               instructions that matches a tree of machine-independent
5085 //               instructions.
5086 // format     -- A string providing the disassembly for this instruction.
5087 //               The value of an instruction's operand may be inserted
5088 //               by referring to it with a '$' prefix.
5089 // opcode     -- Three instruction opcodes may be provided.  These are referred
5090 //               to within an encode class as $primary, $secondary, and $tertiary
5091 //               respectively.  The primary opcode is commonly used to
5092 //               indicate the type of machine instruction, while secondary
5093 //               and tertiary are often used for prefix options or addressing
5094 //               modes.
5095 // ins_encode -- A list of encode classes with parameters. The encode class
5096 //               name must have been defined in an 'enc_class' specification
5097 //               in the encode section of the architecture description.
5098 
5099 //----------BSWAP-Instruction--------------------------------------------------
5100 instruct bytes_reverse_int(rRegI dst) %{
5101   match(Set dst (ReverseBytesI dst));
5102 
5103   format %{ "BSWAP  $dst" %}
5104   opcode(0x0F, 0xC8);
5105   ins_encode( OpcP, OpcSReg(dst) );
5106   ins_pipe( ialu_reg );
5107 %}
5108 
5109 instruct bytes_reverse_long(eRegL dst) %{
5110   match(Set dst (ReverseBytesL dst));
5111 
5112   format %{ "BSWAP  $dst.lo\n\t"
5113             "BSWAP  $dst.hi\n\t"
5114             "XCHG   $dst.lo $dst.hi" %}
5115 
5116   ins_cost(125);
5117   ins_encode( bswap_long_bytes(dst) );
5118   ins_pipe( ialu_reg_reg);
5119 %}
5120 
5121 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5122   match(Set dst (ReverseBytesUS dst));
5123   effect(KILL cr);
5124 
5125   format %{ "BSWAP  $dst\n\t"
5126             "SHR    $dst,16\n\t" %}
5127   ins_encode %{
5128     __ bswapl($dst$$Register);
5129     __ shrl($dst$$Register, 16);
5130   %}
5131   ins_pipe( ialu_reg );
5132 %}
5133 
5134 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5135   match(Set dst (ReverseBytesS dst));
5136   effect(KILL cr);
5137 
5138   format %{ "BSWAP  $dst\n\t"
5139             "SAR    $dst,16\n\t" %}
5140   ins_encode %{
5141     __ bswapl($dst$$Register);
5142     __ sarl($dst$$Register, 16);
5143   %}
5144   ins_pipe( ialu_reg );
5145 %}
5146 
5147 
5148 //---------- Zeros Count Instructions ------------------------------------------
5149 
5150 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5151   predicate(UseCountLeadingZerosInstruction);
5152   match(Set dst (CountLeadingZerosI src));
5153   effect(KILL cr);
5154 
5155   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5156   ins_encode %{
5157     __ lzcntl($dst$$Register, $src$$Register);
5158   %}
5159   ins_pipe(ialu_reg);
5160 %}
5161 
5162 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5163   predicate(!UseCountLeadingZerosInstruction);
5164   match(Set dst (CountLeadingZerosI src));
5165   effect(KILL cr);
5166 
5167   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5168             "JNZ    skip\n\t"
5169             "MOV    $dst, -1\n"
5170       "skip:\n\t"
5171             "NEG    $dst\n\t"
5172             "ADD    $dst, 31" %}
5173   ins_encode %{
5174     Register Rdst = $dst$$Register;
5175     Register Rsrc = $src$$Register;
5176     Label skip;
5177     __ bsrl(Rdst, Rsrc);
5178     __ jccb(Assembler::notZero, skip);
5179     __ movl(Rdst, -1);
5180     __ bind(skip);
5181     __ negl(Rdst);
5182     __ addl(Rdst, BitsPerInt - 1);
5183   %}
5184   ins_pipe(ialu_reg);
5185 %}
5186 
5187 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5188   predicate(UseCountLeadingZerosInstruction);
5189   match(Set dst (CountLeadingZerosL src));
5190   effect(TEMP dst, KILL cr);
5191 
5192   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5193             "JNC    done\n\t"
5194             "LZCNT  $dst, $src.lo\n\t"
5195             "ADD    $dst, 32\n"
5196       "done:" %}
5197   ins_encode %{
5198     Register Rdst = $dst$$Register;
5199     Register Rsrc = $src$$Register;
5200     Label done;
5201     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5202     __ jccb(Assembler::carryClear, done);
5203     __ lzcntl(Rdst, Rsrc);
5204     __ addl(Rdst, BitsPerInt);
5205     __ bind(done);
5206   %}
5207   ins_pipe(ialu_reg);
5208 %}
5209 
5210 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5211   predicate(!UseCountLeadingZerosInstruction);
5212   match(Set dst (CountLeadingZerosL src));
5213   effect(TEMP dst, KILL cr);
5214 
5215   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5216             "JZ     msw_is_zero\n\t"
5217             "ADD    $dst, 32\n\t"
5218             "JMP    not_zero\n"
5219       "msw_is_zero:\n\t"
5220             "BSR    $dst, $src.lo\n\t"
5221             "JNZ    not_zero\n\t"
5222             "MOV    $dst, -1\n"
5223       "not_zero:\n\t"
5224             "NEG    $dst\n\t"
5225             "ADD    $dst, 63\n" %}
5226  ins_encode %{
5227     Register Rdst = $dst$$Register;
5228     Register Rsrc = $src$$Register;
5229     Label msw_is_zero;
5230     Label not_zero;
5231     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5232     __ jccb(Assembler::zero, msw_is_zero);
5233     __ addl(Rdst, BitsPerInt);
5234     __ jmpb(not_zero);
5235     __ bind(msw_is_zero);
5236     __ bsrl(Rdst, Rsrc);
5237     __ jccb(Assembler::notZero, not_zero);
5238     __ movl(Rdst, -1);
5239     __ bind(not_zero);
5240     __ negl(Rdst);
5241     __ addl(Rdst, BitsPerLong - 1);
5242   %}
5243   ins_pipe(ialu_reg);
5244 %}
5245 
5246 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5247   predicate(UseCountTrailingZerosInstruction);
5248   match(Set dst (CountTrailingZerosI src));
5249   effect(KILL cr);
5250 
5251   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5252   ins_encode %{
5253     __ tzcntl($dst$$Register, $src$$Register);
5254   %}
5255   ins_pipe(ialu_reg);
5256 %}
5257 
5258 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5259   predicate(!UseCountTrailingZerosInstruction);
5260   match(Set dst (CountTrailingZerosI src));
5261   effect(KILL cr);
5262 
5263   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5264             "JNZ    done\n\t"
5265             "MOV    $dst, 32\n"
5266       "done:" %}
5267   ins_encode %{
5268     Register Rdst = $dst$$Register;
5269     Label done;
5270     __ bsfl(Rdst, $src$$Register);
5271     __ jccb(Assembler::notZero, done);
5272     __ movl(Rdst, BitsPerInt);
5273     __ bind(done);
5274   %}
5275   ins_pipe(ialu_reg);
5276 %}
5277 
5278 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5279   predicate(UseCountTrailingZerosInstruction);
5280   match(Set dst (CountTrailingZerosL src));
5281   effect(TEMP dst, KILL cr);
5282 
5283   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5284             "JNC    done\n\t"
5285             "TZCNT  $dst, $src.hi\n\t"
5286             "ADD    $dst, 32\n"
5287             "done:" %}
5288   ins_encode %{
5289     Register Rdst = $dst$$Register;
5290     Register Rsrc = $src$$Register;
5291     Label done;
5292     __ tzcntl(Rdst, Rsrc);
5293     __ jccb(Assembler::carryClear, done);
5294     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5295     __ addl(Rdst, BitsPerInt);
5296     __ bind(done);
5297   %}
5298   ins_pipe(ialu_reg);
5299 %}
5300 
5301 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5302   predicate(!UseCountTrailingZerosInstruction);
5303   match(Set dst (CountTrailingZerosL src));
5304   effect(TEMP dst, KILL cr);
5305 
5306   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5307             "JNZ    done\n\t"
5308             "BSF    $dst, $src.hi\n\t"
5309             "JNZ    msw_not_zero\n\t"
5310             "MOV    $dst, 32\n"
5311       "msw_not_zero:\n\t"
5312             "ADD    $dst, 32\n"
5313       "done:" %}
5314   ins_encode %{
5315     Register Rdst = $dst$$Register;
5316     Register Rsrc = $src$$Register;
5317     Label msw_not_zero;
5318     Label done;
5319     __ bsfl(Rdst, Rsrc);
5320     __ jccb(Assembler::notZero, done);
5321     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5322     __ jccb(Assembler::notZero, msw_not_zero);
5323     __ movl(Rdst, BitsPerInt);
5324     __ bind(msw_not_zero);
5325     __ addl(Rdst, BitsPerInt);
5326     __ bind(done);
5327   %}
5328   ins_pipe(ialu_reg);
5329 %}
5330 
5331 
5332 //---------- Population Count Instructions -------------------------------------
5333 
5334 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5335   predicate(UsePopCountInstruction);
5336   match(Set dst (PopCountI src));
5337   effect(KILL cr);
5338 
5339   format %{ "POPCNT $dst, $src" %}
5340   ins_encode %{
5341     __ popcntl($dst$$Register, $src$$Register);
5342   %}
5343   ins_pipe(ialu_reg);
5344 %}
5345 
5346 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5347   predicate(UsePopCountInstruction);
5348   match(Set dst (PopCountI (LoadI mem)));
5349   effect(KILL cr);
5350 
5351   format %{ "POPCNT $dst, $mem" %}
5352   ins_encode %{
5353     __ popcntl($dst$$Register, $mem$$Address);
5354   %}
5355   ins_pipe(ialu_reg);
5356 %}
5357 
5358 // Note: Long.bitCount(long) returns an int.
5359 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5360   predicate(UsePopCountInstruction);
5361   match(Set dst (PopCountL src));
5362   effect(KILL cr, TEMP tmp, TEMP dst);
5363 
5364   format %{ "POPCNT $dst, $src.lo\n\t"
5365             "POPCNT $tmp, $src.hi\n\t"
5366             "ADD    $dst, $tmp" %}
5367   ins_encode %{
5368     __ popcntl($dst$$Register, $src$$Register);
5369     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5370     __ addl($dst$$Register, $tmp$$Register);
5371   %}
5372   ins_pipe(ialu_reg);
5373 %}
5374 
5375 // Note: Long.bitCount(long) returns an int.
5376 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5377   predicate(UsePopCountInstruction);
5378   match(Set dst (PopCountL (LoadL mem)));
5379   effect(KILL cr, TEMP tmp, TEMP dst);
5380 
5381   format %{ "POPCNT $dst, $mem\n\t"
5382             "POPCNT $tmp, $mem+4\n\t"
5383             "ADD    $dst, $tmp" %}
5384   ins_encode %{
5385     //__ popcntl($dst$$Register, $mem$$Address$$first);
5386     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5387     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5388     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5389     __ addl($dst$$Register, $tmp$$Register);
5390   %}
5391   ins_pipe(ialu_reg);
5392 %}
5393 
5394 
5395 //----------Load/Store/Move Instructions---------------------------------------
5396 //----------Load Instructions--------------------------------------------------
5397 // Load Byte (8bit signed)
5398 instruct loadB(xRegI dst, memory mem) %{
5399   match(Set dst (LoadB mem));
5400 
5401   ins_cost(125);
5402   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5403 
5404   ins_encode %{
5405     __ movsbl($dst$$Register, $mem$$Address);
5406   %}
5407 
5408   ins_pipe(ialu_reg_mem);
5409 %}
5410 
5411 // Load Byte (8bit signed) into Long Register
5412 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5413   match(Set dst (ConvI2L (LoadB mem)));
5414   effect(KILL cr);
5415 
5416   ins_cost(375);
5417   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5418             "MOV    $dst.hi,$dst.lo\n\t"
5419             "SAR    $dst.hi,7" %}
5420 
5421   ins_encode %{
5422     __ movsbl($dst$$Register, $mem$$Address);
5423     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5424     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5425   %}
5426 
5427   ins_pipe(ialu_reg_mem);
5428 %}
5429 
5430 // Load Unsigned Byte (8bit UNsigned)
5431 instruct loadUB(xRegI dst, memory mem) %{
5432   match(Set dst (LoadUB mem));
5433 
5434   ins_cost(125);
5435   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5436 
5437   ins_encode %{
5438     __ movzbl($dst$$Register, $mem$$Address);
5439   %}
5440 
5441   ins_pipe(ialu_reg_mem);
5442 %}
5443 
5444 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5445 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5446   match(Set dst (ConvI2L (LoadUB mem)));
5447   effect(KILL cr);
5448 
5449   ins_cost(250);
5450   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5451             "XOR    $dst.hi,$dst.hi" %}
5452 
5453   ins_encode %{
5454     Register Rdst = $dst$$Register;
5455     __ movzbl(Rdst, $mem$$Address);
5456     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5457   %}
5458 
5459   ins_pipe(ialu_reg_mem);
5460 %}
5461 
5462 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5463 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5464   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5465   effect(KILL cr);
5466 
5467   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5468             "XOR    $dst.hi,$dst.hi\n\t"
5469             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5470   ins_encode %{
5471     Register Rdst = $dst$$Register;
5472     __ movzbl(Rdst, $mem$$Address);
5473     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5474     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5475   %}
5476   ins_pipe(ialu_reg_mem);
5477 %}
5478 
5479 // Load Short (16bit signed)
5480 instruct loadS(rRegI dst, memory mem) %{
5481   match(Set dst (LoadS mem));
5482 
5483   ins_cost(125);
5484   format %{ "MOVSX  $dst,$mem\t# short" %}
5485 
5486   ins_encode %{
5487     __ movswl($dst$$Register, $mem$$Address);
5488   %}
5489 
5490   ins_pipe(ialu_reg_mem);
5491 %}
5492 
5493 // Load Short (16 bit signed) to Byte (8 bit signed)
5494 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5495   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5496 
5497   ins_cost(125);
5498   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5499   ins_encode %{
5500     __ movsbl($dst$$Register, $mem$$Address);
5501   %}
5502   ins_pipe(ialu_reg_mem);
5503 %}
5504 
5505 // Load Short (16bit signed) into Long Register
5506 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5507   match(Set dst (ConvI2L (LoadS mem)));
5508   effect(KILL cr);
5509 
5510   ins_cost(375);
5511   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5512             "MOV    $dst.hi,$dst.lo\n\t"
5513             "SAR    $dst.hi,15" %}
5514 
5515   ins_encode %{
5516     __ movswl($dst$$Register, $mem$$Address);
5517     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5518     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5519   %}
5520 
5521   ins_pipe(ialu_reg_mem);
5522 %}
5523 
5524 // Load Unsigned Short/Char (16bit unsigned)
5525 instruct loadUS(rRegI dst, memory mem) %{
5526   match(Set dst (LoadUS mem));
5527 
5528   ins_cost(125);
5529   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5530 
5531   ins_encode %{
5532     __ movzwl($dst$$Register, $mem$$Address);
5533   %}
5534 
5535   ins_pipe(ialu_reg_mem);
5536 %}
5537 
5538 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5539 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5540   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5541 
5542   ins_cost(125);
5543   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5544   ins_encode %{
5545     __ movsbl($dst$$Register, $mem$$Address);
5546   %}
5547   ins_pipe(ialu_reg_mem);
5548 %}
5549 
5550 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5551 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5552   match(Set dst (ConvI2L (LoadUS mem)));
5553   effect(KILL cr);
5554 
5555   ins_cost(250);
5556   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5557             "XOR    $dst.hi,$dst.hi" %}
5558 
5559   ins_encode %{
5560     __ movzwl($dst$$Register, $mem$$Address);
5561     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5562   %}
5563 
5564   ins_pipe(ialu_reg_mem);
5565 %}
5566 
5567 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5568 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5569   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5570   effect(KILL cr);
5571 
5572   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5573             "XOR    $dst.hi,$dst.hi" %}
5574   ins_encode %{
5575     Register Rdst = $dst$$Register;
5576     __ movzbl(Rdst, $mem$$Address);
5577     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5578   %}
5579   ins_pipe(ialu_reg_mem);
5580 %}
5581 
5582 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5583 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5584   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5585   effect(KILL cr);
5586 
5587   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5588             "XOR    $dst.hi,$dst.hi\n\t"
5589             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5590   ins_encode %{
5591     Register Rdst = $dst$$Register;
5592     __ movzwl(Rdst, $mem$$Address);
5593     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5594     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5595   %}
5596   ins_pipe(ialu_reg_mem);
5597 %}
5598 
5599 // Load Integer
5600 instruct loadI(rRegI dst, memory mem) %{
5601   match(Set dst (LoadI mem));
5602 
5603   ins_cost(125);
5604   format %{ "MOV    $dst,$mem\t# int" %}
5605 
5606   ins_encode %{
5607     __ movl($dst$$Register, $mem$$Address);
5608   %}
5609 
5610   ins_pipe(ialu_reg_mem);
5611 %}
5612 
5613 // Load Integer (32 bit signed) to Byte (8 bit signed)
5614 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5615   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5616 
5617   ins_cost(125);
5618   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5619   ins_encode %{
5620     __ movsbl($dst$$Register, $mem$$Address);
5621   %}
5622   ins_pipe(ialu_reg_mem);
5623 %}
5624 
5625 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5626 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5627   match(Set dst (AndI (LoadI mem) mask));
5628 
5629   ins_cost(125);
5630   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5631   ins_encode %{
5632     __ movzbl($dst$$Register, $mem$$Address);
5633   %}
5634   ins_pipe(ialu_reg_mem);
5635 %}
5636 
5637 // Load Integer (32 bit signed) to Short (16 bit signed)
5638 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5639   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5640 
5641   ins_cost(125);
5642   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5643   ins_encode %{
5644     __ movswl($dst$$Register, $mem$$Address);
5645   %}
5646   ins_pipe(ialu_reg_mem);
5647 %}
5648 
5649 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5650 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5651   match(Set dst (AndI (LoadI mem) mask));
5652 
5653   ins_cost(125);
5654   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5655   ins_encode %{
5656     __ movzwl($dst$$Register, $mem$$Address);
5657   %}
5658   ins_pipe(ialu_reg_mem);
5659 %}
5660 
5661 // Load Integer into Long Register
5662 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5663   match(Set dst (ConvI2L (LoadI mem)));
5664   effect(KILL cr);
5665 
5666   ins_cost(375);
5667   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5668             "MOV    $dst.hi,$dst.lo\n\t"
5669             "SAR    $dst.hi,31" %}
5670 
5671   ins_encode %{
5672     __ movl($dst$$Register, $mem$$Address);
5673     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5674     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5675   %}
5676 
5677   ins_pipe(ialu_reg_mem);
5678 %}
5679 
5680 // Load Integer with mask 0xFF into Long Register
5681 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5682   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5683   effect(KILL cr);
5684 
5685   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5686             "XOR    $dst.hi,$dst.hi" %}
5687   ins_encode %{
5688     Register Rdst = $dst$$Register;
5689     __ movzbl(Rdst, $mem$$Address);
5690     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5691   %}
5692   ins_pipe(ialu_reg_mem);
5693 %}
5694 
5695 // Load Integer with mask 0xFFFF into Long Register
5696 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5697   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5698   effect(KILL cr);
5699 
5700   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5701             "XOR    $dst.hi,$dst.hi" %}
5702   ins_encode %{
5703     Register Rdst = $dst$$Register;
5704     __ movzwl(Rdst, $mem$$Address);
5705     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5706   %}
5707   ins_pipe(ialu_reg_mem);
5708 %}
5709 
5710 // Load Integer with 31-bit mask into Long Register
5711 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5712   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5713   effect(KILL cr);
5714 
5715   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5716             "XOR    $dst.hi,$dst.hi\n\t"
5717             "AND    $dst.lo,$mask" %}
5718   ins_encode %{
5719     Register Rdst = $dst$$Register;
5720     __ movl(Rdst, $mem$$Address);
5721     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5722     __ andl(Rdst, $mask$$constant);
5723   %}
5724   ins_pipe(ialu_reg_mem);
5725 %}
5726 
5727 // Load Unsigned Integer into Long Register
5728 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5729   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5730   effect(KILL cr);
5731 
5732   ins_cost(250);
5733   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5734             "XOR    $dst.hi,$dst.hi" %}
5735 
5736   ins_encode %{
5737     __ movl($dst$$Register, $mem$$Address);
5738     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5739   %}
5740 
5741   ins_pipe(ialu_reg_mem);
5742 %}
5743 
5744 // Load Long.  Cannot clobber address while loading, so restrict address
5745 // register to ESI
5746 instruct loadL(eRegL dst, load_long_memory mem) %{
5747   predicate(!((LoadLNode*)n)->require_atomic_access());
5748   match(Set dst (LoadL mem));
5749 
5750   ins_cost(250);
5751   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5752             "MOV    $dst.hi,$mem+4" %}
5753 
5754   ins_encode %{
5755     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5756     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5757     __ movl($dst$$Register, Amemlo);
5758     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5759   %}
5760 
5761   ins_pipe(ialu_reg_long_mem);
5762 %}
5763 
5764 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5765 // then store it down to the stack and reload on the int
5766 // side.
5767 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5768   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5769   match(Set dst (LoadL mem));
5770 
5771   ins_cost(200);
5772   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5773             "FISTp  $dst" %}
5774   ins_encode(enc_loadL_volatile(mem,dst));
5775   ins_pipe( fpu_reg_mem );
5776 %}
5777 
5778 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5779   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5780   match(Set dst (LoadL mem));
5781   effect(TEMP tmp);
5782   ins_cost(180);
5783   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5784             "MOVSD  $dst,$tmp" %}
5785   ins_encode %{
5786     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5787     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5788   %}
5789   ins_pipe( pipe_slow );
5790 %}
5791 
5792 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5793   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5794   match(Set dst (LoadL mem));
5795   effect(TEMP tmp);
5796   ins_cost(160);
5797   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5798             "MOVD   $dst.lo,$tmp\n\t"
5799             "PSRLQ  $tmp,32\n\t"
5800             "MOVD   $dst.hi,$tmp" %}
5801   ins_encode %{
5802     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5803     __ movdl($dst$$Register, $tmp$$XMMRegister);
5804     __ psrlq($tmp$$XMMRegister, 32);
5805     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5806   %}
5807   ins_pipe( pipe_slow );
5808 %}
5809 
5810 // Load Range
5811 instruct loadRange(rRegI dst, memory mem) %{
5812   match(Set dst (LoadRange mem));
5813 
5814   ins_cost(125);
5815   format %{ "MOV    $dst,$mem" %}
5816   opcode(0x8B);
5817   ins_encode( OpcP, RegMem(dst,mem));
5818   ins_pipe( ialu_reg_mem );
5819 %}
5820 
5821 
5822 // Load Pointer
5823 instruct loadP(eRegP dst, memory mem) %{
5824   match(Set dst (LoadP mem));
5825 
5826   ins_cost(125);
5827   format %{ "MOV    $dst,$mem" %}
5828   opcode(0x8B);
5829   ins_encode( OpcP, RegMem(dst,mem));
5830   ins_pipe( ialu_reg_mem );
5831 %}
5832 
5833 // Load Klass Pointer
5834 instruct loadKlass(eRegP dst, memory mem) %{
5835   match(Set dst (LoadKlass mem));
5836 
5837   ins_cost(125);
5838   format %{ "MOV    $dst,$mem" %}
5839   opcode(0x8B);
5840   ins_encode( OpcP, RegMem(dst,mem));
5841   ins_pipe( ialu_reg_mem );
5842 %}
5843 
5844 // Load Double
5845 instruct loadDPR(regDPR dst, memory mem) %{
5846   predicate(UseSSE<=1);
5847   match(Set dst (LoadD mem));
5848 
5849   ins_cost(150);
5850   format %{ "FLD_D  ST,$mem\n\t"
5851             "FSTP   $dst" %}
5852   opcode(0xDD);               /* DD /0 */
5853   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5854               Pop_Reg_DPR(dst) );
5855   ins_pipe( fpu_reg_mem );
5856 %}
5857 
5858 // Load Double to XMM
5859 instruct loadD(regD dst, memory mem) %{
5860   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5861   match(Set dst (LoadD mem));
5862   ins_cost(145);
5863   format %{ "MOVSD  $dst,$mem" %}
5864   ins_encode %{
5865     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5866   %}
5867   ins_pipe( pipe_slow );
5868 %}
5869 
5870 instruct loadD_partial(regD dst, memory mem) %{
5871   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5872   match(Set dst (LoadD mem));
5873   ins_cost(145);
5874   format %{ "MOVLPD $dst,$mem" %}
5875   ins_encode %{
5876     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5877   %}
5878   ins_pipe( pipe_slow );
5879 %}
5880 
5881 // Load to XMM register (single-precision floating point)
5882 // MOVSS instruction
5883 instruct loadF(regF dst, memory mem) %{
5884   predicate(UseSSE>=1);
5885   match(Set dst (LoadF mem));
5886   ins_cost(145);
5887   format %{ "MOVSS  $dst,$mem" %}
5888   ins_encode %{
5889     __ movflt ($dst$$XMMRegister, $mem$$Address);
5890   %}
5891   ins_pipe( pipe_slow );
5892 %}
5893 
5894 // Load Float
5895 instruct loadFPR(regFPR dst, memory mem) %{
5896   predicate(UseSSE==0);
5897   match(Set dst (LoadF mem));
5898 
5899   ins_cost(150);
5900   format %{ "FLD_S  ST,$mem\n\t"
5901             "FSTP   $dst" %}
5902   opcode(0xD9);               /* D9 /0 */
5903   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5904               Pop_Reg_FPR(dst) );
5905   ins_pipe( fpu_reg_mem );
5906 %}
5907 
5908 // Load Effective Address
5909 instruct leaP8(eRegP dst, indOffset8 mem) %{
5910   match(Set dst mem);
5911 
5912   ins_cost(110);
5913   format %{ "LEA    $dst,$mem" %}
5914   opcode(0x8D);
5915   ins_encode( OpcP, RegMem(dst,mem));
5916   ins_pipe( ialu_reg_reg_fat );
5917 %}
5918 
5919 instruct leaP32(eRegP dst, indOffset32 mem) %{
5920   match(Set dst mem);
5921 
5922   ins_cost(110);
5923   format %{ "LEA    $dst,$mem" %}
5924   opcode(0x8D);
5925   ins_encode( OpcP, RegMem(dst,mem));
5926   ins_pipe( ialu_reg_reg_fat );
5927 %}
5928 
5929 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5930   match(Set dst mem);
5931 
5932   ins_cost(110);
5933   format %{ "LEA    $dst,$mem" %}
5934   opcode(0x8D);
5935   ins_encode( OpcP, RegMem(dst,mem));
5936   ins_pipe( ialu_reg_reg_fat );
5937 %}
5938 
5939 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5940   match(Set dst mem);
5941 
5942   ins_cost(110);
5943   format %{ "LEA    $dst,$mem" %}
5944   opcode(0x8D);
5945   ins_encode( OpcP, RegMem(dst,mem));
5946   ins_pipe( ialu_reg_reg_fat );
5947 %}
5948 
5949 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5950   match(Set dst mem);
5951 
5952   ins_cost(110);
5953   format %{ "LEA    $dst,$mem" %}
5954   opcode(0x8D);
5955   ins_encode( OpcP, RegMem(dst,mem));
5956   ins_pipe( ialu_reg_reg_fat );
5957 %}
5958 
5959 // Load Constant
5960 instruct loadConI(rRegI dst, immI src) %{
5961   match(Set dst src);
5962 
5963   format %{ "MOV    $dst,$src" %}
5964   ins_encode( LdImmI(dst, src) );
5965   ins_pipe( ialu_reg_fat );
5966 %}
5967 
5968 // Load Constant zero
5969 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5970   match(Set dst src);
5971   effect(KILL cr);
5972 
5973   ins_cost(50);
5974   format %{ "XOR    $dst,$dst" %}
5975   opcode(0x33);  /* + rd */
5976   ins_encode( OpcP, RegReg( dst, dst ) );
5977   ins_pipe( ialu_reg );
5978 %}
5979 
5980 instruct loadConP(eRegP dst, immP src) %{
5981   match(Set dst src);
5982 
5983   format %{ "MOV    $dst,$src" %}
5984   opcode(0xB8);  /* + rd */
5985   ins_encode( LdImmP(dst, src) );
5986   ins_pipe( ialu_reg_fat );
5987 %}
5988 
5989 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5990   match(Set dst src);
5991   effect(KILL cr);
5992   ins_cost(200);
5993   format %{ "MOV    $dst.lo,$src.lo\n\t"
5994             "MOV    $dst.hi,$src.hi" %}
5995   opcode(0xB8);
5996   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5997   ins_pipe( ialu_reg_long_fat );
5998 %}
5999 
6000 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6001   match(Set dst src);
6002   effect(KILL cr);
6003   ins_cost(150);
6004   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6005             "XOR    $dst.hi,$dst.hi" %}
6006   opcode(0x33,0x33);
6007   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6008   ins_pipe( ialu_reg_long );
6009 %}
6010 
6011 // The instruction usage is guarded by predicate in operand immFPR().
6012 instruct loadConFPR(regFPR dst, immFPR con) %{
6013   match(Set dst con);
6014   ins_cost(125);
6015   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6016             "FSTP   $dst" %}
6017   ins_encode %{
6018     __ fld_s($constantaddress($con));
6019     __ fstp_d($dst$$reg);
6020   %}
6021   ins_pipe(fpu_reg_con);
6022 %}
6023 
6024 // The instruction usage is guarded by predicate in operand immFPR0().
6025 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6026   match(Set dst con);
6027   ins_cost(125);
6028   format %{ "FLDZ   ST\n\t"
6029             "FSTP   $dst" %}
6030   ins_encode %{
6031     __ fldz();
6032     __ fstp_d($dst$$reg);
6033   %}
6034   ins_pipe(fpu_reg_con);
6035 %}
6036 
6037 // The instruction usage is guarded by predicate in operand immFPR1().
6038 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6039   match(Set dst con);
6040   ins_cost(125);
6041   format %{ "FLD1   ST\n\t"
6042             "FSTP   $dst" %}
6043   ins_encode %{
6044     __ fld1();
6045     __ fstp_d($dst$$reg);
6046   %}
6047   ins_pipe(fpu_reg_con);
6048 %}
6049 
6050 // The instruction usage is guarded by predicate in operand immF().
6051 instruct loadConF(regF dst, immF con) %{
6052   match(Set dst con);
6053   ins_cost(125);
6054   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6055   ins_encode %{
6056     __ movflt($dst$$XMMRegister, $constantaddress($con));
6057   %}
6058   ins_pipe(pipe_slow);
6059 %}
6060 
6061 // The instruction usage is guarded by predicate in operand immF0().
6062 instruct loadConF0(regF dst, immF0 src) %{
6063   match(Set dst src);
6064   ins_cost(100);
6065   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6066   ins_encode %{
6067     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6068   %}
6069   ins_pipe(pipe_slow);
6070 %}
6071 
6072 // The instruction usage is guarded by predicate in operand immDPR().
6073 instruct loadConDPR(regDPR dst, immDPR con) %{
6074   match(Set dst con);
6075   ins_cost(125);
6076 
6077   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6078             "FSTP   $dst" %}
6079   ins_encode %{
6080     __ fld_d($constantaddress($con));
6081     __ fstp_d($dst$$reg);
6082   %}
6083   ins_pipe(fpu_reg_con);
6084 %}
6085 
6086 // The instruction usage is guarded by predicate in operand immDPR0().
6087 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6088   match(Set dst con);
6089   ins_cost(125);
6090 
6091   format %{ "FLDZ   ST\n\t"
6092             "FSTP   $dst" %}
6093   ins_encode %{
6094     __ fldz();
6095     __ fstp_d($dst$$reg);
6096   %}
6097   ins_pipe(fpu_reg_con);
6098 %}
6099 
6100 // The instruction usage is guarded by predicate in operand immDPR1().
6101 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6102   match(Set dst con);
6103   ins_cost(125);
6104 
6105   format %{ "FLD1   ST\n\t"
6106             "FSTP   $dst" %}
6107   ins_encode %{
6108     __ fld1();
6109     __ fstp_d($dst$$reg);
6110   %}
6111   ins_pipe(fpu_reg_con);
6112 %}
6113 
6114 // The instruction usage is guarded by predicate in operand immD().
6115 instruct loadConD(regD dst, immD con) %{
6116   match(Set dst con);
6117   ins_cost(125);
6118   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6119   ins_encode %{
6120     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6121   %}
6122   ins_pipe(pipe_slow);
6123 %}
6124 
6125 // The instruction usage is guarded by predicate in operand immD0().
6126 instruct loadConD0(regD dst, immD0 src) %{
6127   match(Set dst src);
6128   ins_cost(100);
6129   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6130   ins_encode %{
6131     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6132   %}
6133   ins_pipe( pipe_slow );
6134 %}
6135 
6136 // Load Stack Slot
6137 instruct loadSSI(rRegI dst, stackSlotI src) %{
6138   match(Set dst src);
6139   ins_cost(125);
6140 
6141   format %{ "MOV    $dst,$src" %}
6142   opcode(0x8B);
6143   ins_encode( OpcP, RegMem(dst,src));
6144   ins_pipe( ialu_reg_mem );
6145 %}
6146 
6147 instruct loadSSL(eRegL dst, stackSlotL src) %{
6148   match(Set dst src);
6149 
6150   ins_cost(200);
6151   format %{ "MOV    $dst,$src.lo\n\t"
6152             "MOV    $dst+4,$src.hi" %}
6153   opcode(0x8B, 0x8B);
6154   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6155   ins_pipe( ialu_mem_long_reg );
6156 %}
6157 
6158 // Load Stack Slot
6159 instruct loadSSP(eRegP dst, stackSlotP src) %{
6160   match(Set dst src);
6161   ins_cost(125);
6162 
6163   format %{ "MOV    $dst,$src" %}
6164   opcode(0x8B);
6165   ins_encode( OpcP, RegMem(dst,src));
6166   ins_pipe( ialu_reg_mem );
6167 %}
6168 
6169 // Load Stack Slot
6170 instruct loadSSF(regFPR dst, stackSlotF src) %{
6171   match(Set dst src);
6172   ins_cost(125);
6173 
6174   format %{ "FLD_S  $src\n\t"
6175             "FSTP   $dst" %}
6176   opcode(0xD9);               /* D9 /0, FLD m32real */
6177   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6178               Pop_Reg_FPR(dst) );
6179   ins_pipe( fpu_reg_mem );
6180 %}
6181 
6182 // Load Stack Slot
6183 instruct loadSSD(regDPR dst, stackSlotD src) %{
6184   match(Set dst src);
6185   ins_cost(125);
6186 
6187   format %{ "FLD_D  $src\n\t"
6188             "FSTP   $dst" %}
6189   opcode(0xDD);               /* DD /0, FLD m64real */
6190   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6191               Pop_Reg_DPR(dst) );
6192   ins_pipe( fpu_reg_mem );
6193 %}
6194 
6195 // Prefetch instructions for allocation.
6196 // Must be safe to execute with invalid address (cannot fault).
6197 
6198 instruct prefetchAlloc0( memory mem ) %{
6199   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6200   match(PrefetchAllocation mem);
6201   ins_cost(0);
6202   size(0);
6203   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6204   ins_encode();
6205   ins_pipe(empty);
6206 %}
6207 
6208 instruct prefetchAlloc( memory mem ) %{
6209   predicate(AllocatePrefetchInstr==3);
6210   match( PrefetchAllocation mem );
6211   ins_cost(100);
6212 
6213   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6214   ins_encode %{
6215     __ prefetchw($mem$$Address);
6216   %}
6217   ins_pipe(ialu_mem);
6218 %}
6219 
6220 instruct prefetchAllocNTA( memory mem ) %{
6221   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6222   match(PrefetchAllocation mem);
6223   ins_cost(100);
6224 
6225   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6226   ins_encode %{
6227     __ prefetchnta($mem$$Address);
6228   %}
6229   ins_pipe(ialu_mem);
6230 %}
6231 
6232 instruct prefetchAllocT0( memory mem ) %{
6233   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6234   match(PrefetchAllocation mem);
6235   ins_cost(100);
6236 
6237   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6238   ins_encode %{
6239     __ prefetcht0($mem$$Address);
6240   %}
6241   ins_pipe(ialu_mem);
6242 %}
6243 
6244 instruct prefetchAllocT2( memory mem ) %{
6245   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6246   match(PrefetchAllocation mem);
6247   ins_cost(100);
6248 
6249   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6250   ins_encode %{
6251     __ prefetcht2($mem$$Address);
6252   %}
6253   ins_pipe(ialu_mem);
6254 %}
6255 
6256 //----------Store Instructions-------------------------------------------------
6257 
6258 // Store Byte
6259 instruct storeB(memory mem, xRegI src) %{
6260   match(Set mem (StoreB mem src));
6261 
6262   ins_cost(125);
6263   format %{ "MOV8   $mem,$src" %}
6264   opcode(0x88);
6265   ins_encode( OpcP, RegMem( src, mem ) );
6266   ins_pipe( ialu_mem_reg );
6267 %}
6268 
6269 // Store Char/Short
6270 instruct storeC(memory mem, rRegI src) %{
6271   match(Set mem (StoreC mem src));
6272 
6273   ins_cost(125);
6274   format %{ "MOV16  $mem,$src" %}
6275   opcode(0x89, 0x66);
6276   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6277   ins_pipe( ialu_mem_reg );
6278 %}
6279 
6280 // Store Integer
6281 instruct storeI(memory mem, rRegI src) %{
6282   match(Set mem (StoreI mem src));
6283 
6284   ins_cost(125);
6285   format %{ "MOV    $mem,$src" %}
6286   opcode(0x89);
6287   ins_encode( OpcP, RegMem( src, mem ) );
6288   ins_pipe( ialu_mem_reg );
6289 %}
6290 
6291 // Store Long
6292 instruct storeL(long_memory mem, eRegL src) %{
6293   predicate(!((StoreLNode*)n)->require_atomic_access());
6294   match(Set mem (StoreL mem src));
6295 
6296   ins_cost(200);
6297   format %{ "MOV    $mem,$src.lo\n\t"
6298             "MOV    $mem+4,$src.hi" %}
6299   opcode(0x89, 0x89);
6300   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6301   ins_pipe( ialu_mem_long_reg );
6302 %}
6303 
6304 // Store Long to Integer
6305 instruct storeL2I(memory mem, eRegL src) %{
6306   match(Set mem (StoreI mem (ConvL2I src)));
6307 
6308   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6309   ins_encode %{
6310     __ movl($mem$$Address, $src$$Register);
6311   %}
6312   ins_pipe(ialu_mem_reg);
6313 %}
6314 
6315 // Volatile Store Long.  Must be atomic, so move it into
6316 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6317 // target address before the store (for null-ptr checks)
6318 // so the memory operand is used twice in the encoding.
6319 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6320   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6321   match(Set mem (StoreL mem src));
6322   effect( KILL cr );
6323   ins_cost(400);
6324   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6325             "FILD   $src\n\t"
6326             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6327   opcode(0x3B);
6328   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6329   ins_pipe( fpu_reg_mem );
6330 %}
6331 
6332 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6333   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6334   match(Set mem (StoreL mem src));
6335   effect( TEMP tmp, KILL cr );
6336   ins_cost(380);
6337   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6338             "MOVSD  $tmp,$src\n\t"
6339             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6340   ins_encode %{
6341     __ cmpl(rax, $mem$$Address);
6342     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6343     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6344   %}
6345   ins_pipe( pipe_slow );
6346 %}
6347 
6348 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6349   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6350   match(Set mem (StoreL mem src));
6351   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6352   ins_cost(360);
6353   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6354             "MOVD   $tmp,$src.lo\n\t"
6355             "MOVD   $tmp2,$src.hi\n\t"
6356             "PUNPCKLDQ $tmp,$tmp2\n\t"
6357             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6358   ins_encode %{
6359     __ cmpl(rax, $mem$$Address);
6360     __ movdl($tmp$$XMMRegister, $src$$Register);
6361     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6362     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6363     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6364   %}
6365   ins_pipe( pipe_slow );
6366 %}
6367 
6368 // Store Pointer; for storing unknown oops and raw pointers
6369 instruct storeP(memory mem, anyRegP src) %{
6370   match(Set mem (StoreP mem src));
6371 
6372   ins_cost(125);
6373   format %{ "MOV    $mem,$src" %}
6374   opcode(0x89);
6375   ins_encode( OpcP, RegMem( src, mem ) );
6376   ins_pipe( ialu_mem_reg );
6377 %}
6378 
6379 // Store Integer Immediate
6380 instruct storeImmI(memory mem, immI src) %{
6381   match(Set mem (StoreI mem src));
6382 
6383   ins_cost(150);
6384   format %{ "MOV    $mem,$src" %}
6385   opcode(0xC7);               /* C7 /0 */
6386   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6387   ins_pipe( ialu_mem_imm );
6388 %}
6389 
6390 // Store Short/Char Immediate
6391 instruct storeImmI16(memory mem, immI16 src) %{
6392   predicate(UseStoreImmI16);
6393   match(Set mem (StoreC mem src));
6394 
6395   ins_cost(150);
6396   format %{ "MOV16  $mem,$src" %}
6397   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6398   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6399   ins_pipe( ialu_mem_imm );
6400 %}
6401 
6402 // Store Pointer Immediate; null pointers or constant oops that do not
6403 // need card-mark barriers.
6404 instruct storeImmP(memory mem, immP src) %{
6405   match(Set mem (StoreP mem src));
6406 
6407   ins_cost(150);
6408   format %{ "MOV    $mem,$src" %}
6409   opcode(0xC7);               /* C7 /0 */
6410   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6411   ins_pipe( ialu_mem_imm );
6412 %}
6413 
6414 // Store Byte Immediate
6415 instruct storeImmB(memory mem, immI8 src) %{
6416   match(Set mem (StoreB mem src));
6417 
6418   ins_cost(150);
6419   format %{ "MOV8   $mem,$src" %}
6420   opcode(0xC6);               /* C6 /0 */
6421   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6422   ins_pipe( ialu_mem_imm );
6423 %}
6424 
6425 // Store CMS card-mark Immediate
6426 instruct storeImmCM(memory mem, immI8 src) %{
6427   match(Set mem (StoreCM mem src));
6428 
6429   ins_cost(150);
6430   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6431   opcode(0xC6);               /* C6 /0 */
6432   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6433   ins_pipe( ialu_mem_imm );
6434 %}
6435 
6436 // Store Double
6437 instruct storeDPR( memory mem, regDPR1 src) %{
6438   predicate(UseSSE<=1);
6439   match(Set mem (StoreD mem src));
6440 
6441   ins_cost(100);
6442   format %{ "FST_D  $mem,$src" %}
6443   opcode(0xDD);       /* DD /2 */
6444   ins_encode( enc_FPR_store(mem,src) );
6445   ins_pipe( fpu_mem_reg );
6446 %}
6447 
6448 // Store double does rounding on x86
6449 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6450   predicate(UseSSE<=1);
6451   match(Set mem (StoreD mem (RoundDouble src)));
6452 
6453   ins_cost(100);
6454   format %{ "FST_D  $mem,$src\t# round" %}
6455   opcode(0xDD);       /* DD /2 */
6456   ins_encode( enc_FPR_store(mem,src) );
6457   ins_pipe( fpu_mem_reg );
6458 %}
6459 
6460 // Store XMM register to memory (double-precision floating points)
6461 // MOVSD instruction
6462 instruct storeD(memory mem, regD src) %{
6463   predicate(UseSSE>=2);
6464   match(Set mem (StoreD mem src));
6465   ins_cost(95);
6466   format %{ "MOVSD  $mem,$src" %}
6467   ins_encode %{
6468     __ movdbl($mem$$Address, $src$$XMMRegister);
6469   %}
6470   ins_pipe( pipe_slow );
6471 %}
6472 
6473 // Store XMM register to memory (single-precision floating point)
6474 // MOVSS instruction
6475 instruct storeF(memory mem, regF src) %{
6476   predicate(UseSSE>=1);
6477   match(Set mem (StoreF mem src));
6478   ins_cost(95);
6479   format %{ "MOVSS  $mem,$src" %}
6480   ins_encode %{
6481     __ movflt($mem$$Address, $src$$XMMRegister);
6482   %}
6483   ins_pipe( pipe_slow );
6484 %}
6485 
6486 // Store Float
6487 instruct storeFPR( memory mem, regFPR1 src) %{
6488   predicate(UseSSE==0);
6489   match(Set mem (StoreF mem src));
6490 
6491   ins_cost(100);
6492   format %{ "FST_S  $mem,$src" %}
6493   opcode(0xD9);       /* D9 /2 */
6494   ins_encode( enc_FPR_store(mem,src) );
6495   ins_pipe( fpu_mem_reg );
6496 %}
6497 
6498 // Store Float does rounding on x86
6499 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6500   predicate(UseSSE==0);
6501   match(Set mem (StoreF mem (RoundFloat src)));
6502 
6503   ins_cost(100);
6504   format %{ "FST_S  $mem,$src\t# round" %}
6505   opcode(0xD9);       /* D9 /2 */
6506   ins_encode( enc_FPR_store(mem,src) );
6507   ins_pipe( fpu_mem_reg );
6508 %}
6509 
6510 // Store Float does rounding on x86
6511 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6512   predicate(UseSSE<=1);
6513   match(Set mem (StoreF mem (ConvD2F src)));
6514 
6515   ins_cost(100);
6516   format %{ "FST_S  $mem,$src\t# D-round" %}
6517   opcode(0xD9);       /* D9 /2 */
6518   ins_encode( enc_FPR_store(mem,src) );
6519   ins_pipe( fpu_mem_reg );
6520 %}
6521 
6522 // Store immediate Float value (it is faster than store from FPU register)
6523 // The instruction usage is guarded by predicate in operand immFPR().
6524 instruct storeFPR_imm( memory mem, immFPR src) %{
6525   match(Set mem (StoreF mem src));
6526 
6527   ins_cost(50);
6528   format %{ "MOV    $mem,$src\t# store float" %}
6529   opcode(0xC7);               /* C7 /0 */
6530   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6531   ins_pipe( ialu_mem_imm );
6532 %}
6533 
6534 // Store immediate Float value (it is faster than store from XMM register)
6535 // The instruction usage is guarded by predicate in operand immF().
6536 instruct storeF_imm( memory mem, immF src) %{
6537   match(Set mem (StoreF mem src));
6538 
6539   ins_cost(50);
6540   format %{ "MOV    $mem,$src\t# store float" %}
6541   opcode(0xC7);               /* C7 /0 */
6542   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6543   ins_pipe( ialu_mem_imm );
6544 %}
6545 
6546 // Store Integer to stack slot
6547 instruct storeSSI(stackSlotI dst, rRegI src) %{
6548   match(Set dst src);
6549 
6550   ins_cost(100);
6551   format %{ "MOV    $dst,$src" %}
6552   opcode(0x89);
6553   ins_encode( OpcPRegSS( dst, src ) );
6554   ins_pipe( ialu_mem_reg );
6555 %}
6556 
6557 // Store Integer to stack slot
6558 instruct storeSSP(stackSlotP dst, eRegP src) %{
6559   match(Set dst src);
6560 
6561   ins_cost(100);
6562   format %{ "MOV    $dst,$src" %}
6563   opcode(0x89);
6564   ins_encode( OpcPRegSS( dst, src ) );
6565   ins_pipe( ialu_mem_reg );
6566 %}
6567 
6568 // Store Long to stack slot
6569 instruct storeSSL(stackSlotL dst, eRegL src) %{
6570   match(Set dst src);
6571 
6572   ins_cost(200);
6573   format %{ "MOV    $dst,$src.lo\n\t"
6574             "MOV    $dst+4,$src.hi" %}
6575   opcode(0x89, 0x89);
6576   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6577   ins_pipe( ialu_mem_long_reg );
6578 %}
6579 
6580 //----------MemBar Instructions-----------------------------------------------
6581 // Memory barrier flavors
6582 
6583 instruct membar_acquire() %{
6584   match(MemBarAcquire);
6585   match(LoadFence);
6586   ins_cost(400);
6587 
6588   size(0);
6589   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6590   ins_encode();
6591   ins_pipe(empty);
6592 %}
6593 
6594 instruct membar_acquire_lock() %{
6595   match(MemBarAcquireLock);
6596   ins_cost(0);
6597 
6598   size(0);
6599   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6600   ins_encode( );
6601   ins_pipe(empty);
6602 %}
6603 
6604 instruct membar_release() %{
6605   match(MemBarRelease);
6606   match(StoreFence);
6607   ins_cost(400);
6608 
6609   size(0);
6610   format %{ "MEMBAR-release ! (empty encoding)" %}
6611   ins_encode( );
6612   ins_pipe(empty);
6613 %}
6614 
6615 instruct membar_release_lock() %{
6616   match(MemBarReleaseLock);
6617   ins_cost(0);
6618 
6619   size(0);
6620   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6621   ins_encode( );
6622   ins_pipe(empty);
6623 %}
6624 
6625 instruct membar_volatile(eFlagsReg cr) %{
6626   match(MemBarVolatile);
6627   effect(KILL cr);
6628   ins_cost(400);
6629 
6630   format %{
6631     $$template
6632     if (os::is_MP()) {
6633       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6634     } else {
6635       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6636     }
6637   %}
6638   ins_encode %{
6639     __ membar(Assembler::StoreLoad);
6640   %}
6641   ins_pipe(pipe_slow);
6642 %}
6643 
6644 instruct unnecessary_membar_volatile() %{
6645   match(MemBarVolatile);
6646   predicate(Matcher::post_store_load_barrier(n));
6647   ins_cost(0);
6648 
6649   size(0);
6650   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6651   ins_encode( );
6652   ins_pipe(empty);
6653 %}
6654 
6655 instruct membar_storestore() %{
6656   match(MemBarStoreStore);
6657   ins_cost(0);
6658 
6659   size(0);
6660   format %{ "MEMBAR-storestore (empty encoding)" %}
6661   ins_encode( );
6662   ins_pipe(empty);
6663 %}
6664 
6665 //----------Move Instructions--------------------------------------------------
6666 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6667   match(Set dst (CastX2P src));
6668   format %{ "# X2P  $dst, $src" %}
6669   ins_encode( /*empty encoding*/ );
6670   ins_cost(0);
6671   ins_pipe(empty);
6672 %}
6673 
6674 instruct castP2X(rRegI dst, eRegP src ) %{
6675   match(Set dst (CastP2X src));
6676   ins_cost(50);
6677   format %{ "MOV    $dst, $src\t# CastP2X" %}
6678   ins_encode( enc_Copy( dst, src) );
6679   ins_pipe( ialu_reg_reg );
6680 %}
6681 
6682 //----------Conditional Move---------------------------------------------------
6683 // Conditional move
6684 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6685   predicate(!VM_Version::supports_cmov() );
6686   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6687   ins_cost(200);
6688   format %{ "J$cop,us skip\t# signed cmove\n\t"
6689             "MOV    $dst,$src\n"
6690       "skip:" %}
6691   ins_encode %{
6692     Label Lskip;
6693     // Invert sense of branch from sense of CMOV
6694     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6695     __ movl($dst$$Register, $src$$Register);
6696     __ bind(Lskip);
6697   %}
6698   ins_pipe( pipe_cmov_reg );
6699 %}
6700 
6701 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6702   predicate(!VM_Version::supports_cmov() );
6703   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6704   ins_cost(200);
6705   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6706             "MOV    $dst,$src\n"
6707       "skip:" %}
6708   ins_encode %{
6709     Label Lskip;
6710     // Invert sense of branch from sense of CMOV
6711     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6712     __ movl($dst$$Register, $src$$Register);
6713     __ bind(Lskip);
6714   %}
6715   ins_pipe( pipe_cmov_reg );
6716 %}
6717 
6718 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6719   predicate(VM_Version::supports_cmov() );
6720   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6721   ins_cost(200);
6722   format %{ "CMOV$cop $dst,$src" %}
6723   opcode(0x0F,0x40);
6724   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6725   ins_pipe( pipe_cmov_reg );
6726 %}
6727 
6728 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6729   predicate(VM_Version::supports_cmov() );
6730   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6731   ins_cost(200);
6732   format %{ "CMOV$cop $dst,$src" %}
6733   opcode(0x0F,0x40);
6734   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6735   ins_pipe( pipe_cmov_reg );
6736 %}
6737 
6738 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6739   predicate(VM_Version::supports_cmov() );
6740   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6741   ins_cost(200);
6742   expand %{
6743     cmovI_regU(cop, cr, dst, src);
6744   %}
6745 %}
6746 
6747 // Conditional move
6748 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6749   predicate(VM_Version::supports_cmov() );
6750   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6751   ins_cost(250);
6752   format %{ "CMOV$cop $dst,$src" %}
6753   opcode(0x0F,0x40);
6754   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6755   ins_pipe( pipe_cmov_mem );
6756 %}
6757 
6758 // Conditional move
6759 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6760   predicate(VM_Version::supports_cmov() );
6761   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6762   ins_cost(250);
6763   format %{ "CMOV$cop $dst,$src" %}
6764   opcode(0x0F,0x40);
6765   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6766   ins_pipe( pipe_cmov_mem );
6767 %}
6768 
6769 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6770   predicate(VM_Version::supports_cmov() );
6771   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6772   ins_cost(250);
6773   expand %{
6774     cmovI_memU(cop, cr, dst, src);
6775   %}
6776 %}
6777 
6778 // Conditional move
6779 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6780   predicate(VM_Version::supports_cmov() );
6781   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6782   ins_cost(200);
6783   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6784   opcode(0x0F,0x40);
6785   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6786   ins_pipe( pipe_cmov_reg );
6787 %}
6788 
6789 // Conditional move (non-P6 version)
6790 // Note:  a CMoveP is generated for  stubs and native wrappers
6791 //        regardless of whether we are on a P6, so we
6792 //        emulate a cmov here
6793 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6794   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6795   ins_cost(300);
6796   format %{ "Jn$cop   skip\n\t"
6797           "MOV    $dst,$src\t# pointer\n"
6798       "skip:" %}
6799   opcode(0x8b);
6800   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6801   ins_pipe( pipe_cmov_reg );
6802 %}
6803 
6804 // Conditional move
6805 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6806   predicate(VM_Version::supports_cmov() );
6807   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6808   ins_cost(200);
6809   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6810   opcode(0x0F,0x40);
6811   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6812   ins_pipe( pipe_cmov_reg );
6813 %}
6814 
6815 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6816   predicate(VM_Version::supports_cmov() );
6817   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6818   ins_cost(200);
6819   expand %{
6820     cmovP_regU(cop, cr, dst, src);
6821   %}
6822 %}
6823 
6824 // DISABLED: Requires the ADLC to emit a bottom_type call that
6825 // correctly meets the two pointer arguments; one is an incoming
6826 // register but the other is a memory operand.  ALSO appears to
6827 // be buggy with implicit null checks.
6828 //
6829 //// Conditional move
6830 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6831 //  predicate(VM_Version::supports_cmov() );
6832 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6833 //  ins_cost(250);
6834 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6835 //  opcode(0x0F,0x40);
6836 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6837 //  ins_pipe( pipe_cmov_mem );
6838 //%}
6839 //
6840 //// Conditional move
6841 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6842 //  predicate(VM_Version::supports_cmov() );
6843 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6844 //  ins_cost(250);
6845 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6846 //  opcode(0x0F,0x40);
6847 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6848 //  ins_pipe( pipe_cmov_mem );
6849 //%}
6850 
6851 // Conditional move
6852 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6853   predicate(UseSSE<=1);
6854   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6855   ins_cost(200);
6856   format %{ "FCMOV$cop $dst,$src\t# double" %}
6857   opcode(0xDA);
6858   ins_encode( enc_cmov_dpr(cop,src) );
6859   ins_pipe( pipe_cmovDPR_reg );
6860 %}
6861 
6862 // Conditional move
6863 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6864   predicate(UseSSE==0);
6865   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6866   ins_cost(200);
6867   format %{ "FCMOV$cop $dst,$src\t# float" %}
6868   opcode(0xDA);
6869   ins_encode( enc_cmov_dpr(cop,src) );
6870   ins_pipe( pipe_cmovDPR_reg );
6871 %}
6872 
6873 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6874 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6875   predicate(UseSSE<=1);
6876   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6877   ins_cost(200);
6878   format %{ "Jn$cop   skip\n\t"
6879             "MOV    $dst,$src\t# double\n"
6880       "skip:" %}
6881   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6882   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6883   ins_pipe( pipe_cmovDPR_reg );
6884 %}
6885 
6886 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6887 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6888   predicate(UseSSE==0);
6889   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6890   ins_cost(200);
6891   format %{ "Jn$cop    skip\n\t"
6892             "MOV    $dst,$src\t# float\n"
6893       "skip:" %}
6894   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6895   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6896   ins_pipe( pipe_cmovDPR_reg );
6897 %}
6898 
6899 // No CMOVE with SSE/SSE2
6900 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6901   predicate (UseSSE>=1);
6902   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6903   ins_cost(200);
6904   format %{ "Jn$cop   skip\n\t"
6905             "MOVSS  $dst,$src\t# float\n"
6906       "skip:" %}
6907   ins_encode %{
6908     Label skip;
6909     // Invert sense of branch from sense of CMOV
6910     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6911     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6912     __ bind(skip);
6913   %}
6914   ins_pipe( pipe_slow );
6915 %}
6916 
6917 // No CMOVE with SSE/SSE2
6918 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6919   predicate (UseSSE>=2);
6920   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6921   ins_cost(200);
6922   format %{ "Jn$cop   skip\n\t"
6923             "MOVSD  $dst,$src\t# float\n"
6924       "skip:" %}
6925   ins_encode %{
6926     Label skip;
6927     // Invert sense of branch from sense of CMOV
6928     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6929     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6930     __ bind(skip);
6931   %}
6932   ins_pipe( pipe_slow );
6933 %}
6934 
6935 // unsigned version
6936 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6937   predicate (UseSSE>=1);
6938   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6939   ins_cost(200);
6940   format %{ "Jn$cop   skip\n\t"
6941             "MOVSS  $dst,$src\t# float\n"
6942       "skip:" %}
6943   ins_encode %{
6944     Label skip;
6945     // Invert sense of branch from sense of CMOV
6946     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6947     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6948     __ bind(skip);
6949   %}
6950   ins_pipe( pipe_slow );
6951 %}
6952 
6953 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6954   predicate (UseSSE>=1);
6955   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6956   ins_cost(200);
6957   expand %{
6958     fcmovF_regU(cop, cr, dst, src);
6959   %}
6960 %}
6961 
6962 // unsigned version
6963 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6964   predicate (UseSSE>=2);
6965   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6966   ins_cost(200);
6967   format %{ "Jn$cop   skip\n\t"
6968             "MOVSD  $dst,$src\t# float\n"
6969       "skip:" %}
6970   ins_encode %{
6971     Label skip;
6972     // Invert sense of branch from sense of CMOV
6973     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6974     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6975     __ bind(skip);
6976   %}
6977   ins_pipe( pipe_slow );
6978 %}
6979 
6980 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6981   predicate (UseSSE>=2);
6982   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6983   ins_cost(200);
6984   expand %{
6985     fcmovD_regU(cop, cr, dst, src);
6986   %}
6987 %}
6988 
6989 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6990   predicate(VM_Version::supports_cmov() );
6991   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6992   ins_cost(200);
6993   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6994             "CMOV$cop $dst.hi,$src.hi" %}
6995   opcode(0x0F,0x40);
6996   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6997   ins_pipe( pipe_cmov_reg_long );
6998 %}
6999 
7000 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7001   predicate(VM_Version::supports_cmov() );
7002   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7003   ins_cost(200);
7004   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7005             "CMOV$cop $dst.hi,$src.hi" %}
7006   opcode(0x0F,0x40);
7007   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7008   ins_pipe( pipe_cmov_reg_long );
7009 %}
7010 
7011 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7012   predicate(VM_Version::supports_cmov() );
7013   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7014   ins_cost(200);
7015   expand %{
7016     cmovL_regU(cop, cr, dst, src);
7017   %}
7018 %}
7019 
7020 //----------Arithmetic Instructions--------------------------------------------
7021 //----------Addition Instructions----------------------------------------------
7022 
7023 // Integer Addition Instructions
7024 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7025   match(Set dst (AddI dst src));
7026   effect(KILL cr);
7027 
7028   size(2);
7029   format %{ "ADD    $dst,$src" %}
7030   opcode(0x03);
7031   ins_encode( OpcP, RegReg( dst, src) );
7032   ins_pipe( ialu_reg_reg );
7033 %}
7034 
7035 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7036   match(Set dst (AddI dst src));
7037   effect(KILL cr);
7038 
7039   format %{ "ADD    $dst,$src" %}
7040   opcode(0x81, 0x00); /* /0 id */
7041   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7042   ins_pipe( ialu_reg );
7043 %}
7044 
7045 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7046   predicate(UseIncDec);
7047   match(Set dst (AddI dst src));
7048   effect(KILL cr);
7049 
7050   size(1);
7051   format %{ "INC    $dst" %}
7052   opcode(0x40); /*  */
7053   ins_encode( Opc_plus( primary, dst ) );
7054   ins_pipe( ialu_reg );
7055 %}
7056 
7057 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7058   match(Set dst (AddI src0 src1));
7059   ins_cost(110);
7060 
7061   format %{ "LEA    $dst,[$src0 + $src1]" %}
7062   opcode(0x8D); /* 0x8D /r */
7063   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7064   ins_pipe( ialu_reg_reg );
7065 %}
7066 
7067 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7068   match(Set dst (AddP src0 src1));
7069   ins_cost(110);
7070 
7071   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7072   opcode(0x8D); /* 0x8D /r */
7073   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7074   ins_pipe( ialu_reg_reg );
7075 %}
7076 
7077 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7078   predicate(UseIncDec);
7079   match(Set dst (AddI dst src));
7080   effect(KILL cr);
7081 
7082   size(1);
7083   format %{ "DEC    $dst" %}
7084   opcode(0x48); /*  */
7085   ins_encode( Opc_plus( primary, dst ) );
7086   ins_pipe( ialu_reg );
7087 %}
7088 
7089 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7090   match(Set dst (AddP dst src));
7091   effect(KILL cr);
7092 
7093   size(2);
7094   format %{ "ADD    $dst,$src" %}
7095   opcode(0x03);
7096   ins_encode( OpcP, RegReg( dst, src) );
7097   ins_pipe( ialu_reg_reg );
7098 %}
7099 
7100 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7101   match(Set dst (AddP dst src));
7102   effect(KILL cr);
7103 
7104   format %{ "ADD    $dst,$src" %}
7105   opcode(0x81,0x00); /* Opcode 81 /0 id */
7106   // ins_encode( RegImm( dst, src) );
7107   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7108   ins_pipe( ialu_reg );
7109 %}
7110 
7111 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7112   match(Set dst (AddI dst (LoadI src)));
7113   effect(KILL cr);
7114 
7115   ins_cost(125);
7116   format %{ "ADD    $dst,$src" %}
7117   opcode(0x03);
7118   ins_encode( OpcP, RegMem( dst, src) );
7119   ins_pipe( ialu_reg_mem );
7120 %}
7121 
7122 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7123   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7124   effect(KILL cr);
7125 
7126   ins_cost(150);
7127   format %{ "ADD    $dst,$src" %}
7128   opcode(0x01);  /* Opcode 01 /r */
7129   ins_encode( OpcP, RegMem( src, dst ) );
7130   ins_pipe( ialu_mem_reg );
7131 %}
7132 
7133 // Add Memory with Immediate
7134 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7135   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7136   effect(KILL cr);
7137 
7138   ins_cost(125);
7139   format %{ "ADD    $dst,$src" %}
7140   opcode(0x81);               /* Opcode 81 /0 id */
7141   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7142   ins_pipe( ialu_mem_imm );
7143 %}
7144 
7145 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7146   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7147   effect(KILL cr);
7148 
7149   ins_cost(125);
7150   format %{ "INC    $dst" %}
7151   opcode(0xFF);               /* Opcode FF /0 */
7152   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7153   ins_pipe( ialu_mem_imm );
7154 %}
7155 
7156 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7157   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7158   effect(KILL cr);
7159 
7160   ins_cost(125);
7161   format %{ "DEC    $dst" %}
7162   opcode(0xFF);               /* Opcode FF /1 */
7163   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7164   ins_pipe( ialu_mem_imm );
7165 %}
7166 
7167 
7168 instruct checkCastPP( eRegP dst ) %{
7169   match(Set dst (CheckCastPP dst));
7170 
7171   size(0);
7172   format %{ "#checkcastPP of $dst" %}
7173   ins_encode( /*empty encoding*/ );
7174   ins_pipe( empty );
7175 %}
7176 
7177 instruct castPP( eRegP dst ) %{
7178   match(Set dst (CastPP dst));
7179   format %{ "#castPP of $dst" %}
7180   ins_encode( /*empty encoding*/ );
7181   ins_pipe( empty );
7182 %}
7183 
7184 instruct castII( rRegI dst ) %{
7185   match(Set dst (CastII dst));
7186   format %{ "#castII of $dst" %}
7187   ins_encode( /*empty encoding*/ );
7188   ins_cost(0);
7189   ins_pipe( empty );
7190 %}
7191 
7192 
7193 // Load-locked - same as a regular pointer load when used with compare-swap
7194 instruct loadPLocked(eRegP dst, memory mem) %{
7195   match(Set dst (LoadPLocked mem));
7196 
7197   ins_cost(125);
7198   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7199   opcode(0x8B);
7200   ins_encode( OpcP, RegMem(dst,mem));
7201   ins_pipe( ialu_reg_mem );
7202 %}
7203 
7204 // Conditional-store of the updated heap-top.
7205 // Used during allocation of the shared heap.
7206 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7207 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7208   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7209   // EAX is killed if there is contention, but then it's also unused.
7210   // In the common case of no contention, EAX holds the new oop address.
7211   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7212   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7213   ins_pipe( pipe_cmpxchg );
7214 %}
7215 
7216 // Conditional-store of an int value.
7217 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7218 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7219   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7220   effect(KILL oldval);
7221   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7222   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7223   ins_pipe( pipe_cmpxchg );
7224 %}
7225 
7226 // Conditional-store of a long value.
7227 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7228 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7229   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7230   effect(KILL oldval);
7231   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7232             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7233             "XCHG   EBX,ECX"
7234   %}
7235   ins_encode %{
7236     // Note: we need to swap rbx, and rcx before and after the
7237     //       cmpxchg8 instruction because the instruction uses
7238     //       rcx as the high order word of the new value to store but
7239     //       our register encoding uses rbx.
7240     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7241     if( os::is_MP() )
7242       __ lock();
7243     __ cmpxchg8($mem$$Address);
7244     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7245   %}
7246   ins_pipe( pipe_cmpxchg );
7247 %}
7248 
7249 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7250 
7251 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7252   predicate(VM_Version::supports_cx8());
7253   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7254   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7255   effect(KILL cr, KILL oldval);
7256   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7257             "MOV    $res,0\n\t"
7258             "JNE,s  fail\n\t"
7259             "MOV    $res,1\n"
7260           "fail:" %}
7261   ins_encode( enc_cmpxchg8(mem_ptr),
7262               enc_flags_ne_to_boolean(res) );
7263   ins_pipe( pipe_cmpxchg );
7264 %}
7265 
7266 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7267   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7268   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7269   effect(KILL cr, KILL oldval);
7270   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7271             "MOV    $res,0\n\t"
7272             "JNE,s  fail\n\t"
7273             "MOV    $res,1\n"
7274           "fail:" %}
7275   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7276   ins_pipe( pipe_cmpxchg );
7277 %}
7278 
7279 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7280   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7281   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7282   effect(KILL cr, KILL oldval);
7283   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7284             "MOV    $res,0\n\t"
7285             "JNE,s  fail\n\t"
7286             "MOV    $res,1\n"
7287           "fail:" %}
7288   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7289   ins_pipe( pipe_cmpxchg );
7290 %}
7291 
7292 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7293   predicate(VM_Version::supports_cx8());
7294   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7295   effect(KILL cr);
7296   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7297   ins_encode( enc_cmpxchg8(mem_ptr) );
7298   ins_pipe( pipe_cmpxchg );
7299 %}
7300 
7301 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7302   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7303   effect(KILL cr);
7304   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7305   ins_encode( enc_cmpxchg(mem_ptr) );
7306   ins_pipe( pipe_cmpxchg );
7307 %}
7308 
7309 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7310   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7311   effect(KILL cr);
7312   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7313   ins_encode( enc_cmpxchg(mem_ptr) );
7314   ins_pipe( pipe_cmpxchg );
7315 %}
7316 
7317 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7318   predicate(n->as_LoadStore()->result_not_used());
7319   match(Set dummy (GetAndAddI mem add));
7320   effect(KILL cr);
7321   format %{ "ADDL  [$mem],$add" %}
7322   ins_encode %{
7323     if (os::is_MP()) { __ lock(); }
7324     __ addl($mem$$Address, $add$$constant);
7325   %}
7326   ins_pipe( pipe_cmpxchg );
7327 %}
7328 
7329 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7330   match(Set newval (GetAndAddI mem newval));
7331   effect(KILL cr);
7332   format %{ "XADDL  [$mem],$newval" %}
7333   ins_encode %{
7334     if (os::is_MP()) { __ lock(); }
7335     __ xaddl($mem$$Address, $newval$$Register);
7336   %}
7337   ins_pipe( pipe_cmpxchg );
7338 %}
7339 
7340 instruct xchgI( memory mem, rRegI newval) %{
7341   match(Set newval (GetAndSetI mem newval));
7342   format %{ "XCHGL  $newval,[$mem]" %}
7343   ins_encode %{
7344     __ xchgl($newval$$Register, $mem$$Address);
7345   %}
7346   ins_pipe( pipe_cmpxchg );
7347 %}
7348 
7349 instruct xchgP( memory mem, pRegP newval) %{
7350   match(Set newval (GetAndSetP mem newval));
7351   format %{ "XCHGL  $newval,[$mem]" %}
7352   ins_encode %{
7353     __ xchgl($newval$$Register, $mem$$Address);
7354   %}
7355   ins_pipe( pipe_cmpxchg );
7356 %}
7357 
7358 //----------Subtraction Instructions-------------------------------------------
7359 
7360 // Integer Subtraction Instructions
7361 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7362   match(Set dst (SubI dst src));
7363   effect(KILL cr);
7364 
7365   size(2);
7366   format %{ "SUB    $dst,$src" %}
7367   opcode(0x2B);
7368   ins_encode( OpcP, RegReg( dst, src) );
7369   ins_pipe( ialu_reg_reg );
7370 %}
7371 
7372 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7373   match(Set dst (SubI dst src));
7374   effect(KILL cr);
7375 
7376   format %{ "SUB    $dst,$src" %}
7377   opcode(0x81,0x05);  /* Opcode 81 /5 */
7378   // ins_encode( RegImm( dst, src) );
7379   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7380   ins_pipe( ialu_reg );
7381 %}
7382 
7383 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7384   match(Set dst (SubI dst (LoadI src)));
7385   effect(KILL cr);
7386 
7387   ins_cost(125);
7388   format %{ "SUB    $dst,$src" %}
7389   opcode(0x2B);
7390   ins_encode( OpcP, RegMem( dst, src) );
7391   ins_pipe( ialu_reg_mem );
7392 %}
7393 
7394 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7395   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7396   effect(KILL cr);
7397 
7398   ins_cost(150);
7399   format %{ "SUB    $dst,$src" %}
7400   opcode(0x29);  /* Opcode 29 /r */
7401   ins_encode( OpcP, RegMem( src, dst ) );
7402   ins_pipe( ialu_mem_reg );
7403 %}
7404 
7405 // Subtract from a pointer
7406 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7407   match(Set dst (AddP dst (SubI zero src)));
7408   effect(KILL cr);
7409 
7410   size(2);
7411   format %{ "SUB    $dst,$src" %}
7412   opcode(0x2B);
7413   ins_encode( OpcP, RegReg( dst, src) );
7414   ins_pipe( ialu_reg_reg );
7415 %}
7416 
7417 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7418   match(Set dst (SubI zero dst));
7419   effect(KILL cr);
7420 
7421   size(2);
7422   format %{ "NEG    $dst" %}
7423   opcode(0xF7,0x03);  // Opcode F7 /3
7424   ins_encode( OpcP, RegOpc( dst ) );
7425   ins_pipe( ialu_reg );
7426 %}
7427 
7428 //----------Multiplication/Division Instructions-------------------------------
7429 // Integer Multiplication Instructions
7430 // Multiply Register
7431 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7432   match(Set dst (MulI dst src));
7433   effect(KILL cr);
7434 
7435   size(3);
7436   ins_cost(300);
7437   format %{ "IMUL   $dst,$src" %}
7438   opcode(0xAF, 0x0F);
7439   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7440   ins_pipe( ialu_reg_reg_alu0 );
7441 %}
7442 
7443 // Multiply 32-bit Immediate
7444 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7445   match(Set dst (MulI src imm));
7446   effect(KILL cr);
7447 
7448   ins_cost(300);
7449   format %{ "IMUL   $dst,$src,$imm" %}
7450   opcode(0x69);  /* 69 /r id */
7451   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7452   ins_pipe( ialu_reg_reg_alu0 );
7453 %}
7454 
7455 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7456   match(Set dst src);
7457   effect(KILL cr);
7458 
7459   // Note that this is artificially increased to make it more expensive than loadConL
7460   ins_cost(250);
7461   format %{ "MOV    EAX,$src\t// low word only" %}
7462   opcode(0xB8);
7463   ins_encode( LdImmL_Lo(dst, src) );
7464   ins_pipe( ialu_reg_fat );
7465 %}
7466 
7467 // Multiply by 32-bit Immediate, taking the shifted high order results
7468 //  (special case for shift by 32)
7469 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7470   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7471   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7472              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7473              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7474   effect(USE src1, KILL cr);
7475 
7476   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7477   ins_cost(0*100 + 1*400 - 150);
7478   format %{ "IMUL   EDX:EAX,$src1" %}
7479   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7480   ins_pipe( pipe_slow );
7481 %}
7482 
7483 // Multiply by 32-bit Immediate, taking the shifted high order results
7484 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7485   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7486   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7487              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7488              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7489   effect(USE src1, KILL cr);
7490 
7491   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7492   ins_cost(1*100 + 1*400 - 150);
7493   format %{ "IMUL   EDX:EAX,$src1\n\t"
7494             "SAR    EDX,$cnt-32" %}
7495   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7496   ins_pipe( pipe_slow );
7497 %}
7498 
7499 // Multiply Memory 32-bit Immediate
7500 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7501   match(Set dst (MulI (LoadI src) imm));
7502   effect(KILL cr);
7503 
7504   ins_cost(300);
7505   format %{ "IMUL   $dst,$src,$imm" %}
7506   opcode(0x69);  /* 69 /r id */
7507   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7508   ins_pipe( ialu_reg_mem_alu0 );
7509 %}
7510 
7511 // Multiply Memory
7512 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7513   match(Set dst (MulI dst (LoadI src)));
7514   effect(KILL cr);
7515 
7516   ins_cost(350);
7517   format %{ "IMUL   $dst,$src" %}
7518   opcode(0xAF, 0x0F);
7519   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7520   ins_pipe( ialu_reg_mem_alu0 );
7521 %}
7522 
7523 // Multiply Register Int to Long
7524 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7525   // Basic Idea: long = (long)int * (long)int
7526   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7527   effect(DEF dst, USE src, USE src1, KILL flags);
7528 
7529   ins_cost(300);
7530   format %{ "IMUL   $dst,$src1" %}
7531 
7532   ins_encode( long_int_multiply( dst, src1 ) );
7533   ins_pipe( ialu_reg_reg_alu0 );
7534 %}
7535 
7536 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7537   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7538   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7539   effect(KILL flags);
7540 
7541   ins_cost(300);
7542   format %{ "MUL    $dst,$src1" %}
7543 
7544   ins_encode( long_uint_multiply(dst, src1) );
7545   ins_pipe( ialu_reg_reg_alu0 );
7546 %}
7547 
7548 // Multiply Register Long
7549 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7550   match(Set dst (MulL dst src));
7551   effect(KILL cr, TEMP tmp);
7552   ins_cost(4*100+3*400);
7553 // Basic idea: lo(result) = lo(x_lo * y_lo)
7554 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7555   format %{ "MOV    $tmp,$src.lo\n\t"
7556             "IMUL   $tmp,EDX\n\t"
7557             "MOV    EDX,$src.hi\n\t"
7558             "IMUL   EDX,EAX\n\t"
7559             "ADD    $tmp,EDX\n\t"
7560             "MUL    EDX:EAX,$src.lo\n\t"
7561             "ADD    EDX,$tmp" %}
7562   ins_encode( long_multiply( dst, src, tmp ) );
7563   ins_pipe( pipe_slow );
7564 %}
7565 
7566 // Multiply Register Long where the left operand's high 32 bits are zero
7567 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7568   predicate(is_operand_hi32_zero(n->in(1)));
7569   match(Set dst (MulL dst src));
7570   effect(KILL cr, TEMP tmp);
7571   ins_cost(2*100+2*400);
7572 // Basic idea: lo(result) = lo(x_lo * y_lo)
7573 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7574   format %{ "MOV    $tmp,$src.hi\n\t"
7575             "IMUL   $tmp,EAX\n\t"
7576             "MUL    EDX:EAX,$src.lo\n\t"
7577             "ADD    EDX,$tmp" %}
7578   ins_encode %{
7579     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7580     __ imull($tmp$$Register, rax);
7581     __ mull($src$$Register);
7582     __ addl(rdx, $tmp$$Register);
7583   %}
7584   ins_pipe( pipe_slow );
7585 %}
7586 
7587 // Multiply Register Long where the right operand's high 32 bits are zero
7588 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7589   predicate(is_operand_hi32_zero(n->in(2)));
7590   match(Set dst (MulL dst src));
7591   effect(KILL cr, TEMP tmp);
7592   ins_cost(2*100+2*400);
7593 // Basic idea: lo(result) = lo(x_lo * y_lo)
7594 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7595   format %{ "MOV    $tmp,$src.lo\n\t"
7596             "IMUL   $tmp,EDX\n\t"
7597             "MUL    EDX:EAX,$src.lo\n\t"
7598             "ADD    EDX,$tmp" %}
7599   ins_encode %{
7600     __ movl($tmp$$Register, $src$$Register);
7601     __ imull($tmp$$Register, rdx);
7602     __ mull($src$$Register);
7603     __ addl(rdx, $tmp$$Register);
7604   %}
7605   ins_pipe( pipe_slow );
7606 %}
7607 
7608 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7609 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7610   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7611   match(Set dst (MulL dst src));
7612   effect(KILL cr);
7613   ins_cost(1*400);
7614 // Basic idea: lo(result) = lo(x_lo * y_lo)
7615 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7616   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7617   ins_encode %{
7618     __ mull($src$$Register);
7619   %}
7620   ins_pipe( pipe_slow );
7621 %}
7622 
7623 // Multiply Register Long by small constant
7624 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7625   match(Set dst (MulL dst src));
7626   effect(KILL cr, TEMP tmp);
7627   ins_cost(2*100+2*400);
7628   size(12);
7629 // Basic idea: lo(result) = lo(src * EAX)
7630 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7631   format %{ "IMUL   $tmp,EDX,$src\n\t"
7632             "MOV    EDX,$src\n\t"
7633             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7634             "ADD    EDX,$tmp" %}
7635   ins_encode( long_multiply_con( dst, src, tmp ) );
7636   ins_pipe( pipe_slow );
7637 %}
7638 
7639 // Integer DIV with Register
7640 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7641   match(Set rax (DivI rax div));
7642   effect(KILL rdx, KILL cr);
7643   size(26);
7644   ins_cost(30*100+10*100);
7645   format %{ "CMP    EAX,0x80000000\n\t"
7646             "JNE,s  normal\n\t"
7647             "XOR    EDX,EDX\n\t"
7648             "CMP    ECX,-1\n\t"
7649             "JE,s   done\n"
7650     "normal: CDQ\n\t"
7651             "IDIV   $div\n\t"
7652     "done:"        %}
7653   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7654   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7655   ins_pipe( ialu_reg_reg_alu0 );
7656 %}
7657 
7658 // Divide Register Long
7659 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7660   match(Set dst (DivL src1 src2));
7661   effect( KILL cr, KILL cx, KILL bx );
7662   ins_cost(10000);
7663   format %{ "PUSH   $src1.hi\n\t"
7664             "PUSH   $src1.lo\n\t"
7665             "PUSH   $src2.hi\n\t"
7666             "PUSH   $src2.lo\n\t"
7667             "CALL   SharedRuntime::ldiv\n\t"
7668             "ADD    ESP,16" %}
7669   ins_encode( long_div(src1,src2) );
7670   ins_pipe( pipe_slow );
7671 %}
7672 
7673 // Integer DIVMOD with Register, both quotient and mod results
7674 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7675   match(DivModI rax div);
7676   effect(KILL cr);
7677   size(26);
7678   ins_cost(30*100+10*100);
7679   format %{ "CMP    EAX,0x80000000\n\t"
7680             "JNE,s  normal\n\t"
7681             "XOR    EDX,EDX\n\t"
7682             "CMP    ECX,-1\n\t"
7683             "JE,s   done\n"
7684     "normal: CDQ\n\t"
7685             "IDIV   $div\n\t"
7686     "done:"        %}
7687   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7688   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7689   ins_pipe( pipe_slow );
7690 %}
7691 
7692 // Integer MOD with Register
7693 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7694   match(Set rdx (ModI rax div));
7695   effect(KILL rax, KILL cr);
7696 
7697   size(26);
7698   ins_cost(300);
7699   format %{ "CDQ\n\t"
7700             "IDIV   $div" %}
7701   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7702   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7703   ins_pipe( ialu_reg_reg_alu0 );
7704 %}
7705 
7706 // Remainder Register Long
7707 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7708   match(Set dst (ModL src1 src2));
7709   effect( KILL cr, KILL cx, KILL bx );
7710   ins_cost(10000);
7711   format %{ "PUSH   $src1.hi\n\t"
7712             "PUSH   $src1.lo\n\t"
7713             "PUSH   $src2.hi\n\t"
7714             "PUSH   $src2.lo\n\t"
7715             "CALL   SharedRuntime::lrem\n\t"
7716             "ADD    ESP,16" %}
7717   ins_encode( long_mod(src1,src2) );
7718   ins_pipe( pipe_slow );
7719 %}
7720 
7721 // Divide Register Long (no special case since divisor != -1)
7722 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7723   match(Set dst (DivL dst imm));
7724   effect( TEMP tmp, TEMP tmp2, KILL cr );
7725   ins_cost(1000);
7726   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7727             "XOR    $tmp2,$tmp2\n\t"
7728             "CMP    $tmp,EDX\n\t"
7729             "JA,s   fast\n\t"
7730             "MOV    $tmp2,EAX\n\t"
7731             "MOV    EAX,EDX\n\t"
7732             "MOV    EDX,0\n\t"
7733             "JLE,s  pos\n\t"
7734             "LNEG   EAX : $tmp2\n\t"
7735             "DIV    $tmp # unsigned division\n\t"
7736             "XCHG   EAX,$tmp2\n\t"
7737             "DIV    $tmp\n\t"
7738             "LNEG   $tmp2 : EAX\n\t"
7739             "JMP,s  done\n"
7740     "pos:\n\t"
7741             "DIV    $tmp\n\t"
7742             "XCHG   EAX,$tmp2\n"
7743     "fast:\n\t"
7744             "DIV    $tmp\n"
7745     "done:\n\t"
7746             "MOV    EDX,$tmp2\n\t"
7747             "NEG    EDX:EAX # if $imm < 0" %}
7748   ins_encode %{
7749     int con = (int)$imm$$constant;
7750     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7751     int pcon = (con > 0) ? con : -con;
7752     Label Lfast, Lpos, Ldone;
7753 
7754     __ movl($tmp$$Register, pcon);
7755     __ xorl($tmp2$$Register,$tmp2$$Register);
7756     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7757     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7758 
7759     __ movl($tmp2$$Register, $dst$$Register); // save
7760     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7761     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7762     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7763 
7764     // Negative dividend.
7765     // convert value to positive to use unsigned division
7766     __ lneg($dst$$Register, $tmp2$$Register);
7767     __ divl($tmp$$Register);
7768     __ xchgl($dst$$Register, $tmp2$$Register);
7769     __ divl($tmp$$Register);
7770     // revert result back to negative
7771     __ lneg($tmp2$$Register, $dst$$Register);
7772     __ jmpb(Ldone);
7773 
7774     __ bind(Lpos);
7775     __ divl($tmp$$Register); // Use unsigned division
7776     __ xchgl($dst$$Register, $tmp2$$Register);
7777     // Fallthrow for final divide, tmp2 has 32 bit hi result
7778 
7779     __ bind(Lfast);
7780     // fast path: src is positive
7781     __ divl($tmp$$Register); // Use unsigned division
7782 
7783     __ bind(Ldone);
7784     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7785     if (con < 0) {
7786       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7787     }
7788   %}
7789   ins_pipe( pipe_slow );
7790 %}
7791 
7792 // Remainder Register Long (remainder fit into 32 bits)
7793 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7794   match(Set dst (ModL dst imm));
7795   effect( TEMP tmp, TEMP tmp2, KILL cr );
7796   ins_cost(1000);
7797   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7798             "CMP    $tmp,EDX\n\t"
7799             "JA,s   fast\n\t"
7800             "MOV    $tmp2,EAX\n\t"
7801             "MOV    EAX,EDX\n\t"
7802             "MOV    EDX,0\n\t"
7803             "JLE,s  pos\n\t"
7804             "LNEG   EAX : $tmp2\n\t"
7805             "DIV    $tmp # unsigned division\n\t"
7806             "MOV    EAX,$tmp2\n\t"
7807             "DIV    $tmp\n\t"
7808             "NEG    EDX\n\t"
7809             "JMP,s  done\n"
7810     "pos:\n\t"
7811             "DIV    $tmp\n\t"
7812             "MOV    EAX,$tmp2\n"
7813     "fast:\n\t"
7814             "DIV    $tmp\n"
7815     "done:\n\t"
7816             "MOV    EAX,EDX\n\t"
7817             "SAR    EDX,31\n\t" %}
7818   ins_encode %{
7819     int con = (int)$imm$$constant;
7820     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7821     int pcon = (con > 0) ? con : -con;
7822     Label  Lfast, Lpos, Ldone;
7823 
7824     __ movl($tmp$$Register, pcon);
7825     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7826     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7827 
7828     __ movl($tmp2$$Register, $dst$$Register); // save
7829     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7830     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7831     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7832 
7833     // Negative dividend.
7834     // convert value to positive to use unsigned division
7835     __ lneg($dst$$Register, $tmp2$$Register);
7836     __ divl($tmp$$Register);
7837     __ movl($dst$$Register, $tmp2$$Register);
7838     __ divl($tmp$$Register);
7839     // revert remainder back to negative
7840     __ negl(HIGH_FROM_LOW($dst$$Register));
7841     __ jmpb(Ldone);
7842 
7843     __ bind(Lpos);
7844     __ divl($tmp$$Register);
7845     __ movl($dst$$Register, $tmp2$$Register);
7846 
7847     __ bind(Lfast);
7848     // fast path: src is positive
7849     __ divl($tmp$$Register);
7850 
7851     __ bind(Ldone);
7852     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7853     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7854 
7855   %}
7856   ins_pipe( pipe_slow );
7857 %}
7858 
7859 // Integer Shift Instructions
7860 // Shift Left by one
7861 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7862   match(Set dst (LShiftI dst shift));
7863   effect(KILL cr);
7864 
7865   size(2);
7866   format %{ "SHL    $dst,$shift" %}
7867   opcode(0xD1, 0x4);  /* D1 /4 */
7868   ins_encode( OpcP, RegOpc( dst ) );
7869   ins_pipe( ialu_reg );
7870 %}
7871 
7872 // Shift Left by 8-bit immediate
7873 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7874   match(Set dst (LShiftI dst shift));
7875   effect(KILL cr);
7876 
7877   size(3);
7878   format %{ "SHL    $dst,$shift" %}
7879   opcode(0xC1, 0x4);  /* C1 /4 ib */
7880   ins_encode( RegOpcImm( dst, shift) );
7881   ins_pipe( ialu_reg );
7882 %}
7883 
7884 // Shift Left by variable
7885 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7886   match(Set dst (LShiftI dst shift));
7887   effect(KILL cr);
7888 
7889   size(2);
7890   format %{ "SHL    $dst,$shift" %}
7891   opcode(0xD3, 0x4);  /* D3 /4 */
7892   ins_encode( OpcP, RegOpc( dst ) );
7893   ins_pipe( ialu_reg_reg );
7894 %}
7895 
7896 // Arithmetic shift right by one
7897 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7898   match(Set dst (RShiftI dst shift));
7899   effect(KILL cr);
7900 
7901   size(2);
7902   format %{ "SAR    $dst,$shift" %}
7903   opcode(0xD1, 0x7);  /* D1 /7 */
7904   ins_encode( OpcP, RegOpc( dst ) );
7905   ins_pipe( ialu_reg );
7906 %}
7907 
7908 // Arithmetic shift right by one
7909 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7910   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7911   effect(KILL cr);
7912   format %{ "SAR    $dst,$shift" %}
7913   opcode(0xD1, 0x7);  /* D1 /7 */
7914   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7915   ins_pipe( ialu_mem_imm );
7916 %}
7917 
7918 // Arithmetic Shift Right by 8-bit immediate
7919 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7920   match(Set dst (RShiftI dst shift));
7921   effect(KILL cr);
7922 
7923   size(3);
7924   format %{ "SAR    $dst,$shift" %}
7925   opcode(0xC1, 0x7);  /* C1 /7 ib */
7926   ins_encode( RegOpcImm( dst, shift ) );
7927   ins_pipe( ialu_mem_imm );
7928 %}
7929 
7930 // Arithmetic Shift Right by 8-bit immediate
7931 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7932   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7933   effect(KILL cr);
7934 
7935   format %{ "SAR    $dst,$shift" %}
7936   opcode(0xC1, 0x7);  /* C1 /7 ib */
7937   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7938   ins_pipe( ialu_mem_imm );
7939 %}
7940 
7941 // Arithmetic Shift Right by variable
7942 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7943   match(Set dst (RShiftI dst shift));
7944   effect(KILL cr);
7945 
7946   size(2);
7947   format %{ "SAR    $dst,$shift" %}
7948   opcode(0xD3, 0x7);  /* D3 /7 */
7949   ins_encode( OpcP, RegOpc( dst ) );
7950   ins_pipe( ialu_reg_reg );
7951 %}
7952 
7953 // Logical shift right by one
7954 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7955   match(Set dst (URShiftI dst shift));
7956   effect(KILL cr);
7957 
7958   size(2);
7959   format %{ "SHR    $dst,$shift" %}
7960   opcode(0xD1, 0x5);  /* D1 /5 */
7961   ins_encode( OpcP, RegOpc( dst ) );
7962   ins_pipe( ialu_reg );
7963 %}
7964 
7965 // Logical Shift Right by 8-bit immediate
7966 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7967   match(Set dst (URShiftI dst shift));
7968   effect(KILL cr);
7969 
7970   size(3);
7971   format %{ "SHR    $dst,$shift" %}
7972   opcode(0xC1, 0x5);  /* C1 /5 ib */
7973   ins_encode( RegOpcImm( dst, shift) );
7974   ins_pipe( ialu_reg );
7975 %}
7976 
7977 
7978 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7979 // This idiom is used by the compiler for the i2b bytecode.
7980 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7981   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7982 
7983   size(3);
7984   format %{ "MOVSX  $dst,$src :8" %}
7985   ins_encode %{
7986     __ movsbl($dst$$Register, $src$$Register);
7987   %}
7988   ins_pipe(ialu_reg_reg);
7989 %}
7990 
7991 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7992 // This idiom is used by the compiler the i2s bytecode.
7993 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7994   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7995 
7996   size(3);
7997   format %{ "MOVSX  $dst,$src :16" %}
7998   ins_encode %{
7999     __ movswl($dst$$Register, $src$$Register);
8000   %}
8001   ins_pipe(ialu_reg_reg);
8002 %}
8003 
8004 
8005 // Logical Shift Right by variable
8006 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8007   match(Set dst (URShiftI dst shift));
8008   effect(KILL cr);
8009 
8010   size(2);
8011   format %{ "SHR    $dst,$shift" %}
8012   opcode(0xD3, 0x5);  /* D3 /5 */
8013   ins_encode( OpcP, RegOpc( dst ) );
8014   ins_pipe( ialu_reg_reg );
8015 %}
8016 
8017 
8018 //----------Logical Instructions-----------------------------------------------
8019 //----------Integer Logical Instructions---------------------------------------
8020 // And Instructions
8021 // And Register with Register
8022 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8023   match(Set dst (AndI dst src));
8024   effect(KILL cr);
8025 
8026   size(2);
8027   format %{ "AND    $dst,$src" %}
8028   opcode(0x23);
8029   ins_encode( OpcP, RegReg( dst, src) );
8030   ins_pipe( ialu_reg_reg );
8031 %}
8032 
8033 // And Register with Immediate
8034 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8035   match(Set dst (AndI dst src));
8036   effect(KILL cr);
8037 
8038   format %{ "AND    $dst,$src" %}
8039   opcode(0x81,0x04);  /* Opcode 81 /4 */
8040   // ins_encode( RegImm( dst, src) );
8041   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8042   ins_pipe( ialu_reg );
8043 %}
8044 
8045 // And Register with Memory
8046 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8047   match(Set dst (AndI dst (LoadI src)));
8048   effect(KILL cr);
8049 
8050   ins_cost(125);
8051   format %{ "AND    $dst,$src" %}
8052   opcode(0x23);
8053   ins_encode( OpcP, RegMem( dst, src) );
8054   ins_pipe( ialu_reg_mem );
8055 %}
8056 
8057 // And Memory with Register
8058 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8059   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8060   effect(KILL cr);
8061 
8062   ins_cost(150);
8063   format %{ "AND    $dst,$src" %}
8064   opcode(0x21);  /* Opcode 21 /r */
8065   ins_encode( OpcP, RegMem( src, dst ) );
8066   ins_pipe( ialu_mem_reg );
8067 %}
8068 
8069 // And Memory with Immediate
8070 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8071   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8072   effect(KILL cr);
8073 
8074   ins_cost(125);
8075   format %{ "AND    $dst,$src" %}
8076   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8077   // ins_encode( MemImm( dst, src) );
8078   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8079   ins_pipe( ialu_mem_imm );
8080 %}
8081 
8082 // BMI1 instructions
8083 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8084   match(Set dst (AndI (XorI src1 minus_1) src2));
8085   predicate(UseBMI1Instructions);
8086   effect(KILL cr);
8087 
8088   format %{ "ANDNL  $dst, $src1, $src2" %}
8089 
8090   ins_encode %{
8091     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8092   %}
8093   ins_pipe(ialu_reg);
8094 %}
8095 
8096 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8097   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8098   predicate(UseBMI1Instructions);
8099   effect(KILL cr);
8100 
8101   ins_cost(125);
8102   format %{ "ANDNL  $dst, $src1, $src2" %}
8103 
8104   ins_encode %{
8105     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8106   %}
8107   ins_pipe(ialu_reg_mem);
8108 %}
8109 
8110 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8111   match(Set dst (AndI (SubI imm_zero src) src));
8112   predicate(UseBMI1Instructions);
8113   effect(KILL cr);
8114 
8115   format %{ "BLSIL  $dst, $src" %}
8116 
8117   ins_encode %{
8118     __ blsil($dst$$Register, $src$$Register);
8119   %}
8120   ins_pipe(ialu_reg);
8121 %}
8122 
8123 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8124   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8125   predicate(UseBMI1Instructions);
8126   effect(KILL cr);
8127 
8128   ins_cost(125);
8129   format %{ "BLSIL  $dst, $src" %}
8130 
8131   ins_encode %{
8132     __ blsil($dst$$Register, $src$$Address);
8133   %}
8134   ins_pipe(ialu_reg_mem);
8135 %}
8136 
8137 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8138 %{
8139   match(Set dst (XorI (AddI src minus_1) src));
8140   predicate(UseBMI1Instructions);
8141   effect(KILL cr);
8142 
8143   format %{ "BLSMSKL $dst, $src" %}
8144 
8145   ins_encode %{
8146     __ blsmskl($dst$$Register, $src$$Register);
8147   %}
8148 
8149   ins_pipe(ialu_reg);
8150 %}
8151 
8152 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8153 %{
8154   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8155   predicate(UseBMI1Instructions);
8156   effect(KILL cr);
8157 
8158   ins_cost(125);
8159   format %{ "BLSMSKL $dst, $src" %}
8160 
8161   ins_encode %{
8162     __ blsmskl($dst$$Register, $src$$Address);
8163   %}
8164 
8165   ins_pipe(ialu_reg_mem);
8166 %}
8167 
8168 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8169 %{
8170   match(Set dst (AndI (AddI src minus_1) src) );
8171   predicate(UseBMI1Instructions);
8172   effect(KILL cr);
8173 
8174   format %{ "BLSRL  $dst, $src" %}
8175 
8176   ins_encode %{
8177     __ blsrl($dst$$Register, $src$$Register);
8178   %}
8179 
8180   ins_pipe(ialu_reg);
8181 %}
8182 
8183 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8184 %{
8185   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8186   predicate(UseBMI1Instructions);
8187   effect(KILL cr);
8188 
8189   ins_cost(125);
8190   format %{ "BLSRL  $dst, $src" %}
8191 
8192   ins_encode %{
8193     __ blsrl($dst$$Register, $src$$Address);
8194   %}
8195 
8196   ins_pipe(ialu_reg_mem);
8197 %}
8198 
8199 // Or Instructions
8200 // Or Register with Register
8201 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8202   match(Set dst (OrI dst src));
8203   effect(KILL cr);
8204 
8205   size(2);
8206   format %{ "OR     $dst,$src" %}
8207   opcode(0x0B);
8208   ins_encode( OpcP, RegReg( dst, src) );
8209   ins_pipe( ialu_reg_reg );
8210 %}
8211 
8212 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8213   match(Set dst (OrI dst (CastP2X src)));
8214   effect(KILL cr);
8215 
8216   size(2);
8217   format %{ "OR     $dst,$src" %}
8218   opcode(0x0B);
8219   ins_encode( OpcP, RegReg( dst, src) );
8220   ins_pipe( ialu_reg_reg );
8221 %}
8222 
8223 
8224 // Or Register with Immediate
8225 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8226   match(Set dst (OrI dst src));
8227   effect(KILL cr);
8228 
8229   format %{ "OR     $dst,$src" %}
8230   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8231   // ins_encode( RegImm( dst, src) );
8232   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8233   ins_pipe( ialu_reg );
8234 %}
8235 
8236 // Or Register with Memory
8237 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8238   match(Set dst (OrI dst (LoadI src)));
8239   effect(KILL cr);
8240 
8241   ins_cost(125);
8242   format %{ "OR     $dst,$src" %}
8243   opcode(0x0B);
8244   ins_encode( OpcP, RegMem( dst, src) );
8245   ins_pipe( ialu_reg_mem );
8246 %}
8247 
8248 // Or Memory with Register
8249 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8250   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8251   effect(KILL cr);
8252 
8253   ins_cost(150);
8254   format %{ "OR     $dst,$src" %}
8255   opcode(0x09);  /* Opcode 09 /r */
8256   ins_encode( OpcP, RegMem( src, dst ) );
8257   ins_pipe( ialu_mem_reg );
8258 %}
8259 
8260 // Or Memory with Immediate
8261 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8262   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8263   effect(KILL cr);
8264 
8265   ins_cost(125);
8266   format %{ "OR     $dst,$src" %}
8267   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8268   // ins_encode( MemImm( dst, src) );
8269   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8270   ins_pipe( ialu_mem_imm );
8271 %}
8272 
8273 // ROL/ROR
8274 // ROL expand
8275 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8276   effect(USE_DEF dst, USE shift, KILL cr);
8277 
8278   format %{ "ROL    $dst, $shift" %}
8279   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8280   ins_encode( OpcP, RegOpc( dst ));
8281   ins_pipe( ialu_reg );
8282 %}
8283 
8284 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8285   effect(USE_DEF dst, USE shift, KILL cr);
8286 
8287   format %{ "ROL    $dst, $shift" %}
8288   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8289   ins_encode( RegOpcImm(dst, shift) );
8290   ins_pipe(ialu_reg);
8291 %}
8292 
8293 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8294   effect(USE_DEF dst, USE shift, KILL cr);
8295 
8296   format %{ "ROL    $dst, $shift" %}
8297   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8298   ins_encode(OpcP, RegOpc(dst));
8299   ins_pipe( ialu_reg_reg );
8300 %}
8301 // end of ROL expand
8302 
8303 // ROL 32bit by one once
8304 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8305   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8306 
8307   expand %{
8308     rolI_eReg_imm1(dst, lshift, cr);
8309   %}
8310 %}
8311 
8312 // ROL 32bit var by imm8 once
8313 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8314   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8315   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8316 
8317   expand %{
8318     rolI_eReg_imm8(dst, lshift, cr);
8319   %}
8320 %}
8321 
8322 // ROL 32bit var by var once
8323 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8324   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8325 
8326   expand %{
8327     rolI_eReg_CL(dst, shift, cr);
8328   %}
8329 %}
8330 
8331 // ROL 32bit var by var once
8332 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8333   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8334 
8335   expand %{
8336     rolI_eReg_CL(dst, shift, cr);
8337   %}
8338 %}
8339 
8340 // ROR expand
8341 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8342   effect(USE_DEF dst, USE shift, KILL cr);
8343 
8344   format %{ "ROR    $dst, $shift" %}
8345   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8346   ins_encode( OpcP, RegOpc( dst ) );
8347   ins_pipe( ialu_reg );
8348 %}
8349 
8350 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8351   effect (USE_DEF dst, USE shift, KILL cr);
8352 
8353   format %{ "ROR    $dst, $shift" %}
8354   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8355   ins_encode( RegOpcImm(dst, shift) );
8356   ins_pipe( ialu_reg );
8357 %}
8358 
8359 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8360   effect(USE_DEF dst, USE shift, KILL cr);
8361 
8362   format %{ "ROR    $dst, $shift" %}
8363   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8364   ins_encode(OpcP, RegOpc(dst));
8365   ins_pipe( ialu_reg_reg );
8366 %}
8367 // end of ROR expand
8368 
8369 // ROR right once
8370 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8371   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8372 
8373   expand %{
8374     rorI_eReg_imm1(dst, rshift, cr);
8375   %}
8376 %}
8377 
8378 // ROR 32bit by immI8 once
8379 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8380   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8381   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8382 
8383   expand %{
8384     rorI_eReg_imm8(dst, rshift, cr);
8385   %}
8386 %}
8387 
8388 // ROR 32bit var by var once
8389 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8390   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8391 
8392   expand %{
8393     rorI_eReg_CL(dst, shift, cr);
8394   %}
8395 %}
8396 
8397 // ROR 32bit var by var once
8398 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8399   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8400 
8401   expand %{
8402     rorI_eReg_CL(dst, shift, cr);
8403   %}
8404 %}
8405 
8406 // Xor Instructions
8407 // Xor Register with Register
8408 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8409   match(Set dst (XorI dst src));
8410   effect(KILL cr);
8411 
8412   size(2);
8413   format %{ "XOR    $dst,$src" %}
8414   opcode(0x33);
8415   ins_encode( OpcP, RegReg( dst, src) );
8416   ins_pipe( ialu_reg_reg );
8417 %}
8418 
8419 // Xor Register with Immediate -1
8420 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8421   match(Set dst (XorI dst imm));
8422 
8423   size(2);
8424   format %{ "NOT    $dst" %}
8425   ins_encode %{
8426      __ notl($dst$$Register);
8427   %}
8428   ins_pipe( ialu_reg );
8429 %}
8430 
8431 // Xor Register with Immediate
8432 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8433   match(Set dst (XorI dst src));
8434   effect(KILL cr);
8435 
8436   format %{ "XOR    $dst,$src" %}
8437   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8438   // ins_encode( RegImm( dst, src) );
8439   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8440   ins_pipe( ialu_reg );
8441 %}
8442 
8443 // Xor Register with Memory
8444 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8445   match(Set dst (XorI dst (LoadI src)));
8446   effect(KILL cr);
8447 
8448   ins_cost(125);
8449   format %{ "XOR    $dst,$src" %}
8450   opcode(0x33);
8451   ins_encode( OpcP, RegMem(dst, src) );
8452   ins_pipe( ialu_reg_mem );
8453 %}
8454 
8455 // Xor Memory with Register
8456 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8457   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8458   effect(KILL cr);
8459 
8460   ins_cost(150);
8461   format %{ "XOR    $dst,$src" %}
8462   opcode(0x31);  /* Opcode 31 /r */
8463   ins_encode( OpcP, RegMem( src, dst ) );
8464   ins_pipe( ialu_mem_reg );
8465 %}
8466 
8467 // Xor Memory with Immediate
8468 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8469   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8470   effect(KILL cr);
8471 
8472   ins_cost(125);
8473   format %{ "XOR    $dst,$src" %}
8474   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8475   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8476   ins_pipe( ialu_mem_imm );
8477 %}
8478 
8479 //----------Convert Int to Boolean---------------------------------------------
8480 
8481 instruct movI_nocopy(rRegI dst, rRegI src) %{
8482   effect( DEF dst, USE src );
8483   format %{ "MOV    $dst,$src" %}
8484   ins_encode( enc_Copy( dst, src) );
8485   ins_pipe( ialu_reg_reg );
8486 %}
8487 
8488 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8489   effect( USE_DEF dst, USE src, KILL cr );
8490 
8491   size(4);
8492   format %{ "NEG    $dst\n\t"
8493             "ADC    $dst,$src" %}
8494   ins_encode( neg_reg(dst),
8495               OpcRegReg(0x13,dst,src) );
8496   ins_pipe( ialu_reg_reg_long );
8497 %}
8498 
8499 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8500   match(Set dst (Conv2B src));
8501 
8502   expand %{
8503     movI_nocopy(dst,src);
8504     ci2b(dst,src,cr);
8505   %}
8506 %}
8507 
8508 instruct movP_nocopy(rRegI dst, eRegP src) %{
8509   effect( DEF dst, USE src );
8510   format %{ "MOV    $dst,$src" %}
8511   ins_encode( enc_Copy( dst, src) );
8512   ins_pipe( ialu_reg_reg );
8513 %}
8514 
8515 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8516   effect( USE_DEF dst, USE src, KILL cr );
8517   format %{ "NEG    $dst\n\t"
8518             "ADC    $dst,$src" %}
8519   ins_encode( neg_reg(dst),
8520               OpcRegReg(0x13,dst,src) );
8521   ins_pipe( ialu_reg_reg_long );
8522 %}
8523 
8524 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8525   match(Set dst (Conv2B src));
8526 
8527   expand %{
8528     movP_nocopy(dst,src);
8529     cp2b(dst,src,cr);
8530   %}
8531 %}
8532 
8533 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8534   match(Set dst (CmpLTMask p q));
8535   effect(KILL cr);
8536   ins_cost(400);
8537 
8538   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8539   format %{ "XOR    $dst,$dst\n\t"
8540             "CMP    $p,$q\n\t"
8541             "SETlt  $dst\n\t"
8542             "NEG    $dst" %}
8543   ins_encode %{
8544     Register Rp = $p$$Register;
8545     Register Rq = $q$$Register;
8546     Register Rd = $dst$$Register;
8547     Label done;
8548     __ xorl(Rd, Rd);
8549     __ cmpl(Rp, Rq);
8550     __ setb(Assembler::less, Rd);
8551     __ negl(Rd);
8552   %}
8553 
8554   ins_pipe(pipe_slow);
8555 %}
8556 
8557 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8558   match(Set dst (CmpLTMask dst zero));
8559   effect(DEF dst, KILL cr);
8560   ins_cost(100);
8561 
8562   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8563   ins_encode %{
8564   __ sarl($dst$$Register, 31);
8565   %}
8566   ins_pipe(ialu_reg);
8567 %}
8568 
8569 /* better to save a register than avoid a branch */
8570 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8571   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8572   effect(KILL cr);
8573   ins_cost(400);
8574   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8575             "JGE    done\n\t"
8576             "ADD    $p,$y\n"
8577             "done:  " %}
8578   ins_encode %{
8579     Register Rp = $p$$Register;
8580     Register Rq = $q$$Register;
8581     Register Ry = $y$$Register;
8582     Label done;
8583     __ subl(Rp, Rq);
8584     __ jccb(Assembler::greaterEqual, done);
8585     __ addl(Rp, Ry);
8586     __ bind(done);
8587   %}
8588 
8589   ins_pipe(pipe_cmplt);
8590 %}
8591 
8592 /* better to save a register than avoid a branch */
8593 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8594   match(Set y (AndI (CmpLTMask p q) y));
8595   effect(KILL cr);
8596 
8597   ins_cost(300);
8598 
8599   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8600             "JLT      done\n\t"
8601             "XORL     $y, $y\n"
8602             "done:  " %}
8603   ins_encode %{
8604     Register Rp = $p$$Register;
8605     Register Rq = $q$$Register;
8606     Register Ry = $y$$Register;
8607     Label done;
8608     __ cmpl(Rp, Rq);
8609     __ jccb(Assembler::less, done);
8610     __ xorl(Ry, Ry);
8611     __ bind(done);
8612   %}
8613 
8614   ins_pipe(pipe_cmplt);
8615 %}
8616 
8617 /* If I enable this, I encourage spilling in the inner loop of compress.
8618 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8619   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8620 */
8621 //----------Overflow Math Instructions-----------------------------------------
8622 
8623 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8624 %{
8625   match(Set cr (OverflowAddI op1 op2));
8626   effect(DEF cr, USE_KILL op1, USE op2);
8627 
8628   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8629 
8630   ins_encode %{
8631     __ addl($op1$$Register, $op2$$Register);
8632   %}
8633   ins_pipe(ialu_reg_reg);
8634 %}
8635 
8636 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8637 %{
8638   match(Set cr (OverflowAddI op1 op2));
8639   effect(DEF cr, USE_KILL op1, USE op2);
8640 
8641   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8642 
8643   ins_encode %{
8644     __ addl($op1$$Register, $op2$$constant);
8645   %}
8646   ins_pipe(ialu_reg_reg);
8647 %}
8648 
8649 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8650 %{
8651   match(Set cr (OverflowSubI op1 op2));
8652 
8653   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8654   ins_encode %{
8655     __ cmpl($op1$$Register, $op2$$Register);
8656   %}
8657   ins_pipe(ialu_reg_reg);
8658 %}
8659 
8660 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8661 %{
8662   match(Set cr (OverflowSubI op1 op2));
8663 
8664   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8665   ins_encode %{
8666     __ cmpl($op1$$Register, $op2$$constant);
8667   %}
8668   ins_pipe(ialu_reg_reg);
8669 %}
8670 
8671 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8672 %{
8673   match(Set cr (OverflowSubI zero op2));
8674   effect(DEF cr, USE_KILL op2);
8675 
8676   format %{ "NEG    $op2\t# overflow check int" %}
8677   ins_encode %{
8678     __ negl($op2$$Register);
8679   %}
8680   ins_pipe(ialu_reg_reg);
8681 %}
8682 
8683 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8684 %{
8685   match(Set cr (OverflowMulI op1 op2));
8686   effect(DEF cr, USE_KILL op1, USE op2);
8687 
8688   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8689   ins_encode %{
8690     __ imull($op1$$Register, $op2$$Register);
8691   %}
8692   ins_pipe(ialu_reg_reg_alu0);
8693 %}
8694 
8695 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8696 %{
8697   match(Set cr (OverflowMulI op1 op2));
8698   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8699 
8700   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8701   ins_encode %{
8702     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8703   %}
8704   ins_pipe(ialu_reg_reg_alu0);
8705 %}
8706 
8707 //----------Long Instructions------------------------------------------------
8708 // Add Long Register with Register
8709 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8710   match(Set dst (AddL dst src));
8711   effect(KILL cr);
8712   ins_cost(200);
8713   format %{ "ADD    $dst.lo,$src.lo\n\t"
8714             "ADC    $dst.hi,$src.hi" %}
8715   opcode(0x03, 0x13);
8716   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8717   ins_pipe( ialu_reg_reg_long );
8718 %}
8719 
8720 // Add Long Register with Immediate
8721 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8722   match(Set dst (AddL dst src));
8723   effect(KILL cr);
8724   format %{ "ADD    $dst.lo,$src.lo\n\t"
8725             "ADC    $dst.hi,$src.hi" %}
8726   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8727   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8728   ins_pipe( ialu_reg_long );
8729 %}
8730 
8731 // Add Long Register with Memory
8732 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8733   match(Set dst (AddL dst (LoadL mem)));
8734   effect(KILL cr);
8735   ins_cost(125);
8736   format %{ "ADD    $dst.lo,$mem\n\t"
8737             "ADC    $dst.hi,$mem+4" %}
8738   opcode(0x03, 0x13);
8739   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8740   ins_pipe( ialu_reg_long_mem );
8741 %}
8742 
8743 // Subtract Long Register with Register.
8744 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8745   match(Set dst (SubL dst src));
8746   effect(KILL cr);
8747   ins_cost(200);
8748   format %{ "SUB    $dst.lo,$src.lo\n\t"
8749             "SBB    $dst.hi,$src.hi" %}
8750   opcode(0x2B, 0x1B);
8751   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8752   ins_pipe( ialu_reg_reg_long );
8753 %}
8754 
8755 // Subtract Long Register with Immediate
8756 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8757   match(Set dst (SubL dst src));
8758   effect(KILL cr);
8759   format %{ "SUB    $dst.lo,$src.lo\n\t"
8760             "SBB    $dst.hi,$src.hi" %}
8761   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8762   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8763   ins_pipe( ialu_reg_long );
8764 %}
8765 
8766 // Subtract Long Register with Memory
8767 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8768   match(Set dst (SubL dst (LoadL mem)));
8769   effect(KILL cr);
8770   ins_cost(125);
8771   format %{ "SUB    $dst.lo,$mem\n\t"
8772             "SBB    $dst.hi,$mem+4" %}
8773   opcode(0x2B, 0x1B);
8774   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8775   ins_pipe( ialu_reg_long_mem );
8776 %}
8777 
8778 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8779   match(Set dst (SubL zero dst));
8780   effect(KILL cr);
8781   ins_cost(300);
8782   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8783   ins_encode( neg_long(dst) );
8784   ins_pipe( ialu_reg_reg_long );
8785 %}
8786 
8787 // And Long Register with Register
8788 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8789   match(Set dst (AndL dst src));
8790   effect(KILL cr);
8791   format %{ "AND    $dst.lo,$src.lo\n\t"
8792             "AND    $dst.hi,$src.hi" %}
8793   opcode(0x23,0x23);
8794   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8795   ins_pipe( ialu_reg_reg_long );
8796 %}
8797 
8798 // And Long Register with Immediate
8799 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8800   match(Set dst (AndL dst src));
8801   effect(KILL cr);
8802   format %{ "AND    $dst.lo,$src.lo\n\t"
8803             "AND    $dst.hi,$src.hi" %}
8804   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8805   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8806   ins_pipe( ialu_reg_long );
8807 %}
8808 
8809 // And Long Register with Memory
8810 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8811   match(Set dst (AndL dst (LoadL mem)));
8812   effect(KILL cr);
8813   ins_cost(125);
8814   format %{ "AND    $dst.lo,$mem\n\t"
8815             "AND    $dst.hi,$mem+4" %}
8816   opcode(0x23, 0x23);
8817   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8818   ins_pipe( ialu_reg_long_mem );
8819 %}
8820 
8821 // BMI1 instructions
8822 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8823   match(Set dst (AndL (XorL src1 minus_1) src2));
8824   predicate(UseBMI1Instructions);
8825   effect(KILL cr, TEMP dst);
8826 
8827   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8828             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8829          %}
8830 
8831   ins_encode %{
8832     Register Rdst = $dst$$Register;
8833     Register Rsrc1 = $src1$$Register;
8834     Register Rsrc2 = $src2$$Register;
8835     __ andnl(Rdst, Rsrc1, Rsrc2);
8836     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8837   %}
8838   ins_pipe(ialu_reg_reg_long);
8839 %}
8840 
8841 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8842   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8843   predicate(UseBMI1Instructions);
8844   effect(KILL cr, TEMP dst);
8845 
8846   ins_cost(125);
8847   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8848             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8849          %}
8850 
8851   ins_encode %{
8852     Register Rdst = $dst$$Register;
8853     Register Rsrc1 = $src1$$Register;
8854     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8855 
8856     __ andnl(Rdst, Rsrc1, $src2$$Address);
8857     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8858   %}
8859   ins_pipe(ialu_reg_mem);
8860 %}
8861 
8862 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8863   match(Set dst (AndL (SubL imm_zero src) src));
8864   predicate(UseBMI1Instructions);
8865   effect(KILL cr, TEMP dst);
8866 
8867   format %{ "MOVL   $dst.hi, 0\n\t"
8868             "BLSIL  $dst.lo, $src.lo\n\t"
8869             "JNZ    done\n\t"
8870             "BLSIL  $dst.hi, $src.hi\n"
8871             "done:"
8872          %}
8873 
8874   ins_encode %{
8875     Label done;
8876     Register Rdst = $dst$$Register;
8877     Register Rsrc = $src$$Register;
8878     __ movl(HIGH_FROM_LOW(Rdst), 0);
8879     __ blsil(Rdst, Rsrc);
8880     __ jccb(Assembler::notZero, done);
8881     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8882     __ bind(done);
8883   %}
8884   ins_pipe(ialu_reg);
8885 %}
8886 
8887 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8888   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8889   predicate(UseBMI1Instructions);
8890   effect(KILL cr, TEMP dst);
8891 
8892   ins_cost(125);
8893   format %{ "MOVL   $dst.hi, 0\n\t"
8894             "BLSIL  $dst.lo, $src\n\t"
8895             "JNZ    done\n\t"
8896             "BLSIL  $dst.hi, $src+4\n"
8897             "done:"
8898          %}
8899 
8900   ins_encode %{
8901     Label done;
8902     Register Rdst = $dst$$Register;
8903     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8904 
8905     __ movl(HIGH_FROM_LOW(Rdst), 0);
8906     __ blsil(Rdst, $src$$Address);
8907     __ jccb(Assembler::notZero, done);
8908     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8909     __ bind(done);
8910   %}
8911   ins_pipe(ialu_reg_mem);
8912 %}
8913 
8914 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8915 %{
8916   match(Set dst (XorL (AddL src minus_1) src));
8917   predicate(UseBMI1Instructions);
8918   effect(KILL cr, TEMP dst);
8919 
8920   format %{ "MOVL    $dst.hi, 0\n\t"
8921             "BLSMSKL $dst.lo, $src.lo\n\t"
8922             "JNC     done\n\t"
8923             "BLSMSKL $dst.hi, $src.hi\n"
8924             "done:"
8925          %}
8926 
8927   ins_encode %{
8928     Label done;
8929     Register Rdst = $dst$$Register;
8930     Register Rsrc = $src$$Register;
8931     __ movl(HIGH_FROM_LOW(Rdst), 0);
8932     __ blsmskl(Rdst, Rsrc);
8933     __ jccb(Assembler::carryClear, done);
8934     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8935     __ bind(done);
8936   %}
8937 
8938   ins_pipe(ialu_reg);
8939 %}
8940 
8941 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8942 %{
8943   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8944   predicate(UseBMI1Instructions);
8945   effect(KILL cr, TEMP dst);
8946 
8947   ins_cost(125);
8948   format %{ "MOVL    $dst.hi, 0\n\t"
8949             "BLSMSKL $dst.lo, $src\n\t"
8950             "JNC     done\n\t"
8951             "BLSMSKL $dst.hi, $src+4\n"
8952             "done:"
8953          %}
8954 
8955   ins_encode %{
8956     Label done;
8957     Register Rdst = $dst$$Register;
8958     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8959 
8960     __ movl(HIGH_FROM_LOW(Rdst), 0);
8961     __ blsmskl(Rdst, $src$$Address);
8962     __ jccb(Assembler::carryClear, done);
8963     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8964     __ bind(done);
8965   %}
8966 
8967   ins_pipe(ialu_reg_mem);
8968 %}
8969 
8970 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8971 %{
8972   match(Set dst (AndL (AddL src minus_1) src) );
8973   predicate(UseBMI1Instructions);
8974   effect(KILL cr, TEMP dst);
8975 
8976   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8977             "BLSRL  $dst.lo, $src.lo\n\t"
8978             "JNC    done\n\t"
8979             "BLSRL  $dst.hi, $src.hi\n"
8980             "done:"
8981   %}
8982 
8983   ins_encode %{
8984     Label done;
8985     Register Rdst = $dst$$Register;
8986     Register Rsrc = $src$$Register;
8987     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8988     __ blsrl(Rdst, Rsrc);
8989     __ jccb(Assembler::carryClear, done);
8990     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8991     __ bind(done);
8992   %}
8993 
8994   ins_pipe(ialu_reg);
8995 %}
8996 
8997 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8998 %{
8999   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9000   predicate(UseBMI1Instructions);
9001   effect(KILL cr, TEMP dst);
9002 
9003   ins_cost(125);
9004   format %{ "MOVL   $dst.hi, $src+4\n\t"
9005             "BLSRL  $dst.lo, $src\n\t"
9006             "JNC    done\n\t"
9007             "BLSRL  $dst.hi, $src+4\n"
9008             "done:"
9009   %}
9010 
9011   ins_encode %{
9012     Label done;
9013     Register Rdst = $dst$$Register;
9014     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9015     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9016     __ blsrl(Rdst, $src$$Address);
9017     __ jccb(Assembler::carryClear, done);
9018     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9019     __ bind(done);
9020   %}
9021 
9022   ins_pipe(ialu_reg_mem);
9023 %}
9024 
9025 // Or Long Register with Register
9026 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9027   match(Set dst (OrL dst src));
9028   effect(KILL cr);
9029   format %{ "OR     $dst.lo,$src.lo\n\t"
9030             "OR     $dst.hi,$src.hi" %}
9031   opcode(0x0B,0x0B);
9032   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9033   ins_pipe( ialu_reg_reg_long );
9034 %}
9035 
9036 // Or Long Register with Immediate
9037 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9038   match(Set dst (OrL dst src));
9039   effect(KILL cr);
9040   format %{ "OR     $dst.lo,$src.lo\n\t"
9041             "OR     $dst.hi,$src.hi" %}
9042   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9043   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9044   ins_pipe( ialu_reg_long );
9045 %}
9046 
9047 // Or Long Register with Memory
9048 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9049   match(Set dst (OrL dst (LoadL mem)));
9050   effect(KILL cr);
9051   ins_cost(125);
9052   format %{ "OR     $dst.lo,$mem\n\t"
9053             "OR     $dst.hi,$mem+4" %}
9054   opcode(0x0B,0x0B);
9055   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9056   ins_pipe( ialu_reg_long_mem );
9057 %}
9058 
9059 // Xor Long Register with Register
9060 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9061   match(Set dst (XorL dst src));
9062   effect(KILL cr);
9063   format %{ "XOR    $dst.lo,$src.lo\n\t"
9064             "XOR    $dst.hi,$src.hi" %}
9065   opcode(0x33,0x33);
9066   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9067   ins_pipe( ialu_reg_reg_long );
9068 %}
9069 
9070 // Xor Long Register with Immediate -1
9071 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9072   match(Set dst (XorL dst imm));
9073   format %{ "NOT    $dst.lo\n\t"
9074             "NOT    $dst.hi" %}
9075   ins_encode %{
9076      __ notl($dst$$Register);
9077      __ notl(HIGH_FROM_LOW($dst$$Register));
9078   %}
9079   ins_pipe( ialu_reg_long );
9080 %}
9081 
9082 // Xor Long Register with Immediate
9083 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9084   match(Set dst (XorL dst src));
9085   effect(KILL cr);
9086   format %{ "XOR    $dst.lo,$src.lo\n\t"
9087             "XOR    $dst.hi,$src.hi" %}
9088   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9089   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9090   ins_pipe( ialu_reg_long );
9091 %}
9092 
9093 // Xor Long Register with Memory
9094 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9095   match(Set dst (XorL dst (LoadL mem)));
9096   effect(KILL cr);
9097   ins_cost(125);
9098   format %{ "XOR    $dst.lo,$mem\n\t"
9099             "XOR    $dst.hi,$mem+4" %}
9100   opcode(0x33,0x33);
9101   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9102   ins_pipe( ialu_reg_long_mem );
9103 %}
9104 
9105 // Shift Left Long by 1
9106 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9107   predicate(UseNewLongLShift);
9108   match(Set dst (LShiftL dst cnt));
9109   effect(KILL cr);
9110   ins_cost(100);
9111   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9112             "ADC    $dst.hi,$dst.hi" %}
9113   ins_encode %{
9114     __ addl($dst$$Register,$dst$$Register);
9115     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9116   %}
9117   ins_pipe( ialu_reg_long );
9118 %}
9119 
9120 // Shift Left Long by 2
9121 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9122   predicate(UseNewLongLShift);
9123   match(Set dst (LShiftL dst cnt));
9124   effect(KILL cr);
9125   ins_cost(100);
9126   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9127             "ADC    $dst.hi,$dst.hi\n\t"
9128             "ADD    $dst.lo,$dst.lo\n\t"
9129             "ADC    $dst.hi,$dst.hi" %}
9130   ins_encode %{
9131     __ addl($dst$$Register,$dst$$Register);
9132     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9133     __ addl($dst$$Register,$dst$$Register);
9134     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9135   %}
9136   ins_pipe( ialu_reg_long );
9137 %}
9138 
9139 // Shift Left Long by 3
9140 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9141   predicate(UseNewLongLShift);
9142   match(Set dst (LShiftL dst cnt));
9143   effect(KILL cr);
9144   ins_cost(100);
9145   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9146             "ADC    $dst.hi,$dst.hi\n\t"
9147             "ADD    $dst.lo,$dst.lo\n\t"
9148             "ADC    $dst.hi,$dst.hi\n\t"
9149             "ADD    $dst.lo,$dst.lo\n\t"
9150             "ADC    $dst.hi,$dst.hi" %}
9151   ins_encode %{
9152     __ addl($dst$$Register,$dst$$Register);
9153     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9154     __ addl($dst$$Register,$dst$$Register);
9155     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9156     __ addl($dst$$Register,$dst$$Register);
9157     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9158   %}
9159   ins_pipe( ialu_reg_long );
9160 %}
9161 
9162 // Shift Left Long by 1-31
9163 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9164   match(Set dst (LShiftL dst cnt));
9165   effect(KILL cr);
9166   ins_cost(200);
9167   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9168             "SHL    $dst.lo,$cnt" %}
9169   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9170   ins_encode( move_long_small_shift(dst,cnt) );
9171   ins_pipe( ialu_reg_long );
9172 %}
9173 
9174 // Shift Left Long by 32-63
9175 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9176   match(Set dst (LShiftL dst cnt));
9177   effect(KILL cr);
9178   ins_cost(300);
9179   format %{ "MOV    $dst.hi,$dst.lo\n"
9180           "\tSHL    $dst.hi,$cnt-32\n"
9181           "\tXOR    $dst.lo,$dst.lo" %}
9182   opcode(0xC1, 0x4);  /* C1 /4 ib */
9183   ins_encode( move_long_big_shift_clr(dst,cnt) );
9184   ins_pipe( ialu_reg_long );
9185 %}
9186 
9187 // Shift Left Long by variable
9188 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9189   match(Set dst (LShiftL dst shift));
9190   effect(KILL cr);
9191   ins_cost(500+200);
9192   size(17);
9193   format %{ "TEST   $shift,32\n\t"
9194             "JEQ,s  small\n\t"
9195             "MOV    $dst.hi,$dst.lo\n\t"
9196             "XOR    $dst.lo,$dst.lo\n"
9197     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9198             "SHL    $dst.lo,$shift" %}
9199   ins_encode( shift_left_long( dst, shift ) );
9200   ins_pipe( pipe_slow );
9201 %}
9202 
9203 // Shift Right Long by 1-31
9204 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9205   match(Set dst (URShiftL dst cnt));
9206   effect(KILL cr);
9207   ins_cost(200);
9208   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9209             "SHR    $dst.hi,$cnt" %}
9210   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9211   ins_encode( move_long_small_shift(dst,cnt) );
9212   ins_pipe( ialu_reg_long );
9213 %}
9214 
9215 // Shift Right Long by 32-63
9216 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9217   match(Set dst (URShiftL dst cnt));
9218   effect(KILL cr);
9219   ins_cost(300);
9220   format %{ "MOV    $dst.lo,$dst.hi\n"
9221           "\tSHR    $dst.lo,$cnt-32\n"
9222           "\tXOR    $dst.hi,$dst.hi" %}
9223   opcode(0xC1, 0x5);  /* C1 /5 ib */
9224   ins_encode( move_long_big_shift_clr(dst,cnt) );
9225   ins_pipe( ialu_reg_long );
9226 %}
9227 
9228 // Shift Right Long by variable
9229 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9230   match(Set dst (URShiftL dst shift));
9231   effect(KILL cr);
9232   ins_cost(600);
9233   size(17);
9234   format %{ "TEST   $shift,32\n\t"
9235             "JEQ,s  small\n\t"
9236             "MOV    $dst.lo,$dst.hi\n\t"
9237             "XOR    $dst.hi,$dst.hi\n"
9238     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9239             "SHR    $dst.hi,$shift" %}
9240   ins_encode( shift_right_long( dst, shift ) );
9241   ins_pipe( pipe_slow );
9242 %}
9243 
9244 // Shift Right Long by 1-31
9245 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9246   match(Set dst (RShiftL dst cnt));
9247   effect(KILL cr);
9248   ins_cost(200);
9249   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9250             "SAR    $dst.hi,$cnt" %}
9251   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9252   ins_encode( move_long_small_shift(dst,cnt) );
9253   ins_pipe( ialu_reg_long );
9254 %}
9255 
9256 // Shift Right Long by 32-63
9257 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9258   match(Set dst (RShiftL dst cnt));
9259   effect(KILL cr);
9260   ins_cost(300);
9261   format %{ "MOV    $dst.lo,$dst.hi\n"
9262           "\tSAR    $dst.lo,$cnt-32\n"
9263           "\tSAR    $dst.hi,31" %}
9264   opcode(0xC1, 0x7);  /* C1 /7 ib */
9265   ins_encode( move_long_big_shift_sign(dst,cnt) );
9266   ins_pipe( ialu_reg_long );
9267 %}
9268 
9269 // Shift Right arithmetic Long by variable
9270 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9271   match(Set dst (RShiftL dst shift));
9272   effect(KILL cr);
9273   ins_cost(600);
9274   size(18);
9275   format %{ "TEST   $shift,32\n\t"
9276             "JEQ,s  small\n\t"
9277             "MOV    $dst.lo,$dst.hi\n\t"
9278             "SAR    $dst.hi,31\n"
9279     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9280             "SAR    $dst.hi,$shift" %}
9281   ins_encode( shift_right_arith_long( dst, shift ) );
9282   ins_pipe( pipe_slow );
9283 %}
9284 
9285 
9286 //----------Double Instructions------------------------------------------------
9287 // Double Math
9288 
9289 // Compare & branch
9290 
9291 // P6 version of float compare, sets condition codes in EFLAGS
9292 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9293   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9294   match(Set cr (CmpD src1 src2));
9295   effect(KILL rax);
9296   ins_cost(150);
9297   format %{ "FLD    $src1\n\t"
9298             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9299             "JNP    exit\n\t"
9300             "MOV    ah,1       // saw a NaN, set CF\n\t"
9301             "SAHF\n"
9302      "exit:\tNOP               // avoid branch to branch" %}
9303   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9304   ins_encode( Push_Reg_DPR(src1),
9305               OpcP, RegOpc(src2),
9306               cmpF_P6_fixup );
9307   ins_pipe( pipe_slow );
9308 %}
9309 
9310 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9311   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9312   match(Set cr (CmpD src1 src2));
9313   ins_cost(150);
9314   format %{ "FLD    $src1\n\t"
9315             "FUCOMIP ST,$src2  // P6 instruction" %}
9316   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9317   ins_encode( Push_Reg_DPR(src1),
9318               OpcP, RegOpc(src2));
9319   ins_pipe( pipe_slow );
9320 %}
9321 
9322 // Compare & branch
9323 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9324   predicate(UseSSE<=1);
9325   match(Set cr (CmpD src1 src2));
9326   effect(KILL rax);
9327   ins_cost(200);
9328   format %{ "FLD    $src1\n\t"
9329             "FCOMp  $src2\n\t"
9330             "FNSTSW AX\n\t"
9331             "TEST   AX,0x400\n\t"
9332             "JZ,s   flags\n\t"
9333             "MOV    AH,1\t# unordered treat as LT\n"
9334     "flags:\tSAHF" %}
9335   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9336   ins_encode( Push_Reg_DPR(src1),
9337               OpcP, RegOpc(src2),
9338               fpu_flags);
9339   ins_pipe( pipe_slow );
9340 %}
9341 
9342 // Compare vs zero into -1,0,1
9343 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9344   predicate(UseSSE<=1);
9345   match(Set dst (CmpD3 src1 zero));
9346   effect(KILL cr, KILL rax);
9347   ins_cost(280);
9348   format %{ "FTSTD  $dst,$src1" %}
9349   opcode(0xE4, 0xD9);
9350   ins_encode( Push_Reg_DPR(src1),
9351               OpcS, OpcP, PopFPU,
9352               CmpF_Result(dst));
9353   ins_pipe( pipe_slow );
9354 %}
9355 
9356 // Compare into -1,0,1
9357 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9358   predicate(UseSSE<=1);
9359   match(Set dst (CmpD3 src1 src2));
9360   effect(KILL cr, KILL rax);
9361   ins_cost(300);
9362   format %{ "FCMPD  $dst,$src1,$src2" %}
9363   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9364   ins_encode( Push_Reg_DPR(src1),
9365               OpcP, RegOpc(src2),
9366               CmpF_Result(dst));
9367   ins_pipe( pipe_slow );
9368 %}
9369 
9370 // float compare and set condition codes in EFLAGS by XMM regs
9371 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9372   predicate(UseSSE>=2);
9373   match(Set cr (CmpD src1 src2));
9374   ins_cost(145);
9375   format %{ "UCOMISD $src1,$src2\n\t"
9376             "JNP,s   exit\n\t"
9377             "PUSHF\t# saw NaN, set CF\n\t"
9378             "AND     [rsp], #0xffffff2b\n\t"
9379             "POPF\n"
9380     "exit:" %}
9381   ins_encode %{
9382     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9383     emit_cmpfp_fixup(_masm);
9384   %}
9385   ins_pipe( pipe_slow );
9386 %}
9387 
9388 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9389   predicate(UseSSE>=2);
9390   match(Set cr (CmpD src1 src2));
9391   ins_cost(100);
9392   format %{ "UCOMISD $src1,$src2" %}
9393   ins_encode %{
9394     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9395   %}
9396   ins_pipe( pipe_slow );
9397 %}
9398 
9399 // float compare and set condition codes in EFLAGS by XMM regs
9400 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9401   predicate(UseSSE>=2);
9402   match(Set cr (CmpD src1 (LoadD src2)));
9403   ins_cost(145);
9404   format %{ "UCOMISD $src1,$src2\n\t"
9405             "JNP,s   exit\n\t"
9406             "PUSHF\t# saw NaN, set CF\n\t"
9407             "AND     [rsp], #0xffffff2b\n\t"
9408             "POPF\n"
9409     "exit:" %}
9410   ins_encode %{
9411     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9412     emit_cmpfp_fixup(_masm);
9413   %}
9414   ins_pipe( pipe_slow );
9415 %}
9416 
9417 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9418   predicate(UseSSE>=2);
9419   match(Set cr (CmpD src1 (LoadD src2)));
9420   ins_cost(100);
9421   format %{ "UCOMISD $src1,$src2" %}
9422   ins_encode %{
9423     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9424   %}
9425   ins_pipe( pipe_slow );
9426 %}
9427 
9428 // Compare into -1,0,1 in XMM
9429 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9430   predicate(UseSSE>=2);
9431   match(Set dst (CmpD3 src1 src2));
9432   effect(KILL cr);
9433   ins_cost(255);
9434   format %{ "UCOMISD $src1, $src2\n\t"
9435             "MOV     $dst, #-1\n\t"
9436             "JP,s    done\n\t"
9437             "JB,s    done\n\t"
9438             "SETNE   $dst\n\t"
9439             "MOVZB   $dst, $dst\n"
9440     "done:" %}
9441   ins_encode %{
9442     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9443     emit_cmpfp3(_masm, $dst$$Register);
9444   %}
9445   ins_pipe( pipe_slow );
9446 %}
9447 
9448 // Compare into -1,0,1 in XMM and memory
9449 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9450   predicate(UseSSE>=2);
9451   match(Set dst (CmpD3 src1 (LoadD src2)));
9452   effect(KILL cr);
9453   ins_cost(275);
9454   format %{ "UCOMISD $src1, $src2\n\t"
9455             "MOV     $dst, #-1\n\t"
9456             "JP,s    done\n\t"
9457             "JB,s    done\n\t"
9458             "SETNE   $dst\n\t"
9459             "MOVZB   $dst, $dst\n"
9460     "done:" %}
9461   ins_encode %{
9462     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9463     emit_cmpfp3(_masm, $dst$$Register);
9464   %}
9465   ins_pipe( pipe_slow );
9466 %}
9467 
9468 
9469 instruct subDPR_reg(regDPR dst, regDPR src) %{
9470   predicate (UseSSE <=1);
9471   match(Set dst (SubD dst src));
9472 
9473   format %{ "FLD    $src\n\t"
9474             "DSUBp  $dst,ST" %}
9475   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9476   ins_cost(150);
9477   ins_encode( Push_Reg_DPR(src),
9478               OpcP, RegOpc(dst) );
9479   ins_pipe( fpu_reg_reg );
9480 %}
9481 
9482 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9483   predicate (UseSSE <=1);
9484   match(Set dst (RoundDouble (SubD src1 src2)));
9485   ins_cost(250);
9486 
9487   format %{ "FLD    $src2\n\t"
9488             "DSUB   ST,$src1\n\t"
9489             "FSTP_D $dst\t# D-round" %}
9490   opcode(0xD8, 0x5);
9491   ins_encode( Push_Reg_DPR(src2),
9492               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9493   ins_pipe( fpu_mem_reg_reg );
9494 %}
9495 
9496 
9497 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9498   predicate (UseSSE <=1);
9499   match(Set dst (SubD dst (LoadD src)));
9500   ins_cost(150);
9501 
9502   format %{ "FLD    $src\n\t"
9503             "DSUBp  $dst,ST" %}
9504   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9505   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9506               OpcP, RegOpc(dst) );
9507   ins_pipe( fpu_reg_mem );
9508 %}
9509 
9510 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9511   predicate (UseSSE<=1);
9512   match(Set dst (AbsD src));
9513   ins_cost(100);
9514   format %{ "FABS" %}
9515   opcode(0xE1, 0xD9);
9516   ins_encode( OpcS, OpcP );
9517   ins_pipe( fpu_reg_reg );
9518 %}
9519 
9520 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9521   predicate(UseSSE<=1);
9522   match(Set dst (NegD src));
9523   ins_cost(100);
9524   format %{ "FCHS" %}
9525   opcode(0xE0, 0xD9);
9526   ins_encode( OpcS, OpcP );
9527   ins_pipe( fpu_reg_reg );
9528 %}
9529 
9530 instruct addDPR_reg(regDPR dst, regDPR src) %{
9531   predicate(UseSSE<=1);
9532   match(Set dst (AddD dst src));
9533   format %{ "FLD    $src\n\t"
9534             "DADD   $dst,ST" %}
9535   size(4);
9536   ins_cost(150);
9537   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9538   ins_encode( Push_Reg_DPR(src),
9539               OpcP, RegOpc(dst) );
9540   ins_pipe( fpu_reg_reg );
9541 %}
9542 
9543 
9544 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9545   predicate(UseSSE<=1);
9546   match(Set dst (RoundDouble (AddD src1 src2)));
9547   ins_cost(250);
9548 
9549   format %{ "FLD    $src2\n\t"
9550             "DADD   ST,$src1\n\t"
9551             "FSTP_D $dst\t# D-round" %}
9552   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9553   ins_encode( Push_Reg_DPR(src2),
9554               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9555   ins_pipe( fpu_mem_reg_reg );
9556 %}
9557 
9558 
9559 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9560   predicate(UseSSE<=1);
9561   match(Set dst (AddD dst (LoadD src)));
9562   ins_cost(150);
9563 
9564   format %{ "FLD    $src\n\t"
9565             "DADDp  $dst,ST" %}
9566   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9567   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9568               OpcP, RegOpc(dst) );
9569   ins_pipe( fpu_reg_mem );
9570 %}
9571 
9572 // add-to-memory
9573 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9574   predicate(UseSSE<=1);
9575   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9576   ins_cost(150);
9577 
9578   format %{ "FLD_D  $dst\n\t"
9579             "DADD   ST,$src\n\t"
9580             "FST_D  $dst" %}
9581   opcode(0xDD, 0x0);
9582   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9583               Opcode(0xD8), RegOpc(src),
9584               set_instruction_start,
9585               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9586   ins_pipe( fpu_reg_mem );
9587 %}
9588 
9589 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9590   predicate(UseSSE<=1);
9591   match(Set dst (AddD dst con));
9592   ins_cost(125);
9593   format %{ "FLD1\n\t"
9594             "DADDp  $dst,ST" %}
9595   ins_encode %{
9596     __ fld1();
9597     __ faddp($dst$$reg);
9598   %}
9599   ins_pipe(fpu_reg);
9600 %}
9601 
9602 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9603   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9604   match(Set dst (AddD dst con));
9605   ins_cost(200);
9606   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9607             "DADDp  $dst,ST" %}
9608   ins_encode %{
9609     __ fld_d($constantaddress($con));
9610     __ faddp($dst$$reg);
9611   %}
9612   ins_pipe(fpu_reg_mem);
9613 %}
9614 
9615 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9616   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9617   match(Set dst (RoundDouble (AddD src con)));
9618   ins_cost(200);
9619   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9620             "DADD   ST,$src\n\t"
9621             "FSTP_D $dst\t# D-round" %}
9622   ins_encode %{
9623     __ fld_d($constantaddress($con));
9624     __ fadd($src$$reg);
9625     __ fstp_d(Address(rsp, $dst$$disp));
9626   %}
9627   ins_pipe(fpu_mem_reg_con);
9628 %}
9629 
9630 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9631   predicate(UseSSE<=1);
9632   match(Set dst (MulD dst src));
9633   format %{ "FLD    $src\n\t"
9634             "DMULp  $dst,ST" %}
9635   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9636   ins_cost(150);
9637   ins_encode( Push_Reg_DPR(src),
9638               OpcP, RegOpc(dst) );
9639   ins_pipe( fpu_reg_reg );
9640 %}
9641 
9642 // Strict FP instruction biases argument before multiply then
9643 // biases result to avoid double rounding of subnormals.
9644 //
9645 // scale arg1 by multiplying arg1 by 2^(-15360)
9646 // load arg2
9647 // multiply scaled arg1 by arg2
9648 // rescale product by 2^(15360)
9649 //
9650 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9651   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9652   match(Set dst (MulD dst src));
9653   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9654 
9655   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9656             "DMULp  $dst,ST\n\t"
9657             "FLD    $src\n\t"
9658             "DMULp  $dst,ST\n\t"
9659             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9660             "DMULp  $dst,ST\n\t" %}
9661   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9662   ins_encode( strictfp_bias1(dst),
9663               Push_Reg_DPR(src),
9664               OpcP, RegOpc(dst),
9665               strictfp_bias2(dst) );
9666   ins_pipe( fpu_reg_reg );
9667 %}
9668 
9669 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9670   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9671   match(Set dst (MulD dst con));
9672   ins_cost(200);
9673   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9674             "DMULp  $dst,ST" %}
9675   ins_encode %{
9676     __ fld_d($constantaddress($con));
9677     __ fmulp($dst$$reg);
9678   %}
9679   ins_pipe(fpu_reg_mem);
9680 %}
9681 
9682 
9683 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9684   predicate( UseSSE<=1 );
9685   match(Set dst (MulD dst (LoadD src)));
9686   ins_cost(200);
9687   format %{ "FLD_D  $src\n\t"
9688             "DMULp  $dst,ST" %}
9689   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9690   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9691               OpcP, RegOpc(dst) );
9692   ins_pipe( fpu_reg_mem );
9693 %}
9694 
9695 //
9696 // Cisc-alternate to reg-reg multiply
9697 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9698   predicate( UseSSE<=1 );
9699   match(Set dst (MulD src (LoadD mem)));
9700   ins_cost(250);
9701   format %{ "FLD_D  $mem\n\t"
9702             "DMUL   ST,$src\n\t"
9703             "FSTP_D $dst" %}
9704   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9705   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9706               OpcReg_FPR(src),
9707               Pop_Reg_DPR(dst) );
9708   ins_pipe( fpu_reg_reg_mem );
9709 %}
9710 
9711 
9712 // MACRO3 -- addDPR a mulDPR
9713 // This instruction is a '2-address' instruction in that the result goes
9714 // back to src2.  This eliminates a move from the macro; possibly the
9715 // register allocator will have to add it back (and maybe not).
9716 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9717   predicate( UseSSE<=1 );
9718   match(Set src2 (AddD (MulD src0 src1) src2));
9719   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9720             "DMUL   ST,$src1\n\t"
9721             "DADDp  $src2,ST" %}
9722   ins_cost(250);
9723   opcode(0xDD); /* LoadD DD /0 */
9724   ins_encode( Push_Reg_FPR(src0),
9725               FMul_ST_reg(src1),
9726               FAddP_reg_ST(src2) );
9727   ins_pipe( fpu_reg_reg_reg );
9728 %}
9729 
9730 
9731 // MACRO3 -- subDPR a mulDPR
9732 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9733   predicate( UseSSE<=1 );
9734   match(Set src2 (SubD (MulD src0 src1) src2));
9735   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9736             "DMUL   ST,$src1\n\t"
9737             "DSUBRp $src2,ST" %}
9738   ins_cost(250);
9739   ins_encode( Push_Reg_FPR(src0),
9740               FMul_ST_reg(src1),
9741               Opcode(0xDE), Opc_plus(0xE0,src2));
9742   ins_pipe( fpu_reg_reg_reg );
9743 %}
9744 
9745 
9746 instruct divDPR_reg(regDPR dst, regDPR src) %{
9747   predicate( UseSSE<=1 );
9748   match(Set dst (DivD dst src));
9749 
9750   format %{ "FLD    $src\n\t"
9751             "FDIVp  $dst,ST" %}
9752   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9753   ins_cost(150);
9754   ins_encode( Push_Reg_DPR(src),
9755               OpcP, RegOpc(dst) );
9756   ins_pipe( fpu_reg_reg );
9757 %}
9758 
9759 // Strict FP instruction biases argument before division then
9760 // biases result, to avoid double rounding of subnormals.
9761 //
9762 // scale dividend by multiplying dividend by 2^(-15360)
9763 // load divisor
9764 // divide scaled dividend by divisor
9765 // rescale quotient by 2^(15360)
9766 //
9767 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9768   predicate (UseSSE<=1);
9769   match(Set dst (DivD dst src));
9770   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9771   ins_cost(01);
9772 
9773   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9774             "DMULp  $dst,ST\n\t"
9775             "FLD    $src\n\t"
9776             "FDIVp  $dst,ST\n\t"
9777             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9778             "DMULp  $dst,ST\n\t" %}
9779   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9780   ins_encode( strictfp_bias1(dst),
9781               Push_Reg_DPR(src),
9782               OpcP, RegOpc(dst),
9783               strictfp_bias2(dst) );
9784   ins_pipe( fpu_reg_reg );
9785 %}
9786 
9787 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9788   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9789   match(Set dst (RoundDouble (DivD src1 src2)));
9790 
9791   format %{ "FLD    $src1\n\t"
9792             "FDIV   ST,$src2\n\t"
9793             "FSTP_D $dst\t# D-round" %}
9794   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9795   ins_encode( Push_Reg_DPR(src1),
9796               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9797   ins_pipe( fpu_mem_reg_reg );
9798 %}
9799 
9800 
9801 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9802   predicate(UseSSE<=1);
9803   match(Set dst (ModD dst src));
9804   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9805 
9806   format %{ "DMOD   $dst,$src" %}
9807   ins_cost(250);
9808   ins_encode(Push_Reg_Mod_DPR(dst, src),
9809               emitModDPR(),
9810               Push_Result_Mod_DPR(src),
9811               Pop_Reg_DPR(dst));
9812   ins_pipe( pipe_slow );
9813 %}
9814 
9815 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9816   predicate(UseSSE>=2);
9817   match(Set dst (ModD src0 src1));
9818   effect(KILL rax, KILL cr);
9819 
9820   format %{ "SUB    ESP,8\t # DMOD\n"
9821           "\tMOVSD  [ESP+0],$src1\n"
9822           "\tFLD_D  [ESP+0]\n"
9823           "\tMOVSD  [ESP+0],$src0\n"
9824           "\tFLD_D  [ESP+0]\n"
9825      "loop:\tFPREM\n"
9826           "\tFWAIT\n"
9827           "\tFNSTSW AX\n"
9828           "\tSAHF\n"
9829           "\tJP     loop\n"
9830           "\tFSTP_D [ESP+0]\n"
9831           "\tMOVSD  $dst,[ESP+0]\n"
9832           "\tADD    ESP,8\n"
9833           "\tFSTP   ST0\t # Restore FPU Stack"
9834     %}
9835   ins_cost(250);
9836   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9837   ins_pipe( pipe_slow );
9838 %}
9839 
9840 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9841   predicate (UseSSE<=1);
9842   match(Set dst(AtanD dst src));
9843   format %{ "DATA   $dst,$src" %}
9844   opcode(0xD9, 0xF3);
9845   ins_encode( Push_Reg_DPR(src),
9846               OpcP, OpcS, RegOpc(dst) );
9847   ins_pipe( pipe_slow );
9848 %}
9849 
9850 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9851   predicate (UseSSE>=2);
9852   match(Set dst(AtanD dst src));
9853   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9854   format %{ "DATA   $dst,$src" %}
9855   opcode(0xD9, 0xF3);
9856   ins_encode( Push_SrcD(src),
9857               OpcP, OpcS, Push_ResultD(dst) );
9858   ins_pipe( pipe_slow );
9859 %}
9860 
9861 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9862   predicate (UseSSE<=1);
9863   match(Set dst (SqrtD src));
9864   format %{ "DSQRT  $dst,$src" %}
9865   opcode(0xFA, 0xD9);
9866   ins_encode( Push_Reg_DPR(src),
9867               OpcS, OpcP, Pop_Reg_DPR(dst) );
9868   ins_pipe( pipe_slow );
9869 %}
9870 
9871 //-------------Float Instructions-------------------------------
9872 // Float Math
9873 
9874 // Code for float compare:
9875 //     fcompp();
9876 //     fwait(); fnstsw_ax();
9877 //     sahf();
9878 //     movl(dst, unordered_result);
9879 //     jcc(Assembler::parity, exit);
9880 //     movl(dst, less_result);
9881 //     jcc(Assembler::below, exit);
9882 //     movl(dst, equal_result);
9883 //     jcc(Assembler::equal, exit);
9884 //     movl(dst, greater_result);
9885 //   exit:
9886 
9887 // P6 version of float compare, sets condition codes in EFLAGS
9888 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9889   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9890   match(Set cr (CmpF src1 src2));
9891   effect(KILL rax);
9892   ins_cost(150);
9893   format %{ "FLD    $src1\n\t"
9894             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9895             "JNP    exit\n\t"
9896             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9897             "SAHF\n"
9898      "exit:\tNOP               // avoid branch to branch" %}
9899   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9900   ins_encode( Push_Reg_DPR(src1),
9901               OpcP, RegOpc(src2),
9902               cmpF_P6_fixup );
9903   ins_pipe( pipe_slow );
9904 %}
9905 
9906 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9907   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9908   match(Set cr (CmpF src1 src2));
9909   ins_cost(100);
9910   format %{ "FLD    $src1\n\t"
9911             "FUCOMIP ST,$src2  // P6 instruction" %}
9912   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9913   ins_encode( Push_Reg_DPR(src1),
9914               OpcP, RegOpc(src2));
9915   ins_pipe( pipe_slow );
9916 %}
9917 
9918 
9919 // Compare & branch
9920 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9921   predicate(UseSSE == 0);
9922   match(Set cr (CmpF src1 src2));
9923   effect(KILL rax);
9924   ins_cost(200);
9925   format %{ "FLD    $src1\n\t"
9926             "FCOMp  $src2\n\t"
9927             "FNSTSW AX\n\t"
9928             "TEST   AX,0x400\n\t"
9929             "JZ,s   flags\n\t"
9930             "MOV    AH,1\t# unordered treat as LT\n"
9931     "flags:\tSAHF" %}
9932   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9933   ins_encode( Push_Reg_DPR(src1),
9934               OpcP, RegOpc(src2),
9935               fpu_flags);
9936   ins_pipe( pipe_slow );
9937 %}
9938 
9939 // Compare vs zero into -1,0,1
9940 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9941   predicate(UseSSE == 0);
9942   match(Set dst (CmpF3 src1 zero));
9943   effect(KILL cr, KILL rax);
9944   ins_cost(280);
9945   format %{ "FTSTF  $dst,$src1" %}
9946   opcode(0xE4, 0xD9);
9947   ins_encode( Push_Reg_DPR(src1),
9948               OpcS, OpcP, PopFPU,
9949               CmpF_Result(dst));
9950   ins_pipe( pipe_slow );
9951 %}
9952 
9953 // Compare into -1,0,1
9954 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9955   predicate(UseSSE == 0);
9956   match(Set dst (CmpF3 src1 src2));
9957   effect(KILL cr, KILL rax);
9958   ins_cost(300);
9959   format %{ "FCMPF  $dst,$src1,$src2" %}
9960   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9961   ins_encode( Push_Reg_DPR(src1),
9962               OpcP, RegOpc(src2),
9963               CmpF_Result(dst));
9964   ins_pipe( pipe_slow );
9965 %}
9966 
9967 // float compare and set condition codes in EFLAGS by XMM regs
9968 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9969   predicate(UseSSE>=1);
9970   match(Set cr (CmpF src1 src2));
9971   ins_cost(145);
9972   format %{ "UCOMISS $src1,$src2\n\t"
9973             "JNP,s   exit\n\t"
9974             "PUSHF\t# saw NaN, set CF\n\t"
9975             "AND     [rsp], #0xffffff2b\n\t"
9976             "POPF\n"
9977     "exit:" %}
9978   ins_encode %{
9979     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9980     emit_cmpfp_fixup(_masm);
9981   %}
9982   ins_pipe( pipe_slow );
9983 %}
9984 
9985 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9986   predicate(UseSSE>=1);
9987   match(Set cr (CmpF src1 src2));
9988   ins_cost(100);
9989   format %{ "UCOMISS $src1,$src2" %}
9990   ins_encode %{
9991     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9992   %}
9993   ins_pipe( pipe_slow );
9994 %}
9995 
9996 // float compare and set condition codes in EFLAGS by XMM regs
9997 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
9998   predicate(UseSSE>=1);
9999   match(Set cr (CmpF src1 (LoadF src2)));
10000   ins_cost(165);
10001   format %{ "UCOMISS $src1,$src2\n\t"
10002             "JNP,s   exit\n\t"
10003             "PUSHF\t# saw NaN, set CF\n\t"
10004             "AND     [rsp], #0xffffff2b\n\t"
10005             "POPF\n"
10006     "exit:" %}
10007   ins_encode %{
10008     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10009     emit_cmpfp_fixup(_masm);
10010   %}
10011   ins_pipe( pipe_slow );
10012 %}
10013 
10014 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10015   predicate(UseSSE>=1);
10016   match(Set cr (CmpF src1 (LoadF src2)));
10017   ins_cost(100);
10018   format %{ "UCOMISS $src1,$src2" %}
10019   ins_encode %{
10020     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10021   %}
10022   ins_pipe( pipe_slow );
10023 %}
10024 
10025 // Compare into -1,0,1 in XMM
10026 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10027   predicate(UseSSE>=1);
10028   match(Set dst (CmpF3 src1 src2));
10029   effect(KILL cr);
10030   ins_cost(255);
10031   format %{ "UCOMISS $src1, $src2\n\t"
10032             "MOV     $dst, #-1\n\t"
10033             "JP,s    done\n\t"
10034             "JB,s    done\n\t"
10035             "SETNE   $dst\n\t"
10036             "MOVZB   $dst, $dst\n"
10037     "done:" %}
10038   ins_encode %{
10039     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10040     emit_cmpfp3(_masm, $dst$$Register);
10041   %}
10042   ins_pipe( pipe_slow );
10043 %}
10044 
10045 // Compare into -1,0,1 in XMM and memory
10046 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10047   predicate(UseSSE>=1);
10048   match(Set dst (CmpF3 src1 (LoadF src2)));
10049   effect(KILL cr);
10050   ins_cost(275);
10051   format %{ "UCOMISS $src1, $src2\n\t"
10052             "MOV     $dst, #-1\n\t"
10053             "JP,s    done\n\t"
10054             "JB,s    done\n\t"
10055             "SETNE   $dst\n\t"
10056             "MOVZB   $dst, $dst\n"
10057     "done:" %}
10058   ins_encode %{
10059     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10060     emit_cmpfp3(_masm, $dst$$Register);
10061   %}
10062   ins_pipe( pipe_slow );
10063 %}
10064 
10065 // Spill to obtain 24-bit precision
10066 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10067   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10068   match(Set dst (SubF src1 src2));
10069 
10070   format %{ "FSUB   $dst,$src1 - $src2" %}
10071   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10072   ins_encode( Push_Reg_FPR(src1),
10073               OpcReg_FPR(src2),
10074               Pop_Mem_FPR(dst) );
10075   ins_pipe( fpu_mem_reg_reg );
10076 %}
10077 //
10078 // This instruction does not round to 24-bits
10079 instruct subFPR_reg(regFPR dst, regFPR src) %{
10080   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10081   match(Set dst (SubF dst src));
10082 
10083   format %{ "FSUB   $dst,$src" %}
10084   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10085   ins_encode( Push_Reg_FPR(src),
10086               OpcP, RegOpc(dst) );
10087   ins_pipe( fpu_reg_reg );
10088 %}
10089 
10090 // Spill to obtain 24-bit precision
10091 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10092   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10093   match(Set dst (AddF src1 src2));
10094 
10095   format %{ "FADD   $dst,$src1,$src2" %}
10096   opcode(0xD8, 0x0); /* D8 C0+i */
10097   ins_encode( Push_Reg_FPR(src2),
10098               OpcReg_FPR(src1),
10099               Pop_Mem_FPR(dst) );
10100   ins_pipe( fpu_mem_reg_reg );
10101 %}
10102 //
10103 // This instruction does not round to 24-bits
10104 instruct addFPR_reg(regFPR dst, regFPR src) %{
10105   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10106   match(Set dst (AddF dst src));
10107 
10108   format %{ "FLD    $src\n\t"
10109             "FADDp  $dst,ST" %}
10110   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10111   ins_encode( Push_Reg_FPR(src),
10112               OpcP, RegOpc(dst) );
10113   ins_pipe( fpu_reg_reg );
10114 %}
10115 
10116 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10117   predicate(UseSSE==0);
10118   match(Set dst (AbsF src));
10119   ins_cost(100);
10120   format %{ "FABS" %}
10121   opcode(0xE1, 0xD9);
10122   ins_encode( OpcS, OpcP );
10123   ins_pipe( fpu_reg_reg );
10124 %}
10125 
10126 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10127   predicate(UseSSE==0);
10128   match(Set dst (NegF src));
10129   ins_cost(100);
10130   format %{ "FCHS" %}
10131   opcode(0xE0, 0xD9);
10132   ins_encode( OpcS, OpcP );
10133   ins_pipe( fpu_reg_reg );
10134 %}
10135 
10136 // Cisc-alternate to addFPR_reg
10137 // Spill to obtain 24-bit precision
10138 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10139   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10140   match(Set dst (AddF src1 (LoadF src2)));
10141 
10142   format %{ "FLD    $src2\n\t"
10143             "FADD   ST,$src1\n\t"
10144             "FSTP_S $dst" %}
10145   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10146   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10147               OpcReg_FPR(src1),
10148               Pop_Mem_FPR(dst) );
10149   ins_pipe( fpu_mem_reg_mem );
10150 %}
10151 //
10152 // Cisc-alternate to addFPR_reg
10153 // This instruction does not round to 24-bits
10154 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10155   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10156   match(Set dst (AddF dst (LoadF src)));
10157 
10158   format %{ "FADD   $dst,$src" %}
10159   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10160   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10161               OpcP, RegOpc(dst) );
10162   ins_pipe( fpu_reg_mem );
10163 %}
10164 
10165 // // Following two instructions for _222_mpegaudio
10166 // Spill to obtain 24-bit precision
10167 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10168   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10169   match(Set dst (AddF src1 src2));
10170 
10171   format %{ "FADD   $dst,$src1,$src2" %}
10172   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10173   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10174               OpcReg_FPR(src2),
10175               Pop_Mem_FPR(dst) );
10176   ins_pipe( fpu_mem_reg_mem );
10177 %}
10178 
10179 // Cisc-spill variant
10180 // Spill to obtain 24-bit precision
10181 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10182   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10183   match(Set dst (AddF src1 (LoadF src2)));
10184 
10185   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10186   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10187   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10188               set_instruction_start,
10189               OpcP, RMopc_Mem(secondary,src1),
10190               Pop_Mem_FPR(dst) );
10191   ins_pipe( fpu_mem_mem_mem );
10192 %}
10193 
10194 // Spill to obtain 24-bit precision
10195 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10196   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10197   match(Set dst (AddF src1 src2));
10198 
10199   format %{ "FADD   $dst,$src1,$src2" %}
10200   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10201   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10202               set_instruction_start,
10203               OpcP, RMopc_Mem(secondary,src1),
10204               Pop_Mem_FPR(dst) );
10205   ins_pipe( fpu_mem_mem_mem );
10206 %}
10207 
10208 
10209 // Spill to obtain 24-bit precision
10210 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10211   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10212   match(Set dst (AddF src con));
10213   format %{ "FLD    $src\n\t"
10214             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10215             "FSTP_S $dst"  %}
10216   ins_encode %{
10217     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10218     __ fadd_s($constantaddress($con));
10219     __ fstp_s(Address(rsp, $dst$$disp));
10220   %}
10221   ins_pipe(fpu_mem_reg_con);
10222 %}
10223 //
10224 // This instruction does not round to 24-bits
10225 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10226   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10227   match(Set dst (AddF src con));
10228   format %{ "FLD    $src\n\t"
10229             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10230             "FSTP   $dst"  %}
10231   ins_encode %{
10232     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10233     __ fadd_s($constantaddress($con));
10234     __ fstp_d($dst$$reg);
10235   %}
10236   ins_pipe(fpu_reg_reg_con);
10237 %}
10238 
10239 // Spill to obtain 24-bit precision
10240 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10241   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10242   match(Set dst (MulF src1 src2));
10243 
10244   format %{ "FLD    $src1\n\t"
10245             "FMUL   $src2\n\t"
10246             "FSTP_S $dst"  %}
10247   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10248   ins_encode( Push_Reg_FPR(src1),
10249               OpcReg_FPR(src2),
10250               Pop_Mem_FPR(dst) );
10251   ins_pipe( fpu_mem_reg_reg );
10252 %}
10253 //
10254 // This instruction does not round to 24-bits
10255 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10256   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10257   match(Set dst (MulF src1 src2));
10258 
10259   format %{ "FLD    $src1\n\t"
10260             "FMUL   $src2\n\t"
10261             "FSTP_S $dst"  %}
10262   opcode(0xD8, 0x1); /* D8 C8+i */
10263   ins_encode( Push_Reg_FPR(src2),
10264               OpcReg_FPR(src1),
10265               Pop_Reg_FPR(dst) );
10266   ins_pipe( fpu_reg_reg_reg );
10267 %}
10268 
10269 
10270 // Spill to obtain 24-bit precision
10271 // Cisc-alternate to reg-reg multiply
10272 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10273   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10274   match(Set dst (MulF src1 (LoadF src2)));
10275 
10276   format %{ "FLD_S  $src2\n\t"
10277             "FMUL   $src1\n\t"
10278             "FSTP_S $dst"  %}
10279   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10280   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10281               OpcReg_FPR(src1),
10282               Pop_Mem_FPR(dst) );
10283   ins_pipe( fpu_mem_reg_mem );
10284 %}
10285 //
10286 // This instruction does not round to 24-bits
10287 // Cisc-alternate to reg-reg multiply
10288 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10289   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10290   match(Set dst (MulF src1 (LoadF src2)));
10291 
10292   format %{ "FMUL   $dst,$src1,$src2" %}
10293   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10294   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10295               OpcReg_FPR(src1),
10296               Pop_Reg_FPR(dst) );
10297   ins_pipe( fpu_reg_reg_mem );
10298 %}
10299 
10300 // Spill to obtain 24-bit precision
10301 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10302   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10303   match(Set dst (MulF src1 src2));
10304 
10305   format %{ "FMUL   $dst,$src1,$src2" %}
10306   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10307   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10308               set_instruction_start,
10309               OpcP, RMopc_Mem(secondary,src1),
10310               Pop_Mem_FPR(dst) );
10311   ins_pipe( fpu_mem_mem_mem );
10312 %}
10313 
10314 // Spill to obtain 24-bit precision
10315 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10316   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10317   match(Set dst (MulF src con));
10318 
10319   format %{ "FLD    $src\n\t"
10320             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10321             "FSTP_S $dst"  %}
10322   ins_encode %{
10323     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10324     __ fmul_s($constantaddress($con));
10325     __ fstp_s(Address(rsp, $dst$$disp));
10326   %}
10327   ins_pipe(fpu_mem_reg_con);
10328 %}
10329 //
10330 // This instruction does not round to 24-bits
10331 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10332   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10333   match(Set dst (MulF src con));
10334 
10335   format %{ "FLD    $src\n\t"
10336             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10337             "FSTP   $dst"  %}
10338   ins_encode %{
10339     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10340     __ fmul_s($constantaddress($con));
10341     __ fstp_d($dst$$reg);
10342   %}
10343   ins_pipe(fpu_reg_reg_con);
10344 %}
10345 
10346 
10347 //
10348 // MACRO1 -- subsume unshared load into mulFPR
10349 // This instruction does not round to 24-bits
10350 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10351   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10352   match(Set dst (MulF (LoadF mem1) src));
10353 
10354   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10355             "FMUL   ST,$src\n\t"
10356             "FSTP   $dst" %}
10357   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10358   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10359               OpcReg_FPR(src),
10360               Pop_Reg_FPR(dst) );
10361   ins_pipe( fpu_reg_reg_mem );
10362 %}
10363 //
10364 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10365 // This instruction does not round to 24-bits
10366 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10367   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10368   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10369   ins_cost(95);
10370 
10371   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10372             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10373             "FADD   ST,$src2\n\t"
10374             "FSTP   $dst" %}
10375   opcode(0xD9); /* LoadF D9 /0 */
10376   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10377               FMul_ST_reg(src1),
10378               FAdd_ST_reg(src2),
10379               Pop_Reg_FPR(dst) );
10380   ins_pipe( fpu_reg_mem_reg_reg );
10381 %}
10382 
10383 // MACRO3 -- addFPR a mulFPR
10384 // This instruction does not round to 24-bits.  It is a '2-address'
10385 // instruction in that the result goes back to src2.  This eliminates
10386 // a move from the macro; possibly the register allocator will have
10387 // to add it back (and maybe not).
10388 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10389   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10390   match(Set src2 (AddF (MulF src0 src1) src2));
10391 
10392   format %{ "FLD    $src0     ===MACRO3===\n\t"
10393             "FMUL   ST,$src1\n\t"
10394             "FADDP  $src2,ST" %}
10395   opcode(0xD9); /* LoadF D9 /0 */
10396   ins_encode( Push_Reg_FPR(src0),
10397               FMul_ST_reg(src1),
10398               FAddP_reg_ST(src2) );
10399   ins_pipe( fpu_reg_reg_reg );
10400 %}
10401 
10402 // MACRO4 -- divFPR subFPR
10403 // This instruction does not round to 24-bits
10404 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10405   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10406   match(Set dst (DivF (SubF src2 src1) src3));
10407 
10408   format %{ "FLD    $src2   ===MACRO4===\n\t"
10409             "FSUB   ST,$src1\n\t"
10410             "FDIV   ST,$src3\n\t"
10411             "FSTP  $dst" %}
10412   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10413   ins_encode( Push_Reg_FPR(src2),
10414               subFPR_divFPR_encode(src1,src3),
10415               Pop_Reg_FPR(dst) );
10416   ins_pipe( fpu_reg_reg_reg_reg );
10417 %}
10418 
10419 // Spill to obtain 24-bit precision
10420 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10421   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10422   match(Set dst (DivF src1 src2));
10423 
10424   format %{ "FDIV   $dst,$src1,$src2" %}
10425   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10426   ins_encode( Push_Reg_FPR(src1),
10427               OpcReg_FPR(src2),
10428               Pop_Mem_FPR(dst) );
10429   ins_pipe( fpu_mem_reg_reg );
10430 %}
10431 //
10432 // This instruction does not round to 24-bits
10433 instruct divFPR_reg(regFPR dst, regFPR src) %{
10434   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10435   match(Set dst (DivF dst src));
10436 
10437   format %{ "FDIV   $dst,$src" %}
10438   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10439   ins_encode( Push_Reg_FPR(src),
10440               OpcP, RegOpc(dst) );
10441   ins_pipe( fpu_reg_reg );
10442 %}
10443 
10444 
10445 // Spill to obtain 24-bit precision
10446 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10447   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10448   match(Set dst (ModF src1 src2));
10449   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10450 
10451   format %{ "FMOD   $dst,$src1,$src2" %}
10452   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10453               emitModDPR(),
10454               Push_Result_Mod_DPR(src2),
10455               Pop_Mem_FPR(dst));
10456   ins_pipe( pipe_slow );
10457 %}
10458 //
10459 // This instruction does not round to 24-bits
10460 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10461   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10462   match(Set dst (ModF dst src));
10463   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10464 
10465   format %{ "FMOD   $dst,$src" %}
10466   ins_encode(Push_Reg_Mod_DPR(dst, src),
10467               emitModDPR(),
10468               Push_Result_Mod_DPR(src),
10469               Pop_Reg_FPR(dst));
10470   ins_pipe( pipe_slow );
10471 %}
10472 
10473 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10474   predicate(UseSSE>=1);
10475   match(Set dst (ModF src0 src1));
10476   effect(KILL rax, KILL cr);
10477   format %{ "SUB    ESP,4\t # FMOD\n"
10478           "\tMOVSS  [ESP+0],$src1\n"
10479           "\tFLD_S  [ESP+0]\n"
10480           "\tMOVSS  [ESP+0],$src0\n"
10481           "\tFLD_S  [ESP+0]\n"
10482      "loop:\tFPREM\n"
10483           "\tFWAIT\n"
10484           "\tFNSTSW AX\n"
10485           "\tSAHF\n"
10486           "\tJP     loop\n"
10487           "\tFSTP_S [ESP+0]\n"
10488           "\tMOVSS  $dst,[ESP+0]\n"
10489           "\tADD    ESP,4\n"
10490           "\tFSTP   ST0\t # Restore FPU Stack"
10491     %}
10492   ins_cost(250);
10493   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10494   ins_pipe( pipe_slow );
10495 %}
10496 
10497 
10498 //----------Arithmetic Conversion Instructions---------------------------------
10499 // The conversions operations are all Alpha sorted.  Please keep it that way!
10500 
10501 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10502   predicate(UseSSE==0);
10503   match(Set dst (RoundFloat src));
10504   ins_cost(125);
10505   format %{ "FST_S  $dst,$src\t# F-round" %}
10506   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10507   ins_pipe( fpu_mem_reg );
10508 %}
10509 
10510 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10511   predicate(UseSSE<=1);
10512   match(Set dst (RoundDouble src));
10513   ins_cost(125);
10514   format %{ "FST_D  $dst,$src\t# D-round" %}
10515   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10516   ins_pipe( fpu_mem_reg );
10517 %}
10518 
10519 // Force rounding to 24-bit precision and 6-bit exponent
10520 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10521   predicate(UseSSE==0);
10522   match(Set dst (ConvD2F src));
10523   format %{ "FST_S  $dst,$src\t# F-round" %}
10524   expand %{
10525     roundFloat_mem_reg(dst,src);
10526   %}
10527 %}
10528 
10529 // Force rounding to 24-bit precision and 6-bit exponent
10530 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10531   predicate(UseSSE==1);
10532   match(Set dst (ConvD2F src));
10533   effect( KILL cr );
10534   format %{ "SUB    ESP,4\n\t"
10535             "FST_S  [ESP],$src\t# F-round\n\t"
10536             "MOVSS  $dst,[ESP]\n\t"
10537             "ADD ESP,4" %}
10538   ins_encode %{
10539     __ subptr(rsp, 4);
10540     if ($src$$reg != FPR1L_enc) {
10541       __ fld_s($src$$reg-1);
10542       __ fstp_s(Address(rsp, 0));
10543     } else {
10544       __ fst_s(Address(rsp, 0));
10545     }
10546     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10547     __ addptr(rsp, 4);
10548   %}
10549   ins_pipe( pipe_slow );
10550 %}
10551 
10552 // Force rounding double precision to single precision
10553 instruct convD2F_reg(regF dst, regD src) %{
10554   predicate(UseSSE>=2);
10555   match(Set dst (ConvD2F src));
10556   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10557   ins_encode %{
10558     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10559   %}
10560   ins_pipe( pipe_slow );
10561 %}
10562 
10563 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10564   predicate(UseSSE==0);
10565   match(Set dst (ConvF2D src));
10566   format %{ "FST_S  $dst,$src\t# D-round" %}
10567   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10568   ins_pipe( fpu_reg_reg );
10569 %}
10570 
10571 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10572   predicate(UseSSE==1);
10573   match(Set dst (ConvF2D src));
10574   format %{ "FST_D  $dst,$src\t# D-round" %}
10575   expand %{
10576     roundDouble_mem_reg(dst,src);
10577   %}
10578 %}
10579 
10580 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10581   predicate(UseSSE==1);
10582   match(Set dst (ConvF2D src));
10583   effect( KILL cr );
10584   format %{ "SUB    ESP,4\n\t"
10585             "MOVSS  [ESP] $src\n\t"
10586             "FLD_S  [ESP]\n\t"
10587             "ADD    ESP,4\n\t"
10588             "FSTP   $dst\t# D-round" %}
10589   ins_encode %{
10590     __ subptr(rsp, 4);
10591     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10592     __ fld_s(Address(rsp, 0));
10593     __ addptr(rsp, 4);
10594     __ fstp_d($dst$$reg);
10595   %}
10596   ins_pipe( pipe_slow );
10597 %}
10598 
10599 instruct convF2D_reg(regD dst, regF src) %{
10600   predicate(UseSSE>=2);
10601   match(Set dst (ConvF2D src));
10602   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10603   ins_encode %{
10604     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10605   %}
10606   ins_pipe( pipe_slow );
10607 %}
10608 
10609 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10610 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10611   predicate(UseSSE<=1);
10612   match(Set dst (ConvD2I src));
10613   effect( KILL tmp, KILL cr );
10614   format %{ "FLD    $src\t# Convert double to int \n\t"
10615             "FLDCW  trunc mode\n\t"
10616             "SUB    ESP,4\n\t"
10617             "FISTp  [ESP + #0]\n\t"
10618             "FLDCW  std/24-bit mode\n\t"
10619             "POP    EAX\n\t"
10620             "CMP    EAX,0x80000000\n\t"
10621             "JNE,s  fast\n\t"
10622             "FLD_D  $src\n\t"
10623             "CALL   d2i_wrapper\n"
10624       "fast:" %}
10625   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10626   ins_pipe( pipe_slow );
10627 %}
10628 
10629 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10630 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10631   predicate(UseSSE>=2);
10632   match(Set dst (ConvD2I src));
10633   effect( KILL tmp, KILL cr );
10634   format %{ "CVTTSD2SI $dst, $src\n\t"
10635             "CMP    $dst,0x80000000\n\t"
10636             "JNE,s  fast\n\t"
10637             "SUB    ESP, 8\n\t"
10638             "MOVSD  [ESP], $src\n\t"
10639             "FLD_D  [ESP]\n\t"
10640             "ADD    ESP, 8\n\t"
10641             "CALL   d2i_wrapper\n"
10642       "fast:" %}
10643   ins_encode %{
10644     Label fast;
10645     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10646     __ cmpl($dst$$Register, 0x80000000);
10647     __ jccb(Assembler::notEqual, fast);
10648     __ subptr(rsp, 8);
10649     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10650     __ fld_d(Address(rsp, 0));
10651     __ addptr(rsp, 8);
10652     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10653     __ bind(fast);
10654   %}
10655   ins_pipe( pipe_slow );
10656 %}
10657 
10658 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10659   predicate(UseSSE<=1);
10660   match(Set dst (ConvD2L src));
10661   effect( KILL cr );
10662   format %{ "FLD    $src\t# Convert double to long\n\t"
10663             "FLDCW  trunc mode\n\t"
10664             "SUB    ESP,8\n\t"
10665             "FISTp  [ESP + #0]\n\t"
10666             "FLDCW  std/24-bit mode\n\t"
10667             "POP    EAX\n\t"
10668             "POP    EDX\n\t"
10669             "CMP    EDX,0x80000000\n\t"
10670             "JNE,s  fast\n\t"
10671             "TEST   EAX,EAX\n\t"
10672             "JNE,s  fast\n\t"
10673             "FLD    $src\n\t"
10674             "CALL   d2l_wrapper\n"
10675       "fast:" %}
10676   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10677   ins_pipe( pipe_slow );
10678 %}
10679 
10680 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10681 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10682   predicate (UseSSE>=2);
10683   match(Set dst (ConvD2L src));
10684   effect( KILL cr );
10685   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10686             "MOVSD  [ESP],$src\n\t"
10687             "FLD_D  [ESP]\n\t"
10688             "FLDCW  trunc mode\n\t"
10689             "FISTp  [ESP + #0]\n\t"
10690             "FLDCW  std/24-bit mode\n\t"
10691             "POP    EAX\n\t"
10692             "POP    EDX\n\t"
10693             "CMP    EDX,0x80000000\n\t"
10694             "JNE,s  fast\n\t"
10695             "TEST   EAX,EAX\n\t"
10696             "JNE,s  fast\n\t"
10697             "SUB    ESP,8\n\t"
10698             "MOVSD  [ESP],$src\n\t"
10699             "FLD_D  [ESP]\n\t"
10700             "ADD    ESP,8\n\t"
10701             "CALL   d2l_wrapper\n"
10702       "fast:" %}
10703   ins_encode %{
10704     Label fast;
10705     __ subptr(rsp, 8);
10706     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10707     __ fld_d(Address(rsp, 0));
10708     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10709     __ fistp_d(Address(rsp, 0));
10710     // Restore the rounding mode, mask the exception
10711     if (Compile::current()->in_24_bit_fp_mode()) {
10712       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10713     } else {
10714       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10715     }
10716     // Load the converted long, adjust CPU stack
10717     __ pop(rax);
10718     __ pop(rdx);
10719     __ cmpl(rdx, 0x80000000);
10720     __ jccb(Assembler::notEqual, fast);
10721     __ testl(rax, rax);
10722     __ jccb(Assembler::notEqual, fast);
10723     __ subptr(rsp, 8);
10724     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10725     __ fld_d(Address(rsp, 0));
10726     __ addptr(rsp, 8);
10727     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10728     __ bind(fast);
10729   %}
10730   ins_pipe( pipe_slow );
10731 %}
10732 
10733 // Convert a double to an int.  Java semantics require we do complex
10734 // manglations in the corner cases.  So we set the rounding mode to
10735 // 'zero', store the darned double down as an int, and reset the
10736 // rounding mode to 'nearest'.  The hardware stores a flag value down
10737 // if we would overflow or converted a NAN; we check for this and
10738 // and go the slow path if needed.
10739 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10740   predicate(UseSSE==0);
10741   match(Set dst (ConvF2I src));
10742   effect( KILL tmp, KILL cr );
10743   format %{ "FLD    $src\t# Convert float to int \n\t"
10744             "FLDCW  trunc mode\n\t"
10745             "SUB    ESP,4\n\t"
10746             "FISTp  [ESP + #0]\n\t"
10747             "FLDCW  std/24-bit mode\n\t"
10748             "POP    EAX\n\t"
10749             "CMP    EAX,0x80000000\n\t"
10750             "JNE,s  fast\n\t"
10751             "FLD    $src\n\t"
10752             "CALL   d2i_wrapper\n"
10753       "fast:" %}
10754   // DPR2I_encoding works for FPR2I
10755   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10756   ins_pipe( pipe_slow );
10757 %}
10758 
10759 // Convert a float in xmm to an int reg.
10760 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10761   predicate(UseSSE>=1);
10762   match(Set dst (ConvF2I src));
10763   effect( KILL tmp, KILL cr );
10764   format %{ "CVTTSS2SI $dst, $src\n\t"
10765             "CMP    $dst,0x80000000\n\t"
10766             "JNE,s  fast\n\t"
10767             "SUB    ESP, 4\n\t"
10768             "MOVSS  [ESP], $src\n\t"
10769             "FLD    [ESP]\n\t"
10770             "ADD    ESP, 4\n\t"
10771             "CALL   d2i_wrapper\n"
10772       "fast:" %}
10773   ins_encode %{
10774     Label fast;
10775     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10776     __ cmpl($dst$$Register, 0x80000000);
10777     __ jccb(Assembler::notEqual, fast);
10778     __ subptr(rsp, 4);
10779     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10780     __ fld_s(Address(rsp, 0));
10781     __ addptr(rsp, 4);
10782     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10783     __ bind(fast);
10784   %}
10785   ins_pipe( pipe_slow );
10786 %}
10787 
10788 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10789   predicate(UseSSE==0);
10790   match(Set dst (ConvF2L src));
10791   effect( KILL cr );
10792   format %{ "FLD    $src\t# Convert float to long\n\t"
10793             "FLDCW  trunc mode\n\t"
10794             "SUB    ESP,8\n\t"
10795             "FISTp  [ESP + #0]\n\t"
10796             "FLDCW  std/24-bit mode\n\t"
10797             "POP    EAX\n\t"
10798             "POP    EDX\n\t"
10799             "CMP    EDX,0x80000000\n\t"
10800             "JNE,s  fast\n\t"
10801             "TEST   EAX,EAX\n\t"
10802             "JNE,s  fast\n\t"
10803             "FLD    $src\n\t"
10804             "CALL   d2l_wrapper\n"
10805       "fast:" %}
10806   // DPR2L_encoding works for FPR2L
10807   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10808   ins_pipe( pipe_slow );
10809 %}
10810 
10811 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10812 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10813   predicate (UseSSE>=1);
10814   match(Set dst (ConvF2L src));
10815   effect( KILL cr );
10816   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10817             "MOVSS  [ESP],$src\n\t"
10818             "FLD_S  [ESP]\n\t"
10819             "FLDCW  trunc mode\n\t"
10820             "FISTp  [ESP + #0]\n\t"
10821             "FLDCW  std/24-bit mode\n\t"
10822             "POP    EAX\n\t"
10823             "POP    EDX\n\t"
10824             "CMP    EDX,0x80000000\n\t"
10825             "JNE,s  fast\n\t"
10826             "TEST   EAX,EAX\n\t"
10827             "JNE,s  fast\n\t"
10828             "SUB    ESP,4\t# Convert float to long\n\t"
10829             "MOVSS  [ESP],$src\n\t"
10830             "FLD_S  [ESP]\n\t"
10831             "ADD    ESP,4\n\t"
10832             "CALL   d2l_wrapper\n"
10833       "fast:" %}
10834   ins_encode %{
10835     Label fast;
10836     __ subptr(rsp, 8);
10837     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10838     __ fld_s(Address(rsp, 0));
10839     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10840     __ fistp_d(Address(rsp, 0));
10841     // Restore the rounding mode, mask the exception
10842     if (Compile::current()->in_24_bit_fp_mode()) {
10843       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10844     } else {
10845       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10846     }
10847     // Load the converted long, adjust CPU stack
10848     __ pop(rax);
10849     __ pop(rdx);
10850     __ cmpl(rdx, 0x80000000);
10851     __ jccb(Assembler::notEqual, fast);
10852     __ testl(rax, rax);
10853     __ jccb(Assembler::notEqual, fast);
10854     __ subptr(rsp, 4);
10855     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10856     __ fld_s(Address(rsp, 0));
10857     __ addptr(rsp, 4);
10858     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10859     __ bind(fast);
10860   %}
10861   ins_pipe( pipe_slow );
10862 %}
10863 
10864 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10865   predicate( UseSSE<=1 );
10866   match(Set dst (ConvI2D src));
10867   format %{ "FILD   $src\n\t"
10868             "FSTP   $dst" %}
10869   opcode(0xDB, 0x0);  /* DB /0 */
10870   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10871   ins_pipe( fpu_reg_mem );
10872 %}
10873 
10874 instruct convI2D_reg(regD dst, rRegI src) %{
10875   predicate( UseSSE>=2 && !UseXmmI2D );
10876   match(Set dst (ConvI2D src));
10877   format %{ "CVTSI2SD $dst,$src" %}
10878   ins_encode %{
10879     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10880   %}
10881   ins_pipe( pipe_slow );
10882 %}
10883 
10884 instruct convI2D_mem(regD dst, memory mem) %{
10885   predicate( UseSSE>=2 );
10886   match(Set dst (ConvI2D (LoadI mem)));
10887   format %{ "CVTSI2SD $dst,$mem" %}
10888   ins_encode %{
10889     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10890   %}
10891   ins_pipe( pipe_slow );
10892 %}
10893 
10894 instruct convXI2D_reg(regD dst, rRegI src)
10895 %{
10896   predicate( UseSSE>=2 && UseXmmI2D );
10897   match(Set dst (ConvI2D src));
10898 
10899   format %{ "MOVD  $dst,$src\n\t"
10900             "CVTDQ2PD $dst,$dst\t# i2d" %}
10901   ins_encode %{
10902     __ movdl($dst$$XMMRegister, $src$$Register);
10903     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10904   %}
10905   ins_pipe(pipe_slow); // XXX
10906 %}
10907 
10908 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10909   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10910   match(Set dst (ConvI2D (LoadI mem)));
10911   format %{ "FILD   $mem\n\t"
10912             "FSTP   $dst" %}
10913   opcode(0xDB);      /* DB /0 */
10914   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10915               Pop_Reg_DPR(dst));
10916   ins_pipe( fpu_reg_mem );
10917 %}
10918 
10919 // Convert a byte to a float; no rounding step needed.
10920 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10921   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10922   match(Set dst (ConvI2F src));
10923   format %{ "FILD   $src\n\t"
10924             "FSTP   $dst" %}
10925 
10926   opcode(0xDB, 0x0);  /* DB /0 */
10927   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10928   ins_pipe( fpu_reg_mem );
10929 %}
10930 
10931 // In 24-bit mode, force exponent rounding by storing back out
10932 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10933   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10934   match(Set dst (ConvI2F src));
10935   ins_cost(200);
10936   format %{ "FILD   $src\n\t"
10937             "FSTP_S $dst" %}
10938   opcode(0xDB, 0x0);  /* DB /0 */
10939   ins_encode( Push_Mem_I(src),
10940               Pop_Mem_FPR(dst));
10941   ins_pipe( fpu_mem_mem );
10942 %}
10943 
10944 // In 24-bit mode, force exponent rounding by storing back out
10945 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10946   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10947   match(Set dst (ConvI2F (LoadI mem)));
10948   ins_cost(200);
10949   format %{ "FILD   $mem\n\t"
10950             "FSTP_S $dst" %}
10951   opcode(0xDB);  /* DB /0 */
10952   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10953               Pop_Mem_FPR(dst));
10954   ins_pipe( fpu_mem_mem );
10955 %}
10956 
10957 // This instruction does not round to 24-bits
10958 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10959   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10960   match(Set dst (ConvI2F src));
10961   format %{ "FILD   $src\n\t"
10962             "FSTP   $dst" %}
10963   opcode(0xDB, 0x0);  /* DB /0 */
10964   ins_encode( Push_Mem_I(src),
10965               Pop_Reg_FPR(dst));
10966   ins_pipe( fpu_reg_mem );
10967 %}
10968 
10969 // This instruction does not round to 24-bits
10970 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10971   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10972   match(Set dst (ConvI2F (LoadI mem)));
10973   format %{ "FILD   $mem\n\t"
10974             "FSTP   $dst" %}
10975   opcode(0xDB);      /* DB /0 */
10976   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10977               Pop_Reg_FPR(dst));
10978   ins_pipe( fpu_reg_mem );
10979 %}
10980 
10981 // Convert an int to a float in xmm; no rounding step needed.
10982 instruct convI2F_reg(regF dst, rRegI src) %{
10983   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
10984   match(Set dst (ConvI2F src));
10985   format %{ "CVTSI2SS $dst, $src" %}
10986   ins_encode %{
10987     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10988   %}
10989   ins_pipe( pipe_slow );
10990 %}
10991 
10992  instruct convXI2F_reg(regF dst, rRegI src)
10993 %{
10994   predicate( UseSSE>=2 && UseXmmI2F );
10995   match(Set dst (ConvI2F src));
10996 
10997   format %{ "MOVD  $dst,$src\n\t"
10998             "CVTDQ2PS $dst,$dst\t# i2f" %}
10999   ins_encode %{
11000     __ movdl($dst$$XMMRegister, $src$$Register);
11001     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11002   %}
11003   ins_pipe(pipe_slow); // XXX
11004 %}
11005 
11006 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11007   match(Set dst (ConvI2L src));
11008   effect(KILL cr);
11009   ins_cost(375);
11010   format %{ "MOV    $dst.lo,$src\n\t"
11011             "MOV    $dst.hi,$src\n\t"
11012             "SAR    $dst.hi,31" %}
11013   ins_encode(convert_int_long(dst,src));
11014   ins_pipe( ialu_reg_reg_long );
11015 %}
11016 
11017 // Zero-extend convert int to long
11018 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11019   match(Set dst (AndL (ConvI2L src) mask) );
11020   effect( KILL flags );
11021   ins_cost(250);
11022   format %{ "MOV    $dst.lo,$src\n\t"
11023             "XOR    $dst.hi,$dst.hi" %}
11024   opcode(0x33); // XOR
11025   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11026   ins_pipe( ialu_reg_reg_long );
11027 %}
11028 
11029 // Zero-extend long
11030 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11031   match(Set dst (AndL src mask) );
11032   effect( KILL flags );
11033   ins_cost(250);
11034   format %{ "MOV    $dst.lo,$src.lo\n\t"
11035             "XOR    $dst.hi,$dst.hi\n\t" %}
11036   opcode(0x33); // XOR
11037   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11038   ins_pipe( ialu_reg_reg_long );
11039 %}
11040 
11041 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11042   predicate (UseSSE<=1);
11043   match(Set dst (ConvL2D src));
11044   effect( KILL cr );
11045   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11046             "PUSH   $src.lo\n\t"
11047             "FILD   ST,[ESP + #0]\n\t"
11048             "ADD    ESP,8\n\t"
11049             "FSTP_D $dst\t# D-round" %}
11050   opcode(0xDF, 0x5);  /* DF /5 */
11051   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11052   ins_pipe( pipe_slow );
11053 %}
11054 
11055 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11056   predicate (UseSSE>=2);
11057   match(Set dst (ConvL2D src));
11058   effect( KILL cr );
11059   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11060             "PUSH   $src.lo\n\t"
11061             "FILD_D [ESP]\n\t"
11062             "FSTP_D [ESP]\n\t"
11063             "MOVSD  $dst,[ESP]\n\t"
11064             "ADD    ESP,8" %}
11065   opcode(0xDF, 0x5);  /* DF /5 */
11066   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11067   ins_pipe( pipe_slow );
11068 %}
11069 
11070 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11071   predicate (UseSSE>=1);
11072   match(Set dst (ConvL2F src));
11073   effect( KILL cr );
11074   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11075             "PUSH   $src.lo\n\t"
11076             "FILD_D [ESP]\n\t"
11077             "FSTP_S [ESP]\n\t"
11078             "MOVSS  $dst,[ESP]\n\t"
11079             "ADD    ESP,8" %}
11080   opcode(0xDF, 0x5);  /* DF /5 */
11081   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11082   ins_pipe( pipe_slow );
11083 %}
11084 
11085 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11086   match(Set dst (ConvL2F src));
11087   effect( KILL cr );
11088   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11089             "PUSH   $src.lo\n\t"
11090             "FILD   ST,[ESP + #0]\n\t"
11091             "ADD    ESP,8\n\t"
11092             "FSTP_S $dst\t# F-round" %}
11093   opcode(0xDF, 0x5);  /* DF /5 */
11094   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11095   ins_pipe( pipe_slow );
11096 %}
11097 
11098 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11099   match(Set dst (ConvL2I src));
11100   effect( DEF dst, USE src );
11101   format %{ "MOV    $dst,$src.lo" %}
11102   ins_encode(enc_CopyL_Lo(dst,src));
11103   ins_pipe( ialu_reg_reg );
11104 %}
11105 
11106 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11107   match(Set dst (MoveF2I src));
11108   effect( DEF dst, USE src );
11109   ins_cost(100);
11110   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11111   ins_encode %{
11112     __ movl($dst$$Register, Address(rsp, $src$$disp));
11113   %}
11114   ins_pipe( ialu_reg_mem );
11115 %}
11116 
11117 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11118   predicate(UseSSE==0);
11119   match(Set dst (MoveF2I src));
11120   effect( DEF dst, USE src );
11121 
11122   ins_cost(125);
11123   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11124   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11125   ins_pipe( fpu_mem_reg );
11126 %}
11127 
11128 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11129   predicate(UseSSE>=1);
11130   match(Set dst (MoveF2I src));
11131   effect( DEF dst, USE src );
11132 
11133   ins_cost(95);
11134   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11135   ins_encode %{
11136     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11137   %}
11138   ins_pipe( pipe_slow );
11139 %}
11140 
11141 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11142   predicate(UseSSE>=2);
11143   match(Set dst (MoveF2I src));
11144   effect( DEF dst, USE src );
11145   ins_cost(85);
11146   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11147   ins_encode %{
11148     __ movdl($dst$$Register, $src$$XMMRegister);
11149   %}
11150   ins_pipe( pipe_slow );
11151 %}
11152 
11153 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11154   match(Set dst (MoveI2F src));
11155   effect( DEF dst, USE src );
11156 
11157   ins_cost(100);
11158   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11159   ins_encode %{
11160     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11161   %}
11162   ins_pipe( ialu_mem_reg );
11163 %}
11164 
11165 
11166 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11167   predicate(UseSSE==0);
11168   match(Set dst (MoveI2F src));
11169   effect(DEF dst, USE src);
11170 
11171   ins_cost(125);
11172   format %{ "FLD_S  $src\n\t"
11173             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11174   opcode(0xD9);               /* D9 /0, FLD m32real */
11175   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11176               Pop_Reg_FPR(dst) );
11177   ins_pipe( fpu_reg_mem );
11178 %}
11179 
11180 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11181   predicate(UseSSE>=1);
11182   match(Set dst (MoveI2F src));
11183   effect( DEF dst, USE src );
11184 
11185   ins_cost(95);
11186   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11187   ins_encode %{
11188     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11189   %}
11190   ins_pipe( pipe_slow );
11191 %}
11192 
11193 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11194   predicate(UseSSE>=2);
11195   match(Set dst (MoveI2F src));
11196   effect( DEF dst, USE src );
11197 
11198   ins_cost(85);
11199   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11200   ins_encode %{
11201     __ movdl($dst$$XMMRegister, $src$$Register);
11202   %}
11203   ins_pipe( pipe_slow );
11204 %}
11205 
11206 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11207   match(Set dst (MoveD2L src));
11208   effect(DEF dst, USE src);
11209 
11210   ins_cost(250);
11211   format %{ "MOV    $dst.lo,$src\n\t"
11212             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11213   opcode(0x8B, 0x8B);
11214   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11215   ins_pipe( ialu_mem_long_reg );
11216 %}
11217 
11218 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11219   predicate(UseSSE<=1);
11220   match(Set dst (MoveD2L src));
11221   effect(DEF dst, USE src);
11222 
11223   ins_cost(125);
11224   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11225   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11226   ins_pipe( fpu_mem_reg );
11227 %}
11228 
11229 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11230   predicate(UseSSE>=2);
11231   match(Set dst (MoveD2L src));
11232   effect(DEF dst, USE src);
11233   ins_cost(95);
11234   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11235   ins_encode %{
11236     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11237   %}
11238   ins_pipe( pipe_slow );
11239 %}
11240 
11241 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11242   predicate(UseSSE>=2);
11243   match(Set dst (MoveD2L src));
11244   effect(DEF dst, USE src, TEMP tmp);
11245   ins_cost(85);
11246   format %{ "MOVD   $dst.lo,$src\n\t"
11247             "PSHUFLW $tmp,$src,0x4E\n\t"
11248             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11249   ins_encode %{
11250     __ movdl($dst$$Register, $src$$XMMRegister);
11251     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11252     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11253   %}
11254   ins_pipe( pipe_slow );
11255 %}
11256 
11257 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11258   match(Set dst (MoveL2D src));
11259   effect(DEF dst, USE src);
11260 
11261   ins_cost(200);
11262   format %{ "MOV    $dst,$src.lo\n\t"
11263             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11264   opcode(0x89, 0x89);
11265   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11266   ins_pipe( ialu_mem_long_reg );
11267 %}
11268 
11269 
11270 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11271   predicate(UseSSE<=1);
11272   match(Set dst (MoveL2D src));
11273   effect(DEF dst, USE src);
11274   ins_cost(125);
11275 
11276   format %{ "FLD_D  $src\n\t"
11277             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11278   opcode(0xDD);               /* DD /0, FLD m64real */
11279   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11280               Pop_Reg_DPR(dst) );
11281   ins_pipe( fpu_reg_mem );
11282 %}
11283 
11284 
11285 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11286   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11287   match(Set dst (MoveL2D src));
11288   effect(DEF dst, USE src);
11289 
11290   ins_cost(95);
11291   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11292   ins_encode %{
11293     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11294   %}
11295   ins_pipe( pipe_slow );
11296 %}
11297 
11298 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11299   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11300   match(Set dst (MoveL2D src));
11301   effect(DEF dst, USE src);
11302 
11303   ins_cost(95);
11304   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11305   ins_encode %{
11306     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11307   %}
11308   ins_pipe( pipe_slow );
11309 %}
11310 
11311 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11312   predicate(UseSSE>=2);
11313   match(Set dst (MoveL2D src));
11314   effect(TEMP dst, USE src, TEMP tmp);
11315   ins_cost(85);
11316   format %{ "MOVD   $dst,$src.lo\n\t"
11317             "MOVD   $tmp,$src.hi\n\t"
11318             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11319   ins_encode %{
11320     __ movdl($dst$$XMMRegister, $src$$Register);
11321     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11322     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11323   %}
11324   ins_pipe( pipe_slow );
11325 %}
11326 
11327 
11328 // =======================================================================
11329 // fast clearing of an array
11330 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11331   predicate(!((ClearArrayNode*)n)->is_large());
11332   match(Set dummy (ClearArray cnt base));
11333   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11334 
11335   format %{ $$template
11336     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11337     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11338     $$emit$$"JG     LARGE\n\t"
11339     $$emit$$"SHL    ECX, 1\n\t"
11340     $$emit$$"DEC    ECX\n\t"
11341     $$emit$$"JS     DONE\t# Zero length\n\t"
11342     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11343     $$emit$$"DEC    ECX\n\t"
11344     $$emit$$"JGE    LOOP\n\t"
11345     $$emit$$"JMP    DONE\n\t"
11346     $$emit$$"# LARGE:\n\t"
11347     if (UseFastStosb) {
11348        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11349        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11350     } else {
11351        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11352        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11353     }
11354     $$emit$$"# DONE"
11355   %}
11356   ins_encode %{
11357     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11358   %}
11359   ins_pipe( pipe_slow );
11360 %}
11361 
11362 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11363   predicate(((ClearArrayNode*)n)->is_large());
11364   match(Set dummy (ClearArray cnt base));
11365   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11366   format %{ $$template
11367     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11368     if (UseFastStosb) {
11369        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11370        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11371     } else {
11372        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11373        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11374     }
11375     $$emit$$"# DONE"
11376   %}
11377   ins_encode %{
11378     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11379   %}
11380   ins_pipe( pipe_slow );
11381 %}
11382 
11383 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11384                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11385   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11386   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11387   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11388 
11389   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11390   ins_encode %{
11391     __ string_compare($str1$$Register, $str2$$Register,
11392                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11393                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11394   %}
11395   ins_pipe( pipe_slow );
11396 %}
11397 
11398 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11399                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11400   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11401   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11402   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11403 
11404   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11405   ins_encode %{
11406     __ string_compare($str1$$Register, $str2$$Register,
11407                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11408                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11409   %}
11410   ins_pipe( pipe_slow );
11411 %}
11412 
11413 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11414                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11415   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11416   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11417   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11418 
11419   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11420   ins_encode %{
11421     __ string_compare($str1$$Register, $str2$$Register,
11422                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11423                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11424   %}
11425   ins_pipe( pipe_slow );
11426 %}
11427 
11428 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11429                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11430   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11431   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11432   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11433 
11434   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11435   ins_encode %{
11436     __ string_compare($str2$$Register, $str1$$Register,
11437                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11438                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11439   %}
11440   ins_pipe( pipe_slow );
11441 %}
11442 
11443 // fast string equals
11444 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11445                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11446   match(Set result (StrEquals (Binary str1 str2) cnt));
11447   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11448 
11449   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11450   ins_encode %{
11451     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11452                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11453                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11454   %}
11455 
11456   ins_pipe( pipe_slow );
11457 %}
11458 
11459 // fast search of substring with known size.
11460 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11461                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11462   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11463   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11464   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11465 
11466   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11467   ins_encode %{
11468     int icnt2 = (int)$int_cnt2$$constant;
11469     if (icnt2 >= 16) {
11470       // IndexOf for constant substrings with size >= 16 elements
11471       // which don't need to be loaded through stack.
11472       __ string_indexofC8($str1$$Register, $str2$$Register,
11473                           $cnt1$$Register, $cnt2$$Register,
11474                           icnt2, $result$$Register,
11475                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11476     } else {
11477       // Small strings are loaded through stack if they cross page boundary.
11478       __ string_indexof($str1$$Register, $str2$$Register,
11479                         $cnt1$$Register, $cnt2$$Register,
11480                         icnt2, $result$$Register,
11481                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11482     }
11483   %}
11484   ins_pipe( pipe_slow );
11485 %}
11486 
11487 // fast search of substring with known size.
11488 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11489                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11490   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11491   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11492   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11493 
11494   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11495   ins_encode %{
11496     int icnt2 = (int)$int_cnt2$$constant;
11497     if (icnt2 >= 8) {
11498       // IndexOf for constant substrings with size >= 8 elements
11499       // which don't need to be loaded through stack.
11500       __ string_indexofC8($str1$$Register, $str2$$Register,
11501                           $cnt1$$Register, $cnt2$$Register,
11502                           icnt2, $result$$Register,
11503                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11504     } else {
11505       // Small strings are loaded through stack if they cross page boundary.
11506       __ string_indexof($str1$$Register, $str2$$Register,
11507                         $cnt1$$Register, $cnt2$$Register,
11508                         icnt2, $result$$Register,
11509                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11510     }
11511   %}
11512   ins_pipe( pipe_slow );
11513 %}
11514 
11515 // fast search of substring with known size.
11516 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11517                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11518   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11519   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11520   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11521 
11522   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11523   ins_encode %{
11524     int icnt2 = (int)$int_cnt2$$constant;
11525     if (icnt2 >= 8) {
11526       // IndexOf for constant substrings with size >= 8 elements
11527       // which don't need to be loaded through stack.
11528       __ string_indexofC8($str1$$Register, $str2$$Register,
11529                           $cnt1$$Register, $cnt2$$Register,
11530                           icnt2, $result$$Register,
11531                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11532     } else {
11533       // Small strings are loaded through stack if they cross page boundary.
11534       __ string_indexof($str1$$Register, $str2$$Register,
11535                         $cnt1$$Register, $cnt2$$Register,
11536                         icnt2, $result$$Register,
11537                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11538     }
11539   %}
11540   ins_pipe( pipe_slow );
11541 %}
11542 
11543 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11544                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11545   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11546   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11547   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11548 
11549   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11550   ins_encode %{
11551     __ string_indexof($str1$$Register, $str2$$Register,
11552                       $cnt1$$Register, $cnt2$$Register,
11553                       (-1), $result$$Register,
11554                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11555   %}
11556   ins_pipe( pipe_slow );
11557 %}
11558 
11559 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11560                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11561   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11562   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11563   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11564 
11565   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11566   ins_encode %{
11567     __ string_indexof($str1$$Register, $str2$$Register,
11568                       $cnt1$$Register, $cnt2$$Register,
11569                       (-1), $result$$Register,
11570                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11571   %}
11572   ins_pipe( pipe_slow );
11573 %}
11574 
11575 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11576                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11577   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11578   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11579   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11580 
11581   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11582   ins_encode %{
11583     __ string_indexof($str1$$Register, $str2$$Register,
11584                       $cnt1$$Register, $cnt2$$Register,
11585                       (-1), $result$$Register,
11586                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11587   %}
11588   ins_pipe( pipe_slow );
11589 %}
11590 
11591 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11592                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11593   predicate(UseSSE42Intrinsics);
11594   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11595   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11596   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11597   ins_encode %{
11598     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11599                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11600   %}
11601   ins_pipe( pipe_slow );
11602 %}
11603 
11604 // fast array equals
11605 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11606                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11607 %{
11608   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11609   match(Set result (AryEq ary1 ary2));
11610   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11611   //ins_cost(300);
11612 
11613   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11614   ins_encode %{
11615     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11616                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11617                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11618   %}
11619   ins_pipe( pipe_slow );
11620 %}
11621 
11622 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11623                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11624 %{
11625   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11626   match(Set result (AryEq ary1 ary2));
11627   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11628   //ins_cost(300);
11629 
11630   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11631   ins_encode %{
11632     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11633                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11634                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11635   %}
11636   ins_pipe( pipe_slow );
11637 %}
11638 
11639 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11640                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11641 %{
11642   match(Set result (HasNegatives ary1 len));
11643   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11644 
11645   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11646   ins_encode %{
11647     __ has_negatives($ary1$$Register, $len$$Register,
11648                      $result$$Register, $tmp3$$Register,
11649                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11650   %}
11651   ins_pipe( pipe_slow );
11652 %}
11653 
11654 // fast char[] to byte[] compression
11655 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11656                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11657   match(Set result (StrCompressedCopy src (Binary dst len)));
11658   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11659 
11660   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11661   ins_encode %{
11662     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11663                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11664                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11665   %}
11666   ins_pipe( pipe_slow );
11667 %}
11668 
11669 // fast byte[] to char[] inflation
11670 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11671                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11672   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11673   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11674 
11675   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11676   ins_encode %{
11677     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11678                           $tmp1$$XMMRegister, $tmp2$$Register);
11679   %}
11680   ins_pipe( pipe_slow );
11681 %}
11682 
11683 // encode char[] to byte[] in ISO_8859_1
11684 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11685                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11686                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11687   match(Set result (EncodeISOArray src (Binary dst len)));
11688   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11689 
11690   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11691   ins_encode %{
11692     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11693                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11694                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11695   %}
11696   ins_pipe( pipe_slow );
11697 %}
11698 
11699 
11700 //----------Control Flow Instructions------------------------------------------
11701 // Signed compare Instructions
11702 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11703   match(Set cr (CmpI op1 op2));
11704   effect( DEF cr, USE op1, USE op2 );
11705   format %{ "CMP    $op1,$op2" %}
11706   opcode(0x3B);  /* Opcode 3B /r */
11707   ins_encode( OpcP, RegReg( op1, op2) );
11708   ins_pipe( ialu_cr_reg_reg );
11709 %}
11710 
11711 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11712   match(Set cr (CmpI op1 op2));
11713   effect( DEF cr, USE op1 );
11714   format %{ "CMP    $op1,$op2" %}
11715   opcode(0x81,0x07);  /* Opcode 81 /7 */
11716   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11717   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11718   ins_pipe( ialu_cr_reg_imm );
11719 %}
11720 
11721 // Cisc-spilled version of cmpI_eReg
11722 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11723   match(Set cr (CmpI op1 (LoadI op2)));
11724 
11725   format %{ "CMP    $op1,$op2" %}
11726   ins_cost(500);
11727   opcode(0x3B);  /* Opcode 3B /r */
11728   ins_encode( OpcP, RegMem( op1, op2) );
11729   ins_pipe( ialu_cr_reg_mem );
11730 %}
11731 
11732 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11733   match(Set cr (CmpI src zero));
11734   effect( DEF cr, USE src );
11735 
11736   format %{ "TEST   $src,$src" %}
11737   opcode(0x85);
11738   ins_encode( OpcP, RegReg( src, src ) );
11739   ins_pipe( ialu_cr_reg_imm );
11740 %}
11741 
11742 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11743   match(Set cr (CmpI (AndI src con) zero));
11744 
11745   format %{ "TEST   $src,$con" %}
11746   opcode(0xF7,0x00);
11747   ins_encode( OpcP, RegOpc(src), Con32(con) );
11748   ins_pipe( ialu_cr_reg_imm );
11749 %}
11750 
11751 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11752   match(Set cr (CmpI (AndI src mem) zero));
11753 
11754   format %{ "TEST   $src,$mem" %}
11755   opcode(0x85);
11756   ins_encode( OpcP, RegMem( src, mem ) );
11757   ins_pipe( ialu_cr_reg_mem );
11758 %}
11759 
11760 // Unsigned compare Instructions; really, same as signed except they
11761 // produce an eFlagsRegU instead of eFlagsReg.
11762 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11763   match(Set cr (CmpU op1 op2));
11764 
11765   format %{ "CMPu   $op1,$op2" %}
11766   opcode(0x3B);  /* Opcode 3B /r */
11767   ins_encode( OpcP, RegReg( op1, op2) );
11768   ins_pipe( ialu_cr_reg_reg );
11769 %}
11770 
11771 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11772   match(Set cr (CmpU op1 op2));
11773 
11774   format %{ "CMPu   $op1,$op2" %}
11775   opcode(0x81,0x07);  /* Opcode 81 /7 */
11776   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11777   ins_pipe( ialu_cr_reg_imm );
11778 %}
11779 
11780 // // Cisc-spilled version of cmpU_eReg
11781 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11782   match(Set cr (CmpU op1 (LoadI op2)));
11783 
11784   format %{ "CMPu   $op1,$op2" %}
11785   ins_cost(500);
11786   opcode(0x3B);  /* Opcode 3B /r */
11787   ins_encode( OpcP, RegMem( op1, op2) );
11788   ins_pipe( ialu_cr_reg_mem );
11789 %}
11790 
11791 // // Cisc-spilled version of cmpU_eReg
11792 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11793 //  match(Set cr (CmpU (LoadI op1) op2));
11794 //
11795 //  format %{ "CMPu   $op1,$op2" %}
11796 //  ins_cost(500);
11797 //  opcode(0x39);  /* Opcode 39 /r */
11798 //  ins_encode( OpcP, RegMem( op1, op2) );
11799 //%}
11800 
11801 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11802   match(Set cr (CmpU src zero));
11803 
11804   format %{ "TESTu  $src,$src" %}
11805   opcode(0x85);
11806   ins_encode( OpcP, RegReg( src, src ) );
11807   ins_pipe( ialu_cr_reg_imm );
11808 %}
11809 
11810 // Unsigned pointer compare Instructions
11811 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11812   match(Set cr (CmpP op1 op2));
11813 
11814   format %{ "CMPu   $op1,$op2" %}
11815   opcode(0x3B);  /* Opcode 3B /r */
11816   ins_encode( OpcP, RegReg( op1, op2) );
11817   ins_pipe( ialu_cr_reg_reg );
11818 %}
11819 
11820 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11821   match(Set cr (CmpP op1 op2));
11822 
11823   format %{ "CMPu   $op1,$op2" %}
11824   opcode(0x81,0x07);  /* Opcode 81 /7 */
11825   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11826   ins_pipe( ialu_cr_reg_imm );
11827 %}
11828 
11829 // // Cisc-spilled version of cmpP_eReg
11830 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11831   match(Set cr (CmpP op1 (LoadP op2)));
11832 
11833   format %{ "CMPu   $op1,$op2" %}
11834   ins_cost(500);
11835   opcode(0x3B);  /* Opcode 3B /r */
11836   ins_encode( OpcP, RegMem( op1, op2) );
11837   ins_pipe( ialu_cr_reg_mem );
11838 %}
11839 
11840 // // Cisc-spilled version of cmpP_eReg
11841 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11842 //  match(Set cr (CmpP (LoadP op1) op2));
11843 //
11844 //  format %{ "CMPu   $op1,$op2" %}
11845 //  ins_cost(500);
11846 //  opcode(0x39);  /* Opcode 39 /r */
11847 //  ins_encode( OpcP, RegMem( op1, op2) );
11848 //%}
11849 
11850 // Compare raw pointer (used in out-of-heap check).
11851 // Only works because non-oop pointers must be raw pointers
11852 // and raw pointers have no anti-dependencies.
11853 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11854   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11855   match(Set cr (CmpP op1 (LoadP op2)));
11856 
11857   format %{ "CMPu   $op1,$op2" %}
11858   opcode(0x3B);  /* Opcode 3B /r */
11859   ins_encode( OpcP, RegMem( op1, op2) );
11860   ins_pipe( ialu_cr_reg_mem );
11861 %}
11862 
11863 //
11864 // This will generate a signed flags result. This should be ok
11865 // since any compare to a zero should be eq/neq.
11866 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11867   match(Set cr (CmpP src zero));
11868 
11869   format %{ "TEST   $src,$src" %}
11870   opcode(0x85);
11871   ins_encode( OpcP, RegReg( src, src ) );
11872   ins_pipe( ialu_cr_reg_imm );
11873 %}
11874 
11875 // Cisc-spilled version of testP_reg
11876 // This will generate a signed flags result. This should be ok
11877 // since any compare to a zero should be eq/neq.
11878 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11879   match(Set cr (CmpP (LoadP op) zero));
11880 
11881   format %{ "TEST   $op,0xFFFFFFFF" %}
11882   ins_cost(500);
11883   opcode(0xF7);               /* Opcode F7 /0 */
11884   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11885   ins_pipe( ialu_cr_reg_imm );
11886 %}
11887 
11888 // Yanked all unsigned pointer compare operations.
11889 // Pointer compares are done with CmpP which is already unsigned.
11890 
11891 //----------Max and Min--------------------------------------------------------
11892 // Min Instructions
11893 ////
11894 //   *** Min and Max using the conditional move are slower than the
11895 //   *** branch version on a Pentium III.
11896 // // Conditional move for min
11897 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11898 //  effect( USE_DEF op2, USE op1, USE cr );
11899 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11900 //  opcode(0x4C,0x0F);
11901 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11902 //  ins_pipe( pipe_cmov_reg );
11903 //%}
11904 //
11905 //// Min Register with Register (P6 version)
11906 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11907 //  predicate(VM_Version::supports_cmov() );
11908 //  match(Set op2 (MinI op1 op2));
11909 //  ins_cost(200);
11910 //  expand %{
11911 //    eFlagsReg cr;
11912 //    compI_eReg(cr,op1,op2);
11913 //    cmovI_reg_lt(op2,op1,cr);
11914 //  %}
11915 //%}
11916 
11917 // Min Register with Register (generic version)
11918 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11919   match(Set dst (MinI dst src));
11920   effect(KILL flags);
11921   ins_cost(300);
11922 
11923   format %{ "MIN    $dst,$src" %}
11924   opcode(0xCC);
11925   ins_encode( min_enc(dst,src) );
11926   ins_pipe( pipe_slow );
11927 %}
11928 
11929 // Max Register with Register
11930 //   *** Min and Max using the conditional move are slower than the
11931 //   *** branch version on a Pentium III.
11932 // // Conditional move for max
11933 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11934 //  effect( USE_DEF op2, USE op1, USE cr );
11935 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11936 //  opcode(0x4F,0x0F);
11937 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11938 //  ins_pipe( pipe_cmov_reg );
11939 //%}
11940 //
11941 // // Max Register with Register (P6 version)
11942 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11943 //  predicate(VM_Version::supports_cmov() );
11944 //  match(Set op2 (MaxI op1 op2));
11945 //  ins_cost(200);
11946 //  expand %{
11947 //    eFlagsReg cr;
11948 //    compI_eReg(cr,op1,op2);
11949 //    cmovI_reg_gt(op2,op1,cr);
11950 //  %}
11951 //%}
11952 
11953 // Max Register with Register (generic version)
11954 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11955   match(Set dst (MaxI dst src));
11956   effect(KILL flags);
11957   ins_cost(300);
11958 
11959   format %{ "MAX    $dst,$src" %}
11960   opcode(0xCC);
11961   ins_encode( max_enc(dst,src) );
11962   ins_pipe( pipe_slow );
11963 %}
11964 
11965 // ============================================================================
11966 // Counted Loop limit node which represents exact final iterator value.
11967 // Note: the resulting value should fit into integer range since
11968 // counted loops have limit check on overflow.
11969 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11970   match(Set limit (LoopLimit (Binary init limit) stride));
11971   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11972   ins_cost(300);
11973 
11974   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11975   ins_encode %{
11976     int strd = (int)$stride$$constant;
11977     assert(strd != 1 && strd != -1, "sanity");
11978     int m1 = (strd > 0) ? 1 : -1;
11979     // Convert limit to long (EAX:EDX)
11980     __ cdql();
11981     // Convert init to long (init:tmp)
11982     __ movl($tmp$$Register, $init$$Register);
11983     __ sarl($tmp$$Register, 31);
11984     // $limit - $init
11985     __ subl($limit$$Register, $init$$Register);
11986     __ sbbl($limit_hi$$Register, $tmp$$Register);
11987     // + ($stride - 1)
11988     if (strd > 0) {
11989       __ addl($limit$$Register, (strd - 1));
11990       __ adcl($limit_hi$$Register, 0);
11991       __ movl($tmp$$Register, strd);
11992     } else {
11993       __ addl($limit$$Register, (strd + 1));
11994       __ adcl($limit_hi$$Register, -1);
11995       __ lneg($limit_hi$$Register, $limit$$Register);
11996       __ movl($tmp$$Register, -strd);
11997     }
11998     // signed devision: (EAX:EDX) / pos_stride
11999     __ idivl($tmp$$Register);
12000     if (strd < 0) {
12001       // restore sign
12002       __ negl($tmp$$Register);
12003     }
12004     // (EAX) * stride
12005     __ mull($tmp$$Register);
12006     // + init (ignore upper bits)
12007     __ addl($limit$$Register, $init$$Register);
12008   %}
12009   ins_pipe( pipe_slow );
12010 %}
12011 
12012 // ============================================================================
12013 // Branch Instructions
12014 // Jump Table
12015 instruct jumpXtnd(rRegI switch_val) %{
12016   match(Jump switch_val);
12017   ins_cost(350);
12018   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12019   ins_encode %{
12020     // Jump to Address(table_base + switch_reg)
12021     Address index(noreg, $switch_val$$Register, Address::times_1);
12022     __ jump(ArrayAddress($constantaddress, index));
12023   %}
12024   ins_pipe(pipe_jmp);
12025 %}
12026 
12027 // Jump Direct - Label defines a relative address from JMP+1
12028 instruct jmpDir(label labl) %{
12029   match(Goto);
12030   effect(USE labl);
12031 
12032   ins_cost(300);
12033   format %{ "JMP    $labl" %}
12034   size(5);
12035   ins_encode %{
12036     Label* L = $labl$$label;
12037     __ jmp(*L, false); // Always long jump
12038   %}
12039   ins_pipe( pipe_jmp );
12040 %}
12041 
12042 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12043 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12044   match(If cop cr);
12045   effect(USE labl);
12046 
12047   ins_cost(300);
12048   format %{ "J$cop    $labl" %}
12049   size(6);
12050   ins_encode %{
12051     Label* L = $labl$$label;
12052     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12053   %}
12054   ins_pipe( pipe_jcc );
12055 %}
12056 
12057 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12058 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12059   predicate(!n->has_vector_mask_set());
12060   match(CountedLoopEnd cop cr);
12061   effect(USE labl);
12062 
12063   ins_cost(300);
12064   format %{ "J$cop    $labl\t# Loop end" %}
12065   size(6);
12066   ins_encode %{
12067     Label* L = $labl$$label;
12068     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12069   %}
12070   ins_pipe( pipe_jcc );
12071 %}
12072 
12073 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12074 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12075   predicate(!n->has_vector_mask_set());
12076   match(CountedLoopEnd cop cmp);
12077   effect(USE labl);
12078 
12079   ins_cost(300);
12080   format %{ "J$cop,u  $labl\t# Loop end" %}
12081   size(6);
12082   ins_encode %{
12083     Label* L = $labl$$label;
12084     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12085   %}
12086   ins_pipe( pipe_jcc );
12087 %}
12088 
12089 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12090   predicate(!n->has_vector_mask_set());
12091   match(CountedLoopEnd cop cmp);
12092   effect(USE labl);
12093 
12094   ins_cost(200);
12095   format %{ "J$cop,u  $labl\t# Loop end" %}
12096   size(6);
12097   ins_encode %{
12098     Label* L = $labl$$label;
12099     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12100   %}
12101   ins_pipe( pipe_jcc );
12102 %}
12103 
12104 // mask version
12105 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12106 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12107   predicate(n->has_vector_mask_set());
12108   match(CountedLoopEnd cop cr);
12109   effect(USE labl);
12110 
12111   ins_cost(400);
12112   format %{ "J$cop    $labl\t# Loop end\n\t"
12113             "restorevectmask \t# vector mask restore for loops" %}
12114   size(10);
12115   ins_encode %{
12116     Label* L = $labl$$label;
12117     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12118     __ restorevectmask();
12119   %}
12120   ins_pipe( pipe_jcc );
12121 %}
12122 
12123 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12124 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12125   predicate(n->has_vector_mask_set());
12126   match(CountedLoopEnd cop cmp);
12127   effect(USE labl);
12128 
12129   ins_cost(400);
12130   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12131             "restorevectmask \t# vector mask restore for loops" %}
12132   size(10);
12133   ins_encode %{
12134     Label* L = $labl$$label;
12135     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12136     __ restorevectmask();
12137   %}
12138   ins_pipe( pipe_jcc );
12139 %}
12140 
12141 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12142   predicate(n->has_vector_mask_set());
12143   match(CountedLoopEnd cop cmp);
12144   effect(USE labl);
12145 
12146   ins_cost(300);
12147   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12148             "restorevectmask \t# vector mask restore for loops" %}
12149   size(10);
12150   ins_encode %{
12151     Label* L = $labl$$label;
12152     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12153     __ restorevectmask();
12154   %}
12155   ins_pipe( pipe_jcc );
12156 %}
12157 
12158 // Jump Direct Conditional - using unsigned comparison
12159 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12160   match(If cop cmp);
12161   effect(USE labl);
12162 
12163   ins_cost(300);
12164   format %{ "J$cop,u  $labl" %}
12165   size(6);
12166   ins_encode %{
12167     Label* L = $labl$$label;
12168     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12169   %}
12170   ins_pipe(pipe_jcc);
12171 %}
12172 
12173 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12174   match(If cop cmp);
12175   effect(USE labl);
12176 
12177   ins_cost(200);
12178   format %{ "J$cop,u  $labl" %}
12179   size(6);
12180   ins_encode %{
12181     Label* L = $labl$$label;
12182     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12183   %}
12184   ins_pipe(pipe_jcc);
12185 %}
12186 
12187 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12188   match(If cop cmp);
12189   effect(USE labl);
12190 
12191   ins_cost(200);
12192   format %{ $$template
12193     if ($cop$$cmpcode == Assembler::notEqual) {
12194       $$emit$$"JP,u   $labl\n\t"
12195       $$emit$$"J$cop,u   $labl"
12196     } else {
12197       $$emit$$"JP,u   done\n\t"
12198       $$emit$$"J$cop,u   $labl\n\t"
12199       $$emit$$"done:"
12200     }
12201   %}
12202   ins_encode %{
12203     Label* l = $labl$$label;
12204     if ($cop$$cmpcode == Assembler::notEqual) {
12205       __ jcc(Assembler::parity, *l, false);
12206       __ jcc(Assembler::notEqual, *l, false);
12207     } else if ($cop$$cmpcode == Assembler::equal) {
12208       Label done;
12209       __ jccb(Assembler::parity, done);
12210       __ jcc(Assembler::equal, *l, false);
12211       __ bind(done);
12212     } else {
12213        ShouldNotReachHere();
12214     }
12215   %}
12216   ins_pipe(pipe_jcc);
12217 %}
12218 
12219 // ============================================================================
12220 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12221 // array for an instance of the superklass.  Set a hidden internal cache on a
12222 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12223 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12224 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12225   match(Set result (PartialSubtypeCheck sub super));
12226   effect( KILL rcx, KILL cr );
12227 
12228   ins_cost(1100);  // slightly larger than the next version
12229   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12230             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12231             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12232             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12233             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12234             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12235             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12236      "miss:\t" %}
12237 
12238   opcode(0x1); // Force a XOR of EDI
12239   ins_encode( enc_PartialSubtypeCheck() );
12240   ins_pipe( pipe_slow );
12241 %}
12242 
12243 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12244   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12245   effect( KILL rcx, KILL result );
12246 
12247   ins_cost(1000);
12248   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12249             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12250             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12251             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12252             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12253             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12254      "miss:\t" %}
12255 
12256   opcode(0x0);  // No need to XOR EDI
12257   ins_encode( enc_PartialSubtypeCheck() );
12258   ins_pipe( pipe_slow );
12259 %}
12260 
12261 // ============================================================================
12262 // Branch Instructions -- short offset versions
12263 //
12264 // These instructions are used to replace jumps of a long offset (the default
12265 // match) with jumps of a shorter offset.  These instructions are all tagged
12266 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12267 // match rules in general matching.  Instead, the ADLC generates a conversion
12268 // method in the MachNode which can be used to do in-place replacement of the
12269 // long variant with the shorter variant.  The compiler will determine if a
12270 // branch can be taken by the is_short_branch_offset() predicate in the machine
12271 // specific code section of the file.
12272 
12273 // Jump Direct - Label defines a relative address from JMP+1
12274 instruct jmpDir_short(label labl) %{
12275   match(Goto);
12276   effect(USE labl);
12277 
12278   ins_cost(300);
12279   format %{ "JMP,s  $labl" %}
12280   size(2);
12281   ins_encode %{
12282     Label* L = $labl$$label;
12283     __ jmpb(*L);
12284   %}
12285   ins_pipe( pipe_jmp );
12286   ins_short_branch(1);
12287 %}
12288 
12289 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12290 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12291   match(If cop cr);
12292   effect(USE labl);
12293 
12294   ins_cost(300);
12295   format %{ "J$cop,s  $labl" %}
12296   size(2);
12297   ins_encode %{
12298     Label* L = $labl$$label;
12299     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12300   %}
12301   ins_pipe( pipe_jcc );
12302   ins_short_branch(1);
12303 %}
12304 
12305 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12306 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12307   match(CountedLoopEnd cop cr);
12308   effect(USE labl);
12309 
12310   ins_cost(300);
12311   format %{ "J$cop,s  $labl\t# Loop end" %}
12312   size(2);
12313   ins_encode %{
12314     Label* L = $labl$$label;
12315     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12316   %}
12317   ins_pipe( pipe_jcc );
12318   ins_short_branch(1);
12319 %}
12320 
12321 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12322 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12323   match(CountedLoopEnd cop cmp);
12324   effect(USE labl);
12325 
12326   ins_cost(300);
12327   format %{ "J$cop,us $labl\t# Loop end" %}
12328   size(2);
12329   ins_encode %{
12330     Label* L = $labl$$label;
12331     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12332   %}
12333   ins_pipe( pipe_jcc );
12334   ins_short_branch(1);
12335 %}
12336 
12337 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12338   match(CountedLoopEnd cop cmp);
12339   effect(USE labl);
12340 
12341   ins_cost(300);
12342   format %{ "J$cop,us $labl\t# Loop end" %}
12343   size(2);
12344   ins_encode %{
12345     Label* L = $labl$$label;
12346     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12347   %}
12348   ins_pipe( pipe_jcc );
12349   ins_short_branch(1);
12350 %}
12351 
12352 // Jump Direct Conditional - using unsigned comparison
12353 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12354   match(If cop cmp);
12355   effect(USE labl);
12356 
12357   ins_cost(300);
12358   format %{ "J$cop,us $labl" %}
12359   size(2);
12360   ins_encode %{
12361     Label* L = $labl$$label;
12362     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12363   %}
12364   ins_pipe( pipe_jcc );
12365   ins_short_branch(1);
12366 %}
12367 
12368 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12369   match(If cop cmp);
12370   effect(USE labl);
12371 
12372   ins_cost(300);
12373   format %{ "J$cop,us $labl" %}
12374   size(2);
12375   ins_encode %{
12376     Label* L = $labl$$label;
12377     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12378   %}
12379   ins_pipe( pipe_jcc );
12380   ins_short_branch(1);
12381 %}
12382 
12383 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12384   match(If cop cmp);
12385   effect(USE labl);
12386 
12387   ins_cost(300);
12388   format %{ $$template
12389     if ($cop$$cmpcode == Assembler::notEqual) {
12390       $$emit$$"JP,u,s   $labl\n\t"
12391       $$emit$$"J$cop,u,s   $labl"
12392     } else {
12393       $$emit$$"JP,u,s   done\n\t"
12394       $$emit$$"J$cop,u,s  $labl\n\t"
12395       $$emit$$"done:"
12396     }
12397   %}
12398   size(4);
12399   ins_encode %{
12400     Label* l = $labl$$label;
12401     if ($cop$$cmpcode == Assembler::notEqual) {
12402       __ jccb(Assembler::parity, *l);
12403       __ jccb(Assembler::notEqual, *l);
12404     } else if ($cop$$cmpcode == Assembler::equal) {
12405       Label done;
12406       __ jccb(Assembler::parity, done);
12407       __ jccb(Assembler::equal, *l);
12408       __ bind(done);
12409     } else {
12410        ShouldNotReachHere();
12411     }
12412   %}
12413   ins_pipe(pipe_jcc);
12414   ins_short_branch(1);
12415 %}
12416 
12417 // ============================================================================
12418 // Long Compare
12419 //
12420 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12421 // is tricky.  The flavor of compare used depends on whether we are testing
12422 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12423 // The GE test is the negated LT test.  The LE test can be had by commuting
12424 // the operands (yielding a GE test) and then negating; negate again for the
12425 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12426 // NE test is negated from that.
12427 
12428 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12429 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12430 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12431 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12432 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12433 // foo match ends up with the wrong leaf.  One fix is to not match both
12434 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12435 // both forms beat the trinary form of long-compare and both are very useful
12436 // on Intel which has so few registers.
12437 
12438 // Manifest a CmpL result in an integer register.  Very painful.
12439 // This is the test to avoid.
12440 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12441   match(Set dst (CmpL3 src1 src2));
12442   effect( KILL flags );
12443   ins_cost(1000);
12444   format %{ "XOR    $dst,$dst\n\t"
12445             "CMP    $src1.hi,$src2.hi\n\t"
12446             "JLT,s  m_one\n\t"
12447             "JGT,s  p_one\n\t"
12448             "CMP    $src1.lo,$src2.lo\n\t"
12449             "JB,s   m_one\n\t"
12450             "JEQ,s  done\n"
12451     "p_one:\tINC    $dst\n\t"
12452             "JMP,s  done\n"
12453     "m_one:\tDEC    $dst\n"
12454      "done:" %}
12455   ins_encode %{
12456     Label p_one, m_one, done;
12457     __ xorptr($dst$$Register, $dst$$Register);
12458     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12459     __ jccb(Assembler::less,    m_one);
12460     __ jccb(Assembler::greater, p_one);
12461     __ cmpl($src1$$Register, $src2$$Register);
12462     __ jccb(Assembler::below,   m_one);
12463     __ jccb(Assembler::equal,   done);
12464     __ bind(p_one);
12465     __ incrementl($dst$$Register);
12466     __ jmpb(done);
12467     __ bind(m_one);
12468     __ decrementl($dst$$Register);
12469     __ bind(done);
12470   %}
12471   ins_pipe( pipe_slow );
12472 %}
12473 
12474 //======
12475 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12476 // compares.  Can be used for LE or GT compares by reversing arguments.
12477 // NOT GOOD FOR EQ/NE tests.
12478 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12479   match( Set flags (CmpL src zero ));
12480   ins_cost(100);
12481   format %{ "TEST   $src.hi,$src.hi" %}
12482   opcode(0x85);
12483   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12484   ins_pipe( ialu_cr_reg_reg );
12485 %}
12486 
12487 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12488 // compares.  Can be used for LE or GT compares by reversing arguments.
12489 // NOT GOOD FOR EQ/NE tests.
12490 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12491   match( Set flags (CmpL src1 src2 ));
12492   effect( TEMP tmp );
12493   ins_cost(300);
12494   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12495             "MOV    $tmp,$src1.hi\n\t"
12496             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12497   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12498   ins_pipe( ialu_cr_reg_reg );
12499 %}
12500 
12501 // Long compares reg < zero/req OR reg >= zero/req.
12502 // Just a wrapper for a normal branch, plus the predicate test.
12503 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12504   match(If cmp flags);
12505   effect(USE labl);
12506   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12507   expand %{
12508     jmpCon(cmp,flags,labl);    // JLT or JGE...
12509   %}
12510 %}
12511 
12512 // Compare 2 longs and CMOVE longs.
12513 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12514   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12515   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12516   ins_cost(400);
12517   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12518             "CMOV$cmp $dst.hi,$src.hi" %}
12519   opcode(0x0F,0x40);
12520   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12521   ins_pipe( pipe_cmov_reg_long );
12522 %}
12523 
12524 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12525   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12526   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12527   ins_cost(500);
12528   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12529             "CMOV$cmp $dst.hi,$src.hi" %}
12530   opcode(0x0F,0x40);
12531   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12532   ins_pipe( pipe_cmov_reg_long );
12533 %}
12534 
12535 // Compare 2 longs and CMOVE ints.
12536 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12537   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12538   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12539   ins_cost(200);
12540   format %{ "CMOV$cmp $dst,$src" %}
12541   opcode(0x0F,0x40);
12542   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12543   ins_pipe( pipe_cmov_reg );
12544 %}
12545 
12546 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12547   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12548   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12549   ins_cost(250);
12550   format %{ "CMOV$cmp $dst,$src" %}
12551   opcode(0x0F,0x40);
12552   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12553   ins_pipe( pipe_cmov_mem );
12554 %}
12555 
12556 // Compare 2 longs and CMOVE ints.
12557 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12558   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12559   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12560   ins_cost(200);
12561   format %{ "CMOV$cmp $dst,$src" %}
12562   opcode(0x0F,0x40);
12563   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12564   ins_pipe( pipe_cmov_reg );
12565 %}
12566 
12567 // Compare 2 longs and CMOVE doubles
12568 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12569   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12570   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12571   ins_cost(200);
12572   expand %{
12573     fcmovDPR_regS(cmp,flags,dst,src);
12574   %}
12575 %}
12576 
12577 // Compare 2 longs and CMOVE doubles
12578 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12579   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12580   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12581   ins_cost(200);
12582   expand %{
12583     fcmovD_regS(cmp,flags,dst,src);
12584   %}
12585 %}
12586 
12587 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12588   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12589   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12590   ins_cost(200);
12591   expand %{
12592     fcmovFPR_regS(cmp,flags,dst,src);
12593   %}
12594 %}
12595 
12596 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12597   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12598   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12599   ins_cost(200);
12600   expand %{
12601     fcmovF_regS(cmp,flags,dst,src);
12602   %}
12603 %}
12604 
12605 //======
12606 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12607 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12608   match( Set flags (CmpL src zero ));
12609   effect(TEMP tmp);
12610   ins_cost(200);
12611   format %{ "MOV    $tmp,$src.lo\n\t"
12612             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12613   ins_encode( long_cmp_flags0( src, tmp ) );
12614   ins_pipe( ialu_reg_reg_long );
12615 %}
12616 
12617 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12618 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12619   match( Set flags (CmpL src1 src2 ));
12620   ins_cost(200+300);
12621   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12622             "JNE,s  skip\n\t"
12623             "CMP    $src1.hi,$src2.hi\n\t"
12624      "skip:\t" %}
12625   ins_encode( long_cmp_flags1( src1, src2 ) );
12626   ins_pipe( ialu_cr_reg_reg );
12627 %}
12628 
12629 // Long compare reg == zero/reg OR reg != zero/reg
12630 // Just a wrapper for a normal branch, plus the predicate test.
12631 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12632   match(If cmp flags);
12633   effect(USE labl);
12634   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12635   expand %{
12636     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12637   %}
12638 %}
12639 
12640 // Compare 2 longs and CMOVE longs.
12641 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12642   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12643   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12644   ins_cost(400);
12645   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12646             "CMOV$cmp $dst.hi,$src.hi" %}
12647   opcode(0x0F,0x40);
12648   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12649   ins_pipe( pipe_cmov_reg_long );
12650 %}
12651 
12652 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12653   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12654   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12655   ins_cost(500);
12656   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12657             "CMOV$cmp $dst.hi,$src.hi" %}
12658   opcode(0x0F,0x40);
12659   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12660   ins_pipe( pipe_cmov_reg_long );
12661 %}
12662 
12663 // Compare 2 longs and CMOVE ints.
12664 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12665   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12666   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12667   ins_cost(200);
12668   format %{ "CMOV$cmp $dst,$src" %}
12669   opcode(0x0F,0x40);
12670   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12671   ins_pipe( pipe_cmov_reg );
12672 %}
12673 
12674 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12675   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12676   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12677   ins_cost(250);
12678   format %{ "CMOV$cmp $dst,$src" %}
12679   opcode(0x0F,0x40);
12680   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12681   ins_pipe( pipe_cmov_mem );
12682 %}
12683 
12684 // Compare 2 longs and CMOVE ints.
12685 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12686   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12687   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12688   ins_cost(200);
12689   format %{ "CMOV$cmp $dst,$src" %}
12690   opcode(0x0F,0x40);
12691   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12692   ins_pipe( pipe_cmov_reg );
12693 %}
12694 
12695 // Compare 2 longs and CMOVE doubles
12696 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12697   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12698   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12699   ins_cost(200);
12700   expand %{
12701     fcmovDPR_regS(cmp,flags,dst,src);
12702   %}
12703 %}
12704 
12705 // Compare 2 longs and CMOVE doubles
12706 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12707   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12708   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12709   ins_cost(200);
12710   expand %{
12711     fcmovD_regS(cmp,flags,dst,src);
12712   %}
12713 %}
12714 
12715 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12716   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12717   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12718   ins_cost(200);
12719   expand %{
12720     fcmovFPR_regS(cmp,flags,dst,src);
12721   %}
12722 %}
12723 
12724 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12725   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12726   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12727   ins_cost(200);
12728   expand %{
12729     fcmovF_regS(cmp,flags,dst,src);
12730   %}
12731 %}
12732 
12733 //======
12734 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12735 // Same as cmpL_reg_flags_LEGT except must negate src
12736 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12737   match( Set flags (CmpL src zero ));
12738   effect( TEMP tmp );
12739   ins_cost(300);
12740   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12741             "CMP    $tmp,$src.lo\n\t"
12742             "SBB    $tmp,$src.hi\n\t" %}
12743   ins_encode( long_cmp_flags3(src, tmp) );
12744   ins_pipe( ialu_reg_reg_long );
12745 %}
12746 
12747 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12748 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12749 // requires a commuted test to get the same result.
12750 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12751   match( Set flags (CmpL src1 src2 ));
12752   effect( TEMP tmp );
12753   ins_cost(300);
12754   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12755             "MOV    $tmp,$src2.hi\n\t"
12756             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12757   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12758   ins_pipe( ialu_cr_reg_reg );
12759 %}
12760 
12761 // Long compares reg < zero/req OR reg >= zero/req.
12762 // Just a wrapper for a normal branch, plus the predicate test
12763 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12764   match(If cmp flags);
12765   effect(USE labl);
12766   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12767   ins_cost(300);
12768   expand %{
12769     jmpCon(cmp,flags,labl);    // JGT or JLE...
12770   %}
12771 %}
12772 
12773 // Compare 2 longs and CMOVE longs.
12774 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12775   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12776   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12777   ins_cost(400);
12778   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12779             "CMOV$cmp $dst.hi,$src.hi" %}
12780   opcode(0x0F,0x40);
12781   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12782   ins_pipe( pipe_cmov_reg_long );
12783 %}
12784 
12785 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12786   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12787   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12788   ins_cost(500);
12789   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12790             "CMOV$cmp $dst.hi,$src.hi+4" %}
12791   opcode(0x0F,0x40);
12792   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12793   ins_pipe( pipe_cmov_reg_long );
12794 %}
12795 
12796 // Compare 2 longs and CMOVE ints.
12797 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12798   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12799   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12800   ins_cost(200);
12801   format %{ "CMOV$cmp $dst,$src" %}
12802   opcode(0x0F,0x40);
12803   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12804   ins_pipe( pipe_cmov_reg );
12805 %}
12806 
12807 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12808   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12809   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12810   ins_cost(250);
12811   format %{ "CMOV$cmp $dst,$src" %}
12812   opcode(0x0F,0x40);
12813   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12814   ins_pipe( pipe_cmov_mem );
12815 %}
12816 
12817 // Compare 2 longs and CMOVE ptrs.
12818 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12819   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12820   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12821   ins_cost(200);
12822   format %{ "CMOV$cmp $dst,$src" %}
12823   opcode(0x0F,0x40);
12824   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12825   ins_pipe( pipe_cmov_reg );
12826 %}
12827 
12828 // Compare 2 longs and CMOVE doubles
12829 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12830   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12831   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12832   ins_cost(200);
12833   expand %{
12834     fcmovDPR_regS(cmp,flags,dst,src);
12835   %}
12836 %}
12837 
12838 // Compare 2 longs and CMOVE doubles
12839 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12840   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12841   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12842   ins_cost(200);
12843   expand %{
12844     fcmovD_regS(cmp,flags,dst,src);
12845   %}
12846 %}
12847 
12848 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12849   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12850   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12851   ins_cost(200);
12852   expand %{
12853     fcmovFPR_regS(cmp,flags,dst,src);
12854   %}
12855 %}
12856 
12857 
12858 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12859   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12860   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12861   ins_cost(200);
12862   expand %{
12863     fcmovF_regS(cmp,flags,dst,src);
12864   %}
12865 %}
12866 
12867 
12868 // ============================================================================
12869 // Procedure Call/Return Instructions
12870 // Call Java Static Instruction
12871 // Note: If this code changes, the corresponding ret_addr_offset() and
12872 //       compute_padding() functions will have to be adjusted.
12873 instruct CallStaticJavaDirect(method meth) %{
12874   match(CallStaticJava);
12875   effect(USE meth);
12876 
12877   ins_cost(300);
12878   format %{ "CALL,static " %}
12879   opcode(0xE8); /* E8 cd */
12880   ins_encode( pre_call_resets,
12881               Java_Static_Call( meth ),
12882               call_epilog,
12883               post_call_FPU );
12884   ins_pipe( pipe_slow );
12885   ins_alignment(4);
12886 %}
12887 
12888 // Call Java Dynamic Instruction
12889 // Note: If this code changes, the corresponding ret_addr_offset() and
12890 //       compute_padding() functions will have to be adjusted.
12891 instruct CallDynamicJavaDirect(method meth) %{
12892   match(CallDynamicJava);
12893   effect(USE meth);
12894 
12895   ins_cost(300);
12896   format %{ "MOV    EAX,(oop)-1\n\t"
12897             "CALL,dynamic" %}
12898   opcode(0xE8); /* E8 cd */
12899   ins_encode( pre_call_resets,
12900               Java_Dynamic_Call( meth ),
12901               call_epilog,
12902               post_call_FPU );
12903   ins_pipe( pipe_slow );
12904   ins_alignment(4);
12905 %}
12906 
12907 // Call Runtime Instruction
12908 instruct CallRuntimeDirect(method meth) %{
12909   match(CallRuntime );
12910   effect(USE meth);
12911 
12912   ins_cost(300);
12913   format %{ "CALL,runtime " %}
12914   opcode(0xE8); /* E8 cd */
12915   // Use FFREEs to clear entries in float stack
12916   ins_encode( pre_call_resets,
12917               FFree_Float_Stack_All,
12918               Java_To_Runtime( meth ),
12919               post_call_FPU );
12920   ins_pipe( pipe_slow );
12921 %}
12922 
12923 // Call runtime without safepoint
12924 instruct CallLeafDirect(method meth) %{
12925   match(CallLeaf);
12926   effect(USE meth);
12927 
12928   ins_cost(300);
12929   format %{ "CALL_LEAF,runtime " %}
12930   opcode(0xE8); /* E8 cd */
12931   ins_encode( pre_call_resets,
12932               FFree_Float_Stack_All,
12933               Java_To_Runtime( meth ),
12934               Verify_FPU_For_Leaf, post_call_FPU );
12935   ins_pipe( pipe_slow );
12936 %}
12937 
12938 instruct CallLeafNoFPDirect(method meth) %{
12939   match(CallLeafNoFP);
12940   effect(USE meth);
12941 
12942   ins_cost(300);
12943   format %{ "CALL_LEAF_NOFP,runtime " %}
12944   opcode(0xE8); /* E8 cd */
12945   ins_encode(Java_To_Runtime(meth));
12946   ins_pipe( pipe_slow );
12947 %}
12948 
12949 
12950 // Return Instruction
12951 // Remove the return address & jump to it.
12952 instruct Ret() %{
12953   match(Return);
12954   format %{ "RET" %}
12955   opcode(0xC3);
12956   ins_encode(OpcP);
12957   ins_pipe( pipe_jmp );
12958 %}
12959 
12960 // Tail Call; Jump from runtime stub to Java code.
12961 // Also known as an 'interprocedural jump'.
12962 // Target of jump will eventually return to caller.
12963 // TailJump below removes the return address.
12964 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12965   match(TailCall jump_target method_oop );
12966   ins_cost(300);
12967   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12968   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12969   ins_encode( OpcP, RegOpc(jump_target) );
12970   ins_pipe( pipe_jmp );
12971 %}
12972 
12973 
12974 // Tail Jump; remove the return address; jump to target.
12975 // TailCall above leaves the return address around.
12976 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12977   match( TailJump jump_target ex_oop );
12978   ins_cost(300);
12979   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12980             "JMP    $jump_target " %}
12981   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12982   ins_encode( enc_pop_rdx,
12983               OpcP, RegOpc(jump_target) );
12984   ins_pipe( pipe_jmp );
12985 %}
12986 
12987 // Create exception oop: created by stack-crawling runtime code.
12988 // Created exception is now available to this handler, and is setup
12989 // just prior to jumping to this handler.  No code emitted.
12990 instruct CreateException( eAXRegP ex_oop )
12991 %{
12992   match(Set ex_oop (CreateEx));
12993 
12994   size(0);
12995   // use the following format syntax
12996   format %{ "# exception oop is in EAX; no code emitted" %}
12997   ins_encode();
12998   ins_pipe( empty );
12999 %}
13000 
13001 
13002 // Rethrow exception:
13003 // The exception oop will come in the first argument position.
13004 // Then JUMP (not call) to the rethrow stub code.
13005 instruct RethrowException()
13006 %{
13007   match(Rethrow);
13008 
13009   // use the following format syntax
13010   format %{ "JMP    rethrow_stub" %}
13011   ins_encode(enc_rethrow);
13012   ins_pipe( pipe_jmp );
13013 %}
13014 
13015 // inlined locking and unlocking
13016 
13017 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13018   predicate(Compile::current()->use_rtm());
13019   match(Set cr (FastLock object box));
13020   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13021   ins_cost(300);
13022   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13023   ins_encode %{
13024     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13025                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13026                  _counters, _rtm_counters, _stack_rtm_counters,
13027                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13028                  true, ra_->C->profile_rtm());
13029   %}
13030   ins_pipe(pipe_slow);
13031 %}
13032 
13033 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13034   predicate(!Compile::current()->use_rtm());
13035   match(Set cr (FastLock object box));
13036   effect(TEMP tmp, TEMP scr, USE_KILL box);
13037   ins_cost(300);
13038   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13039   ins_encode %{
13040     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13041                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13042   %}
13043   ins_pipe(pipe_slow);
13044 %}
13045 
13046 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13047   match(Set cr (FastUnlock object box));
13048   effect(TEMP tmp, USE_KILL box);
13049   ins_cost(300);
13050   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13051   ins_encode %{
13052     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13053   %}
13054   ins_pipe(pipe_slow);
13055 %}
13056 
13057 
13058 
13059 // ============================================================================
13060 // Safepoint Instruction
13061 instruct safePoint_poll(eFlagsReg cr) %{
13062   match(SafePoint);
13063   effect(KILL cr);
13064 
13065   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13066   // On SPARC that might be acceptable as we can generate the address with
13067   // just a sethi, saving an or.  By polling at offset 0 we can end up
13068   // putting additional pressure on the index-0 in the D$.  Because of
13069   // alignment (just like the situation at hand) the lower indices tend
13070   // to see more traffic.  It'd be better to change the polling address
13071   // to offset 0 of the last $line in the polling page.
13072 
13073   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13074   ins_cost(125);
13075   size(6) ;
13076   ins_encode( Safepoint_Poll() );
13077   ins_pipe( ialu_reg_mem );
13078 %}
13079 
13080 
13081 // ============================================================================
13082 // This name is KNOWN by the ADLC and cannot be changed.
13083 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13084 // for this guy.
13085 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13086   match(Set dst (ThreadLocal));
13087   effect(DEF dst, KILL cr);
13088 
13089   format %{ "MOV    $dst, Thread::current()" %}
13090   ins_encode %{
13091     Register dstReg = as_Register($dst$$reg);
13092     __ get_thread(dstReg);
13093   %}
13094   ins_pipe( ialu_reg_fat );
13095 %}
13096 
13097 
13098 
13099 //----------PEEPHOLE RULES-----------------------------------------------------
13100 // These must follow all instruction definitions as they use the names
13101 // defined in the instructions definitions.
13102 //
13103 // peepmatch ( root_instr_name [preceding_instruction]* );
13104 //
13105 // peepconstraint %{
13106 // (instruction_number.operand_name relational_op instruction_number.operand_name
13107 //  [, ...] );
13108 // // instruction numbers are zero-based using left to right order in peepmatch
13109 //
13110 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13111 // // provide an instruction_number.operand_name for each operand that appears
13112 // // in the replacement instruction's match rule
13113 //
13114 // ---------VM FLAGS---------------------------------------------------------
13115 //
13116 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13117 //
13118 // Each peephole rule is given an identifying number starting with zero and
13119 // increasing by one in the order seen by the parser.  An individual peephole
13120 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13121 // on the command-line.
13122 //
13123 // ---------CURRENT LIMITATIONS----------------------------------------------
13124 //
13125 // Only match adjacent instructions in same basic block
13126 // Only equality constraints
13127 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13128 // Only one replacement instruction
13129 //
13130 // ---------EXAMPLE----------------------------------------------------------
13131 //
13132 // // pertinent parts of existing instructions in architecture description
13133 // instruct movI(rRegI dst, rRegI src) %{
13134 //   match(Set dst (CopyI src));
13135 // %}
13136 //
13137 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13138 //   match(Set dst (AddI dst src));
13139 //   effect(KILL cr);
13140 // %}
13141 //
13142 // // Change (inc mov) to lea
13143 // peephole %{
13144 //   // increment preceeded by register-register move
13145 //   peepmatch ( incI_eReg movI );
13146 //   // require that the destination register of the increment
13147 //   // match the destination register of the move
13148 //   peepconstraint ( 0.dst == 1.dst );
13149 //   // construct a replacement instruction that sets
13150 //   // the destination to ( move's source register + one )
13151 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13152 // %}
13153 //
13154 // Implementation no longer uses movX instructions since
13155 // machine-independent system no longer uses CopyX nodes.
13156 //
13157 // peephole %{
13158 //   peepmatch ( incI_eReg movI );
13159 //   peepconstraint ( 0.dst == 1.dst );
13160 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13161 // %}
13162 //
13163 // peephole %{
13164 //   peepmatch ( decI_eReg movI );
13165 //   peepconstraint ( 0.dst == 1.dst );
13166 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13167 // %}
13168 //
13169 // peephole %{
13170 //   peepmatch ( addI_eReg_imm movI );
13171 //   peepconstraint ( 0.dst == 1.dst );
13172 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13173 // %}
13174 //
13175 // peephole %{
13176 //   peepmatch ( addP_eReg_imm movP );
13177 //   peepconstraint ( 0.dst == 1.dst );
13178 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13179 // %}
13180 
13181 // // Change load of spilled value to only a spill
13182 // instruct storeI(memory mem, rRegI src) %{
13183 //   match(Set mem (StoreI mem src));
13184 // %}
13185 //
13186 // instruct loadI(rRegI dst, memory mem) %{
13187 //   match(Set dst (LoadI mem));
13188 // %}
13189 //
13190 peephole %{
13191   peepmatch ( loadI storeI );
13192   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13193   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13194 %}
13195 
13196 //----------SMARTSPILL RULES---------------------------------------------------
13197 // These must follow all instruction definitions as they use the names
13198 // defined in the instructions definitions.