New src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     _masm.set_managed();
 799     if (reg_lo+1 == reg_hi) { // double move?
 800       if (is_load) {
 801         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 802       } else {
 803         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 804       }
 805     } else {
 806       if (is_load) {
 807         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 808       } else {
 809         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 810       }
 811     }
 812 #ifndef PRODUCT
 813   } else if (!do_size) {
 814     if (size != 0) st->print("\n\t");
 815     if (reg_lo+1 == reg_hi) { // double move?
 816       if (is_load) st->print("%s %s,[ESP + #%d]",
 817                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 818                               Matcher::regName[reg_lo], offset);
 819       else         st->print("MOVSD  [ESP + #%d],%s",
 820                               offset, Matcher::regName[reg_lo]);
 821     } else {
 822       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 823                               Matcher::regName[reg_lo], offset);
 824       else         st->print("MOVSS  [ESP + #%d],%s",
 825                               offset, Matcher::regName[reg_lo]);
 826     }
 827 #endif
 828   }
 829   bool is_single_byte = false;
 830   if ((UseAVX > 2) && (offset != 0)) {
 831     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 832   }
 833   int offset_size = 0;
 834   if (UseAVX > 2 ) {
 835     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 836   } else {
 837     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 838   }
 839   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 840   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 841   return size+5+offset_size;
 842 }
 843 
 844 
 845 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 846                             int src_hi, int dst_hi, int size, outputStream* st ) {
 847   if (cbuf) {
 848     MacroAssembler _masm(cbuf);
 849     _masm.set_managed();
 850     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 851       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 852                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 853     } else {
 854       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     }
 857 #ifndef PRODUCT
 858   } else if (!do_size) {
 859     if (size != 0) st->print("\n\t");
 860     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 861       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 862         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     } else {
 867       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 868         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 869       } else {
 870         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 871       }
 872     }
 873 #endif
 874   }
 875   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 876   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 877   int sz = (UseAVX > 2) ? 6 : 4;
 878   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 879       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 880   return size + sz;
 881 }
 882 
 883 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 884                             int src_hi, int dst_hi, int size, outputStream* st ) {
 885   // 32-bit
 886   if (cbuf) {
 887     MacroAssembler _masm(cbuf);
 888     _masm.set_managed();
 889     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 890              as_Register(Matcher::_regEncode[src_lo]));
 891 #ifndef PRODUCT
 892   } else if (!do_size) {
 893     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 894 #endif
 895   }
 896   return (UseAVX> 2) ? 6 : 4;
 897 }
 898 
 899 
 900 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 901                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 902   // 32-bit
 903   if (cbuf) {
 904     MacroAssembler _masm(cbuf);
 905     _masm.set_managed();
 906     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 907              as_XMMRegister(Matcher::_regEncode[src_lo]));
 908 #ifndef PRODUCT
 909   } else if (!do_size) {
 910     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 911 #endif
 912   }
 913   return (UseAVX> 2) ? 6 : 4;
 914 }
 915 
 916 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 917   if( cbuf ) {
 918     emit_opcode(*cbuf, 0x8B );
 919     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 920 #ifndef PRODUCT
 921   } else if( !do_size ) {
 922     if( size != 0 ) st->print("\n\t");
 923     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 924 #endif
 925   }
 926   return size+2;
 927 }
 928 
 929 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 930                                  int offset, int size, outputStream* st ) {
 931   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 932     if( cbuf ) {
 933       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 934       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 935 #ifndef PRODUCT
 936     } else if( !do_size ) {
 937       if( size != 0 ) st->print("\n\t");
 938       st->print("FLD    %s",Matcher::regName[src_lo]);
 939 #endif
 940     }
 941     size += 2;
 942   }
 943 
 944   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 945   const char *op_str;
 946   int op;
 947   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 948     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 949     op = 0xDD;
 950   } else {                   // 32-bit store
 951     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 952     op = 0xD9;
 953     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 954   }
 955 
 956   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 957 }
 958 
 959 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 960 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 961                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 962 
 963 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 964                             int stack_offset, int reg, uint ireg, outputStream* st);
 965 
 966 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 967                                      int dst_offset, uint ireg, outputStream* st) {
 968   int calc_size = 0;
 969   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 970   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 971   switch (ireg) {
 972   case Op_VecS:
 973     calc_size = 3+src_offset_size + 3+dst_offset_size;
 974     break;
 975   case Op_VecD: {
 976     calc_size = 3+src_offset_size + 3+dst_offset_size;
 977     int tmp_src_offset = src_offset + 4;
 978     int tmp_dst_offset = dst_offset + 4;
 979     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 980     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 981     calc_size += 3+src_offset_size + 3+dst_offset_size;
 982     break;
 983   }   
 984   case Op_VecX:
 985   case Op_VecY:
 986   case Op_VecZ:
 987     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 988     break;
 989   default:
 990     ShouldNotReachHere();
 991   }
 992   if (cbuf) {
 993     MacroAssembler _masm(cbuf);
 994     int offset = __ offset();
 995     switch (ireg) {
 996     case Op_VecS:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       break;
1000     case Op_VecD:
1001       __ pushl(Address(rsp, src_offset));
1002       __ popl (Address(rsp, dst_offset));
1003       __ pushl(Address(rsp, src_offset+4));
1004       __ popl (Address(rsp, dst_offset+4));
1005       break;
1006     case Op_VecX:
1007       __ movdqu(Address(rsp, -16), xmm0);
1008       __ movdqu(xmm0, Address(rsp, src_offset));
1009       __ movdqu(Address(rsp, dst_offset), xmm0);
1010       __ movdqu(xmm0, Address(rsp, -16));
1011       break;
1012     case Op_VecY:
1013       __ vmovdqu(Address(rsp, -32), xmm0);
1014       __ vmovdqu(xmm0, Address(rsp, src_offset));
1015       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1016       __ vmovdqu(xmm0, Address(rsp, -32));
1017       break;
1018     case Op_VecZ:
1019       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1020       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1021       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1022       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1023       break;
1024     default:
1025       ShouldNotReachHere();
1026     }
1027     int size = __ offset() - offset;
1028     assert(size == calc_size, "incorrect size calculation");
1029     return size;
1030 #ifndef PRODUCT
1031   } else if (!do_size) {
1032     switch (ireg) {
1033     case Op_VecS:
1034       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1035                 "popl    [rsp + #%d]",
1036                 src_offset, dst_offset);
1037       break;
1038     case Op_VecD:
1039       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1040                 "popq    [rsp + #%d]\n\t"
1041                 "pushl   [rsp + #%d]\n\t"
1042                 "popq    [rsp + #%d]",
1043                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1044       break;
1045      case Op_VecX:
1046       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1047                 "movdqu  xmm0, [rsp + #%d]\n\t"
1048                 "movdqu  [rsp + #%d], xmm0\n\t"
1049                 "movdqu  xmm0, [rsp - #16]",
1050                 src_offset, dst_offset);
1051       break;
1052     case Op_VecY:
1053       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1054                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1055                 "vmovdqu [rsp + #%d], xmm0\n\t"
1056                 "vmovdqu xmm0, [rsp - #32]",
1057                 src_offset, dst_offset);
1058       break;
1059     case Op_VecZ:
1060       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1061                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1062                 "vmovdqu [rsp + #%d], xmm0\n\t"
1063                 "vmovdqu xmm0, [rsp - #64]",
1064                 src_offset, dst_offset);
1065       break;
1066     default:
1067       ShouldNotReachHere();
1068     }
1069 #endif
1070   }
1071   return calc_size;
1072 }
1073 
1074 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1075   // Get registers to move
1076   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1077   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1078   OptoReg::Name dst_second = ra_->get_reg_second(this );
1079   OptoReg::Name dst_first = ra_->get_reg_first(this );
1080 
1081   enum RC src_second_rc = rc_class(src_second);
1082   enum RC src_first_rc = rc_class(src_first);
1083   enum RC dst_second_rc = rc_class(dst_second);
1084   enum RC dst_first_rc = rc_class(dst_first);
1085 
1086   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1087 
1088   // Generate spill code!
1089   int size = 0;
1090 
1091   if( src_first == dst_first && src_second == dst_second )
1092     return size;            // Self copy, no move
1093 
1094   if (bottom_type()->isa_vect() != NULL) {
1095     uint ireg = ideal_reg();
1096     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1097     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1098     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1099     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1100       // mem -> mem
1101       int src_offset = ra_->reg2offset(src_first);
1102       int dst_offset = ra_->reg2offset(dst_first);
1103       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1104     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1105       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1106     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1107       int stack_offset = ra_->reg2offset(dst_first);
1108       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1109     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1110       int stack_offset = ra_->reg2offset(src_first);
1111       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1112     } else {
1113       ShouldNotReachHere();
1114     }
1115   }
1116 
1117   // --------------------------------------
1118   // Check for mem-mem move.  push/pop to move.
1119   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1120     if( src_second == dst_first ) { // overlapping stack copy ranges
1121       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1122       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1123       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1124       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1125     }
1126     // move low bits
1127     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1128     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1129     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1130       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1131       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1132     }
1133     return size;
1134   }
1135 
1136   // --------------------------------------
1137   // Check for integer reg-reg copy
1138   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1139     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1140 
1141   // Check for integer store
1142   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1144 
1145   // Check for integer load
1146   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1147     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1148 
1149   // Check for integer reg-xmm reg copy
1150   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1151     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1152             "no 64 bit integer-float reg moves" );
1153     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1154   }
1155   // --------------------------------------
1156   // Check for float reg-reg copy
1157   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1158     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1159             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1160     if( cbuf ) {
1161 
1162       // Note the mucking with the register encode to compensate for the 0/1
1163       // indexing issue mentioned in a comment in the reg_def sections
1164       // for FPR registers many lines above here.
1165 
1166       if( src_first != FPR1L_num ) {
1167         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1168         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1169         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1170         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1171      } else {
1172         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1173         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1174      }
1175 #ifndef PRODUCT
1176     } else if( !do_size ) {
1177       if( size != 0 ) st->print("\n\t");
1178       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1179       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1180 #endif
1181     }
1182     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1183   }
1184 
1185   // Check for float store
1186   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1187     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1188   }
1189 
1190   // Check for float load
1191   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1192     int offset = ra_->reg2offset(src_first);
1193     const char *op_str;
1194     int op;
1195     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1196       op_str = "FLD_D";
1197       op = 0xDD;
1198     } else {                   // 32-bit load
1199       op_str = "FLD_S";
1200       op = 0xD9;
1201       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1202     }
1203     if( cbuf ) {
1204       emit_opcode  (*cbuf, op );
1205       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1206       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1207       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1208 #ifndef PRODUCT
1209     } else if( !do_size ) {
1210       if( size != 0 ) st->print("\n\t");
1211       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1212 #endif
1213     }
1214     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1215     return size + 3+offset_size+2;
1216   }
1217 
1218   // Check for xmm reg-reg copy
1219   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1220     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1221             (src_first+1 == src_second && dst_first+1 == dst_second),
1222             "no non-adjacent float-moves" );
1223     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1224   }
1225 
1226   // Check for xmm reg-integer reg copy
1227   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1228     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1229             "no 64 bit float-integer reg moves" );
1230     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1231   }
1232 
1233   // Check for xmm store
1234   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1235     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1236   }
1237 
1238   // Check for float xmm load
1239   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1240     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1241   }
1242 
1243   // Copy from float reg to xmm reg
1244   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1245     // copy to the top of stack from floating point reg
1246     // and use LEA to preserve flags
1247     if( cbuf ) {
1248       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1249       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1250       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1251       emit_d8(*cbuf,0xF8);
1252 #ifndef PRODUCT
1253     } else if( !do_size ) {
1254       if( size != 0 ) st->print("\n\t");
1255       st->print("LEA    ESP,[ESP-8]");
1256 #endif
1257     }
1258     size += 4;
1259 
1260     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1261 
1262     // Copy from the temp memory to the xmm reg.
1263     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1264 
1265     if( cbuf ) {
1266       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1267       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1268       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1269       emit_d8(*cbuf,0x08);
1270 #ifndef PRODUCT
1271     } else if( !do_size ) {
1272       if( size != 0 ) st->print("\n\t");
1273       st->print("LEA    ESP,[ESP+8]");
1274 #endif
1275     }
1276     size += 4;
1277     return size;
1278   }
1279 
1280   assert( size > 0, "missed a case" );
1281 
1282   // --------------------------------------------------------------------
1283   // Check for second bits still needing moving.
1284   if( src_second == dst_second )
1285     return size;               // Self copy; no move
1286   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1287 
1288   // Check for second word int-int move
1289   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1290     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1291 
1292   // Check for second word integer store
1293   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1295 
1296   // Check for second word integer load
1297   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1298     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1299 
1300 
1301   Unimplemented();
1302   return 0; // Mute compiler
1303 }
1304 
1305 #ifndef PRODUCT
1306 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1307   implementation( NULL, ra_, false, st );
1308 }
1309 #endif
1310 
1311 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1312   implementation( &cbuf, ra_, false, NULL );
1313 }
1314 
1315 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1316   return implementation( NULL, ra_, true, NULL );
1317 }
1318 
1319 
1320 //=============================================================================
1321 #ifndef PRODUCT
1322 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1323   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1324   int reg = ra_->get_reg_first(this);
1325   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1326 }
1327 #endif
1328 
1329 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1330   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1331   int reg = ra_->get_encode(this);
1332   if( offset >= 128 ) {
1333     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1334     emit_rm(cbuf, 0x2, reg, 0x04);
1335     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1336     emit_d32(cbuf, offset);
1337   }
1338   else {
1339     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1340     emit_rm(cbuf, 0x1, reg, 0x04);
1341     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1342     emit_d8(cbuf, offset);
1343   }
1344 }
1345 
1346 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1347   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1348   if( offset >= 128 ) {
1349     return 7;
1350   }
1351   else {
1352     return 4;
1353   }
1354 }
1355 
1356 //=============================================================================
1357 #ifndef PRODUCT
1358 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1359   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1360   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1361   st->print_cr("\tNOP");
1362   st->print_cr("\tNOP");
1363   if( !OptoBreakpoint )
1364     st->print_cr("\tNOP");
1365 }
1366 #endif
1367 
1368 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1369   MacroAssembler masm(&cbuf);
1370 #ifdef ASSERT
1371   uint insts_size = cbuf.insts_size();
1372 #endif
1373   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1374   masm.jump_cc(Assembler::notEqual,
1375                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1376   /* WARNING these NOPs are critical so that verified entry point is properly
1377      aligned for patching by NativeJump::patch_verified_entry() */
1378   int nops_cnt = 2;
1379   if( !OptoBreakpoint ) // Leave space for int3
1380      nops_cnt += 1;
1381   masm.nop(nops_cnt);
1382 
1383   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1384 }
1385 
1386 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1387   return OptoBreakpoint ? 11 : 12;
1388 }
1389 
1390 
1391 //=============================================================================
1392 
1393 int Matcher::regnum_to_fpu_offset(int regnum) {
1394   return regnum - 32; // The FP registers are in the second chunk
1395 }
1396 
1397 // This is UltraSparc specific, true just means we have fast l2f conversion
1398 const bool Matcher::convL2FSupported(void) {
1399   return true;
1400 }
1401 
1402 // Is this branch offset short enough that a short branch can be used?
1403 //
1404 // NOTE: If the platform does not provide any short branch variants, then
1405 //       this method should return false for offset 0.
1406 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1407   // The passed offset is relative to address of the branch.
1408   // On 86 a branch displacement is calculated relative to address
1409   // of a next instruction.
1410   offset -= br_size;
1411 
1412   // the short version of jmpConUCF2 contains multiple branches,
1413   // making the reach slightly less
1414   if (rule == jmpConUCF2_rule)
1415     return (-126 <= offset && offset <= 125);
1416   return (-128 <= offset && offset <= 127);
1417 }
1418 
1419 const bool Matcher::isSimpleConstant64(jlong value) {
1420   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1421   return false;
1422 }
1423 
1424 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1425 const bool Matcher::init_array_count_is_in_bytes = false;
1426 
1427 // Needs 2 CMOV's for longs.
1428 const int Matcher::long_cmove_cost() { return 1; }
1429 
1430 // No CMOVF/CMOVD with SSE/SSE2
1431 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1432 
1433 // Does the CPU require late expand (see block.cpp for description of late expand)?
1434 const bool Matcher::require_postalloc_expand = false;
1435 
1436 // Should the Matcher clone shifts on addressing modes, expecting them to
1437 // be subsumed into complex addressing expressions or compute them into
1438 // registers?  True for Intel but false for most RISCs
1439 const bool Matcher::clone_shift_expressions = true;
1440 
1441 // Do we need to mask the count passed to shift instructions or does
1442 // the cpu only look at the lower 5/6 bits anyway?
1443 const bool Matcher::need_masked_shift_count = false;
1444 
1445 bool Matcher::narrow_oop_use_complex_address() {
1446   ShouldNotCallThis();
1447   return true;
1448 }
1449 
1450 bool Matcher::narrow_klass_use_complex_address() {
1451   ShouldNotCallThis();
1452   return true;
1453 }
1454 
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878 
1879   enc_class pre_call_resets %{
1880     // If method sets FPU control word restore it here
1881     debug_only(int off0 = cbuf.insts_size());
1882     if (ra_->C->in_24_bit_fp_mode()) {
1883       MacroAssembler _masm(&cbuf);
1884       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1885     }
1886     if (ra_->C->max_vector_size() > 16) {
1887       // Clear upper bits of YMM registers when current compiled code uses
1888       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1889       MacroAssembler _masm(&cbuf);
1890       __ vzeroupper();
1891     }
1892     debug_only(int off1 = cbuf.insts_size());
1893     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1894   %}
1895 
1896   enc_class post_call_FPU %{
1897     // If method sets FPU control word do it here also
1898     if (Compile::current()->in_24_bit_fp_mode()) {
1899       MacroAssembler masm(&cbuf);
1900       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1901     }
1902   %}
1903 
1904   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1905     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1906     // who we intended to call.
1907     cbuf.set_insts_mark();
1908     $$$emit8$primary;
1909 
1910     if (!_method) {
1911       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1912                      runtime_call_Relocation::spec(),
1913                      RELOC_IMM32);
1914     } else {
1915       int method_index = resolved_method_index(cbuf);
1916       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1917                                                   : static_call_Relocation::spec(method_index);
1918       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919                      rspec, RELOC_DISP32);
1920       // Emit stubs for static call.
1921       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1922       if (stub == NULL) {
1923         ciEnv::current()->record_failure("CodeCache is full");
1924         return;
1925       }
1926     }
1927   %}
1928 
1929   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1930     MacroAssembler _masm(&cbuf);
1931     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1932   %}
1933 
1934   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1935     int disp = in_bytes(Method::from_compiled_offset());
1936     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1937 
1938     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1939     cbuf.set_insts_mark();
1940     $$$emit8$primary;
1941     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1942     emit_d8(cbuf, disp);             // Displacement
1943 
1944   %}
1945 
1946 //   Following encoding is no longer used, but may be restored if calling
1947 //   convention changes significantly.
1948 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1949 //
1950 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1951 //     // int ic_reg     = Matcher::inline_cache_reg();
1952 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1953 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1954 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1955 //
1956 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1957 //     // // so we load it immediately before the call
1958 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1959 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1960 //
1961 //     // xor rbp,ebp
1962 //     emit_opcode(cbuf, 0x33);
1963 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1964 //
1965 //     // CALL to interpreter.
1966 //     cbuf.set_insts_mark();
1967 //     $$$emit8$primary;
1968 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1969 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1970 //   %}
1971 
1972   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1973     $$$emit8$primary;
1974     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1975     $$$emit8$shift$$constant;
1976   %}
1977 
1978   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1979     // Load immediate does not have a zero or sign extended version
1980     // for 8-bit immediates
1981     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1982     $$$emit32$src$$constant;
1983   %}
1984 
1985   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1986     // Load immediate does not have a zero or sign extended version
1987     // for 8-bit immediates
1988     emit_opcode(cbuf, $primary + $dst$$reg);
1989     $$$emit32$src$$constant;
1990   %}
1991 
1992   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1993     // Load immediate does not have a zero or sign extended version
1994     // for 8-bit immediates
1995     int dst_enc = $dst$$reg;
1996     int src_con = $src$$constant & 0x0FFFFFFFFL;
1997     if (src_con == 0) {
1998       // xor dst, dst
1999       emit_opcode(cbuf, 0x33);
2000       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2001     } else {
2002       emit_opcode(cbuf, $primary + dst_enc);
2003       emit_d32(cbuf, src_con);
2004     }
2005   %}
2006 
2007   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2008     // Load immediate does not have a zero or sign extended version
2009     // for 8-bit immediates
2010     int dst_enc = $dst$$reg + 2;
2011     int src_con = ((julong)($src$$constant)) >> 32;
2012     if (src_con == 0) {
2013       // xor dst, dst
2014       emit_opcode(cbuf, 0x33);
2015       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2016     } else {
2017       emit_opcode(cbuf, $primary + dst_enc);
2018       emit_d32(cbuf, src_con);
2019     }
2020   %}
2021 
2022 
2023   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2024   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2025     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2026   %}
2027 
2028   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2029     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2030   %}
2031 
2032   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2033     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2034   %}
2035 
2036   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2037     $$$emit8$primary;
2038     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2039   %}
2040 
2041   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2042     $$$emit8$secondary;
2043     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2044   %}
2045 
2046   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2047     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2048   %}
2049 
2050   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2051     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2052   %}
2053 
2054   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2055     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2056   %}
2057 
2058   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2059     // Output immediate
2060     $$$emit32$src$$constant;
2061   %}
2062 
2063   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2064     // Output Float immediate bits
2065     jfloat jf = $src$$constant;
2066     int    jf_as_bits = jint_cast( jf );
2067     emit_d32(cbuf, jf_as_bits);
2068   %}
2069 
2070   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2071     // Output Float immediate bits
2072     jfloat jf = $src$$constant;
2073     int    jf_as_bits = jint_cast( jf );
2074     emit_d32(cbuf, jf_as_bits);
2075   %}
2076 
2077   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2078     // Output immediate
2079     $$$emit16$src$$constant;
2080   %}
2081 
2082   enc_class Con_d32(immI src) %{
2083     emit_d32(cbuf,$src$$constant);
2084   %}
2085 
2086   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2087     // Output immediate memory reference
2088     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2089     emit_d32(cbuf, 0x00);
2090   %}
2091 
2092   enc_class lock_prefix( ) %{
2093     if( os::is_MP() )
2094       emit_opcode(cbuf,0xF0);         // [Lock]
2095   %}
2096 
2097   // Cmp-xchg long value.
2098   // Note: we need to swap rbx, and rcx before and after the
2099   //       cmpxchg8 instruction because the instruction uses
2100   //       rcx as the high order word of the new value to store but
2101   //       our register encoding uses rbx,.
2102   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2103 
2104     // XCHG  rbx,ecx
2105     emit_opcode(cbuf,0x87);
2106     emit_opcode(cbuf,0xD9);
2107     // [Lock]
2108     if( os::is_MP() )
2109       emit_opcode(cbuf,0xF0);
2110     // CMPXCHG8 [Eptr]
2111     emit_opcode(cbuf,0x0F);
2112     emit_opcode(cbuf,0xC7);
2113     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2114     // XCHG  rbx,ecx
2115     emit_opcode(cbuf,0x87);
2116     emit_opcode(cbuf,0xD9);
2117   %}
2118 
2119   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2120     // [Lock]
2121     if( os::is_MP() )
2122       emit_opcode(cbuf,0xF0);
2123 
2124     // CMPXCHG [Eptr]
2125     emit_opcode(cbuf,0x0F);
2126     emit_opcode(cbuf,0xB1);
2127     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2128   %}
2129 
2130   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2131     int res_encoding = $res$$reg;
2132 
2133     // MOV  res,0
2134     emit_opcode( cbuf, 0xB8 + res_encoding);
2135     emit_d32( cbuf, 0 );
2136     // JNE,s  fail
2137     emit_opcode(cbuf,0x75);
2138     emit_d8(cbuf, 5 );
2139     // MOV  res,1
2140     emit_opcode( cbuf, 0xB8 + res_encoding);
2141     emit_d32( cbuf, 1 );
2142     // fail:
2143   %}
2144 
2145   enc_class set_instruction_start( ) %{
2146     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2147   %}
2148 
2149   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2150     int reg_encoding = $ereg$$reg;
2151     int base  = $mem$$base;
2152     int index = $mem$$index;
2153     int scale = $mem$$scale;
2154     int displace = $mem$$disp;
2155     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2156     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2157   %}
2158 
2159   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2160     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2161     int base  = $mem$$base;
2162     int index = $mem$$index;
2163     int scale = $mem$$scale;
2164     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2165     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2166     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2167   %}
2168 
2169   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2170     int r1, r2;
2171     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2172     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2173     emit_opcode(cbuf,0x0F);
2174     emit_opcode(cbuf,$tertiary);
2175     emit_rm(cbuf, 0x3, r1, r2);
2176     emit_d8(cbuf,$cnt$$constant);
2177     emit_d8(cbuf,$primary);
2178     emit_rm(cbuf, 0x3, $secondary, r1);
2179     emit_d8(cbuf,$cnt$$constant);
2180   %}
2181 
2182   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2183     emit_opcode( cbuf, 0x8B ); // Move
2184     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2185     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2186       emit_d8(cbuf,$primary);
2187       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2188       emit_d8(cbuf,$cnt$$constant-32);
2189     }
2190     emit_d8(cbuf,$primary);
2191     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2192     emit_d8(cbuf,31);
2193   %}
2194 
2195   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2196     int r1, r2;
2197     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2198     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2199 
2200     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2201     emit_rm(cbuf, 0x3, r1, r2);
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_opcode(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, r1);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_opcode(cbuf,0x33);  // XOR r2,r2
2208     emit_rm(cbuf, 0x3, r2, r2);
2209   %}
2210 
2211   // Clone of RegMem but accepts an extra parameter to access each
2212   // half of a double in memory; it never needs relocation info.
2213   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2214     emit_opcode(cbuf,$opcode$$constant);
2215     int reg_encoding = $rm_reg$$reg;
2216     int base     = $mem$$base;
2217     int index    = $mem$$index;
2218     int scale    = $mem$$scale;
2219     int displace = $mem$$disp + $disp_for_half$$constant;
2220     relocInfo::relocType disp_reloc = relocInfo::none;
2221     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2222   %}
2223 
2224   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2225   //
2226   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2227   // and it never needs relocation information.
2228   // Frequently used to move data between FPU's Stack Top and memory.
2229   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2230     int rm_byte_opcode = $rm_opcode$$constant;
2231     int base     = $mem$$base;
2232     int index    = $mem$$index;
2233     int scale    = $mem$$scale;
2234     int displace = $mem$$disp;
2235     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2236     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2237   %}
2238 
2239   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2240     int rm_byte_opcode = $rm_opcode$$constant;
2241     int base     = $mem$$base;
2242     int index    = $mem$$index;
2243     int scale    = $mem$$scale;
2244     int displace = $mem$$disp;
2245     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2246     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2247   %}
2248 
2249   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2250     int reg_encoding = $dst$$reg;
2251     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2252     int index        = 0x04;            // 0x04 indicates no index
2253     int scale        = 0x00;            // 0x00 indicates no scale
2254     int displace     = $src1$$constant; // 0x00 indicates no displacement
2255     relocInfo::relocType disp_reloc = relocInfo::none;
2256     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2257   %}
2258 
2259   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2260     // Compare dst,src
2261     emit_opcode(cbuf,0x3B);
2262     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2263     // jmp dst < src around move
2264     emit_opcode(cbuf,0x7C);
2265     emit_d8(cbuf,2);
2266     // move dst,src
2267     emit_opcode(cbuf,0x8B);
2268     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2269   %}
2270 
2271   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2272     // Compare dst,src
2273     emit_opcode(cbuf,0x3B);
2274     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2275     // jmp dst > src around move
2276     emit_opcode(cbuf,0x7F);
2277     emit_d8(cbuf,2);
2278     // move dst,src
2279     emit_opcode(cbuf,0x8B);
2280     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2281   %}
2282 
2283   enc_class enc_FPR_store(memory mem, regDPR src) %{
2284     // If src is FPR1, we can just FST to store it.
2285     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2286     int reg_encoding = 0x2; // Just store
2287     int base  = $mem$$base;
2288     int index = $mem$$index;
2289     int scale = $mem$$scale;
2290     int displace = $mem$$disp;
2291     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2292     if( $src$$reg != FPR1L_enc ) {
2293       reg_encoding = 0x3;  // Store & pop
2294       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2295       emit_d8( cbuf, 0xC0-1+$src$$reg );
2296     }
2297     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2298     emit_opcode(cbuf,$primary);
2299     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2300   %}
2301 
2302   enc_class neg_reg(rRegI dst) %{
2303     // NEG $dst
2304     emit_opcode(cbuf,0xF7);
2305     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2306   %}
2307 
2308   enc_class setLT_reg(eCXRegI dst) %{
2309     // SETLT $dst
2310     emit_opcode(cbuf,0x0F);
2311     emit_opcode(cbuf,0x9C);
2312     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2313   %}
2314 
2315   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2316     int tmpReg = $tmp$$reg;
2317 
2318     // SUB $p,$q
2319     emit_opcode(cbuf,0x2B);
2320     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2321     // SBB $tmp,$tmp
2322     emit_opcode(cbuf,0x1B);
2323     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2324     // AND $tmp,$y
2325     emit_opcode(cbuf,0x23);
2326     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2327     // ADD $p,$tmp
2328     emit_opcode(cbuf,0x03);
2329     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2330   %}
2331 
2332   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2333     // TEST shift,32
2334     emit_opcode(cbuf,0xF7);
2335     emit_rm(cbuf, 0x3, 0, ECX_enc);
2336     emit_d32(cbuf,0x20);
2337     // JEQ,s small
2338     emit_opcode(cbuf, 0x74);
2339     emit_d8(cbuf, 0x04);
2340     // MOV    $dst.hi,$dst.lo
2341     emit_opcode( cbuf, 0x8B );
2342     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2343     // CLR    $dst.lo
2344     emit_opcode(cbuf, 0x33);
2345     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2346 // small:
2347     // SHLD   $dst.hi,$dst.lo,$shift
2348     emit_opcode(cbuf,0x0F);
2349     emit_opcode(cbuf,0xA5);
2350     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2351     // SHL    $dst.lo,$shift"
2352     emit_opcode(cbuf,0xD3);
2353     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2354   %}
2355 
2356   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2357     // TEST shift,32
2358     emit_opcode(cbuf,0xF7);
2359     emit_rm(cbuf, 0x3, 0, ECX_enc);
2360     emit_d32(cbuf,0x20);
2361     // JEQ,s small
2362     emit_opcode(cbuf, 0x74);
2363     emit_d8(cbuf, 0x04);
2364     // MOV    $dst.lo,$dst.hi
2365     emit_opcode( cbuf, 0x8B );
2366     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2367     // CLR    $dst.hi
2368     emit_opcode(cbuf, 0x33);
2369     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2370 // small:
2371     // SHRD   $dst.lo,$dst.hi,$shift
2372     emit_opcode(cbuf,0x0F);
2373     emit_opcode(cbuf,0xAD);
2374     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2375     // SHR    $dst.hi,$shift"
2376     emit_opcode(cbuf,0xD3);
2377     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2378   %}
2379 
2380   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2381     // TEST shift,32
2382     emit_opcode(cbuf,0xF7);
2383     emit_rm(cbuf, 0x3, 0, ECX_enc);
2384     emit_d32(cbuf,0x20);
2385     // JEQ,s small
2386     emit_opcode(cbuf, 0x74);
2387     emit_d8(cbuf, 0x05);
2388     // MOV    $dst.lo,$dst.hi
2389     emit_opcode( cbuf, 0x8B );
2390     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2391     // SAR    $dst.hi,31
2392     emit_opcode(cbuf, 0xC1);
2393     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2394     emit_d8(cbuf, 0x1F );
2395 // small:
2396     // SHRD   $dst.lo,$dst.hi,$shift
2397     emit_opcode(cbuf,0x0F);
2398     emit_opcode(cbuf,0xAD);
2399     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2400     // SAR    $dst.hi,$shift"
2401     emit_opcode(cbuf,0xD3);
2402     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2403   %}
2404 
2405 
2406   // ----------------- Encodings for floating point unit -----------------
2407   // May leave result in FPU-TOS or FPU reg depending on opcodes
2408   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2409     $$$emit8$primary;
2410     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2411   %}
2412 
2413   // Pop argument in FPR0 with FSTP ST(0)
2414   enc_class PopFPU() %{
2415     emit_opcode( cbuf, 0xDD );
2416     emit_d8( cbuf, 0xD8 );
2417   %}
2418 
2419   // !!!!! equivalent to Pop_Reg_F
2420   enc_class Pop_Reg_DPR( regDPR dst ) %{
2421     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2422     emit_d8( cbuf, 0xD8+$dst$$reg );
2423   %}
2424 
2425   enc_class Push_Reg_DPR( regDPR dst ) %{
2426     emit_opcode( cbuf, 0xD9 );
2427     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2428   %}
2429 
2430   enc_class strictfp_bias1( regDPR dst ) %{
2431     emit_opcode( cbuf, 0xDB );           // FLD m80real
2432     emit_opcode( cbuf, 0x2D );
2433     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2434     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2435     emit_opcode( cbuf, 0xC8+$dst$$reg );
2436   %}
2437 
2438   enc_class strictfp_bias2( regDPR dst ) %{
2439     emit_opcode( cbuf, 0xDB );           // FLD m80real
2440     emit_opcode( cbuf, 0x2D );
2441     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2442     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2443     emit_opcode( cbuf, 0xC8+$dst$$reg );
2444   %}
2445 
2446   // Special case for moving an integer register to a stack slot.
2447   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2448     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2449   %}
2450 
2451   // Special case for moving a register to a stack slot.
2452   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2453     // Opcode already emitted
2454     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2455     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2456     emit_d32(cbuf, $dst$$disp);   // Displacement
2457   %}
2458 
2459   // Push the integer in stackSlot 'src' onto FP-stack
2460   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2461     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2462   %}
2463 
2464   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2465   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2466     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2467   %}
2468 
2469   // Same as Pop_Mem_F except for opcode
2470   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2471   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2472     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2473   %}
2474 
2475   enc_class Pop_Reg_FPR( regFPR dst ) %{
2476     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2477     emit_d8( cbuf, 0xD8+$dst$$reg );
2478   %}
2479 
2480   enc_class Push_Reg_FPR( regFPR dst ) %{
2481     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2482     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2483   %}
2484 
2485   // Push FPU's float to a stack-slot, and pop FPU-stack
2486   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2487     int pop = 0x02;
2488     if ($src$$reg != FPR1L_enc) {
2489       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2490       emit_d8( cbuf, 0xC0-1+$src$$reg );
2491       pop = 0x03;
2492     }
2493     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2494   %}
2495 
2496   // Push FPU's double to a stack-slot, and pop FPU-stack
2497   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2498     int pop = 0x02;
2499     if ($src$$reg != FPR1L_enc) {
2500       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2501       emit_d8( cbuf, 0xC0-1+$src$$reg );
2502       pop = 0x03;
2503     }
2504     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2505   %}
2506 
2507   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2508   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2509     int pop = 0xD0 - 1; // -1 since we skip FLD
2510     if ($src$$reg != FPR1L_enc) {
2511       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2512       emit_d8( cbuf, 0xC0-1+$src$$reg );
2513       pop = 0xD8;
2514     }
2515     emit_opcode( cbuf, 0xDD );
2516     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2517   %}
2518 
2519 
2520   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2521     // load dst in FPR0
2522     emit_opcode( cbuf, 0xD9 );
2523     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2524     if ($src$$reg != FPR1L_enc) {
2525       // fincstp
2526       emit_opcode (cbuf, 0xD9);
2527       emit_opcode (cbuf, 0xF7);
2528       // swap src with FPR1:
2529       // FXCH FPR1 with src
2530       emit_opcode(cbuf, 0xD9);
2531       emit_d8(cbuf, 0xC8-1+$src$$reg );
2532       // fdecstp
2533       emit_opcode (cbuf, 0xD9);
2534       emit_opcode (cbuf, 0xF6);
2535     }
2536   %}
2537 
2538   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2539     MacroAssembler _masm(&cbuf);
2540     __ subptr(rsp, 8);
2541     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2542     __ fld_d(Address(rsp, 0));
2543     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2544     __ fld_d(Address(rsp, 0));
2545   %}
2546 
2547   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2548     MacroAssembler _masm(&cbuf);
2549     __ subptr(rsp, 4);
2550     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2551     __ fld_s(Address(rsp, 0));
2552     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2553     __ fld_s(Address(rsp, 0));
2554   %}
2555 
2556   enc_class Push_ResultD(regD dst) %{
2557     MacroAssembler _masm(&cbuf);
2558     __ fstp_d(Address(rsp, 0));
2559     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2560     __ addptr(rsp, 8);
2561   %}
2562 
2563   enc_class Push_ResultF(regF dst, immI d8) %{
2564     MacroAssembler _masm(&cbuf);
2565     __ fstp_s(Address(rsp, 0));
2566     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2567     __ addptr(rsp, $d8$$constant);
2568   %}
2569 
2570   enc_class Push_SrcD(regD src) %{
2571     MacroAssembler _masm(&cbuf);
2572     __ subptr(rsp, 8);
2573     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2574     __ fld_d(Address(rsp, 0));
2575   %}
2576 
2577   enc_class push_stack_temp_qword() %{
2578     MacroAssembler _masm(&cbuf);
2579     __ subptr(rsp, 8);
2580   %}
2581 
2582   enc_class pop_stack_temp_qword() %{
2583     MacroAssembler _masm(&cbuf);
2584     __ addptr(rsp, 8);
2585   %}
2586 
2587   enc_class push_xmm_to_fpr1(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2590     __ fld_d(Address(rsp, 0));
2591   %}
2592 
2593   enc_class Push_Result_Mod_DPR( regDPR src) %{
2594     if ($src$$reg != FPR1L_enc) {
2595       // fincstp
2596       emit_opcode (cbuf, 0xD9);
2597       emit_opcode (cbuf, 0xF7);
2598       // FXCH FPR1 with src
2599       emit_opcode(cbuf, 0xD9);
2600       emit_d8(cbuf, 0xC8-1+$src$$reg );
2601       // fdecstp
2602       emit_opcode (cbuf, 0xD9);
2603       emit_opcode (cbuf, 0xF6);
2604     }
2605     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2606     // // FSTP   FPR$dst$$reg
2607     // emit_opcode( cbuf, 0xDD );
2608     // emit_d8( cbuf, 0xD8+$dst$$reg );
2609   %}
2610 
2611   enc_class fnstsw_sahf_skip_parity() %{
2612     // fnstsw ax
2613     emit_opcode( cbuf, 0xDF );
2614     emit_opcode( cbuf, 0xE0 );
2615     // sahf
2616     emit_opcode( cbuf, 0x9E );
2617     // jnp  ::skip
2618     emit_opcode( cbuf, 0x7B );
2619     emit_opcode( cbuf, 0x05 );
2620   %}
2621 
2622   enc_class emitModDPR() %{
2623     // fprem must be iterative
2624     // :: loop
2625     // fprem
2626     emit_opcode( cbuf, 0xD9 );
2627     emit_opcode( cbuf, 0xF8 );
2628     // wait
2629     emit_opcode( cbuf, 0x9b );
2630     // fnstsw ax
2631     emit_opcode( cbuf, 0xDF );
2632     emit_opcode( cbuf, 0xE0 );
2633     // sahf
2634     emit_opcode( cbuf, 0x9E );
2635     // jp  ::loop
2636     emit_opcode( cbuf, 0x0F );
2637     emit_opcode( cbuf, 0x8A );
2638     emit_opcode( cbuf, 0xF4 );
2639     emit_opcode( cbuf, 0xFF );
2640     emit_opcode( cbuf, 0xFF );
2641     emit_opcode( cbuf, 0xFF );
2642   %}
2643 
2644   enc_class fpu_flags() %{
2645     // fnstsw_ax
2646     emit_opcode( cbuf, 0xDF);
2647     emit_opcode( cbuf, 0xE0);
2648     // test ax,0x0400
2649     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2650     emit_opcode( cbuf, 0xA9 );
2651     emit_d16   ( cbuf, 0x0400 );
2652     // // // This sequence works, but stalls for 12-16 cycles on PPro
2653     // // test rax,0x0400
2654     // emit_opcode( cbuf, 0xA9 );
2655     // emit_d32   ( cbuf, 0x00000400 );
2656     //
2657     // jz exit (no unordered comparison)
2658     emit_opcode( cbuf, 0x74 );
2659     emit_d8    ( cbuf, 0x02 );
2660     // mov ah,1 - treat as LT case (set carry flag)
2661     emit_opcode( cbuf, 0xB4 );
2662     emit_d8    ( cbuf, 0x01 );
2663     // sahf
2664     emit_opcode( cbuf, 0x9E);
2665   %}
2666 
2667   enc_class cmpF_P6_fixup() %{
2668     // Fixup the integer flags in case comparison involved a NaN
2669     //
2670     // JNP exit (no unordered comparison, P-flag is set by NaN)
2671     emit_opcode( cbuf, 0x7B );
2672     emit_d8    ( cbuf, 0x03 );
2673     // MOV AH,1 - treat as LT case (set carry flag)
2674     emit_opcode( cbuf, 0xB4 );
2675     emit_d8    ( cbuf, 0x01 );
2676     // SAHF
2677     emit_opcode( cbuf, 0x9E);
2678     // NOP     // target for branch to avoid branch to branch
2679     emit_opcode( cbuf, 0x90);
2680   %}
2681 
2682 //     fnstsw_ax();
2683 //     sahf();
2684 //     movl(dst, nan_result);
2685 //     jcc(Assembler::parity, exit);
2686 //     movl(dst, less_result);
2687 //     jcc(Assembler::below, exit);
2688 //     movl(dst, equal_result);
2689 //     jcc(Assembler::equal, exit);
2690 //     movl(dst, greater_result);
2691 
2692 // less_result     =  1;
2693 // greater_result  = -1;
2694 // equal_result    = 0;
2695 // nan_result      = -1;
2696 
2697   enc_class CmpF_Result(rRegI dst) %{
2698     // fnstsw_ax();
2699     emit_opcode( cbuf, 0xDF);
2700     emit_opcode( cbuf, 0xE0);
2701     // sahf
2702     emit_opcode( cbuf, 0x9E);
2703     // movl(dst, nan_result);
2704     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2705     emit_d32( cbuf, -1 );
2706     // jcc(Assembler::parity, exit);
2707     emit_opcode( cbuf, 0x7A );
2708     emit_d8    ( cbuf, 0x13 );
2709     // movl(dst, less_result);
2710     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2711     emit_d32( cbuf, -1 );
2712     // jcc(Assembler::below, exit);
2713     emit_opcode( cbuf, 0x72 );
2714     emit_d8    ( cbuf, 0x0C );
2715     // movl(dst, equal_result);
2716     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2717     emit_d32( cbuf, 0 );
2718     // jcc(Assembler::equal, exit);
2719     emit_opcode( cbuf, 0x74 );
2720     emit_d8    ( cbuf, 0x05 );
2721     // movl(dst, greater_result);
2722     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2723     emit_d32( cbuf, 1 );
2724   %}
2725 
2726 
2727   // Compare the longs and set flags
2728   // BROKEN!  Do Not use as-is
2729   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2730     // CMP    $src1.hi,$src2.hi
2731     emit_opcode( cbuf, 0x3B );
2732     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2733     // JNE,s  done
2734     emit_opcode(cbuf,0x75);
2735     emit_d8(cbuf, 2 );
2736     // CMP    $src1.lo,$src2.lo
2737     emit_opcode( cbuf, 0x3B );
2738     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2739 // done:
2740   %}
2741 
2742   enc_class convert_int_long( regL dst, rRegI src ) %{
2743     // mov $dst.lo,$src
2744     int dst_encoding = $dst$$reg;
2745     int src_encoding = $src$$reg;
2746     encode_Copy( cbuf, dst_encoding  , src_encoding );
2747     // mov $dst.hi,$src
2748     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2749     // sar $dst.hi,31
2750     emit_opcode( cbuf, 0xC1 );
2751     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2752     emit_d8(cbuf, 0x1F );
2753   %}
2754 
2755   enc_class convert_long_double( eRegL src ) %{
2756     // push $src.hi
2757     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2758     // push $src.lo
2759     emit_opcode(cbuf, 0x50+$src$$reg  );
2760     // fild 64-bits at [SP]
2761     emit_opcode(cbuf,0xdf);
2762     emit_d8(cbuf, 0x6C);
2763     emit_d8(cbuf, 0x24);
2764     emit_d8(cbuf, 0x00);
2765     // pop stack
2766     emit_opcode(cbuf, 0x83); // add  SP, #8
2767     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2768     emit_d8(cbuf, 0x8);
2769   %}
2770 
2771   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2772     // IMUL   EDX:EAX,$src1
2773     emit_opcode( cbuf, 0xF7 );
2774     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2775     // SAR    EDX,$cnt-32
2776     int shift_count = ((int)$cnt$$constant) - 32;
2777     if (shift_count > 0) {
2778       emit_opcode(cbuf, 0xC1);
2779       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2780       emit_d8(cbuf, shift_count);
2781     }
2782   %}
2783 
2784   // this version doesn't have add sp, 8
2785   enc_class convert_long_double2( eRegL src ) %{
2786     // push $src.hi
2787     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2788     // push $src.lo
2789     emit_opcode(cbuf, 0x50+$src$$reg  );
2790     // fild 64-bits at [SP]
2791     emit_opcode(cbuf,0xdf);
2792     emit_d8(cbuf, 0x6C);
2793     emit_d8(cbuf, 0x24);
2794     emit_d8(cbuf, 0x00);
2795   %}
2796 
2797   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2798     // Basic idea: long = (long)int * (long)int
2799     // IMUL EDX:EAX, src
2800     emit_opcode( cbuf, 0xF7 );
2801     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2802   %}
2803 
2804   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2805     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2806     // MUL EDX:EAX, src
2807     emit_opcode( cbuf, 0xF7 );
2808     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2809   %}
2810 
2811   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2812     // Basic idea: lo(result) = lo(x_lo * y_lo)
2813     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2814     // MOV    $tmp,$src.lo
2815     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2816     // IMUL   $tmp,EDX
2817     emit_opcode( cbuf, 0x0F );
2818     emit_opcode( cbuf, 0xAF );
2819     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2820     // MOV    EDX,$src.hi
2821     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2822     // IMUL   EDX,EAX
2823     emit_opcode( cbuf, 0x0F );
2824     emit_opcode( cbuf, 0xAF );
2825     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2826     // ADD    $tmp,EDX
2827     emit_opcode( cbuf, 0x03 );
2828     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2829     // MUL   EDX:EAX,$src.lo
2830     emit_opcode( cbuf, 0xF7 );
2831     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2832     // ADD    EDX,ESI
2833     emit_opcode( cbuf, 0x03 );
2834     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2835   %}
2836 
2837   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2838     // Basic idea: lo(result) = lo(src * y_lo)
2839     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2840     // IMUL   $tmp,EDX,$src
2841     emit_opcode( cbuf, 0x6B );
2842     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2843     emit_d8( cbuf, (int)$src$$constant );
2844     // MOV    EDX,$src
2845     emit_opcode(cbuf, 0xB8 + EDX_enc);
2846     emit_d32( cbuf, (int)$src$$constant );
2847     // MUL   EDX:EAX,EDX
2848     emit_opcode( cbuf, 0xF7 );
2849     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2850     // ADD    EDX,ESI
2851     emit_opcode( cbuf, 0x03 );
2852     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2853   %}
2854 
2855   enc_class long_div( eRegL src1, eRegL src2 ) %{
2856     // PUSH src1.hi
2857     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2858     // PUSH src1.lo
2859     emit_opcode(cbuf,               0x50+$src1$$reg  );
2860     // PUSH src2.hi
2861     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2862     // PUSH src2.lo
2863     emit_opcode(cbuf,               0x50+$src2$$reg  );
2864     // CALL directly to the runtime
2865     cbuf.set_insts_mark();
2866     emit_opcode(cbuf,0xE8);       // Call into runtime
2867     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2868     // Restore stack
2869     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2870     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2871     emit_d8(cbuf, 4*4);
2872   %}
2873 
2874   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2875     // PUSH src1.hi
2876     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2877     // PUSH src1.lo
2878     emit_opcode(cbuf,               0x50+$src1$$reg  );
2879     // PUSH src2.hi
2880     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2881     // PUSH src2.lo
2882     emit_opcode(cbuf,               0x50+$src2$$reg  );
2883     // CALL directly to the runtime
2884     cbuf.set_insts_mark();
2885     emit_opcode(cbuf,0xE8);       // Call into runtime
2886     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2887     // Restore stack
2888     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2889     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2890     emit_d8(cbuf, 4*4);
2891   %}
2892 
2893   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2894     // MOV   $tmp,$src.lo
2895     emit_opcode(cbuf, 0x8B);
2896     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2897     // OR    $tmp,$src.hi
2898     emit_opcode(cbuf, 0x0B);
2899     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2900   %}
2901 
2902   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2903     // CMP    $src1.lo,$src2.lo
2904     emit_opcode( cbuf, 0x3B );
2905     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2906     // JNE,s  skip
2907     emit_cc(cbuf, 0x70, 0x5);
2908     emit_d8(cbuf,2);
2909     // CMP    $src1.hi,$src2.hi
2910     emit_opcode( cbuf, 0x3B );
2911     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2912   %}
2913 
2914   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2915     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2916     emit_opcode( cbuf, 0x3B );
2917     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2918     // MOV    $tmp,$src1.hi
2919     emit_opcode( cbuf, 0x8B );
2920     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2921     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2922     emit_opcode( cbuf, 0x1B );
2923     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2924   %}
2925 
2926   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2927     // XOR    $tmp,$tmp
2928     emit_opcode(cbuf,0x33);  // XOR
2929     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2930     // CMP    $tmp,$src.lo
2931     emit_opcode( cbuf, 0x3B );
2932     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2933     // SBB    $tmp,$src.hi
2934     emit_opcode( cbuf, 0x1B );
2935     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2936   %}
2937 
2938  // Sniff, sniff... smells like Gnu Superoptimizer
2939   enc_class neg_long( eRegL dst ) %{
2940     emit_opcode(cbuf,0xF7);    // NEG hi
2941     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2942     emit_opcode(cbuf,0xF7);    // NEG lo
2943     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2944     emit_opcode(cbuf,0x83);    // SBB hi,0
2945     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2946     emit_d8    (cbuf,0 );
2947   %}
2948 
2949   enc_class enc_pop_rdx() %{
2950     emit_opcode(cbuf,0x5A);
2951   %}
2952 
2953   enc_class enc_rethrow() %{
2954     cbuf.set_insts_mark();
2955     emit_opcode(cbuf, 0xE9);        // jmp    entry
2956     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2957                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2958   %}
2959 
2960 
2961   // Convert a double to an int.  Java semantics require we do complex
2962   // manglelations in the corner cases.  So we set the rounding mode to
2963   // 'zero', store the darned double down as an int, and reset the
2964   // rounding mode to 'nearest'.  The hardware throws an exception which
2965   // patches up the correct value directly to the stack.
2966   enc_class DPR2I_encoding( regDPR src ) %{
2967     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2968     // exceptions here, so that a NAN or other corner-case value will
2969     // thrown an exception (but normal values get converted at full speed).
2970     // However, I2C adapters and other float-stack manglers leave pending
2971     // invalid-op exceptions hanging.  We would have to clear them before
2972     // enabling them and that is more expensive than just testing for the
2973     // invalid value Intel stores down in the corner cases.
2974     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2975     emit_opcode(cbuf,0x2D);
2976     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2977     // Allocate a word
2978     emit_opcode(cbuf,0x83);            // SUB ESP,4
2979     emit_opcode(cbuf,0xEC);
2980     emit_d8(cbuf,0x04);
2981     // Encoding assumes a double has been pushed into FPR0.
2982     // Store down the double as an int, popping the FPU stack
2983     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2984     emit_opcode(cbuf,0x1C);
2985     emit_d8(cbuf,0x24);
2986     // Restore the rounding mode; mask the exception
2987     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2988     emit_opcode(cbuf,0x2D);
2989     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2990         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2991         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2992 
2993     // Load the converted int; adjust CPU stack
2994     emit_opcode(cbuf,0x58);       // POP EAX
2995     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2996     emit_d32   (cbuf,0x80000000); //         0x80000000
2997     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2998     emit_d8    (cbuf,0x07);       // Size of slow_call
2999     // Push src onto stack slow-path
3000     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3001     emit_d8    (cbuf,0xC0-1+$src$$reg );
3002     // CALL directly to the runtime
3003     cbuf.set_insts_mark();
3004     emit_opcode(cbuf,0xE8);       // Call into runtime
3005     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3006     // Carry on here...
3007   %}
3008 
3009   enc_class DPR2L_encoding( regDPR src ) %{
3010     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3011     emit_opcode(cbuf,0x2D);
3012     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3013     // Allocate a word
3014     emit_opcode(cbuf,0x83);            // SUB ESP,8
3015     emit_opcode(cbuf,0xEC);
3016     emit_d8(cbuf,0x08);
3017     // Encoding assumes a double has been pushed into FPR0.
3018     // Store down the double as a long, popping the FPU stack
3019     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3020     emit_opcode(cbuf,0x3C);
3021     emit_d8(cbuf,0x24);
3022     // Restore the rounding mode; mask the exception
3023     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3024     emit_opcode(cbuf,0x2D);
3025     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3026         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3027         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3028 
3029     // Load the converted int; adjust CPU stack
3030     emit_opcode(cbuf,0x58);       // POP EAX
3031     emit_opcode(cbuf,0x5A);       // POP EDX
3032     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3033     emit_d8    (cbuf,0xFA);       // rdx
3034     emit_d32   (cbuf,0x80000000); //         0x80000000
3035     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3036     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3037     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3038     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3039     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3040     emit_d8    (cbuf,0x07);       // Size of slow_call
3041     // Push src onto stack slow-path
3042     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3043     emit_d8    (cbuf,0xC0-1+$src$$reg );
3044     // CALL directly to the runtime
3045     cbuf.set_insts_mark();
3046     emit_opcode(cbuf,0xE8);       // Call into runtime
3047     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3048     // Carry on here...
3049   %}
3050 
3051   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3052     // Operand was loaded from memory into fp ST (stack top)
3053     // FMUL   ST,$src  /* D8 C8+i */
3054     emit_opcode(cbuf, 0xD8);
3055     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3056   %}
3057 
3058   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3059     // FADDP  ST,src2  /* D8 C0+i */
3060     emit_opcode(cbuf, 0xD8);
3061     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3062     //could use FADDP  src2,fpST  /* DE C0+i */
3063   %}
3064 
3065   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3066     // FADDP  src2,ST  /* DE C0+i */
3067     emit_opcode(cbuf, 0xDE);
3068     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3069   %}
3070 
3071   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3072     // Operand has been loaded into fp ST (stack top)
3073       // FSUB   ST,$src1
3074       emit_opcode(cbuf, 0xD8);
3075       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3076 
3077       // FDIV
3078       emit_opcode(cbuf, 0xD8);
3079       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3080   %}
3081 
3082   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3083     // Operand was loaded from memory into fp ST (stack top)
3084     // FADD   ST,$src  /* D8 C0+i */
3085     emit_opcode(cbuf, 0xD8);
3086     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3087 
3088     // FMUL  ST,src2  /* D8 C*+i */
3089     emit_opcode(cbuf, 0xD8);
3090     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3091   %}
3092 
3093 
3094   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3095     // Operand was loaded from memory into fp ST (stack top)
3096     // FADD   ST,$src  /* D8 C0+i */
3097     emit_opcode(cbuf, 0xD8);
3098     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3099 
3100     // FMULP  src2,ST  /* DE C8+i */
3101     emit_opcode(cbuf, 0xDE);
3102     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3103   %}
3104 
3105   // Atomically load the volatile long
3106   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3107     emit_opcode(cbuf,0xDF);
3108     int rm_byte_opcode = 0x05;
3109     int base     = $mem$$base;
3110     int index    = $mem$$index;
3111     int scale    = $mem$$scale;
3112     int displace = $mem$$disp;
3113     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3114     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3115     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3116   %}
3117 
3118   // Volatile Store Long.  Must be atomic, so move it into
3119   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3120   // target address before the store (for null-ptr checks)
3121   // so the memory operand is used twice in the encoding.
3122   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3123     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3124     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3125     emit_opcode(cbuf,0xDF);
3126     int rm_byte_opcode = 0x07;
3127     int base     = $mem$$base;
3128     int index    = $mem$$index;
3129     int scale    = $mem$$scale;
3130     int displace = $mem$$disp;
3131     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3132     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3133   %}
3134 
3135   // Safepoint Poll.  This polls the safepoint page, and causes an
3136   // exception if it is not readable. Unfortunately, it kills the condition code
3137   // in the process
3138   // We current use TESTL [spp],EDI
3139   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3140 
3141   enc_class Safepoint_Poll() %{
3142     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3143     emit_opcode(cbuf,0x85);
3144     emit_rm (cbuf, 0x0, 0x7, 0x5);
3145     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3146   %}
3147 %}
3148 
3149 
3150 //----------FRAME--------------------------------------------------------------
3151 // Definition of frame structure and management information.
3152 //
3153 //  S T A C K   L A Y O U T    Allocators stack-slot number
3154 //                             |   (to get allocators register number
3155 //  G  Owned by    |        |  v    add OptoReg::stack0())
3156 //  r   CALLER     |        |
3157 //  o     |        +--------+      pad to even-align allocators stack-slot
3158 //  w     V        |  pad0  |        numbers; owned by CALLER
3159 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3160 //  h     ^        |   in   |  5
3161 //        |        |  args  |  4   Holes in incoming args owned by SELF
3162 //  |     |        |        |  3
3163 //  |     |        +--------+
3164 //  V     |        | old out|      Empty on Intel, window on Sparc
3165 //        |    old |preserve|      Must be even aligned.
3166 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3167 //        |        |   in   |  3   area for Intel ret address
3168 //     Owned by    |preserve|      Empty on Sparc.
3169 //       SELF      +--------+
3170 //        |        |  pad2  |  2   pad to align old SP
3171 //        |        +--------+  1
3172 //        |        | locks  |  0
3173 //        |        +--------+----> OptoReg::stack0(), even aligned
3174 //        |        |  pad1  | 11   pad to align new SP
3175 //        |        +--------+
3176 //        |        |        | 10
3177 //        |        | spills |  9   spills
3178 //        V        |        |  8   (pad0 slot for callee)
3179 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3180 //        ^        |  out   |  7
3181 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3182 //     Owned by    +--------+
3183 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3184 //        |    new |preserve|      Must be even-aligned.
3185 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3186 //        |        |        |
3187 //
3188 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3189 //         known from SELF's arguments and the Java calling convention.
3190 //         Region 6-7 is determined per call site.
3191 // Note 2: If the calling convention leaves holes in the incoming argument
3192 //         area, those holes are owned by SELF.  Holes in the outgoing area
3193 //         are owned by the CALLEE.  Holes should not be nessecary in the
3194 //         incoming area, as the Java calling convention is completely under
3195 //         the control of the AD file.  Doubles can be sorted and packed to
3196 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3197 //         varargs C calling conventions.
3198 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3199 //         even aligned with pad0 as needed.
3200 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3201 //         region 6-11 is even aligned; it may be padded out more so that
3202 //         the region from SP to FP meets the minimum stack alignment.
3203 
3204 frame %{
3205   // What direction does stack grow in (assumed to be same for C & Java)
3206   stack_direction(TOWARDS_LOW);
3207 
3208   // These three registers define part of the calling convention
3209   // between compiled code and the interpreter.
3210   inline_cache_reg(EAX);                // Inline Cache Register
3211   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3212 
3213   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3214   cisc_spilling_operand_name(indOffset32);
3215 
3216   // Number of stack slots consumed by locking an object
3217   sync_stack_slots(1);
3218 
3219   // Compiled code's Frame Pointer
3220   frame_pointer(ESP);
3221   // Interpreter stores its frame pointer in a register which is
3222   // stored to the stack by I2CAdaptors.
3223   // I2CAdaptors convert from interpreted java to compiled java.
3224   interpreter_frame_pointer(EBP);
3225 
3226   // Stack alignment requirement
3227   // Alignment size in bytes (128-bit -> 16 bytes)
3228   stack_alignment(StackAlignmentInBytes);
3229 
3230   // Number of stack slots between incoming argument block and the start of
3231   // a new frame.  The PROLOG must add this many slots to the stack.  The
3232   // EPILOG must remove this many slots.  Intel needs one slot for
3233   // return address and one for rbp, (must save rbp)
3234   in_preserve_stack_slots(2+VerifyStackAtCalls);
3235 
3236   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3237   // for calls to C.  Supports the var-args backing area for register parms.
3238   varargs_C_out_slots_killed(0);
3239 
3240   // The after-PROLOG location of the return address.  Location of
3241   // return address specifies a type (REG or STACK) and a number
3242   // representing the register number (i.e. - use a register name) or
3243   // stack slot.
3244   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3245   // Otherwise, it is above the locks and verification slot and alignment word
3246   return_addr(STACK - 1 +
3247               round_to((Compile::current()->in_preserve_stack_slots() +
3248                         Compile::current()->fixed_slots()),
3249                        stack_alignment_in_slots()));
3250 
3251   // Body of function which returns an integer array locating
3252   // arguments either in registers or in stack slots.  Passed an array
3253   // of ideal registers called "sig" and a "length" count.  Stack-slot
3254   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3255   // arguments for a CALLEE.  Incoming stack arguments are
3256   // automatically biased by the preserve_stack_slots field above.
3257   calling_convention %{
3258     // No difference between ingoing/outgoing just pass false
3259     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3260   %}
3261 
3262 
3263   // Body of function which returns an integer array locating
3264   // arguments either in registers or in stack slots.  Passed an array
3265   // of ideal registers called "sig" and a "length" count.  Stack-slot
3266   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3267   // arguments for a CALLEE.  Incoming stack arguments are
3268   // automatically biased by the preserve_stack_slots field above.
3269   c_calling_convention %{
3270     // This is obviously always outgoing
3271     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3272   %}
3273 
3274   // Location of C & interpreter return values
3275   c_return_value %{
3276     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3277     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3278     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3279 
3280     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3281     // that C functions return float and double results in XMM0.
3282     if( ideal_reg == Op_RegD && UseSSE>=2 )
3283       return OptoRegPair(XMM0b_num,XMM0_num);
3284     if( ideal_reg == Op_RegF && UseSSE>=2 )
3285       return OptoRegPair(OptoReg::Bad,XMM0_num);
3286 
3287     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3288   %}
3289 
3290   // Location of return values
3291   return_value %{
3292     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3293     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3294     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3295     if( ideal_reg == Op_RegD && UseSSE>=2 )
3296       return OptoRegPair(XMM0b_num,XMM0_num);
3297     if( ideal_reg == Op_RegF && UseSSE>=1 )
3298       return OptoRegPair(OptoReg::Bad,XMM0_num);
3299     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3300   %}
3301 
3302 %}
3303 
3304 //----------ATTRIBUTES---------------------------------------------------------
3305 //----------Operand Attributes-------------------------------------------------
3306 op_attrib op_cost(0);        // Required cost attribute
3307 
3308 //----------Instruction Attributes---------------------------------------------
3309 ins_attrib ins_cost(100);       // Required cost attribute
3310 ins_attrib ins_size(8);         // Required size attribute (in bits)
3311 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3312                                 // non-matching short branch variant of some
3313                                                             // long branch?
3314 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3315                                 // specifies the alignment that some part of the instruction (not
3316                                 // necessarily the start) requires.  If > 1, a compute_padding()
3317                                 // function must be provided for the instruction
3318 
3319 //----------OPERANDS-----------------------------------------------------------
3320 // Operand definitions must precede instruction definitions for correct parsing
3321 // in the ADLC because operands constitute user defined types which are used in
3322 // instruction definitions.
3323 
3324 //----------Simple Operands----------------------------------------------------
3325 // Immediate Operands
3326 // Integer Immediate
3327 operand immI() %{
3328   match(ConI);
3329 
3330   op_cost(10);
3331   format %{ %}
3332   interface(CONST_INTER);
3333 %}
3334 
3335 // Constant for test vs zero
3336 operand immI0() %{
3337   predicate(n->get_int() == 0);
3338   match(ConI);
3339 
3340   op_cost(0);
3341   format %{ %}
3342   interface(CONST_INTER);
3343 %}
3344 
3345 // Constant for increment
3346 operand immI1() %{
3347   predicate(n->get_int() == 1);
3348   match(ConI);
3349 
3350   op_cost(0);
3351   format %{ %}
3352   interface(CONST_INTER);
3353 %}
3354 
3355 // Constant for decrement
3356 operand immI_M1() %{
3357   predicate(n->get_int() == -1);
3358   match(ConI);
3359 
3360   op_cost(0);
3361   format %{ %}
3362   interface(CONST_INTER);
3363 %}
3364 
3365 // Valid scale values for addressing modes
3366 operand immI2() %{
3367   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3368   match(ConI);
3369 
3370   format %{ %}
3371   interface(CONST_INTER);
3372 %}
3373 
3374 operand immI8() %{
3375   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3376   match(ConI);
3377 
3378   op_cost(5);
3379   format %{ %}
3380   interface(CONST_INTER);
3381 %}
3382 
3383 operand immI16() %{
3384   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3385   match(ConI);
3386 
3387   op_cost(10);
3388   format %{ %}
3389   interface(CONST_INTER);
3390 %}
3391 
3392 // Int Immediate non-negative
3393 operand immU31()
3394 %{
3395   predicate(n->get_int() >= 0);
3396   match(ConI);
3397 
3398   op_cost(0);
3399   format %{ %}
3400   interface(CONST_INTER);
3401 %}
3402 
3403 // Constant for long shifts
3404 operand immI_32() %{
3405   predicate( n->get_int() == 32 );
3406   match(ConI);
3407 
3408   op_cost(0);
3409   format %{ %}
3410   interface(CONST_INTER);
3411 %}
3412 
3413 operand immI_1_31() %{
3414   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3415   match(ConI);
3416 
3417   op_cost(0);
3418   format %{ %}
3419   interface(CONST_INTER);
3420 %}
3421 
3422 operand immI_32_63() %{
3423   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3424   match(ConI);
3425   op_cost(0);
3426 
3427   format %{ %}
3428   interface(CONST_INTER);
3429 %}
3430 
3431 operand immI_1() %{
3432   predicate( n->get_int() == 1 );
3433   match(ConI);
3434 
3435   op_cost(0);
3436   format %{ %}
3437   interface(CONST_INTER);
3438 %}
3439 
3440 operand immI_2() %{
3441   predicate( n->get_int() == 2 );
3442   match(ConI);
3443 
3444   op_cost(0);
3445   format %{ %}
3446   interface(CONST_INTER);
3447 %}
3448 
3449 operand immI_3() %{
3450   predicate( n->get_int() == 3 );
3451   match(ConI);
3452 
3453   op_cost(0);
3454   format %{ %}
3455   interface(CONST_INTER);
3456 %}
3457 
3458 // Pointer Immediate
3459 operand immP() %{
3460   match(ConP);
3461 
3462   op_cost(10);
3463   format %{ %}
3464   interface(CONST_INTER);
3465 %}
3466 
3467 // NULL Pointer Immediate
3468 operand immP0() %{
3469   predicate( n->get_ptr() == 0 );
3470   match(ConP);
3471   op_cost(0);
3472 
3473   format %{ %}
3474   interface(CONST_INTER);
3475 %}
3476 
3477 // Long Immediate
3478 operand immL() %{
3479   match(ConL);
3480 
3481   op_cost(20);
3482   format %{ %}
3483   interface(CONST_INTER);
3484 %}
3485 
3486 // Long Immediate zero
3487 operand immL0() %{
3488   predicate( n->get_long() == 0L );
3489   match(ConL);
3490   op_cost(0);
3491 
3492   format %{ %}
3493   interface(CONST_INTER);
3494 %}
3495 
3496 // Long Immediate zero
3497 operand immL_M1() %{
3498   predicate( n->get_long() == -1L );
3499   match(ConL);
3500   op_cost(0);
3501 
3502   format %{ %}
3503   interface(CONST_INTER);
3504 %}
3505 
3506 // Long immediate from 0 to 127.
3507 // Used for a shorter form of long mul by 10.
3508 operand immL_127() %{
3509   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3510   match(ConL);
3511   op_cost(0);
3512 
3513   format %{ %}
3514   interface(CONST_INTER);
3515 %}
3516 
3517 // Long Immediate: low 32-bit mask
3518 operand immL_32bits() %{
3519   predicate(n->get_long() == 0xFFFFFFFFL);
3520   match(ConL);
3521   op_cost(0);
3522 
3523   format %{ %}
3524   interface(CONST_INTER);
3525 %}
3526 
3527 // Long Immediate: low 32-bit mask
3528 operand immL32() %{
3529   predicate(n->get_long() == (int)(n->get_long()));
3530   match(ConL);
3531   op_cost(20);
3532 
3533   format %{ %}
3534   interface(CONST_INTER);
3535 %}
3536 
3537 //Double Immediate zero
3538 operand immDPR0() %{
3539   // Do additional (and counter-intuitive) test against NaN to work around VC++
3540   // bug that generates code such that NaNs compare equal to 0.0
3541   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3542   match(ConD);
3543 
3544   op_cost(5);
3545   format %{ %}
3546   interface(CONST_INTER);
3547 %}
3548 
3549 // Double Immediate one
3550 operand immDPR1() %{
3551   predicate( UseSSE<=1 && n->getd() == 1.0 );
3552   match(ConD);
3553 
3554   op_cost(5);
3555   format %{ %}
3556   interface(CONST_INTER);
3557 %}
3558 
3559 // Double Immediate
3560 operand immDPR() %{
3561   predicate(UseSSE<=1);
3562   match(ConD);
3563 
3564   op_cost(5);
3565   format %{ %}
3566   interface(CONST_INTER);
3567 %}
3568 
3569 operand immD() %{
3570   predicate(UseSSE>=2);
3571   match(ConD);
3572 
3573   op_cost(5);
3574   format %{ %}
3575   interface(CONST_INTER);
3576 %}
3577 
3578 // Double Immediate zero
3579 operand immD0() %{
3580   // Do additional (and counter-intuitive) test against NaN to work around VC++
3581   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3582   // compare equal to -0.0.
3583   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3584   match(ConD);
3585 
3586   format %{ %}
3587   interface(CONST_INTER);
3588 %}
3589 
3590 // Float Immediate zero
3591 operand immFPR0() %{
3592   predicate(UseSSE == 0 && n->getf() == 0.0F);
3593   match(ConF);
3594 
3595   op_cost(5);
3596   format %{ %}
3597   interface(CONST_INTER);
3598 %}
3599 
3600 // Float Immediate one
3601 operand immFPR1() %{
3602   predicate(UseSSE == 0 && n->getf() == 1.0F);
3603   match(ConF);
3604 
3605   op_cost(5);
3606   format %{ %}
3607   interface(CONST_INTER);
3608 %}
3609 
3610 // Float Immediate
3611 operand immFPR() %{
3612   predicate( UseSSE == 0 );
3613   match(ConF);
3614 
3615   op_cost(5);
3616   format %{ %}
3617   interface(CONST_INTER);
3618 %}
3619 
3620 // Float Immediate
3621 operand immF() %{
3622   predicate(UseSSE >= 1);
3623   match(ConF);
3624 
3625   op_cost(5);
3626   format %{ %}
3627   interface(CONST_INTER);
3628 %}
3629 
3630 // Float Immediate zero.  Zero and not -0.0
3631 operand immF0() %{
3632   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3633   match(ConF);
3634 
3635   op_cost(5);
3636   format %{ %}
3637   interface(CONST_INTER);
3638 %}
3639 
3640 // Immediates for special shifts (sign extend)
3641 
3642 // Constants for increment
3643 operand immI_16() %{
3644   predicate( n->get_int() == 16 );
3645   match(ConI);
3646 
3647   format %{ %}
3648   interface(CONST_INTER);
3649 %}
3650 
3651 operand immI_24() %{
3652   predicate( n->get_int() == 24 );
3653   match(ConI);
3654 
3655   format %{ %}
3656   interface(CONST_INTER);
3657 %}
3658 
3659 // Constant for byte-wide masking
3660 operand immI_255() %{
3661   predicate( n->get_int() == 255 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 // Constant for short-wide masking
3669 operand immI_65535() %{
3670   predicate(n->get_int() == 65535);
3671   match(ConI);
3672 
3673   format %{ %}
3674   interface(CONST_INTER);
3675 %}
3676 
3677 // Register Operands
3678 // Integer Register
3679 operand rRegI() %{
3680   constraint(ALLOC_IN_RC(int_reg));
3681   match(RegI);
3682   match(xRegI);
3683   match(eAXRegI);
3684   match(eBXRegI);
3685   match(eCXRegI);
3686   match(eDXRegI);
3687   match(eDIRegI);
3688   match(eSIRegI);
3689 
3690   format %{ %}
3691   interface(REG_INTER);
3692 %}
3693 
3694 // Subset of Integer Register
3695 operand xRegI(rRegI reg) %{
3696   constraint(ALLOC_IN_RC(int_x_reg));
3697   match(reg);
3698   match(eAXRegI);
3699   match(eBXRegI);
3700   match(eCXRegI);
3701   match(eDXRegI);
3702 
3703   format %{ %}
3704   interface(REG_INTER);
3705 %}
3706 
3707 // Special Registers
3708 operand eAXRegI(xRegI reg) %{
3709   constraint(ALLOC_IN_RC(eax_reg));
3710   match(reg);
3711   match(rRegI);
3712 
3713   format %{ "EAX" %}
3714   interface(REG_INTER);
3715 %}
3716 
3717 // Special Registers
3718 operand eBXRegI(xRegI reg) %{
3719   constraint(ALLOC_IN_RC(ebx_reg));
3720   match(reg);
3721   match(rRegI);
3722 
3723   format %{ "EBX" %}
3724   interface(REG_INTER);
3725 %}
3726 
3727 operand eCXRegI(xRegI reg) %{
3728   constraint(ALLOC_IN_RC(ecx_reg));
3729   match(reg);
3730   match(rRegI);
3731 
3732   format %{ "ECX" %}
3733   interface(REG_INTER);
3734 %}
3735 
3736 operand eDXRegI(xRegI reg) %{
3737   constraint(ALLOC_IN_RC(edx_reg));
3738   match(reg);
3739   match(rRegI);
3740 
3741   format %{ "EDX" %}
3742   interface(REG_INTER);
3743 %}
3744 
3745 operand eDIRegI(xRegI reg) %{
3746   constraint(ALLOC_IN_RC(edi_reg));
3747   match(reg);
3748   match(rRegI);
3749 
3750   format %{ "EDI" %}
3751   interface(REG_INTER);
3752 %}
3753 
3754 operand naxRegI() %{
3755   constraint(ALLOC_IN_RC(nax_reg));
3756   match(RegI);
3757   match(eCXRegI);
3758   match(eDXRegI);
3759   match(eSIRegI);
3760   match(eDIRegI);
3761 
3762   format %{ %}
3763   interface(REG_INTER);
3764 %}
3765 
3766 operand nadxRegI() %{
3767   constraint(ALLOC_IN_RC(nadx_reg));
3768   match(RegI);
3769   match(eBXRegI);
3770   match(eCXRegI);
3771   match(eSIRegI);
3772   match(eDIRegI);
3773 
3774   format %{ %}
3775   interface(REG_INTER);
3776 %}
3777 
3778 operand ncxRegI() %{
3779   constraint(ALLOC_IN_RC(ncx_reg));
3780   match(RegI);
3781   match(eAXRegI);
3782   match(eDXRegI);
3783   match(eSIRegI);
3784   match(eDIRegI);
3785 
3786   format %{ %}
3787   interface(REG_INTER);
3788 %}
3789 
3790 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3791 // //
3792 operand eSIRegI(xRegI reg) %{
3793    constraint(ALLOC_IN_RC(esi_reg));
3794    match(reg);
3795    match(rRegI);
3796 
3797    format %{ "ESI" %}
3798    interface(REG_INTER);
3799 %}
3800 
3801 // Pointer Register
3802 operand anyRegP() %{
3803   constraint(ALLOC_IN_RC(any_reg));
3804   match(RegP);
3805   match(eAXRegP);
3806   match(eBXRegP);
3807   match(eCXRegP);
3808   match(eDIRegP);
3809   match(eRegP);
3810 
3811   format %{ %}
3812   interface(REG_INTER);
3813 %}
3814 
3815 operand eRegP() %{
3816   constraint(ALLOC_IN_RC(int_reg));
3817   match(RegP);
3818   match(eAXRegP);
3819   match(eBXRegP);
3820   match(eCXRegP);
3821   match(eDIRegP);
3822 
3823   format %{ %}
3824   interface(REG_INTER);
3825 %}
3826 
3827 // On windows95, EBP is not safe to use for implicit null tests.
3828 operand eRegP_no_EBP() %{
3829   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3830   match(RegP);
3831   match(eAXRegP);
3832   match(eBXRegP);
3833   match(eCXRegP);
3834   match(eDIRegP);
3835 
3836   op_cost(100);
3837   format %{ %}
3838   interface(REG_INTER);
3839 %}
3840 
3841 operand naxRegP() %{
3842   constraint(ALLOC_IN_RC(nax_reg));
3843   match(RegP);
3844   match(eBXRegP);
3845   match(eDXRegP);
3846   match(eCXRegP);
3847   match(eSIRegP);
3848   match(eDIRegP);
3849 
3850   format %{ %}
3851   interface(REG_INTER);
3852 %}
3853 
3854 operand nabxRegP() %{
3855   constraint(ALLOC_IN_RC(nabx_reg));
3856   match(RegP);
3857   match(eCXRegP);
3858   match(eDXRegP);
3859   match(eSIRegP);
3860   match(eDIRegP);
3861 
3862   format %{ %}
3863   interface(REG_INTER);
3864 %}
3865 
3866 operand pRegP() %{
3867   constraint(ALLOC_IN_RC(p_reg));
3868   match(RegP);
3869   match(eBXRegP);
3870   match(eDXRegP);
3871   match(eSIRegP);
3872   match(eDIRegP);
3873 
3874   format %{ %}
3875   interface(REG_INTER);
3876 %}
3877 
3878 // Special Registers
3879 // Return a pointer value
3880 operand eAXRegP(eRegP reg) %{
3881   constraint(ALLOC_IN_RC(eax_reg));
3882   match(reg);
3883   format %{ "EAX" %}
3884   interface(REG_INTER);
3885 %}
3886 
3887 // Used in AtomicAdd
3888 operand eBXRegP(eRegP reg) %{
3889   constraint(ALLOC_IN_RC(ebx_reg));
3890   match(reg);
3891   format %{ "EBX" %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Tail-call (interprocedural jump) to interpreter
3896 operand eCXRegP(eRegP reg) %{
3897   constraint(ALLOC_IN_RC(ecx_reg));
3898   match(reg);
3899   format %{ "ECX" %}
3900   interface(REG_INTER);
3901 %}
3902 
3903 operand eSIRegP(eRegP reg) %{
3904   constraint(ALLOC_IN_RC(esi_reg));
3905   match(reg);
3906   format %{ "ESI" %}
3907   interface(REG_INTER);
3908 %}
3909 
3910 // Used in rep stosw
3911 operand eDIRegP(eRegP reg) %{
3912   constraint(ALLOC_IN_RC(edi_reg));
3913   match(reg);
3914   format %{ "EDI" %}
3915   interface(REG_INTER);
3916 %}
3917 
3918 operand eRegL() %{
3919   constraint(ALLOC_IN_RC(long_reg));
3920   match(RegL);
3921   match(eADXRegL);
3922 
3923   format %{ %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 operand eADXRegL( eRegL reg ) %{
3928   constraint(ALLOC_IN_RC(eadx_reg));
3929   match(reg);
3930 
3931   format %{ "EDX:EAX" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eBCXRegL( eRegL reg ) %{
3936   constraint(ALLOC_IN_RC(ebcx_reg));
3937   match(reg);
3938 
3939   format %{ "EBX:ECX" %}
3940   interface(REG_INTER);
3941 %}
3942 
3943 // Special case for integer high multiply
3944 operand eADXRegL_low_only() %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(RegL);
3947 
3948   format %{ "EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 // Flags register, used as output of compare instructions
3953 operand eFlagsReg() %{
3954   constraint(ALLOC_IN_RC(int_flags));
3955   match(RegFlags);
3956 
3957   format %{ "EFLAGS" %}
3958   interface(REG_INTER);
3959 %}
3960 
3961 // Flags register, used as output of FLOATING POINT compare instructions
3962 operand eFlagsRegU() %{
3963   constraint(ALLOC_IN_RC(int_flags));
3964   match(RegFlags);
3965 
3966   format %{ "EFLAGS_U" %}
3967   interface(REG_INTER);
3968 %}
3969 
3970 operand eFlagsRegUCF() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973   predicate(false);
3974 
3975   format %{ "EFLAGS_U_CF" %}
3976   interface(REG_INTER);
3977 %}
3978 
3979 // Condition Code Register used by long compare
3980 operand flagsReg_long_LTGE() %{
3981   constraint(ALLOC_IN_RC(int_flags));
3982   match(RegFlags);
3983   format %{ "FLAGS_LTGE" %}
3984   interface(REG_INTER);
3985 %}
3986 operand flagsReg_long_EQNE() %{
3987   constraint(ALLOC_IN_RC(int_flags));
3988   match(RegFlags);
3989   format %{ "FLAGS_EQNE" %}
3990   interface(REG_INTER);
3991 %}
3992 operand flagsReg_long_LEGT() %{
3993   constraint(ALLOC_IN_RC(int_flags));
3994   match(RegFlags);
3995   format %{ "FLAGS_LEGT" %}
3996   interface(REG_INTER);
3997 %}
3998 
3999 // Float register operands
4000 operand regDPR() %{
4001   predicate( UseSSE < 2 );
4002   constraint(ALLOC_IN_RC(fp_dbl_reg));
4003   match(RegD);
4004   match(regDPR1);
4005   match(regDPR2);
4006   format %{ %}
4007   interface(REG_INTER);
4008 %}
4009 
4010 operand regDPR1(regDPR reg) %{
4011   predicate( UseSSE < 2 );
4012   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4013   match(reg);
4014   format %{ "FPR1" %}
4015   interface(REG_INTER);
4016 %}
4017 
4018 operand regDPR2(regDPR reg) %{
4019   predicate( UseSSE < 2 );
4020   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4021   match(reg);
4022   format %{ "FPR2" %}
4023   interface(REG_INTER);
4024 %}
4025 
4026 operand regnotDPR1(regDPR reg) %{
4027   predicate( UseSSE < 2 );
4028   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4029   match(reg);
4030   format %{ %}
4031   interface(REG_INTER);
4032 %}
4033 
4034 // Float register operands
4035 operand regFPR() %{
4036   predicate( UseSSE < 2 );
4037   constraint(ALLOC_IN_RC(fp_flt_reg));
4038   match(RegF);
4039   match(regFPR1);
4040   format %{ %}
4041   interface(REG_INTER);
4042 %}
4043 
4044 // Float register operands
4045 operand regFPR1(regFPR reg) %{
4046   predicate( UseSSE < 2 );
4047   constraint(ALLOC_IN_RC(fp_flt_reg0));
4048   match(reg);
4049   format %{ "FPR1" %}
4050   interface(REG_INTER);
4051 %}
4052 
4053 // XMM Float register operands
4054 operand regF() %{
4055   predicate( UseSSE>=1 );
4056   constraint(ALLOC_IN_RC(float_reg_legacy));
4057   match(RegF);
4058   format %{ %}
4059   interface(REG_INTER);
4060 %}
4061 
4062 // XMM Double register operands
4063 operand regD() %{
4064   predicate( UseSSE>=2 );
4065   constraint(ALLOC_IN_RC(double_reg_legacy));
4066   match(RegD);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4072 // runtime code generation via reg_class_dynamic.
4073 operand vecS() %{
4074   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4075   match(VecS);
4076 
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 operand vecD() %{
4082   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4083   match(VecD);
4084 
4085   format %{ %}
4086   interface(REG_INTER);
4087 %}
4088 
4089 operand vecX() %{
4090   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4091   match(VecX);
4092 
4093   format %{ %}
4094   interface(REG_INTER);
4095 %}
4096 
4097 operand vecY() %{
4098   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4099   match(VecY);
4100 
4101   format %{ %}
4102   interface(REG_INTER);
4103 %}
4104 
4105 //----------Memory Operands----------------------------------------------------
4106 // Direct Memory Operand
4107 operand direct(immP addr) %{
4108   match(addr);
4109 
4110   format %{ "[$addr]" %}
4111   interface(MEMORY_INTER) %{
4112     base(0xFFFFFFFF);
4113     index(0x4);
4114     scale(0x0);
4115     disp($addr);
4116   %}
4117 %}
4118 
4119 // Indirect Memory Operand
4120 operand indirect(eRegP reg) %{
4121   constraint(ALLOC_IN_RC(int_reg));
4122   match(reg);
4123 
4124   format %{ "[$reg]" %}
4125   interface(MEMORY_INTER) %{
4126     base($reg);
4127     index(0x4);
4128     scale(0x0);
4129     disp(0x0);
4130   %}
4131 %}
4132 
4133 // Indirect Memory Plus Short Offset Operand
4134 operand indOffset8(eRegP reg, immI8 off) %{
4135   match(AddP reg off);
4136 
4137   format %{ "[$reg + $off]" %}
4138   interface(MEMORY_INTER) %{
4139     base($reg);
4140     index(0x4);
4141     scale(0x0);
4142     disp($off);
4143   %}
4144 %}
4145 
4146 // Indirect Memory Plus Long Offset Operand
4147 operand indOffset32(eRegP reg, immI off) %{
4148   match(AddP reg off);
4149 
4150   format %{ "[$reg + $off]" %}
4151   interface(MEMORY_INTER) %{
4152     base($reg);
4153     index(0x4);
4154     scale(0x0);
4155     disp($off);
4156   %}
4157 %}
4158 
4159 // Indirect Memory Plus Long Offset Operand
4160 operand indOffset32X(rRegI reg, immP off) %{
4161   match(AddP off reg);
4162 
4163   format %{ "[$reg + $off]" %}
4164   interface(MEMORY_INTER) %{
4165     base($reg);
4166     index(0x4);
4167     scale(0x0);
4168     disp($off);
4169   %}
4170 %}
4171 
4172 // Indirect Memory Plus Index Register Plus Offset Operand
4173 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4174   match(AddP (AddP reg ireg) off);
4175 
4176   op_cost(10);
4177   format %{"[$reg + $off + $ireg]" %}
4178   interface(MEMORY_INTER) %{
4179     base($reg);
4180     index($ireg);
4181     scale(0x0);
4182     disp($off);
4183   %}
4184 %}
4185 
4186 // Indirect Memory Plus Index Register Plus Offset Operand
4187 operand indIndex(eRegP reg, rRegI ireg) %{
4188   match(AddP reg ireg);
4189 
4190   op_cost(10);
4191   format %{"[$reg + $ireg]" %}
4192   interface(MEMORY_INTER) %{
4193     base($reg);
4194     index($ireg);
4195     scale(0x0);
4196     disp(0x0);
4197   %}
4198 %}
4199 
4200 // // -------------------------------------------------------------------------
4201 // // 486 architecture doesn't support "scale * index + offset" with out a base
4202 // // -------------------------------------------------------------------------
4203 // // Scaled Memory Operands
4204 // // Indirect Memory Times Scale Plus Offset Operand
4205 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4206 //   match(AddP off (LShiftI ireg scale));
4207 //
4208 //   op_cost(10);
4209 //   format %{"[$off + $ireg << $scale]" %}
4210 //   interface(MEMORY_INTER) %{
4211 //     base(0x4);
4212 //     index($ireg);
4213 //     scale($scale);
4214 //     disp($off);
4215 //   %}
4216 // %}
4217 
4218 // Indirect Memory Times Scale Plus Index Register
4219 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4220   match(AddP reg (LShiftI ireg scale));
4221 
4222   op_cost(10);
4223   format %{"[$reg + $ireg << $scale]" %}
4224   interface(MEMORY_INTER) %{
4225     base($reg);
4226     index($ireg);
4227     scale($scale);
4228     disp(0x0);
4229   %}
4230 %}
4231 
4232 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4233 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4234   match(AddP (AddP reg (LShiftI ireg scale)) off);
4235 
4236   op_cost(10);
4237   format %{"[$reg + $off + $ireg << $scale]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index($ireg);
4241     scale($scale);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 //----------Load Long Memory Operands------------------------------------------
4247 // The load-long idiom will use it's address expression again after loading
4248 // the first word of the long.  If the load-long destination overlaps with
4249 // registers used in the addressing expression, the 2nd half will be loaded
4250 // from a clobbered address.  Fix this by requiring that load-long use
4251 // address registers that do not overlap with the load-long target.
4252 
4253 // load-long support
4254 operand load_long_RegP() %{
4255   constraint(ALLOC_IN_RC(esi_reg));
4256   match(RegP);
4257   match(eSIRegP);
4258   op_cost(100);
4259   format %{  %}
4260   interface(REG_INTER);
4261 %}
4262 
4263 // Indirect Memory Operand Long
4264 operand load_long_indirect(load_long_RegP reg) %{
4265   constraint(ALLOC_IN_RC(esi_reg));
4266   match(reg);
4267 
4268   format %{ "[$reg]" %}
4269   interface(MEMORY_INTER) %{
4270     base($reg);
4271     index(0x4);
4272     scale(0x0);
4273     disp(0x0);
4274   %}
4275 %}
4276 
4277 // Indirect Memory Plus Long Offset Operand
4278 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4279   match(AddP reg off);
4280 
4281   format %{ "[$reg + $off]" %}
4282   interface(MEMORY_INTER) %{
4283     base($reg);
4284     index(0x4);
4285     scale(0x0);
4286     disp($off);
4287   %}
4288 %}
4289 
4290 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4291 
4292 
4293 //----------Special Memory Operands--------------------------------------------
4294 // Stack Slot Operand - This operand is used for loading and storing temporary
4295 //                      values on the stack where a match requires a value to
4296 //                      flow through memory.
4297 operand stackSlotP(sRegP reg) %{
4298   constraint(ALLOC_IN_RC(stack_slots));
4299   // No match rule because this operand is only generated in matching
4300   format %{ "[$reg]" %}
4301   interface(MEMORY_INTER) %{
4302     base(0x4);   // ESP
4303     index(0x4);  // No Index
4304     scale(0x0);  // No Scale
4305     disp($reg);  // Stack Offset
4306   %}
4307 %}
4308 
4309 operand stackSlotI(sRegI reg) %{
4310   constraint(ALLOC_IN_RC(stack_slots));
4311   // No match rule because this operand is only generated in matching
4312   format %{ "[$reg]" %}
4313   interface(MEMORY_INTER) %{
4314     base(0x4);   // ESP
4315     index(0x4);  // No Index
4316     scale(0x0);  // No Scale
4317     disp($reg);  // Stack Offset
4318   %}
4319 %}
4320 
4321 operand stackSlotF(sRegF reg) %{
4322   constraint(ALLOC_IN_RC(stack_slots));
4323   // No match rule because this operand is only generated in matching
4324   format %{ "[$reg]" %}
4325   interface(MEMORY_INTER) %{
4326     base(0x4);   // ESP
4327     index(0x4);  // No Index
4328     scale(0x0);  // No Scale
4329     disp($reg);  // Stack Offset
4330   %}
4331 %}
4332 
4333 operand stackSlotD(sRegD reg) %{
4334   constraint(ALLOC_IN_RC(stack_slots));
4335   // No match rule because this operand is only generated in matching
4336   format %{ "[$reg]" %}
4337   interface(MEMORY_INTER) %{
4338     base(0x4);   // ESP
4339     index(0x4);  // No Index
4340     scale(0x0);  // No Scale
4341     disp($reg);  // Stack Offset
4342   %}
4343 %}
4344 
4345 operand stackSlotL(sRegL reg) %{
4346   constraint(ALLOC_IN_RC(stack_slots));
4347   // No match rule because this operand is only generated in matching
4348   format %{ "[$reg]" %}
4349   interface(MEMORY_INTER) %{
4350     base(0x4);   // ESP
4351     index(0x4);  // No Index
4352     scale(0x0);  // No Scale
4353     disp($reg);  // Stack Offset
4354   %}
4355 %}
4356 
4357 //----------Memory Operands - Win95 Implicit Null Variants----------------
4358 // Indirect Memory Operand
4359 operand indirect_win95_safe(eRegP_no_EBP reg)
4360 %{
4361   constraint(ALLOC_IN_RC(int_reg));
4362   match(reg);
4363 
4364   op_cost(100);
4365   format %{ "[$reg]" %}
4366   interface(MEMORY_INTER) %{
4367     base($reg);
4368     index(0x4);
4369     scale(0x0);
4370     disp(0x0);
4371   %}
4372 %}
4373 
4374 // Indirect Memory Plus Short Offset Operand
4375 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4376 %{
4377   match(AddP reg off);
4378 
4379   op_cost(100);
4380   format %{ "[$reg + $off]" %}
4381   interface(MEMORY_INTER) %{
4382     base($reg);
4383     index(0x4);
4384     scale(0x0);
4385     disp($off);
4386   %}
4387 %}
4388 
4389 // Indirect Memory Plus Long Offset Operand
4390 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4391 %{
4392   match(AddP reg off);
4393 
4394   op_cost(100);
4395   format %{ "[$reg + $off]" %}
4396   interface(MEMORY_INTER) %{
4397     base($reg);
4398     index(0x4);
4399     scale(0x0);
4400     disp($off);
4401   %}
4402 %}
4403 
4404 // Indirect Memory Plus Index Register Plus Offset Operand
4405 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4406 %{
4407   match(AddP (AddP reg ireg) off);
4408 
4409   op_cost(100);
4410   format %{"[$reg + $off + $ireg]" %}
4411   interface(MEMORY_INTER) %{
4412     base($reg);
4413     index($ireg);
4414     scale(0x0);
4415     disp($off);
4416   %}
4417 %}
4418 
4419 // Indirect Memory Times Scale Plus Index Register
4420 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4421 %{
4422   match(AddP reg (LShiftI ireg scale));
4423 
4424   op_cost(100);
4425   format %{"[$reg + $ireg << $scale]" %}
4426   interface(MEMORY_INTER) %{
4427     base($reg);
4428     index($ireg);
4429     scale($scale);
4430     disp(0x0);
4431   %}
4432 %}
4433 
4434 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4435 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4436 %{
4437   match(AddP (AddP reg (LShiftI ireg scale)) off);
4438 
4439   op_cost(100);
4440   format %{"[$reg + $off + $ireg << $scale]" %}
4441   interface(MEMORY_INTER) %{
4442     base($reg);
4443     index($ireg);
4444     scale($scale);
4445     disp($off);
4446   %}
4447 %}
4448 
4449 //----------Conditional Branch Operands----------------------------------------
4450 // Comparison Op  - This is the operation of the comparison, and is limited to
4451 //                  the following set of codes:
4452 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4453 //
4454 // Other attributes of the comparison, such as unsignedness, are specified
4455 // by the comparison instruction that sets a condition code flags register.
4456 // That result is represented by a flags operand whose subtype is appropriate
4457 // to the unsignedness (etc.) of the comparison.
4458 //
4459 // Later, the instruction which matches both the Comparison Op (a Bool) and
4460 // the flags (produced by the Cmp) specifies the coding of the comparison op
4461 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4462 
4463 // Comparision Code
4464 operand cmpOp() %{
4465   match(Bool);
4466 
4467   format %{ "" %}
4468   interface(COND_INTER) %{
4469     equal(0x4, "e");
4470     not_equal(0x5, "ne");
4471     less(0xC, "l");
4472     greater_equal(0xD, "ge");
4473     less_equal(0xE, "le");
4474     greater(0xF, "g");
4475     overflow(0x0, "o");
4476     no_overflow(0x1, "no");
4477   %}
4478 %}
4479 
4480 // Comparison Code, unsigned compare.  Used by FP also, with
4481 // C2 (unordered) turned into GT or LT already.  The other bits
4482 // C0 and C3 are turned into Carry & Zero flags.
4483 operand cmpOpU() %{
4484   match(Bool);
4485 
4486   format %{ "" %}
4487   interface(COND_INTER) %{
4488     equal(0x4, "e");
4489     not_equal(0x5, "ne");
4490     less(0x2, "b");
4491     greater_equal(0x3, "nb");
4492     less_equal(0x6, "be");
4493     greater(0x7, "nbe");
4494     overflow(0x0, "o");
4495     no_overflow(0x1, "no");
4496   %}
4497 %}
4498 
4499 // Floating comparisons that don't require any fixup for the unordered case
4500 operand cmpOpUCF() %{
4501   match(Bool);
4502   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4503             n->as_Bool()->_test._test == BoolTest::ge ||
4504             n->as_Bool()->_test._test == BoolTest::le ||
4505             n->as_Bool()->_test._test == BoolTest::gt);
4506   format %{ "" %}
4507   interface(COND_INTER) %{
4508     equal(0x4, "e");
4509     not_equal(0x5, "ne");
4510     less(0x2, "b");
4511     greater_equal(0x3, "nb");
4512     less_equal(0x6, "be");
4513     greater(0x7, "nbe");
4514     overflow(0x0, "o");
4515     no_overflow(0x1, "no");
4516   %}
4517 %}
4518 
4519 
4520 // Floating comparisons that can be fixed up with extra conditional jumps
4521 operand cmpOpUCF2() %{
4522   match(Bool);
4523   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4524             n->as_Bool()->_test._test == BoolTest::eq);
4525   format %{ "" %}
4526   interface(COND_INTER) %{
4527     equal(0x4, "e");
4528     not_equal(0x5, "ne");
4529     less(0x2, "b");
4530     greater_equal(0x3, "nb");
4531     less_equal(0x6, "be");
4532     greater(0x7, "nbe");
4533     overflow(0x0, "o");
4534     no_overflow(0x1, "no");
4535   %}
4536 %}
4537 
4538 // Comparison Code for FP conditional move
4539 operand cmpOp_fcmov() %{
4540   match(Bool);
4541 
4542   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4543             n->as_Bool()->_test._test != BoolTest::no_overflow);
4544   format %{ "" %}
4545   interface(COND_INTER) %{
4546     equal        (0x0C8);
4547     not_equal    (0x1C8);
4548     less         (0x0C0);
4549     greater_equal(0x1C0);
4550     less_equal   (0x0D0);
4551     greater      (0x1D0);
4552     overflow(0x0, "o"); // not really supported by the instruction
4553     no_overflow(0x1, "no"); // not really supported by the instruction
4554   %}
4555 %}
4556 
4557 // Comparision Code used in long compares
4558 operand cmpOp_commute() %{
4559   match(Bool);
4560 
4561   format %{ "" %}
4562   interface(COND_INTER) %{
4563     equal(0x4, "e");
4564     not_equal(0x5, "ne");
4565     less(0xF, "g");
4566     greater_equal(0xE, "le");
4567     less_equal(0xD, "ge");
4568     greater(0xC, "l");
4569     overflow(0x0, "o");
4570     no_overflow(0x1, "no");
4571   %}
4572 %}
4573 
4574 //----------OPERAND CLASSES----------------------------------------------------
4575 // Operand Classes are groups of operands that are used as to simplify
4576 // instruction definitions by not requiring the AD writer to specify separate
4577 // instructions for every form of operand when the instruction accepts
4578 // multiple operand types with the same basic encoding and format.  The classic
4579 // case of this is memory operands.
4580 
4581 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4582                indIndex, indIndexScale, indIndexScaleOffset);
4583 
4584 // Long memory operations are encoded in 2 instructions and a +4 offset.
4585 // This means some kind of offset is always required and you cannot use
4586 // an oop as the offset (done when working on static globals).
4587 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4588                     indIndex, indIndexScale, indIndexScaleOffset);
4589 
4590 
4591 //----------PIPELINE-----------------------------------------------------------
4592 // Rules which define the behavior of the target architectures pipeline.
4593 pipeline %{
4594 
4595 //----------ATTRIBUTES---------------------------------------------------------
4596 attributes %{
4597   variable_size_instructions;        // Fixed size instructions
4598   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4599   instruction_unit_size = 1;         // An instruction is 1 bytes long
4600   instruction_fetch_unit_size = 16;  // The processor fetches one line
4601   instruction_fetch_units = 1;       // of 16 bytes
4602 
4603   // List of nop instructions
4604   nops( MachNop );
4605 %}
4606 
4607 //----------RESOURCES----------------------------------------------------------
4608 // Resources are the functional units available to the machine
4609 
4610 // Generic P2/P3 pipeline
4611 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4612 // 3 instructions decoded per cycle.
4613 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4614 // 2 ALU op, only ALU0 handles mul/div instructions.
4615 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4616            MS0, MS1, MEM = MS0 | MS1,
4617            BR, FPU,
4618            ALU0, ALU1, ALU = ALU0 | ALU1 );
4619 
4620 //----------PIPELINE DESCRIPTION-----------------------------------------------
4621 // Pipeline Description specifies the stages in the machine's pipeline
4622 
4623 // Generic P2/P3 pipeline
4624 pipe_desc(S0, S1, S2, S3, S4, S5);
4625 
4626 //----------PIPELINE CLASSES---------------------------------------------------
4627 // Pipeline Classes describe the stages in which input and output are
4628 // referenced by the hardware pipeline.
4629 
4630 // Naming convention: ialu or fpu
4631 // Then: _reg
4632 // Then: _reg if there is a 2nd register
4633 // Then: _long if it's a pair of instructions implementing a long
4634 // Then: _fat if it requires the big decoder
4635 //   Or: _mem if it requires the big decoder and a memory unit.
4636 
4637 // Integer ALU reg operation
4638 pipe_class ialu_reg(rRegI dst) %{
4639     single_instruction;
4640     dst    : S4(write);
4641     dst    : S3(read);
4642     DECODE : S0;        // any decoder
4643     ALU    : S3;        // any alu
4644 %}
4645 
4646 // Long ALU reg operation
4647 pipe_class ialu_reg_long(eRegL dst) %{
4648     instruction_count(2);
4649     dst    : S4(write);
4650     dst    : S3(read);
4651     DECODE : S0(2);     // any 2 decoders
4652     ALU    : S3(2);     // both alus
4653 %}
4654 
4655 // Integer ALU reg operation using big decoder
4656 pipe_class ialu_reg_fat(rRegI dst) %{
4657     single_instruction;
4658     dst    : S4(write);
4659     dst    : S3(read);
4660     D0     : S0;        // big decoder only
4661     ALU    : S3;        // any alu
4662 %}
4663 
4664 // Long ALU reg operation using big decoder
4665 pipe_class ialu_reg_long_fat(eRegL dst) %{
4666     instruction_count(2);
4667     dst    : S4(write);
4668     dst    : S3(read);
4669     D0     : S0(2);     // big decoder only; twice
4670     ALU    : S3(2);     // any 2 alus
4671 %}
4672 
4673 // Integer ALU reg-reg operation
4674 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4675     single_instruction;
4676     dst    : S4(write);
4677     src    : S3(read);
4678     DECODE : S0;        // any decoder
4679     ALU    : S3;        // any alu
4680 %}
4681 
4682 // Long ALU reg-reg operation
4683 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4684     instruction_count(2);
4685     dst    : S4(write);
4686     src    : S3(read);
4687     DECODE : S0(2);     // any 2 decoders
4688     ALU    : S3(2);     // both alus
4689 %}
4690 
4691 // Integer ALU reg-reg operation
4692 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4693     single_instruction;
4694     dst    : S4(write);
4695     src    : S3(read);
4696     D0     : S0;        // big decoder only
4697     ALU    : S3;        // any alu
4698 %}
4699 
4700 // Long ALU reg-reg operation
4701 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4702     instruction_count(2);
4703     dst    : S4(write);
4704     src    : S3(read);
4705     D0     : S0(2);     // big decoder only; twice
4706     ALU    : S3(2);     // both alus
4707 %}
4708 
4709 // Integer ALU reg-mem operation
4710 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4711     single_instruction;
4712     dst    : S5(write);
4713     mem    : S3(read);
4714     D0     : S0;        // big decoder only
4715     ALU    : S4;        // any alu
4716     MEM    : S3;        // any mem
4717 %}
4718 
4719 // Long ALU reg-mem operation
4720 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4721     instruction_count(2);
4722     dst    : S5(write);
4723     mem    : S3(read);
4724     D0     : S0(2);     // big decoder only; twice
4725     ALU    : S4(2);     // any 2 alus
4726     MEM    : S3(2);     // both mems
4727 %}
4728 
4729 // Integer mem operation (prefetch)
4730 pipe_class ialu_mem(memory mem)
4731 %{
4732     single_instruction;
4733     mem    : S3(read);
4734     D0     : S0;        // big decoder only
4735     MEM    : S3;        // any mem
4736 %}
4737 
4738 // Integer Store to Memory
4739 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4740     single_instruction;
4741     mem    : S3(read);
4742     src    : S5(read);
4743     D0     : S0;        // big decoder only
4744     ALU    : S4;        // any alu
4745     MEM    : S3;
4746 %}
4747 
4748 // Long Store to Memory
4749 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4750     instruction_count(2);
4751     mem    : S3(read);
4752     src    : S5(read);
4753     D0     : S0(2);     // big decoder only; twice
4754     ALU    : S4(2);     // any 2 alus
4755     MEM    : S3(2);     // Both mems
4756 %}
4757 
4758 // Integer Store to Memory
4759 pipe_class ialu_mem_imm(memory mem) %{
4760     single_instruction;
4761     mem    : S3(read);
4762     D0     : S0;        // big decoder only
4763     ALU    : S4;        // any alu
4764     MEM    : S3;
4765 %}
4766 
4767 // Integer ALU0 reg-reg operation
4768 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4769     single_instruction;
4770     dst    : S4(write);
4771     src    : S3(read);
4772     D0     : S0;        // Big decoder only
4773     ALU0   : S3;        // only alu0
4774 %}
4775 
4776 // Integer ALU0 reg-mem operation
4777 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4778     single_instruction;
4779     dst    : S5(write);
4780     mem    : S3(read);
4781     D0     : S0;        // big decoder only
4782     ALU0   : S4;        // ALU0 only
4783     MEM    : S3;        // any mem
4784 %}
4785 
4786 // Integer ALU reg-reg operation
4787 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4788     single_instruction;
4789     cr     : S4(write);
4790     src1   : S3(read);
4791     src2   : S3(read);
4792     DECODE : S0;        // any decoder
4793     ALU    : S3;        // any alu
4794 %}
4795 
4796 // Integer ALU reg-imm operation
4797 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4798     single_instruction;
4799     cr     : S4(write);
4800     src1   : S3(read);
4801     DECODE : S0;        // any decoder
4802     ALU    : S3;        // any alu
4803 %}
4804 
4805 // Integer ALU reg-mem operation
4806 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4807     single_instruction;
4808     cr     : S4(write);
4809     src1   : S3(read);
4810     src2   : S3(read);
4811     D0     : S0;        // big decoder only
4812     ALU    : S4;        // any alu
4813     MEM    : S3;
4814 %}
4815 
4816 // Conditional move reg-reg
4817 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4818     instruction_count(4);
4819     y      : S4(read);
4820     q      : S3(read);
4821     p      : S3(read);
4822     DECODE : S0(4);     // any decoder
4823 %}
4824 
4825 // Conditional move reg-reg
4826 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4827     single_instruction;
4828     dst    : S4(write);
4829     src    : S3(read);
4830     cr     : S3(read);
4831     DECODE : S0;        // any decoder
4832 %}
4833 
4834 // Conditional move reg-mem
4835 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4836     single_instruction;
4837     dst    : S4(write);
4838     src    : S3(read);
4839     cr     : S3(read);
4840     DECODE : S0;        // any decoder
4841     MEM    : S3;
4842 %}
4843 
4844 // Conditional move reg-reg long
4845 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4846     single_instruction;
4847     dst    : S4(write);
4848     src    : S3(read);
4849     cr     : S3(read);
4850     DECODE : S0(2);     // any 2 decoders
4851 %}
4852 
4853 // Conditional move double reg-reg
4854 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4855     single_instruction;
4856     dst    : S4(write);
4857     src    : S3(read);
4858     cr     : S3(read);
4859     DECODE : S0;        // any decoder
4860 %}
4861 
4862 // Float reg-reg operation
4863 pipe_class fpu_reg(regDPR dst) %{
4864     instruction_count(2);
4865     dst    : S3(read);
4866     DECODE : S0(2);     // any 2 decoders
4867     FPU    : S3;
4868 %}
4869 
4870 // Float reg-reg operation
4871 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4872     instruction_count(2);
4873     dst    : S4(write);
4874     src    : S3(read);
4875     DECODE : S0(2);     // any 2 decoders
4876     FPU    : S3;
4877 %}
4878 
4879 // Float reg-reg operation
4880 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4881     instruction_count(3);
4882     dst    : S4(write);
4883     src1   : S3(read);
4884     src2   : S3(read);
4885     DECODE : S0(3);     // any 3 decoders
4886     FPU    : S3(2);
4887 %}
4888 
4889 // Float reg-reg operation
4890 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4891     instruction_count(4);
4892     dst    : S4(write);
4893     src1   : S3(read);
4894     src2   : S3(read);
4895     src3   : S3(read);
4896     DECODE : S0(4);     // any 3 decoders
4897     FPU    : S3(2);
4898 %}
4899 
4900 // Float reg-reg operation
4901 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4902     instruction_count(4);
4903     dst    : S4(write);
4904     src1   : S3(read);
4905     src2   : S3(read);
4906     src3   : S3(read);
4907     DECODE : S1(3);     // any 3 decoders
4908     D0     : S0;        // Big decoder only
4909     FPU    : S3(2);
4910     MEM    : S3;
4911 %}
4912 
4913 // Float reg-mem operation
4914 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4915     instruction_count(2);
4916     dst    : S5(write);
4917     mem    : S3(read);
4918     D0     : S0;        // big decoder only
4919     DECODE : S1;        // any decoder for FPU POP
4920     FPU    : S4;
4921     MEM    : S3;        // any mem
4922 %}
4923 
4924 // Float reg-mem operation
4925 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4926     instruction_count(3);
4927     dst    : S5(write);
4928     src1   : S3(read);
4929     mem    : S3(read);
4930     D0     : S0;        // big decoder only
4931     DECODE : S1(2);     // any decoder for FPU POP
4932     FPU    : S4;
4933     MEM    : S3;        // any mem
4934 %}
4935 
4936 // Float mem-reg operation
4937 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4938     instruction_count(2);
4939     src    : S5(read);
4940     mem    : S3(read);
4941     DECODE : S0;        // any decoder for FPU PUSH
4942     D0     : S1;        // big decoder only
4943     FPU    : S4;
4944     MEM    : S3;        // any mem
4945 %}
4946 
4947 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4948     instruction_count(3);
4949     src1   : S3(read);
4950     src2   : S3(read);
4951     mem    : S3(read);
4952     DECODE : S0(2);     // any decoder for FPU PUSH
4953     D0     : S1;        // big decoder only
4954     FPU    : S4;
4955     MEM    : S3;        // any mem
4956 %}
4957 
4958 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4959     instruction_count(3);
4960     src1   : S3(read);
4961     src2   : S3(read);
4962     mem    : S4(read);
4963     DECODE : S0;        // any decoder for FPU PUSH
4964     D0     : S0(2);     // big decoder only
4965     FPU    : S4;
4966     MEM    : S3(2);     // any mem
4967 %}
4968 
4969 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4970     instruction_count(2);
4971     src1   : S3(read);
4972     dst    : S4(read);
4973     D0     : S0(2);     // big decoder only
4974     MEM    : S3(2);     // any mem
4975 %}
4976 
4977 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4978     instruction_count(3);
4979     src1   : S3(read);
4980     src2   : S3(read);
4981     dst    : S4(read);
4982     D0     : S0(3);     // big decoder only
4983     FPU    : S4;
4984     MEM    : S3(3);     // any mem
4985 %}
4986 
4987 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4988     instruction_count(3);
4989     src1   : S4(read);
4990     mem    : S4(read);
4991     DECODE : S0;        // any decoder for FPU PUSH
4992     D0     : S0(2);     // big decoder only
4993     FPU    : S4;
4994     MEM    : S3(2);     // any mem
4995 %}
4996 
4997 // Float load constant
4998 pipe_class fpu_reg_con(regDPR dst) %{
4999     instruction_count(2);
5000     dst    : S5(write);
5001     D0     : S0;        // big decoder only for the load
5002     DECODE : S1;        // any decoder for FPU POP
5003     FPU    : S4;
5004     MEM    : S3;        // any mem
5005 %}
5006 
5007 // Float load constant
5008 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5009     instruction_count(3);
5010     dst    : S5(write);
5011     src    : S3(read);
5012     D0     : S0;        // big decoder only for the load
5013     DECODE : S1(2);     // any decoder for FPU POP
5014     FPU    : S4;
5015     MEM    : S3;        // any mem
5016 %}
5017 
5018 // UnConditional branch
5019 pipe_class pipe_jmp( label labl ) %{
5020     single_instruction;
5021     BR   : S3;
5022 %}
5023 
5024 // Conditional branch
5025 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5026     single_instruction;
5027     cr    : S1(read);
5028     BR    : S3;
5029 %}
5030 
5031 // Allocation idiom
5032 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5033     instruction_count(1); force_serialization;
5034     fixed_latency(6);
5035     heap_ptr : S3(read);
5036     DECODE   : S0(3);
5037     D0       : S2;
5038     MEM      : S3;
5039     ALU      : S3(2);
5040     dst      : S5(write);
5041     BR       : S5;
5042 %}
5043 
5044 // Generic big/slow expanded idiom
5045 pipe_class pipe_slow(  ) %{
5046     instruction_count(10); multiple_bundles; force_serialization;
5047     fixed_latency(100);
5048     D0  : S0(2);
5049     MEM : S3(2);
5050 %}
5051 
5052 // The real do-nothing guy
5053 pipe_class empty( ) %{
5054     instruction_count(0);
5055 %}
5056 
5057 // Define the class for the Nop node
5058 define %{
5059    MachNop = empty;
5060 %}
5061 
5062 %}
5063 
5064 //----------INSTRUCTIONS-------------------------------------------------------
5065 //
5066 // match      -- States which machine-independent subtree may be replaced
5067 //               by this instruction.
5068 // ins_cost   -- The estimated cost of this instruction is used by instruction
5069 //               selection to identify a minimum cost tree of machine
5070 //               instructions that matches a tree of machine-independent
5071 //               instructions.
5072 // format     -- A string providing the disassembly for this instruction.
5073 //               The value of an instruction's operand may be inserted
5074 //               by referring to it with a '$' prefix.
5075 // opcode     -- Three instruction opcodes may be provided.  These are referred
5076 //               to within an encode class as $primary, $secondary, and $tertiary
5077 //               respectively.  The primary opcode is commonly used to
5078 //               indicate the type of machine instruction, while secondary
5079 //               and tertiary are often used for prefix options or addressing
5080 //               modes.
5081 // ins_encode -- A list of encode classes with parameters. The encode class
5082 //               name must have been defined in an 'enc_class' specification
5083 //               in the encode section of the architecture description.
5084 
5085 //----------BSWAP-Instruction--------------------------------------------------
5086 instruct bytes_reverse_int(rRegI dst) %{
5087   match(Set dst (ReverseBytesI dst));
5088 
5089   format %{ "BSWAP  $dst" %}
5090   opcode(0x0F, 0xC8);
5091   ins_encode( OpcP, OpcSReg(dst) );
5092   ins_pipe( ialu_reg );
5093 %}
5094 
5095 instruct bytes_reverse_long(eRegL dst) %{
5096   match(Set dst (ReverseBytesL dst));
5097 
5098   format %{ "BSWAP  $dst.lo\n\t"
5099             "BSWAP  $dst.hi\n\t"
5100             "XCHG   $dst.lo $dst.hi" %}
5101 
5102   ins_cost(125);
5103   ins_encode( bswap_long_bytes(dst) );
5104   ins_pipe( ialu_reg_reg);
5105 %}
5106 
5107 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5108   match(Set dst (ReverseBytesUS dst));
5109   effect(KILL cr);
5110 
5111   format %{ "BSWAP  $dst\n\t"
5112             "SHR    $dst,16\n\t" %}
5113   ins_encode %{
5114     __ bswapl($dst$$Register);
5115     __ shrl($dst$$Register, 16);
5116   %}
5117   ins_pipe( ialu_reg );
5118 %}
5119 
5120 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5121   match(Set dst (ReverseBytesS dst));
5122   effect(KILL cr);
5123 
5124   format %{ "BSWAP  $dst\n\t"
5125             "SAR    $dst,16\n\t" %}
5126   ins_encode %{
5127     __ bswapl($dst$$Register);
5128     __ sarl($dst$$Register, 16);
5129   %}
5130   ins_pipe( ialu_reg );
5131 %}
5132 
5133 
5134 //---------- Zeros Count Instructions ------------------------------------------
5135 
5136 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5137   predicate(UseCountLeadingZerosInstruction);
5138   match(Set dst (CountLeadingZerosI src));
5139   effect(KILL cr);
5140 
5141   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5142   ins_encode %{
5143     __ lzcntl($dst$$Register, $src$$Register);
5144   %}
5145   ins_pipe(ialu_reg);
5146 %}
5147 
5148 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5149   predicate(!UseCountLeadingZerosInstruction);
5150   match(Set dst (CountLeadingZerosI src));
5151   effect(KILL cr);
5152 
5153   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5154             "JNZ    skip\n\t"
5155             "MOV    $dst, -1\n"
5156       "skip:\n\t"
5157             "NEG    $dst\n\t"
5158             "ADD    $dst, 31" %}
5159   ins_encode %{
5160     Register Rdst = $dst$$Register;
5161     Register Rsrc = $src$$Register;
5162     Label skip;
5163     __ bsrl(Rdst, Rsrc);
5164     __ jccb(Assembler::notZero, skip);
5165     __ movl(Rdst, -1);
5166     __ bind(skip);
5167     __ negl(Rdst);
5168     __ addl(Rdst, BitsPerInt - 1);
5169   %}
5170   ins_pipe(ialu_reg);
5171 %}
5172 
5173 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5174   predicate(UseCountLeadingZerosInstruction);
5175   match(Set dst (CountLeadingZerosL src));
5176   effect(TEMP dst, KILL cr);
5177 
5178   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5179             "JNC    done\n\t"
5180             "LZCNT  $dst, $src.lo\n\t"
5181             "ADD    $dst, 32\n"
5182       "done:" %}
5183   ins_encode %{
5184     Register Rdst = $dst$$Register;
5185     Register Rsrc = $src$$Register;
5186     Label done;
5187     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5188     __ jccb(Assembler::carryClear, done);
5189     __ lzcntl(Rdst, Rsrc);
5190     __ addl(Rdst, BitsPerInt);
5191     __ bind(done);
5192   %}
5193   ins_pipe(ialu_reg);
5194 %}
5195 
5196 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5197   predicate(!UseCountLeadingZerosInstruction);
5198   match(Set dst (CountLeadingZerosL src));
5199   effect(TEMP dst, KILL cr);
5200 
5201   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5202             "JZ     msw_is_zero\n\t"
5203             "ADD    $dst, 32\n\t"
5204             "JMP    not_zero\n"
5205       "msw_is_zero:\n\t"
5206             "BSR    $dst, $src.lo\n\t"
5207             "JNZ    not_zero\n\t"
5208             "MOV    $dst, -1\n"
5209       "not_zero:\n\t"
5210             "NEG    $dst\n\t"
5211             "ADD    $dst, 63\n" %}
5212  ins_encode %{
5213     Register Rdst = $dst$$Register;
5214     Register Rsrc = $src$$Register;
5215     Label msw_is_zero;
5216     Label not_zero;
5217     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5218     __ jccb(Assembler::zero, msw_is_zero);
5219     __ addl(Rdst, BitsPerInt);
5220     __ jmpb(not_zero);
5221     __ bind(msw_is_zero);
5222     __ bsrl(Rdst, Rsrc);
5223     __ jccb(Assembler::notZero, not_zero);
5224     __ movl(Rdst, -1);
5225     __ bind(not_zero);
5226     __ negl(Rdst);
5227     __ addl(Rdst, BitsPerLong - 1);
5228   %}
5229   ins_pipe(ialu_reg);
5230 %}
5231 
5232 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5233   predicate(UseCountTrailingZerosInstruction);
5234   match(Set dst (CountTrailingZerosI src));
5235   effect(KILL cr);
5236 
5237   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5238   ins_encode %{
5239     __ tzcntl($dst$$Register, $src$$Register);
5240   %}
5241   ins_pipe(ialu_reg);
5242 %}
5243 
5244 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5245   predicate(!UseCountTrailingZerosInstruction);
5246   match(Set dst (CountTrailingZerosI src));
5247   effect(KILL cr);
5248 
5249   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5250             "JNZ    done\n\t"
5251             "MOV    $dst, 32\n"
5252       "done:" %}
5253   ins_encode %{
5254     Register Rdst = $dst$$Register;
5255     Label done;
5256     __ bsfl(Rdst, $src$$Register);
5257     __ jccb(Assembler::notZero, done);
5258     __ movl(Rdst, BitsPerInt);
5259     __ bind(done);
5260   %}
5261   ins_pipe(ialu_reg);
5262 %}
5263 
5264 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5265   predicate(UseCountTrailingZerosInstruction);
5266   match(Set dst (CountTrailingZerosL src));
5267   effect(TEMP dst, KILL cr);
5268 
5269   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5270             "JNC    done\n\t"
5271             "TZCNT  $dst, $src.hi\n\t"
5272             "ADD    $dst, 32\n"
5273             "done:" %}
5274   ins_encode %{
5275     Register Rdst = $dst$$Register;
5276     Register Rsrc = $src$$Register;
5277     Label done;
5278     __ tzcntl(Rdst, Rsrc);
5279     __ jccb(Assembler::carryClear, done);
5280     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5281     __ addl(Rdst, BitsPerInt);
5282     __ bind(done);
5283   %}
5284   ins_pipe(ialu_reg);
5285 %}
5286 
5287 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5288   predicate(!UseCountTrailingZerosInstruction);
5289   match(Set dst (CountTrailingZerosL src));
5290   effect(TEMP dst, KILL cr);
5291 
5292   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5293             "JNZ    done\n\t"
5294             "BSF    $dst, $src.hi\n\t"
5295             "JNZ    msw_not_zero\n\t"
5296             "MOV    $dst, 32\n"
5297       "msw_not_zero:\n\t"
5298             "ADD    $dst, 32\n"
5299       "done:" %}
5300   ins_encode %{
5301     Register Rdst = $dst$$Register;
5302     Register Rsrc = $src$$Register;
5303     Label msw_not_zero;
5304     Label done;
5305     __ bsfl(Rdst, Rsrc);
5306     __ jccb(Assembler::notZero, done);
5307     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5308     __ jccb(Assembler::notZero, msw_not_zero);
5309     __ movl(Rdst, BitsPerInt);
5310     __ bind(msw_not_zero);
5311     __ addl(Rdst, BitsPerInt);
5312     __ bind(done);
5313   %}
5314   ins_pipe(ialu_reg);
5315 %}
5316 
5317 
5318 //---------- Population Count Instructions -------------------------------------
5319 
5320 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5321   predicate(UsePopCountInstruction);
5322   match(Set dst (PopCountI src));
5323   effect(KILL cr);
5324 
5325   format %{ "POPCNT $dst, $src" %}
5326   ins_encode %{
5327     __ popcntl($dst$$Register, $src$$Register);
5328   %}
5329   ins_pipe(ialu_reg);
5330 %}
5331 
5332 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5333   predicate(UsePopCountInstruction);
5334   match(Set dst (PopCountI (LoadI mem)));
5335   effect(KILL cr);
5336 
5337   format %{ "POPCNT $dst, $mem" %}
5338   ins_encode %{
5339     __ popcntl($dst$$Register, $mem$$Address);
5340   %}
5341   ins_pipe(ialu_reg);
5342 %}
5343 
5344 // Note: Long.bitCount(long) returns an int.
5345 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5346   predicate(UsePopCountInstruction);
5347   match(Set dst (PopCountL src));
5348   effect(KILL cr, TEMP tmp, TEMP dst);
5349 
5350   format %{ "POPCNT $dst, $src.lo\n\t"
5351             "POPCNT $tmp, $src.hi\n\t"
5352             "ADD    $dst, $tmp" %}
5353   ins_encode %{
5354     __ popcntl($dst$$Register, $src$$Register);
5355     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5356     __ addl($dst$$Register, $tmp$$Register);
5357   %}
5358   ins_pipe(ialu_reg);
5359 %}
5360 
5361 // Note: Long.bitCount(long) returns an int.
5362 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5363   predicate(UsePopCountInstruction);
5364   match(Set dst (PopCountL (LoadL mem)));
5365   effect(KILL cr, TEMP tmp, TEMP dst);
5366 
5367   format %{ "POPCNT $dst, $mem\n\t"
5368             "POPCNT $tmp, $mem+4\n\t"
5369             "ADD    $dst, $tmp" %}
5370   ins_encode %{
5371     //__ popcntl($dst$$Register, $mem$$Address$$first);
5372     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5373     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5374     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5375     __ addl($dst$$Register, $tmp$$Register);
5376   %}
5377   ins_pipe(ialu_reg);
5378 %}
5379 
5380 
5381 //----------Load/Store/Move Instructions---------------------------------------
5382 //----------Load Instructions--------------------------------------------------
5383 // Load Byte (8bit signed)
5384 instruct loadB(xRegI dst, memory mem) %{
5385   match(Set dst (LoadB mem));
5386 
5387   ins_cost(125);
5388   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5389 
5390   ins_encode %{
5391     __ movsbl($dst$$Register, $mem$$Address);
5392   %}
5393 
5394   ins_pipe(ialu_reg_mem);
5395 %}
5396 
5397 // Load Byte (8bit signed) into Long Register
5398 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5399   match(Set dst (ConvI2L (LoadB mem)));
5400   effect(KILL cr);
5401 
5402   ins_cost(375);
5403   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5404             "MOV    $dst.hi,$dst.lo\n\t"
5405             "SAR    $dst.hi,7" %}
5406 
5407   ins_encode %{
5408     __ movsbl($dst$$Register, $mem$$Address);
5409     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5410     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5411   %}
5412 
5413   ins_pipe(ialu_reg_mem);
5414 %}
5415 
5416 // Load Unsigned Byte (8bit UNsigned)
5417 instruct loadUB(xRegI dst, memory mem) %{
5418   match(Set dst (LoadUB mem));
5419 
5420   ins_cost(125);
5421   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5422 
5423   ins_encode %{
5424     __ movzbl($dst$$Register, $mem$$Address);
5425   %}
5426 
5427   ins_pipe(ialu_reg_mem);
5428 %}
5429 
5430 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5431 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5432   match(Set dst (ConvI2L (LoadUB mem)));
5433   effect(KILL cr);
5434 
5435   ins_cost(250);
5436   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5437             "XOR    $dst.hi,$dst.hi" %}
5438 
5439   ins_encode %{
5440     Register Rdst = $dst$$Register;
5441     __ movzbl(Rdst, $mem$$Address);
5442     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5443   %}
5444 
5445   ins_pipe(ialu_reg_mem);
5446 %}
5447 
5448 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5449 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5450   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5451   effect(KILL cr);
5452 
5453   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5454             "XOR    $dst.hi,$dst.hi\n\t"
5455             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5456   ins_encode %{
5457     Register Rdst = $dst$$Register;
5458     __ movzbl(Rdst, $mem$$Address);
5459     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5460     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5461   %}
5462   ins_pipe(ialu_reg_mem);
5463 %}
5464 
5465 // Load Short (16bit signed)
5466 instruct loadS(rRegI dst, memory mem) %{
5467   match(Set dst (LoadS mem));
5468 
5469   ins_cost(125);
5470   format %{ "MOVSX  $dst,$mem\t# short" %}
5471 
5472   ins_encode %{
5473     __ movswl($dst$$Register, $mem$$Address);
5474   %}
5475 
5476   ins_pipe(ialu_reg_mem);
5477 %}
5478 
5479 // Load Short (16 bit signed) to Byte (8 bit signed)
5480 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5481   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5482 
5483   ins_cost(125);
5484   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5485   ins_encode %{
5486     __ movsbl($dst$$Register, $mem$$Address);
5487   %}
5488   ins_pipe(ialu_reg_mem);
5489 %}
5490 
5491 // Load Short (16bit signed) into Long Register
5492 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5493   match(Set dst (ConvI2L (LoadS mem)));
5494   effect(KILL cr);
5495 
5496   ins_cost(375);
5497   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5498             "MOV    $dst.hi,$dst.lo\n\t"
5499             "SAR    $dst.hi,15" %}
5500 
5501   ins_encode %{
5502     __ movswl($dst$$Register, $mem$$Address);
5503     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5504     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5505   %}
5506 
5507   ins_pipe(ialu_reg_mem);
5508 %}
5509 
5510 // Load Unsigned Short/Char (16bit unsigned)
5511 instruct loadUS(rRegI dst, memory mem) %{
5512   match(Set dst (LoadUS mem));
5513 
5514   ins_cost(125);
5515   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5516 
5517   ins_encode %{
5518     __ movzwl($dst$$Register, $mem$$Address);
5519   %}
5520 
5521   ins_pipe(ialu_reg_mem);
5522 %}
5523 
5524 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5525 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5526   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5527 
5528   ins_cost(125);
5529   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5530   ins_encode %{
5531     __ movsbl($dst$$Register, $mem$$Address);
5532   %}
5533   ins_pipe(ialu_reg_mem);
5534 %}
5535 
5536 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5537 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5538   match(Set dst (ConvI2L (LoadUS mem)));
5539   effect(KILL cr);
5540 
5541   ins_cost(250);
5542   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5543             "XOR    $dst.hi,$dst.hi" %}
5544 
5545   ins_encode %{
5546     __ movzwl($dst$$Register, $mem$$Address);
5547     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5548   %}
5549 
5550   ins_pipe(ialu_reg_mem);
5551 %}
5552 
5553 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5554 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5555   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5556   effect(KILL cr);
5557 
5558   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5559             "XOR    $dst.hi,$dst.hi" %}
5560   ins_encode %{
5561     Register Rdst = $dst$$Register;
5562     __ movzbl(Rdst, $mem$$Address);
5563     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5564   %}
5565   ins_pipe(ialu_reg_mem);
5566 %}
5567 
5568 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5569 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5570   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5571   effect(KILL cr);
5572 
5573   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5574             "XOR    $dst.hi,$dst.hi\n\t"
5575             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5576   ins_encode %{
5577     Register Rdst = $dst$$Register;
5578     __ movzwl(Rdst, $mem$$Address);
5579     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5580     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5581   %}
5582   ins_pipe(ialu_reg_mem);
5583 %}
5584 
5585 // Load Integer
5586 instruct loadI(rRegI dst, memory mem) %{
5587   match(Set dst (LoadI mem));
5588 
5589   ins_cost(125);
5590   format %{ "MOV    $dst,$mem\t# int" %}
5591 
5592   ins_encode %{
5593     __ movl($dst$$Register, $mem$$Address);
5594   %}
5595 
5596   ins_pipe(ialu_reg_mem);
5597 %}
5598 
5599 // Load Integer (32 bit signed) to Byte (8 bit signed)
5600 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5601   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5602 
5603   ins_cost(125);
5604   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5605   ins_encode %{
5606     __ movsbl($dst$$Register, $mem$$Address);
5607   %}
5608   ins_pipe(ialu_reg_mem);
5609 %}
5610 
5611 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5612 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5613   match(Set dst (AndI (LoadI mem) mask));
5614 
5615   ins_cost(125);
5616   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5617   ins_encode %{
5618     __ movzbl($dst$$Register, $mem$$Address);
5619   %}
5620   ins_pipe(ialu_reg_mem);
5621 %}
5622 
5623 // Load Integer (32 bit signed) to Short (16 bit signed)
5624 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5625   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5626 
5627   ins_cost(125);
5628   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5629   ins_encode %{
5630     __ movswl($dst$$Register, $mem$$Address);
5631   %}
5632   ins_pipe(ialu_reg_mem);
5633 %}
5634 
5635 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5636 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5637   match(Set dst (AndI (LoadI mem) mask));
5638 
5639   ins_cost(125);
5640   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5641   ins_encode %{
5642     __ movzwl($dst$$Register, $mem$$Address);
5643   %}
5644   ins_pipe(ialu_reg_mem);
5645 %}
5646 
5647 // Load Integer into Long Register
5648 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5649   match(Set dst (ConvI2L (LoadI mem)));
5650   effect(KILL cr);
5651 
5652   ins_cost(375);
5653   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5654             "MOV    $dst.hi,$dst.lo\n\t"
5655             "SAR    $dst.hi,31" %}
5656 
5657   ins_encode %{
5658     __ movl($dst$$Register, $mem$$Address);
5659     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5660     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5661   %}
5662 
5663   ins_pipe(ialu_reg_mem);
5664 %}
5665 
5666 // Load Integer with mask 0xFF into Long Register
5667 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5668   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5669   effect(KILL cr);
5670 
5671   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5672             "XOR    $dst.hi,$dst.hi" %}
5673   ins_encode %{
5674     Register Rdst = $dst$$Register;
5675     __ movzbl(Rdst, $mem$$Address);
5676     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5677   %}
5678   ins_pipe(ialu_reg_mem);
5679 %}
5680 
5681 // Load Integer with mask 0xFFFF into Long Register
5682 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5683   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5684   effect(KILL cr);
5685 
5686   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5687             "XOR    $dst.hi,$dst.hi" %}
5688   ins_encode %{
5689     Register Rdst = $dst$$Register;
5690     __ movzwl(Rdst, $mem$$Address);
5691     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5692   %}
5693   ins_pipe(ialu_reg_mem);
5694 %}
5695 
5696 // Load Integer with 31-bit mask into Long Register
5697 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5698   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5699   effect(KILL cr);
5700 
5701   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5702             "XOR    $dst.hi,$dst.hi\n\t"
5703             "AND    $dst.lo,$mask" %}
5704   ins_encode %{
5705     Register Rdst = $dst$$Register;
5706     __ movl(Rdst, $mem$$Address);
5707     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5708     __ andl(Rdst, $mask$$constant);
5709   %}
5710   ins_pipe(ialu_reg_mem);
5711 %}
5712 
5713 // Load Unsigned Integer into Long Register
5714 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5715   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5716   effect(KILL cr);
5717 
5718   ins_cost(250);
5719   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5720             "XOR    $dst.hi,$dst.hi" %}
5721 
5722   ins_encode %{
5723     __ movl($dst$$Register, $mem$$Address);
5724     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5725   %}
5726 
5727   ins_pipe(ialu_reg_mem);
5728 %}
5729 
5730 // Load Long.  Cannot clobber address while loading, so restrict address
5731 // register to ESI
5732 instruct loadL(eRegL dst, load_long_memory mem) %{
5733   predicate(!((LoadLNode*)n)->require_atomic_access());
5734   match(Set dst (LoadL mem));
5735 
5736   ins_cost(250);
5737   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5738             "MOV    $dst.hi,$mem+4" %}
5739 
5740   ins_encode %{
5741     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5742     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5743     __ movl($dst$$Register, Amemlo);
5744     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5745   %}
5746 
5747   ins_pipe(ialu_reg_long_mem);
5748 %}
5749 
5750 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5751 // then store it down to the stack and reload on the int
5752 // side.
5753 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5754   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5755   match(Set dst (LoadL mem));
5756 
5757   ins_cost(200);
5758   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5759             "FISTp  $dst" %}
5760   ins_encode(enc_loadL_volatile(mem,dst));
5761   ins_pipe( fpu_reg_mem );
5762 %}
5763 
5764 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5765   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5766   match(Set dst (LoadL mem));
5767   effect(TEMP tmp);
5768   ins_cost(180);
5769   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5770             "MOVSD  $dst,$tmp" %}
5771   ins_encode %{
5772     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5773     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5774   %}
5775   ins_pipe( pipe_slow );
5776 %}
5777 
5778 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5779   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5780   match(Set dst (LoadL mem));
5781   effect(TEMP tmp);
5782   ins_cost(160);
5783   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5784             "MOVD   $dst.lo,$tmp\n\t"
5785             "PSRLQ  $tmp,32\n\t"
5786             "MOVD   $dst.hi,$tmp" %}
5787   ins_encode %{
5788     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5789     __ movdl($dst$$Register, $tmp$$XMMRegister);
5790     __ psrlq($tmp$$XMMRegister, 32);
5791     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5792   %}
5793   ins_pipe( pipe_slow );
5794 %}
5795 
5796 // Load Range
5797 instruct loadRange(rRegI dst, memory mem) %{
5798   match(Set dst (LoadRange mem));
5799 
5800   ins_cost(125);
5801   format %{ "MOV    $dst,$mem" %}
5802   opcode(0x8B);
5803   ins_encode( OpcP, RegMem(dst,mem));
5804   ins_pipe( ialu_reg_mem );
5805 %}
5806 
5807 
5808 // Load Pointer
5809 instruct loadP(eRegP dst, memory mem) %{
5810   match(Set dst (LoadP mem));
5811 
5812   ins_cost(125);
5813   format %{ "MOV    $dst,$mem" %}
5814   opcode(0x8B);
5815   ins_encode( OpcP, RegMem(dst,mem));
5816   ins_pipe( ialu_reg_mem );
5817 %}
5818 
5819 // Load Klass Pointer
5820 instruct loadKlass(eRegP dst, memory mem) %{
5821   match(Set dst (LoadKlass mem));
5822 
5823   ins_cost(125);
5824   format %{ "MOV    $dst,$mem" %}
5825   opcode(0x8B);
5826   ins_encode( OpcP, RegMem(dst,mem));
5827   ins_pipe( ialu_reg_mem );
5828 %}
5829 
5830 // Load Double
5831 instruct loadDPR(regDPR dst, memory mem) %{
5832   predicate(UseSSE<=1);
5833   match(Set dst (LoadD mem));
5834 
5835   ins_cost(150);
5836   format %{ "FLD_D  ST,$mem\n\t"
5837             "FSTP   $dst" %}
5838   opcode(0xDD);               /* DD /0 */
5839   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5840               Pop_Reg_DPR(dst) );
5841   ins_pipe( fpu_reg_mem );
5842 %}
5843 
5844 // Load Double to XMM
5845 instruct loadD(regD dst, memory mem) %{
5846   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5847   match(Set dst (LoadD mem));
5848   ins_cost(145);
5849   format %{ "MOVSD  $dst,$mem" %}
5850   ins_encode %{
5851     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5852   %}
5853   ins_pipe( pipe_slow );
5854 %}
5855 
5856 instruct loadD_partial(regD dst, memory mem) %{
5857   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5858   match(Set dst (LoadD mem));
5859   ins_cost(145);
5860   format %{ "MOVLPD $dst,$mem" %}
5861   ins_encode %{
5862     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5863   %}
5864   ins_pipe( pipe_slow );
5865 %}
5866 
5867 // Load to XMM register (single-precision floating point)
5868 // MOVSS instruction
5869 instruct loadF(regF dst, memory mem) %{
5870   predicate(UseSSE>=1);
5871   match(Set dst (LoadF mem));
5872   ins_cost(145);
5873   format %{ "MOVSS  $dst,$mem" %}
5874   ins_encode %{
5875     __ movflt ($dst$$XMMRegister, $mem$$Address);
5876   %}
5877   ins_pipe( pipe_slow );
5878 %}
5879 
5880 // Load Float
5881 instruct loadFPR(regFPR dst, memory mem) %{
5882   predicate(UseSSE==0);
5883   match(Set dst (LoadF mem));
5884 
5885   ins_cost(150);
5886   format %{ "FLD_S  ST,$mem\n\t"
5887             "FSTP   $dst" %}
5888   opcode(0xD9);               /* D9 /0 */
5889   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5890               Pop_Reg_FPR(dst) );
5891   ins_pipe( fpu_reg_mem );
5892 %}
5893 
5894 // Load Effective Address
5895 instruct leaP8(eRegP dst, indOffset8 mem) %{
5896   match(Set dst mem);
5897 
5898   ins_cost(110);
5899   format %{ "LEA    $dst,$mem" %}
5900   opcode(0x8D);
5901   ins_encode( OpcP, RegMem(dst,mem));
5902   ins_pipe( ialu_reg_reg_fat );
5903 %}
5904 
5905 instruct leaP32(eRegP dst, indOffset32 mem) %{
5906   match(Set dst mem);
5907 
5908   ins_cost(110);
5909   format %{ "LEA    $dst,$mem" %}
5910   opcode(0x8D);
5911   ins_encode( OpcP, RegMem(dst,mem));
5912   ins_pipe( ialu_reg_reg_fat );
5913 %}
5914 
5915 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5916   match(Set dst mem);
5917 
5918   ins_cost(110);
5919   format %{ "LEA    $dst,$mem" %}
5920   opcode(0x8D);
5921   ins_encode( OpcP, RegMem(dst,mem));
5922   ins_pipe( ialu_reg_reg_fat );
5923 %}
5924 
5925 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5926   match(Set dst mem);
5927 
5928   ins_cost(110);
5929   format %{ "LEA    $dst,$mem" %}
5930   opcode(0x8D);
5931   ins_encode( OpcP, RegMem(dst,mem));
5932   ins_pipe( ialu_reg_reg_fat );
5933 %}
5934 
5935 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5936   match(Set dst mem);
5937 
5938   ins_cost(110);
5939   format %{ "LEA    $dst,$mem" %}
5940   opcode(0x8D);
5941   ins_encode( OpcP, RegMem(dst,mem));
5942   ins_pipe( ialu_reg_reg_fat );
5943 %}
5944 
5945 // Load Constant
5946 instruct loadConI(rRegI dst, immI src) %{
5947   match(Set dst src);
5948 
5949   format %{ "MOV    $dst,$src" %}
5950   ins_encode( LdImmI(dst, src) );
5951   ins_pipe( ialu_reg_fat );
5952 %}
5953 
5954 // Load Constant zero
5955 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5956   match(Set dst src);
5957   effect(KILL cr);
5958 
5959   ins_cost(50);
5960   format %{ "XOR    $dst,$dst" %}
5961   opcode(0x33);  /* + rd */
5962   ins_encode( OpcP, RegReg( dst, dst ) );
5963   ins_pipe( ialu_reg );
5964 %}
5965 
5966 instruct loadConP(eRegP dst, immP src) %{
5967   match(Set dst src);
5968 
5969   format %{ "MOV    $dst,$src" %}
5970   opcode(0xB8);  /* + rd */
5971   ins_encode( LdImmP(dst, src) );
5972   ins_pipe( ialu_reg_fat );
5973 %}
5974 
5975 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5976   match(Set dst src);
5977   effect(KILL cr);
5978   ins_cost(200);
5979   format %{ "MOV    $dst.lo,$src.lo\n\t"
5980             "MOV    $dst.hi,$src.hi" %}
5981   opcode(0xB8);
5982   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5983   ins_pipe( ialu_reg_long_fat );
5984 %}
5985 
5986 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5987   match(Set dst src);
5988   effect(KILL cr);
5989   ins_cost(150);
5990   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5991             "XOR    $dst.hi,$dst.hi" %}
5992   opcode(0x33,0x33);
5993   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5994   ins_pipe( ialu_reg_long );
5995 %}
5996 
5997 // The instruction usage is guarded by predicate in operand immFPR().
5998 instruct loadConFPR(regFPR dst, immFPR con) %{
5999   match(Set dst con);
6000   ins_cost(125);
6001   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6002             "FSTP   $dst" %}
6003   ins_encode %{
6004     __ fld_s($constantaddress($con));
6005     __ fstp_d($dst$$reg);
6006   %}
6007   ins_pipe(fpu_reg_con);
6008 %}
6009 
6010 // The instruction usage is guarded by predicate in operand immFPR0().
6011 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6012   match(Set dst con);
6013   ins_cost(125);
6014   format %{ "FLDZ   ST\n\t"
6015             "FSTP   $dst" %}
6016   ins_encode %{
6017     __ fldz();
6018     __ fstp_d($dst$$reg);
6019   %}
6020   ins_pipe(fpu_reg_con);
6021 %}
6022 
6023 // The instruction usage is guarded by predicate in operand immFPR1().
6024 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6025   match(Set dst con);
6026   ins_cost(125);
6027   format %{ "FLD1   ST\n\t"
6028             "FSTP   $dst" %}
6029   ins_encode %{
6030     __ fld1();
6031     __ fstp_d($dst$$reg);
6032   %}
6033   ins_pipe(fpu_reg_con);
6034 %}
6035 
6036 // The instruction usage is guarded by predicate in operand immF().
6037 instruct loadConF(regF dst, immF con) %{
6038   match(Set dst con);
6039   ins_cost(125);
6040   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6041   ins_encode %{
6042     __ movflt($dst$$XMMRegister, $constantaddress($con));
6043   %}
6044   ins_pipe(pipe_slow);
6045 %}
6046 
6047 // The instruction usage is guarded by predicate in operand immF0().
6048 instruct loadConF0(regF dst, immF0 src) %{
6049   match(Set dst src);
6050   ins_cost(100);
6051   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6052   ins_encode %{
6053     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6054   %}
6055   ins_pipe(pipe_slow);
6056 %}
6057 
6058 // The instruction usage is guarded by predicate in operand immDPR().
6059 instruct loadConDPR(regDPR dst, immDPR con) %{
6060   match(Set dst con);
6061   ins_cost(125);
6062 
6063   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6064             "FSTP   $dst" %}
6065   ins_encode %{
6066     __ fld_d($constantaddress($con));
6067     __ fstp_d($dst$$reg);
6068   %}
6069   ins_pipe(fpu_reg_con);
6070 %}
6071 
6072 // The instruction usage is guarded by predicate in operand immDPR0().
6073 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6074   match(Set dst con);
6075   ins_cost(125);
6076 
6077   format %{ "FLDZ   ST\n\t"
6078             "FSTP   $dst" %}
6079   ins_encode %{
6080     __ fldz();
6081     __ fstp_d($dst$$reg);
6082   %}
6083   ins_pipe(fpu_reg_con);
6084 %}
6085 
6086 // The instruction usage is guarded by predicate in operand immDPR1().
6087 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6088   match(Set dst con);
6089   ins_cost(125);
6090 
6091   format %{ "FLD1   ST\n\t"
6092             "FSTP   $dst" %}
6093   ins_encode %{
6094     __ fld1();
6095     __ fstp_d($dst$$reg);
6096   %}
6097   ins_pipe(fpu_reg_con);
6098 %}
6099 
6100 // The instruction usage is guarded by predicate in operand immD().
6101 instruct loadConD(regD dst, immD con) %{
6102   match(Set dst con);
6103   ins_cost(125);
6104   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6105   ins_encode %{
6106     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6107   %}
6108   ins_pipe(pipe_slow);
6109 %}
6110 
6111 // The instruction usage is guarded by predicate in operand immD0().
6112 instruct loadConD0(regD dst, immD0 src) %{
6113   match(Set dst src);
6114   ins_cost(100);
6115   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6116   ins_encode %{
6117     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6118   %}
6119   ins_pipe( pipe_slow );
6120 %}
6121 
6122 // Load Stack Slot
6123 instruct loadSSI(rRegI dst, stackSlotI src) %{
6124   match(Set dst src);
6125   ins_cost(125);
6126 
6127   format %{ "MOV    $dst,$src" %}
6128   opcode(0x8B);
6129   ins_encode( OpcP, RegMem(dst,src));
6130   ins_pipe( ialu_reg_mem );
6131 %}
6132 
6133 instruct loadSSL(eRegL dst, stackSlotL src) %{
6134   match(Set dst src);
6135 
6136   ins_cost(200);
6137   format %{ "MOV    $dst,$src.lo\n\t"
6138             "MOV    $dst+4,$src.hi" %}
6139   opcode(0x8B, 0x8B);
6140   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6141   ins_pipe( ialu_mem_long_reg );
6142 %}
6143 
6144 // Load Stack Slot
6145 instruct loadSSP(eRegP dst, stackSlotP src) %{
6146   match(Set dst src);
6147   ins_cost(125);
6148 
6149   format %{ "MOV    $dst,$src" %}
6150   opcode(0x8B);
6151   ins_encode( OpcP, RegMem(dst,src));
6152   ins_pipe( ialu_reg_mem );
6153 %}
6154 
6155 // Load Stack Slot
6156 instruct loadSSF(regFPR dst, stackSlotF src) %{
6157   match(Set dst src);
6158   ins_cost(125);
6159 
6160   format %{ "FLD_S  $src\n\t"
6161             "FSTP   $dst" %}
6162   opcode(0xD9);               /* D9 /0, FLD m32real */
6163   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6164               Pop_Reg_FPR(dst) );
6165   ins_pipe( fpu_reg_mem );
6166 %}
6167 
6168 // Load Stack Slot
6169 instruct loadSSD(regDPR dst, stackSlotD src) %{
6170   match(Set dst src);
6171   ins_cost(125);
6172 
6173   format %{ "FLD_D  $src\n\t"
6174             "FSTP   $dst" %}
6175   opcode(0xDD);               /* DD /0, FLD m64real */
6176   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6177               Pop_Reg_DPR(dst) );
6178   ins_pipe( fpu_reg_mem );
6179 %}
6180 
6181 // Prefetch instructions for allocation.
6182 // Must be safe to execute with invalid address (cannot fault).
6183 
6184 instruct prefetchAlloc0( memory mem ) %{
6185   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6186   match(PrefetchAllocation mem);
6187   ins_cost(0);
6188   size(0);
6189   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6190   ins_encode();
6191   ins_pipe(empty);
6192 %}
6193 
6194 instruct prefetchAlloc( memory mem ) %{
6195   predicate(AllocatePrefetchInstr==3);
6196   match( PrefetchAllocation mem );
6197   ins_cost(100);
6198 
6199   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6200   ins_encode %{
6201     __ prefetchw($mem$$Address);
6202   %}
6203   ins_pipe(ialu_mem);
6204 %}
6205 
6206 instruct prefetchAllocNTA( memory mem ) %{
6207   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6208   match(PrefetchAllocation mem);
6209   ins_cost(100);
6210 
6211   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6212   ins_encode %{
6213     __ prefetchnta($mem$$Address);
6214   %}
6215   ins_pipe(ialu_mem);
6216 %}
6217 
6218 instruct prefetchAllocT0( memory mem ) %{
6219   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6220   match(PrefetchAllocation mem);
6221   ins_cost(100);
6222 
6223   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6224   ins_encode %{
6225     __ prefetcht0($mem$$Address);
6226   %}
6227   ins_pipe(ialu_mem);
6228 %}
6229 
6230 instruct prefetchAllocT2( memory mem ) %{
6231   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6232   match(PrefetchAllocation mem);
6233   ins_cost(100);
6234 
6235   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6236   ins_encode %{
6237     __ prefetcht2($mem$$Address);
6238   %}
6239   ins_pipe(ialu_mem);
6240 %}
6241 
6242 //----------Store Instructions-------------------------------------------------
6243 
6244 // Store Byte
6245 instruct storeB(memory mem, xRegI src) %{
6246   match(Set mem (StoreB mem src));
6247 
6248   ins_cost(125);
6249   format %{ "MOV8   $mem,$src" %}
6250   opcode(0x88);
6251   ins_encode( OpcP, RegMem( src, mem ) );
6252   ins_pipe( ialu_mem_reg );
6253 %}
6254 
6255 // Store Char/Short
6256 instruct storeC(memory mem, rRegI src) %{
6257   match(Set mem (StoreC mem src));
6258 
6259   ins_cost(125);
6260   format %{ "MOV16  $mem,$src" %}
6261   opcode(0x89, 0x66);
6262   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6263   ins_pipe( ialu_mem_reg );
6264 %}
6265 
6266 // Store Integer
6267 instruct storeI(memory mem, rRegI src) %{
6268   match(Set mem (StoreI mem src));
6269 
6270   ins_cost(125);
6271   format %{ "MOV    $mem,$src" %}
6272   opcode(0x89);
6273   ins_encode( OpcP, RegMem( src, mem ) );
6274   ins_pipe( ialu_mem_reg );
6275 %}
6276 
6277 // Store Long
6278 instruct storeL(long_memory mem, eRegL src) %{
6279   predicate(!((StoreLNode*)n)->require_atomic_access());
6280   match(Set mem (StoreL mem src));
6281 
6282   ins_cost(200);
6283   format %{ "MOV    $mem,$src.lo\n\t"
6284             "MOV    $mem+4,$src.hi" %}
6285   opcode(0x89, 0x89);
6286   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6287   ins_pipe( ialu_mem_long_reg );
6288 %}
6289 
6290 // Store Long to Integer
6291 instruct storeL2I(memory mem, eRegL src) %{
6292   match(Set mem (StoreI mem (ConvL2I src)));
6293 
6294   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6295   ins_encode %{
6296     __ movl($mem$$Address, $src$$Register);
6297   %}
6298   ins_pipe(ialu_mem_reg);
6299 %}
6300 
6301 // Volatile Store Long.  Must be atomic, so move it into
6302 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6303 // target address before the store (for null-ptr checks)
6304 // so the memory operand is used twice in the encoding.
6305 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6306   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6307   match(Set mem (StoreL mem src));
6308   effect( KILL cr );
6309   ins_cost(400);
6310   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6311             "FILD   $src\n\t"
6312             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6313   opcode(0x3B);
6314   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6315   ins_pipe( fpu_reg_mem );
6316 %}
6317 
6318 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6319   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6320   match(Set mem (StoreL mem src));
6321   effect( TEMP tmp, KILL cr );
6322   ins_cost(380);
6323   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6324             "MOVSD  $tmp,$src\n\t"
6325             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6326   ins_encode %{
6327     __ cmpl(rax, $mem$$Address);
6328     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6329     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6330   %}
6331   ins_pipe( pipe_slow );
6332 %}
6333 
6334 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6335   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6336   match(Set mem (StoreL mem src));
6337   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6338   ins_cost(360);
6339   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6340             "MOVD   $tmp,$src.lo\n\t"
6341             "MOVD   $tmp2,$src.hi\n\t"
6342             "PUNPCKLDQ $tmp,$tmp2\n\t"
6343             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6344   ins_encode %{
6345     __ cmpl(rax, $mem$$Address);
6346     __ movdl($tmp$$XMMRegister, $src$$Register);
6347     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6348     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6349     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6350   %}
6351   ins_pipe( pipe_slow );
6352 %}
6353 
6354 // Store Pointer; for storing unknown oops and raw pointers
6355 instruct storeP(memory mem, anyRegP src) %{
6356   match(Set mem (StoreP mem src));
6357 
6358   ins_cost(125);
6359   format %{ "MOV    $mem,$src" %}
6360   opcode(0x89);
6361   ins_encode( OpcP, RegMem( src, mem ) );
6362   ins_pipe( ialu_mem_reg );
6363 %}
6364 
6365 // Store Integer Immediate
6366 instruct storeImmI(memory mem, immI src) %{
6367   match(Set mem (StoreI mem src));
6368 
6369   ins_cost(150);
6370   format %{ "MOV    $mem,$src" %}
6371   opcode(0xC7);               /* C7 /0 */
6372   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6373   ins_pipe( ialu_mem_imm );
6374 %}
6375 
6376 // Store Short/Char Immediate
6377 instruct storeImmI16(memory mem, immI16 src) %{
6378   predicate(UseStoreImmI16);
6379   match(Set mem (StoreC mem src));
6380 
6381   ins_cost(150);
6382   format %{ "MOV16  $mem,$src" %}
6383   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6384   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6385   ins_pipe( ialu_mem_imm );
6386 %}
6387 
6388 // Store Pointer Immediate; null pointers or constant oops that do not
6389 // need card-mark barriers.
6390 instruct storeImmP(memory mem, immP src) %{
6391   match(Set mem (StoreP mem src));
6392 
6393   ins_cost(150);
6394   format %{ "MOV    $mem,$src" %}
6395   opcode(0xC7);               /* C7 /0 */
6396   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6397   ins_pipe( ialu_mem_imm );
6398 %}
6399 
6400 // Store Byte Immediate
6401 instruct storeImmB(memory mem, immI8 src) %{
6402   match(Set mem (StoreB mem src));
6403 
6404   ins_cost(150);
6405   format %{ "MOV8   $mem,$src" %}
6406   opcode(0xC6);               /* C6 /0 */
6407   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6408   ins_pipe( ialu_mem_imm );
6409 %}
6410 
6411 // Store CMS card-mark Immediate
6412 instruct storeImmCM(memory mem, immI8 src) %{
6413   match(Set mem (StoreCM mem src));
6414 
6415   ins_cost(150);
6416   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6417   opcode(0xC6);               /* C6 /0 */
6418   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6419   ins_pipe( ialu_mem_imm );
6420 %}
6421 
6422 // Store Double
6423 instruct storeDPR( memory mem, regDPR1 src) %{
6424   predicate(UseSSE<=1);
6425   match(Set mem (StoreD mem src));
6426 
6427   ins_cost(100);
6428   format %{ "FST_D  $mem,$src" %}
6429   opcode(0xDD);       /* DD /2 */
6430   ins_encode( enc_FPR_store(mem,src) );
6431   ins_pipe( fpu_mem_reg );
6432 %}
6433 
6434 // Store double does rounding on x86
6435 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6436   predicate(UseSSE<=1);
6437   match(Set mem (StoreD mem (RoundDouble src)));
6438 
6439   ins_cost(100);
6440   format %{ "FST_D  $mem,$src\t# round" %}
6441   opcode(0xDD);       /* DD /2 */
6442   ins_encode( enc_FPR_store(mem,src) );
6443   ins_pipe( fpu_mem_reg );
6444 %}
6445 
6446 // Store XMM register to memory (double-precision floating points)
6447 // MOVSD instruction
6448 instruct storeD(memory mem, regD src) %{
6449   predicate(UseSSE>=2);
6450   match(Set mem (StoreD mem src));
6451   ins_cost(95);
6452   format %{ "MOVSD  $mem,$src" %}
6453   ins_encode %{
6454     __ movdbl($mem$$Address, $src$$XMMRegister);
6455   %}
6456   ins_pipe( pipe_slow );
6457 %}
6458 
6459 // Store XMM register to memory (single-precision floating point)
6460 // MOVSS instruction
6461 instruct storeF(memory mem, regF src) %{
6462   predicate(UseSSE>=1);
6463   match(Set mem (StoreF mem src));
6464   ins_cost(95);
6465   format %{ "MOVSS  $mem,$src" %}
6466   ins_encode %{
6467     __ movflt($mem$$Address, $src$$XMMRegister);
6468   %}
6469   ins_pipe( pipe_slow );
6470 %}
6471 
6472 // Store Float
6473 instruct storeFPR( memory mem, regFPR1 src) %{
6474   predicate(UseSSE==0);
6475   match(Set mem (StoreF mem src));
6476 
6477   ins_cost(100);
6478   format %{ "FST_S  $mem,$src" %}
6479   opcode(0xD9);       /* D9 /2 */
6480   ins_encode( enc_FPR_store(mem,src) );
6481   ins_pipe( fpu_mem_reg );
6482 %}
6483 
6484 // Store Float does rounding on x86
6485 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6486   predicate(UseSSE==0);
6487   match(Set mem (StoreF mem (RoundFloat src)));
6488 
6489   ins_cost(100);
6490   format %{ "FST_S  $mem,$src\t# round" %}
6491   opcode(0xD9);       /* D9 /2 */
6492   ins_encode( enc_FPR_store(mem,src) );
6493   ins_pipe( fpu_mem_reg );
6494 %}
6495 
6496 // Store Float does rounding on x86
6497 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6498   predicate(UseSSE<=1);
6499   match(Set mem (StoreF mem (ConvD2F src)));
6500 
6501   ins_cost(100);
6502   format %{ "FST_S  $mem,$src\t# D-round" %}
6503   opcode(0xD9);       /* D9 /2 */
6504   ins_encode( enc_FPR_store(mem,src) );
6505   ins_pipe( fpu_mem_reg );
6506 %}
6507 
6508 // Store immediate Float value (it is faster than store from FPU register)
6509 // The instruction usage is guarded by predicate in operand immFPR().
6510 instruct storeFPR_imm( memory mem, immFPR src) %{
6511   match(Set mem (StoreF mem src));
6512 
6513   ins_cost(50);
6514   format %{ "MOV    $mem,$src\t# store float" %}
6515   opcode(0xC7);               /* C7 /0 */
6516   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6517   ins_pipe( ialu_mem_imm );
6518 %}
6519 
6520 // Store immediate Float value (it is faster than store from XMM register)
6521 // The instruction usage is guarded by predicate in operand immF().
6522 instruct storeF_imm( memory mem, immF src) %{
6523   match(Set mem (StoreF mem src));
6524 
6525   ins_cost(50);
6526   format %{ "MOV    $mem,$src\t# store float" %}
6527   opcode(0xC7);               /* C7 /0 */
6528   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6529   ins_pipe( ialu_mem_imm );
6530 %}
6531 
6532 // Store Integer to stack slot
6533 instruct storeSSI(stackSlotI dst, rRegI src) %{
6534   match(Set dst src);
6535 
6536   ins_cost(100);
6537   format %{ "MOV    $dst,$src" %}
6538   opcode(0x89);
6539   ins_encode( OpcPRegSS( dst, src ) );
6540   ins_pipe( ialu_mem_reg );
6541 %}
6542 
6543 // Store Integer to stack slot
6544 instruct storeSSP(stackSlotP dst, eRegP src) %{
6545   match(Set dst src);
6546 
6547   ins_cost(100);
6548   format %{ "MOV    $dst,$src" %}
6549   opcode(0x89);
6550   ins_encode( OpcPRegSS( dst, src ) );
6551   ins_pipe( ialu_mem_reg );
6552 %}
6553 
6554 // Store Long to stack slot
6555 instruct storeSSL(stackSlotL dst, eRegL src) %{
6556   match(Set dst src);
6557 
6558   ins_cost(200);
6559   format %{ "MOV    $dst,$src.lo\n\t"
6560             "MOV    $dst+4,$src.hi" %}
6561   opcode(0x89, 0x89);
6562   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6563   ins_pipe( ialu_mem_long_reg );
6564 %}
6565 
6566 //----------MemBar Instructions-----------------------------------------------
6567 // Memory barrier flavors
6568 
6569 instruct membar_acquire() %{
6570   match(MemBarAcquire);
6571   match(LoadFence);
6572   ins_cost(400);
6573 
6574   size(0);
6575   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6576   ins_encode();
6577   ins_pipe(empty);
6578 %}
6579 
6580 instruct membar_acquire_lock() %{
6581   match(MemBarAcquireLock);
6582   ins_cost(0);
6583 
6584   size(0);
6585   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6586   ins_encode( );
6587   ins_pipe(empty);
6588 %}
6589 
6590 instruct membar_release() %{
6591   match(MemBarRelease);
6592   match(StoreFence);
6593   ins_cost(400);
6594 
6595   size(0);
6596   format %{ "MEMBAR-release ! (empty encoding)" %}
6597   ins_encode( );
6598   ins_pipe(empty);
6599 %}
6600 
6601 instruct membar_release_lock() %{
6602   match(MemBarReleaseLock);
6603   ins_cost(0);
6604 
6605   size(0);
6606   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6607   ins_encode( );
6608   ins_pipe(empty);
6609 %}
6610 
6611 instruct membar_volatile(eFlagsReg cr) %{
6612   match(MemBarVolatile);
6613   effect(KILL cr);
6614   ins_cost(400);
6615 
6616   format %{
6617     $$template
6618     if (os::is_MP()) {
6619       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6620     } else {
6621       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6622     }
6623   %}
6624   ins_encode %{
6625     __ membar(Assembler::StoreLoad);
6626   %}
6627   ins_pipe(pipe_slow);
6628 %}
6629 
6630 instruct unnecessary_membar_volatile() %{
6631   match(MemBarVolatile);
6632   predicate(Matcher::post_store_load_barrier(n));
6633   ins_cost(0);
6634 
6635   size(0);
6636   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6637   ins_encode( );
6638   ins_pipe(empty);
6639 %}
6640 
6641 instruct membar_storestore() %{
6642   match(MemBarStoreStore);
6643   ins_cost(0);
6644 
6645   size(0);
6646   format %{ "MEMBAR-storestore (empty encoding)" %}
6647   ins_encode( );
6648   ins_pipe(empty);
6649 %}
6650 
6651 //----------Move Instructions--------------------------------------------------
6652 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6653   match(Set dst (CastX2P src));
6654   format %{ "# X2P  $dst, $src" %}
6655   ins_encode( /*empty encoding*/ );
6656   ins_cost(0);
6657   ins_pipe(empty);
6658 %}
6659 
6660 instruct castP2X(rRegI dst, eRegP src ) %{
6661   match(Set dst (CastP2X src));
6662   ins_cost(50);
6663   format %{ "MOV    $dst, $src\t# CastP2X" %}
6664   ins_encode( enc_Copy( dst, src) );
6665   ins_pipe( ialu_reg_reg );
6666 %}
6667 
6668 //----------Conditional Move---------------------------------------------------
6669 // Conditional move
6670 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6671   predicate(!VM_Version::supports_cmov() );
6672   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6673   ins_cost(200);
6674   format %{ "J$cop,us skip\t# signed cmove\n\t"
6675             "MOV    $dst,$src\n"
6676       "skip:" %}
6677   ins_encode %{
6678     Label Lskip;
6679     // Invert sense of branch from sense of CMOV
6680     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6681     __ movl($dst$$Register, $src$$Register);
6682     __ bind(Lskip);
6683   %}
6684   ins_pipe( pipe_cmov_reg );
6685 %}
6686 
6687 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6688   predicate(!VM_Version::supports_cmov() );
6689   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6690   ins_cost(200);
6691   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6692             "MOV    $dst,$src\n"
6693       "skip:" %}
6694   ins_encode %{
6695     Label Lskip;
6696     // Invert sense of branch from sense of CMOV
6697     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6698     __ movl($dst$$Register, $src$$Register);
6699     __ bind(Lskip);
6700   %}
6701   ins_pipe( pipe_cmov_reg );
6702 %}
6703 
6704 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6705   predicate(VM_Version::supports_cmov() );
6706   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6707   ins_cost(200);
6708   format %{ "CMOV$cop $dst,$src" %}
6709   opcode(0x0F,0x40);
6710   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6711   ins_pipe( pipe_cmov_reg );
6712 %}
6713 
6714 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6715   predicate(VM_Version::supports_cmov() );
6716   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6717   ins_cost(200);
6718   format %{ "CMOV$cop $dst,$src" %}
6719   opcode(0x0F,0x40);
6720   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6721   ins_pipe( pipe_cmov_reg );
6722 %}
6723 
6724 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6725   predicate(VM_Version::supports_cmov() );
6726   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6727   ins_cost(200);
6728   expand %{
6729     cmovI_regU(cop, cr, dst, src);
6730   %}
6731 %}
6732 
6733 // Conditional move
6734 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6735   predicate(VM_Version::supports_cmov() );
6736   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6737   ins_cost(250);
6738   format %{ "CMOV$cop $dst,$src" %}
6739   opcode(0x0F,0x40);
6740   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6741   ins_pipe( pipe_cmov_mem );
6742 %}
6743 
6744 // Conditional move
6745 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6746   predicate(VM_Version::supports_cmov() );
6747   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6748   ins_cost(250);
6749   format %{ "CMOV$cop $dst,$src" %}
6750   opcode(0x0F,0x40);
6751   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6752   ins_pipe( pipe_cmov_mem );
6753 %}
6754 
6755 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6756   predicate(VM_Version::supports_cmov() );
6757   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6758   ins_cost(250);
6759   expand %{
6760     cmovI_memU(cop, cr, dst, src);
6761   %}
6762 %}
6763 
6764 // Conditional move
6765 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6766   predicate(VM_Version::supports_cmov() );
6767   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6768   ins_cost(200);
6769   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6770   opcode(0x0F,0x40);
6771   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6772   ins_pipe( pipe_cmov_reg );
6773 %}
6774 
6775 // Conditional move (non-P6 version)
6776 // Note:  a CMoveP is generated for  stubs and native wrappers
6777 //        regardless of whether we are on a P6, so we
6778 //        emulate a cmov here
6779 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6780   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6781   ins_cost(300);
6782   format %{ "Jn$cop   skip\n\t"
6783           "MOV    $dst,$src\t# pointer\n"
6784       "skip:" %}
6785   opcode(0x8b);
6786   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6787   ins_pipe( pipe_cmov_reg );
6788 %}
6789 
6790 // Conditional move
6791 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6792   predicate(VM_Version::supports_cmov() );
6793   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6794   ins_cost(200);
6795   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6796   opcode(0x0F,0x40);
6797   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6798   ins_pipe( pipe_cmov_reg );
6799 %}
6800 
6801 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6802   predicate(VM_Version::supports_cmov() );
6803   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6804   ins_cost(200);
6805   expand %{
6806     cmovP_regU(cop, cr, dst, src);
6807   %}
6808 %}
6809 
6810 // DISABLED: Requires the ADLC to emit a bottom_type call that
6811 // correctly meets the two pointer arguments; one is an incoming
6812 // register but the other is a memory operand.  ALSO appears to
6813 // be buggy with implicit null checks.
6814 //
6815 //// Conditional move
6816 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6817 //  predicate(VM_Version::supports_cmov() );
6818 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6819 //  ins_cost(250);
6820 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6821 //  opcode(0x0F,0x40);
6822 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6823 //  ins_pipe( pipe_cmov_mem );
6824 //%}
6825 //
6826 //// Conditional move
6827 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6828 //  predicate(VM_Version::supports_cmov() );
6829 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6830 //  ins_cost(250);
6831 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6832 //  opcode(0x0F,0x40);
6833 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6834 //  ins_pipe( pipe_cmov_mem );
6835 //%}
6836 
6837 // Conditional move
6838 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6839   predicate(UseSSE<=1);
6840   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6841   ins_cost(200);
6842   format %{ "FCMOV$cop $dst,$src\t# double" %}
6843   opcode(0xDA);
6844   ins_encode( enc_cmov_dpr(cop,src) );
6845   ins_pipe( pipe_cmovDPR_reg );
6846 %}
6847 
6848 // Conditional move
6849 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6850   predicate(UseSSE==0);
6851   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6852   ins_cost(200);
6853   format %{ "FCMOV$cop $dst,$src\t# float" %}
6854   opcode(0xDA);
6855   ins_encode( enc_cmov_dpr(cop,src) );
6856   ins_pipe( pipe_cmovDPR_reg );
6857 %}
6858 
6859 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6860 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6861   predicate(UseSSE<=1);
6862   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6863   ins_cost(200);
6864   format %{ "Jn$cop   skip\n\t"
6865             "MOV    $dst,$src\t# double\n"
6866       "skip:" %}
6867   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6868   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6869   ins_pipe( pipe_cmovDPR_reg );
6870 %}
6871 
6872 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6873 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6874   predicate(UseSSE==0);
6875   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6876   ins_cost(200);
6877   format %{ "Jn$cop    skip\n\t"
6878             "MOV    $dst,$src\t# float\n"
6879       "skip:" %}
6880   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6881   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6882   ins_pipe( pipe_cmovDPR_reg );
6883 %}
6884 
6885 // No CMOVE with SSE/SSE2
6886 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6887   predicate (UseSSE>=1);
6888   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6889   ins_cost(200);
6890   format %{ "Jn$cop   skip\n\t"
6891             "MOVSS  $dst,$src\t# float\n"
6892       "skip:" %}
6893   ins_encode %{
6894     Label skip;
6895     // Invert sense of branch from sense of CMOV
6896     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6897     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6898     __ bind(skip);
6899   %}
6900   ins_pipe( pipe_slow );
6901 %}
6902 
6903 // No CMOVE with SSE/SSE2
6904 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6905   predicate (UseSSE>=2);
6906   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6907   ins_cost(200);
6908   format %{ "Jn$cop   skip\n\t"
6909             "MOVSD  $dst,$src\t# float\n"
6910       "skip:" %}
6911   ins_encode %{
6912     Label skip;
6913     // Invert sense of branch from sense of CMOV
6914     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6915     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6916     __ bind(skip);
6917   %}
6918   ins_pipe( pipe_slow );
6919 %}
6920 
6921 // unsigned version
6922 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6923   predicate (UseSSE>=1);
6924   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6925   ins_cost(200);
6926   format %{ "Jn$cop   skip\n\t"
6927             "MOVSS  $dst,$src\t# float\n"
6928       "skip:" %}
6929   ins_encode %{
6930     Label skip;
6931     // Invert sense of branch from sense of CMOV
6932     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6933     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6934     __ bind(skip);
6935   %}
6936   ins_pipe( pipe_slow );
6937 %}
6938 
6939 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6940   predicate (UseSSE>=1);
6941   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6942   ins_cost(200);
6943   expand %{
6944     fcmovF_regU(cop, cr, dst, src);
6945   %}
6946 %}
6947 
6948 // unsigned version
6949 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6950   predicate (UseSSE>=2);
6951   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6952   ins_cost(200);
6953   format %{ "Jn$cop   skip\n\t"
6954             "MOVSD  $dst,$src\t# float\n"
6955       "skip:" %}
6956   ins_encode %{
6957     Label skip;
6958     // Invert sense of branch from sense of CMOV
6959     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6960     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6961     __ bind(skip);
6962   %}
6963   ins_pipe( pipe_slow );
6964 %}
6965 
6966 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6967   predicate (UseSSE>=2);
6968   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6969   ins_cost(200);
6970   expand %{
6971     fcmovD_regU(cop, cr, dst, src);
6972   %}
6973 %}
6974 
6975 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6976   predicate(VM_Version::supports_cmov() );
6977   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6978   ins_cost(200);
6979   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6980             "CMOV$cop $dst.hi,$src.hi" %}
6981   opcode(0x0F,0x40);
6982   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6983   ins_pipe( pipe_cmov_reg_long );
6984 %}
6985 
6986 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6987   predicate(VM_Version::supports_cmov() );
6988   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6989   ins_cost(200);
6990   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6991             "CMOV$cop $dst.hi,$src.hi" %}
6992   opcode(0x0F,0x40);
6993   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6994   ins_pipe( pipe_cmov_reg_long );
6995 %}
6996 
6997 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6998   predicate(VM_Version::supports_cmov() );
6999   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7000   ins_cost(200);
7001   expand %{
7002     cmovL_regU(cop, cr, dst, src);
7003   %}
7004 %}
7005 
7006 //----------Arithmetic Instructions--------------------------------------------
7007 //----------Addition Instructions----------------------------------------------
7008 
7009 // Integer Addition Instructions
7010 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7011   match(Set dst (AddI dst src));
7012   effect(KILL cr);
7013 
7014   size(2);
7015   format %{ "ADD    $dst,$src" %}
7016   opcode(0x03);
7017   ins_encode( OpcP, RegReg( dst, src) );
7018   ins_pipe( ialu_reg_reg );
7019 %}
7020 
7021 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7022   match(Set dst (AddI dst src));
7023   effect(KILL cr);
7024 
7025   format %{ "ADD    $dst,$src" %}
7026   opcode(0x81, 0x00); /* /0 id */
7027   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7028   ins_pipe( ialu_reg );
7029 %}
7030 
7031 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7032   predicate(UseIncDec);
7033   match(Set dst (AddI dst src));
7034   effect(KILL cr);
7035 
7036   size(1);
7037   format %{ "INC    $dst" %}
7038   opcode(0x40); /*  */
7039   ins_encode( Opc_plus( primary, dst ) );
7040   ins_pipe( ialu_reg );
7041 %}
7042 
7043 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7044   match(Set dst (AddI src0 src1));
7045   ins_cost(110);
7046 
7047   format %{ "LEA    $dst,[$src0 + $src1]" %}
7048   opcode(0x8D); /* 0x8D /r */
7049   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7050   ins_pipe( ialu_reg_reg );
7051 %}
7052 
7053 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7054   match(Set dst (AddP src0 src1));
7055   ins_cost(110);
7056 
7057   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7058   opcode(0x8D); /* 0x8D /r */
7059   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7060   ins_pipe( ialu_reg_reg );
7061 %}
7062 
7063 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7064   predicate(UseIncDec);
7065   match(Set dst (AddI dst src));
7066   effect(KILL cr);
7067 
7068   size(1);
7069   format %{ "DEC    $dst" %}
7070   opcode(0x48); /*  */
7071   ins_encode( Opc_plus( primary, dst ) );
7072   ins_pipe( ialu_reg );
7073 %}
7074 
7075 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7076   match(Set dst (AddP dst src));
7077   effect(KILL cr);
7078 
7079   size(2);
7080   format %{ "ADD    $dst,$src" %}
7081   opcode(0x03);
7082   ins_encode( OpcP, RegReg( dst, src) );
7083   ins_pipe( ialu_reg_reg );
7084 %}
7085 
7086 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7087   match(Set dst (AddP dst src));
7088   effect(KILL cr);
7089 
7090   format %{ "ADD    $dst,$src" %}
7091   opcode(0x81,0x00); /* Opcode 81 /0 id */
7092   // ins_encode( RegImm( dst, src) );
7093   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7094   ins_pipe( ialu_reg );
7095 %}
7096 
7097 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7098   match(Set dst (AddI dst (LoadI src)));
7099   effect(KILL cr);
7100 
7101   ins_cost(125);
7102   format %{ "ADD    $dst,$src" %}
7103   opcode(0x03);
7104   ins_encode( OpcP, RegMem( dst, src) );
7105   ins_pipe( ialu_reg_mem );
7106 %}
7107 
7108 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7109   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7110   effect(KILL cr);
7111 
7112   ins_cost(150);
7113   format %{ "ADD    $dst,$src" %}
7114   opcode(0x01);  /* Opcode 01 /r */
7115   ins_encode( OpcP, RegMem( src, dst ) );
7116   ins_pipe( ialu_mem_reg );
7117 %}
7118 
7119 // Add Memory with Immediate
7120 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7121   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7122   effect(KILL cr);
7123 
7124   ins_cost(125);
7125   format %{ "ADD    $dst,$src" %}
7126   opcode(0x81);               /* Opcode 81 /0 id */
7127   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7128   ins_pipe( ialu_mem_imm );
7129 %}
7130 
7131 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7132   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7133   effect(KILL cr);
7134 
7135   ins_cost(125);
7136   format %{ "INC    $dst" %}
7137   opcode(0xFF);               /* Opcode FF /0 */
7138   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7139   ins_pipe( ialu_mem_imm );
7140 %}
7141 
7142 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7143   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7144   effect(KILL cr);
7145 
7146   ins_cost(125);
7147   format %{ "DEC    $dst" %}
7148   opcode(0xFF);               /* Opcode FF /1 */
7149   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7150   ins_pipe( ialu_mem_imm );
7151 %}
7152 
7153 
7154 instruct checkCastPP( eRegP dst ) %{
7155   match(Set dst (CheckCastPP dst));
7156 
7157   size(0);
7158   format %{ "#checkcastPP of $dst" %}
7159   ins_encode( /*empty encoding*/ );
7160   ins_pipe( empty );
7161 %}
7162 
7163 instruct castPP( eRegP dst ) %{
7164   match(Set dst (CastPP dst));
7165   format %{ "#castPP of $dst" %}
7166   ins_encode( /*empty encoding*/ );
7167   ins_pipe( empty );
7168 %}
7169 
7170 instruct castII( rRegI dst ) %{
7171   match(Set dst (CastII dst));
7172   format %{ "#castII of $dst" %}
7173   ins_encode( /*empty encoding*/ );
7174   ins_cost(0);
7175   ins_pipe( empty );
7176 %}
7177 
7178 
7179 // Load-locked - same as a regular pointer load when used with compare-swap
7180 instruct loadPLocked(eRegP dst, memory mem) %{
7181   match(Set dst (LoadPLocked mem));
7182 
7183   ins_cost(125);
7184   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7185   opcode(0x8B);
7186   ins_encode( OpcP, RegMem(dst,mem));
7187   ins_pipe( ialu_reg_mem );
7188 %}
7189 
7190 // Conditional-store of the updated heap-top.
7191 // Used during allocation of the shared heap.
7192 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7193 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7194   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7195   // EAX is killed if there is contention, but then it's also unused.
7196   // In the common case of no contention, EAX holds the new oop address.
7197   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7198   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7199   ins_pipe( pipe_cmpxchg );
7200 %}
7201 
7202 // Conditional-store of an int value.
7203 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7204 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7205   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7206   effect(KILL oldval);
7207   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7208   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7209   ins_pipe( pipe_cmpxchg );
7210 %}
7211 
7212 // Conditional-store of a long value.
7213 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7214 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7215   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7216   effect(KILL oldval);
7217   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7218             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7219             "XCHG   EBX,ECX"
7220   %}
7221   ins_encode %{
7222     // Note: we need to swap rbx, and rcx before and after the
7223     //       cmpxchg8 instruction because the instruction uses
7224     //       rcx as the high order word of the new value to store but
7225     //       our register encoding uses rbx.
7226     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7227     if( os::is_MP() )
7228       __ lock();
7229     __ cmpxchg8($mem$$Address);
7230     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7231   %}
7232   ins_pipe( pipe_cmpxchg );
7233 %}
7234 
7235 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7236 
7237 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7238   predicate(VM_Version::supports_cx8());
7239   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7240   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7241   effect(KILL cr, KILL oldval);
7242   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7243             "MOV    $res,0\n\t"
7244             "JNE,s  fail\n\t"
7245             "MOV    $res,1\n"
7246           "fail:" %}
7247   ins_encode( enc_cmpxchg8(mem_ptr),
7248               enc_flags_ne_to_boolean(res) );
7249   ins_pipe( pipe_cmpxchg );
7250 %}
7251 
7252 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7253   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7254   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7255   effect(KILL cr, KILL oldval);
7256   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7257             "MOV    $res,0\n\t"
7258             "JNE,s  fail\n\t"
7259             "MOV    $res,1\n"
7260           "fail:" %}
7261   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7262   ins_pipe( pipe_cmpxchg );
7263 %}
7264 
7265 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7266   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7267   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7268   effect(KILL cr, KILL oldval);
7269   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7270             "MOV    $res,0\n\t"
7271             "JNE,s  fail\n\t"
7272             "MOV    $res,1\n"
7273           "fail:" %}
7274   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7275   ins_pipe( pipe_cmpxchg );
7276 %}
7277 
7278 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7279   predicate(VM_Version::supports_cx8());
7280   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7281   effect(KILL cr);
7282   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7283   ins_encode( enc_cmpxchg8(mem_ptr) );
7284   ins_pipe( pipe_cmpxchg );
7285 %}
7286 
7287 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7288   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7289   effect(KILL cr);
7290   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7291   ins_encode( enc_cmpxchg(mem_ptr) );
7292   ins_pipe( pipe_cmpxchg );
7293 %}
7294 
7295 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7296   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7297   effect(KILL cr);
7298   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7299   ins_encode( enc_cmpxchg(mem_ptr) );
7300   ins_pipe( pipe_cmpxchg );
7301 %}
7302 
7303 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7304   predicate(n->as_LoadStore()->result_not_used());
7305   match(Set dummy (GetAndAddI mem add));
7306   effect(KILL cr);
7307   format %{ "ADDL  [$mem],$add" %}
7308   ins_encode %{
7309     if (os::is_MP()) { __ lock(); }
7310     __ addl($mem$$Address, $add$$constant);
7311   %}
7312   ins_pipe( pipe_cmpxchg );
7313 %}
7314 
7315 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7316   match(Set newval (GetAndAddI mem newval));
7317   effect(KILL cr);
7318   format %{ "XADDL  [$mem],$newval" %}
7319   ins_encode %{
7320     if (os::is_MP()) { __ lock(); }
7321     __ xaddl($mem$$Address, $newval$$Register);
7322   %}
7323   ins_pipe( pipe_cmpxchg );
7324 %}
7325 
7326 instruct xchgI( memory mem, rRegI newval) %{
7327   match(Set newval (GetAndSetI mem newval));
7328   format %{ "XCHGL  $newval,[$mem]" %}
7329   ins_encode %{
7330     __ xchgl($newval$$Register, $mem$$Address);
7331   %}
7332   ins_pipe( pipe_cmpxchg );
7333 %}
7334 
7335 instruct xchgP( memory mem, pRegP newval) %{
7336   match(Set newval (GetAndSetP mem newval));
7337   format %{ "XCHGL  $newval,[$mem]" %}
7338   ins_encode %{
7339     __ xchgl($newval$$Register, $mem$$Address);
7340   %}
7341   ins_pipe( pipe_cmpxchg );
7342 %}
7343 
7344 //----------Subtraction Instructions-------------------------------------------
7345 
7346 // Integer Subtraction Instructions
7347 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7348   match(Set dst (SubI dst src));
7349   effect(KILL cr);
7350 
7351   size(2);
7352   format %{ "SUB    $dst,$src" %}
7353   opcode(0x2B);
7354   ins_encode( OpcP, RegReg( dst, src) );
7355   ins_pipe( ialu_reg_reg );
7356 %}
7357 
7358 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7359   match(Set dst (SubI dst src));
7360   effect(KILL cr);
7361 
7362   format %{ "SUB    $dst,$src" %}
7363   opcode(0x81,0x05);  /* Opcode 81 /5 */
7364   // ins_encode( RegImm( dst, src) );
7365   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7366   ins_pipe( ialu_reg );
7367 %}
7368 
7369 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7370   match(Set dst (SubI dst (LoadI src)));
7371   effect(KILL cr);
7372 
7373   ins_cost(125);
7374   format %{ "SUB    $dst,$src" %}
7375   opcode(0x2B);
7376   ins_encode( OpcP, RegMem( dst, src) );
7377   ins_pipe( ialu_reg_mem );
7378 %}
7379 
7380 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7381   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7382   effect(KILL cr);
7383 
7384   ins_cost(150);
7385   format %{ "SUB    $dst,$src" %}
7386   opcode(0x29);  /* Opcode 29 /r */
7387   ins_encode( OpcP, RegMem( src, dst ) );
7388   ins_pipe( ialu_mem_reg );
7389 %}
7390 
7391 // Subtract from a pointer
7392 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7393   match(Set dst (AddP dst (SubI zero src)));
7394   effect(KILL cr);
7395 
7396   size(2);
7397   format %{ "SUB    $dst,$src" %}
7398   opcode(0x2B);
7399   ins_encode( OpcP, RegReg( dst, src) );
7400   ins_pipe( ialu_reg_reg );
7401 %}
7402 
7403 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7404   match(Set dst (SubI zero dst));
7405   effect(KILL cr);
7406 
7407   size(2);
7408   format %{ "NEG    $dst" %}
7409   opcode(0xF7,0x03);  // Opcode F7 /3
7410   ins_encode( OpcP, RegOpc( dst ) );
7411   ins_pipe( ialu_reg );
7412 %}
7413 
7414 //----------Multiplication/Division Instructions-------------------------------
7415 // Integer Multiplication Instructions
7416 // Multiply Register
7417 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7418   match(Set dst (MulI dst src));
7419   effect(KILL cr);
7420 
7421   size(3);
7422   ins_cost(300);
7423   format %{ "IMUL   $dst,$src" %}
7424   opcode(0xAF, 0x0F);
7425   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7426   ins_pipe( ialu_reg_reg_alu0 );
7427 %}
7428 
7429 // Multiply 32-bit Immediate
7430 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7431   match(Set dst (MulI src imm));
7432   effect(KILL cr);
7433 
7434   ins_cost(300);
7435   format %{ "IMUL   $dst,$src,$imm" %}
7436   opcode(0x69);  /* 69 /r id */
7437   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7438   ins_pipe( ialu_reg_reg_alu0 );
7439 %}
7440 
7441 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7442   match(Set dst src);
7443   effect(KILL cr);
7444 
7445   // Note that this is artificially increased to make it more expensive than loadConL
7446   ins_cost(250);
7447   format %{ "MOV    EAX,$src\t// low word only" %}
7448   opcode(0xB8);
7449   ins_encode( LdImmL_Lo(dst, src) );
7450   ins_pipe( ialu_reg_fat );
7451 %}
7452 
7453 // Multiply by 32-bit Immediate, taking the shifted high order results
7454 //  (special case for shift by 32)
7455 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7456   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7457   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7458              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7459              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7460   effect(USE src1, KILL cr);
7461 
7462   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7463   ins_cost(0*100 + 1*400 - 150);
7464   format %{ "IMUL   EDX:EAX,$src1" %}
7465   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7466   ins_pipe( pipe_slow );
7467 %}
7468 
7469 // Multiply by 32-bit Immediate, taking the shifted high order results
7470 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7471   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7472   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7473              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7474              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7475   effect(USE src1, KILL cr);
7476 
7477   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7478   ins_cost(1*100 + 1*400 - 150);
7479   format %{ "IMUL   EDX:EAX,$src1\n\t"
7480             "SAR    EDX,$cnt-32" %}
7481   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7482   ins_pipe( pipe_slow );
7483 %}
7484 
7485 // Multiply Memory 32-bit Immediate
7486 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7487   match(Set dst (MulI (LoadI src) imm));
7488   effect(KILL cr);
7489 
7490   ins_cost(300);
7491   format %{ "IMUL   $dst,$src,$imm" %}
7492   opcode(0x69);  /* 69 /r id */
7493   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7494   ins_pipe( ialu_reg_mem_alu0 );
7495 %}
7496 
7497 // Multiply Memory
7498 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7499   match(Set dst (MulI dst (LoadI src)));
7500   effect(KILL cr);
7501 
7502   ins_cost(350);
7503   format %{ "IMUL   $dst,$src" %}
7504   opcode(0xAF, 0x0F);
7505   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7506   ins_pipe( ialu_reg_mem_alu0 );
7507 %}
7508 
7509 // Multiply Register Int to Long
7510 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7511   // Basic Idea: long = (long)int * (long)int
7512   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7513   effect(DEF dst, USE src, USE src1, KILL flags);
7514 
7515   ins_cost(300);
7516   format %{ "IMUL   $dst,$src1" %}
7517 
7518   ins_encode( long_int_multiply( dst, src1 ) );
7519   ins_pipe( ialu_reg_reg_alu0 );
7520 %}
7521 
7522 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7523   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7524   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7525   effect(KILL flags);
7526 
7527   ins_cost(300);
7528   format %{ "MUL    $dst,$src1" %}
7529 
7530   ins_encode( long_uint_multiply(dst, src1) );
7531   ins_pipe( ialu_reg_reg_alu0 );
7532 %}
7533 
7534 // Multiply Register Long
7535 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7536   match(Set dst (MulL dst src));
7537   effect(KILL cr, TEMP tmp);
7538   ins_cost(4*100+3*400);
7539 // Basic idea: lo(result) = lo(x_lo * y_lo)
7540 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7541   format %{ "MOV    $tmp,$src.lo\n\t"
7542             "IMUL   $tmp,EDX\n\t"
7543             "MOV    EDX,$src.hi\n\t"
7544             "IMUL   EDX,EAX\n\t"
7545             "ADD    $tmp,EDX\n\t"
7546             "MUL    EDX:EAX,$src.lo\n\t"
7547             "ADD    EDX,$tmp" %}
7548   ins_encode( long_multiply( dst, src, tmp ) );
7549   ins_pipe( pipe_slow );
7550 %}
7551 
7552 // Multiply Register Long where the left operand's high 32 bits are zero
7553 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7554   predicate(is_operand_hi32_zero(n->in(1)));
7555   match(Set dst (MulL dst src));
7556   effect(KILL cr, TEMP tmp);
7557   ins_cost(2*100+2*400);
7558 // Basic idea: lo(result) = lo(x_lo * y_lo)
7559 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7560   format %{ "MOV    $tmp,$src.hi\n\t"
7561             "IMUL   $tmp,EAX\n\t"
7562             "MUL    EDX:EAX,$src.lo\n\t"
7563             "ADD    EDX,$tmp" %}
7564   ins_encode %{
7565     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7566     __ imull($tmp$$Register, rax);
7567     __ mull($src$$Register);
7568     __ addl(rdx, $tmp$$Register);
7569   %}
7570   ins_pipe( pipe_slow );
7571 %}
7572 
7573 // Multiply Register Long where the right operand's high 32 bits are zero
7574 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7575   predicate(is_operand_hi32_zero(n->in(2)));
7576   match(Set dst (MulL dst src));
7577   effect(KILL cr, TEMP tmp);
7578   ins_cost(2*100+2*400);
7579 // Basic idea: lo(result) = lo(x_lo * y_lo)
7580 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7581   format %{ "MOV    $tmp,$src.lo\n\t"
7582             "IMUL   $tmp,EDX\n\t"
7583             "MUL    EDX:EAX,$src.lo\n\t"
7584             "ADD    EDX,$tmp" %}
7585   ins_encode %{
7586     __ movl($tmp$$Register, $src$$Register);
7587     __ imull($tmp$$Register, rdx);
7588     __ mull($src$$Register);
7589     __ addl(rdx, $tmp$$Register);
7590   %}
7591   ins_pipe( pipe_slow );
7592 %}
7593 
7594 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7595 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7596   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7597   match(Set dst (MulL dst src));
7598   effect(KILL cr);
7599   ins_cost(1*400);
7600 // Basic idea: lo(result) = lo(x_lo * y_lo)
7601 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7602   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7603   ins_encode %{
7604     __ mull($src$$Register);
7605   %}
7606   ins_pipe( pipe_slow );
7607 %}
7608 
7609 // Multiply Register Long by small constant
7610 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7611   match(Set dst (MulL dst src));
7612   effect(KILL cr, TEMP tmp);
7613   ins_cost(2*100+2*400);
7614   size(12);
7615 // Basic idea: lo(result) = lo(src * EAX)
7616 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7617   format %{ "IMUL   $tmp,EDX,$src\n\t"
7618             "MOV    EDX,$src\n\t"
7619             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7620             "ADD    EDX,$tmp" %}
7621   ins_encode( long_multiply_con( dst, src, tmp ) );
7622   ins_pipe( pipe_slow );
7623 %}
7624 
7625 // Integer DIV with Register
7626 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7627   match(Set rax (DivI rax div));
7628   effect(KILL rdx, KILL cr);
7629   size(26);
7630   ins_cost(30*100+10*100);
7631   format %{ "CMP    EAX,0x80000000\n\t"
7632             "JNE,s  normal\n\t"
7633             "XOR    EDX,EDX\n\t"
7634             "CMP    ECX,-1\n\t"
7635             "JE,s   done\n"
7636     "normal: CDQ\n\t"
7637             "IDIV   $div\n\t"
7638     "done:"        %}
7639   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7640   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7641   ins_pipe( ialu_reg_reg_alu0 );
7642 %}
7643 
7644 // Divide Register Long
7645 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7646   match(Set dst (DivL src1 src2));
7647   effect( KILL cr, KILL cx, KILL bx );
7648   ins_cost(10000);
7649   format %{ "PUSH   $src1.hi\n\t"
7650             "PUSH   $src1.lo\n\t"
7651             "PUSH   $src2.hi\n\t"
7652             "PUSH   $src2.lo\n\t"
7653             "CALL   SharedRuntime::ldiv\n\t"
7654             "ADD    ESP,16" %}
7655   ins_encode( long_div(src1,src2) );
7656   ins_pipe( pipe_slow );
7657 %}
7658 
7659 // Integer DIVMOD with Register, both quotient and mod results
7660 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7661   match(DivModI rax div);
7662   effect(KILL cr);
7663   size(26);
7664   ins_cost(30*100+10*100);
7665   format %{ "CMP    EAX,0x80000000\n\t"
7666             "JNE,s  normal\n\t"
7667             "XOR    EDX,EDX\n\t"
7668             "CMP    ECX,-1\n\t"
7669             "JE,s   done\n"
7670     "normal: CDQ\n\t"
7671             "IDIV   $div\n\t"
7672     "done:"        %}
7673   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7674   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7675   ins_pipe( pipe_slow );
7676 %}
7677 
7678 // Integer MOD with Register
7679 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7680   match(Set rdx (ModI rax div));
7681   effect(KILL rax, KILL cr);
7682 
7683   size(26);
7684   ins_cost(300);
7685   format %{ "CDQ\n\t"
7686             "IDIV   $div" %}
7687   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7688   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7689   ins_pipe( ialu_reg_reg_alu0 );
7690 %}
7691 
7692 // Remainder Register Long
7693 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7694   match(Set dst (ModL src1 src2));
7695   effect( KILL cr, KILL cx, KILL bx );
7696   ins_cost(10000);
7697   format %{ "PUSH   $src1.hi\n\t"
7698             "PUSH   $src1.lo\n\t"
7699             "PUSH   $src2.hi\n\t"
7700             "PUSH   $src2.lo\n\t"
7701             "CALL   SharedRuntime::lrem\n\t"
7702             "ADD    ESP,16" %}
7703   ins_encode( long_mod(src1,src2) );
7704   ins_pipe( pipe_slow );
7705 %}
7706 
7707 // Divide Register Long (no special case since divisor != -1)
7708 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7709   match(Set dst (DivL dst imm));
7710   effect( TEMP tmp, TEMP tmp2, KILL cr );
7711   ins_cost(1000);
7712   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7713             "XOR    $tmp2,$tmp2\n\t"
7714             "CMP    $tmp,EDX\n\t"
7715             "JA,s   fast\n\t"
7716             "MOV    $tmp2,EAX\n\t"
7717             "MOV    EAX,EDX\n\t"
7718             "MOV    EDX,0\n\t"
7719             "JLE,s  pos\n\t"
7720             "LNEG   EAX : $tmp2\n\t"
7721             "DIV    $tmp # unsigned division\n\t"
7722             "XCHG   EAX,$tmp2\n\t"
7723             "DIV    $tmp\n\t"
7724             "LNEG   $tmp2 : EAX\n\t"
7725             "JMP,s  done\n"
7726     "pos:\n\t"
7727             "DIV    $tmp\n\t"
7728             "XCHG   EAX,$tmp2\n"
7729     "fast:\n\t"
7730             "DIV    $tmp\n"
7731     "done:\n\t"
7732             "MOV    EDX,$tmp2\n\t"
7733             "NEG    EDX:EAX # if $imm < 0" %}
7734   ins_encode %{
7735     int con = (int)$imm$$constant;
7736     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7737     int pcon = (con > 0) ? con : -con;
7738     Label Lfast, Lpos, Ldone;
7739 
7740     __ movl($tmp$$Register, pcon);
7741     __ xorl($tmp2$$Register,$tmp2$$Register);
7742     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7743     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7744 
7745     __ movl($tmp2$$Register, $dst$$Register); // save
7746     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7747     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7748     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7749 
7750     // Negative dividend.
7751     // convert value to positive to use unsigned division
7752     __ lneg($dst$$Register, $tmp2$$Register);
7753     __ divl($tmp$$Register);
7754     __ xchgl($dst$$Register, $tmp2$$Register);
7755     __ divl($tmp$$Register);
7756     // revert result back to negative
7757     __ lneg($tmp2$$Register, $dst$$Register);
7758     __ jmpb(Ldone);
7759 
7760     __ bind(Lpos);
7761     __ divl($tmp$$Register); // Use unsigned division
7762     __ xchgl($dst$$Register, $tmp2$$Register);
7763     // Fallthrow for final divide, tmp2 has 32 bit hi result
7764 
7765     __ bind(Lfast);
7766     // fast path: src is positive
7767     __ divl($tmp$$Register); // Use unsigned division
7768 
7769     __ bind(Ldone);
7770     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7771     if (con < 0) {
7772       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7773     }
7774   %}
7775   ins_pipe( pipe_slow );
7776 %}
7777 
7778 // Remainder Register Long (remainder fit into 32 bits)
7779 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7780   match(Set dst (ModL dst imm));
7781   effect( TEMP tmp, TEMP tmp2, KILL cr );
7782   ins_cost(1000);
7783   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7784             "CMP    $tmp,EDX\n\t"
7785             "JA,s   fast\n\t"
7786             "MOV    $tmp2,EAX\n\t"
7787             "MOV    EAX,EDX\n\t"
7788             "MOV    EDX,0\n\t"
7789             "JLE,s  pos\n\t"
7790             "LNEG   EAX : $tmp2\n\t"
7791             "DIV    $tmp # unsigned division\n\t"
7792             "MOV    EAX,$tmp2\n\t"
7793             "DIV    $tmp\n\t"
7794             "NEG    EDX\n\t"
7795             "JMP,s  done\n"
7796     "pos:\n\t"
7797             "DIV    $tmp\n\t"
7798             "MOV    EAX,$tmp2\n"
7799     "fast:\n\t"
7800             "DIV    $tmp\n"
7801     "done:\n\t"
7802             "MOV    EAX,EDX\n\t"
7803             "SAR    EDX,31\n\t" %}
7804   ins_encode %{
7805     int con = (int)$imm$$constant;
7806     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7807     int pcon = (con > 0) ? con : -con;
7808     Label  Lfast, Lpos, Ldone;
7809 
7810     __ movl($tmp$$Register, pcon);
7811     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7812     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7813 
7814     __ movl($tmp2$$Register, $dst$$Register); // save
7815     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7816     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7817     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7818 
7819     // Negative dividend.
7820     // convert value to positive to use unsigned division
7821     __ lneg($dst$$Register, $tmp2$$Register);
7822     __ divl($tmp$$Register);
7823     __ movl($dst$$Register, $tmp2$$Register);
7824     __ divl($tmp$$Register);
7825     // revert remainder back to negative
7826     __ negl(HIGH_FROM_LOW($dst$$Register));
7827     __ jmpb(Ldone);
7828 
7829     __ bind(Lpos);
7830     __ divl($tmp$$Register);
7831     __ movl($dst$$Register, $tmp2$$Register);
7832 
7833     __ bind(Lfast);
7834     // fast path: src is positive
7835     __ divl($tmp$$Register);
7836 
7837     __ bind(Ldone);
7838     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7839     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7840 
7841   %}
7842   ins_pipe( pipe_slow );
7843 %}
7844 
7845 // Integer Shift Instructions
7846 // Shift Left by one
7847 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7848   match(Set dst (LShiftI dst shift));
7849   effect(KILL cr);
7850 
7851   size(2);
7852   format %{ "SHL    $dst,$shift" %}
7853   opcode(0xD1, 0x4);  /* D1 /4 */
7854   ins_encode( OpcP, RegOpc( dst ) );
7855   ins_pipe( ialu_reg );
7856 %}
7857 
7858 // Shift Left by 8-bit immediate
7859 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7860   match(Set dst (LShiftI dst shift));
7861   effect(KILL cr);
7862 
7863   size(3);
7864   format %{ "SHL    $dst,$shift" %}
7865   opcode(0xC1, 0x4);  /* C1 /4 ib */
7866   ins_encode( RegOpcImm( dst, shift) );
7867   ins_pipe( ialu_reg );
7868 %}
7869 
7870 // Shift Left by variable
7871 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7872   match(Set dst (LShiftI dst shift));
7873   effect(KILL cr);
7874 
7875   size(2);
7876   format %{ "SHL    $dst,$shift" %}
7877   opcode(0xD3, 0x4);  /* D3 /4 */
7878   ins_encode( OpcP, RegOpc( dst ) );
7879   ins_pipe( ialu_reg_reg );
7880 %}
7881 
7882 // Arithmetic shift right by one
7883 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7884   match(Set dst (RShiftI dst shift));
7885   effect(KILL cr);
7886 
7887   size(2);
7888   format %{ "SAR    $dst,$shift" %}
7889   opcode(0xD1, 0x7);  /* D1 /7 */
7890   ins_encode( OpcP, RegOpc( dst ) );
7891   ins_pipe( ialu_reg );
7892 %}
7893 
7894 // Arithmetic shift right by one
7895 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7896   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7897   effect(KILL cr);
7898   format %{ "SAR    $dst,$shift" %}
7899   opcode(0xD1, 0x7);  /* D1 /7 */
7900   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7901   ins_pipe( ialu_mem_imm );
7902 %}
7903 
7904 // Arithmetic Shift Right by 8-bit immediate
7905 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7906   match(Set dst (RShiftI dst shift));
7907   effect(KILL cr);
7908 
7909   size(3);
7910   format %{ "SAR    $dst,$shift" %}
7911   opcode(0xC1, 0x7);  /* C1 /7 ib */
7912   ins_encode( RegOpcImm( dst, shift ) );
7913   ins_pipe( ialu_mem_imm );
7914 %}
7915 
7916 // Arithmetic Shift Right by 8-bit immediate
7917 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7918   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7919   effect(KILL cr);
7920 
7921   format %{ "SAR    $dst,$shift" %}
7922   opcode(0xC1, 0x7);  /* C1 /7 ib */
7923   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7924   ins_pipe( ialu_mem_imm );
7925 %}
7926 
7927 // Arithmetic Shift Right by variable
7928 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7929   match(Set dst (RShiftI dst shift));
7930   effect(KILL cr);
7931 
7932   size(2);
7933   format %{ "SAR    $dst,$shift" %}
7934   opcode(0xD3, 0x7);  /* D3 /7 */
7935   ins_encode( OpcP, RegOpc( dst ) );
7936   ins_pipe( ialu_reg_reg );
7937 %}
7938 
7939 // Logical shift right by one
7940 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7941   match(Set dst (URShiftI dst shift));
7942   effect(KILL cr);
7943 
7944   size(2);
7945   format %{ "SHR    $dst,$shift" %}
7946   opcode(0xD1, 0x5);  /* D1 /5 */
7947   ins_encode( OpcP, RegOpc( dst ) );
7948   ins_pipe( ialu_reg );
7949 %}
7950 
7951 // Logical Shift Right by 8-bit immediate
7952 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7953   match(Set dst (URShiftI dst shift));
7954   effect(KILL cr);
7955 
7956   size(3);
7957   format %{ "SHR    $dst,$shift" %}
7958   opcode(0xC1, 0x5);  /* C1 /5 ib */
7959   ins_encode( RegOpcImm( dst, shift) );
7960   ins_pipe( ialu_reg );
7961 %}
7962 
7963 
7964 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7965 // This idiom is used by the compiler for the i2b bytecode.
7966 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7967   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7968 
7969   size(3);
7970   format %{ "MOVSX  $dst,$src :8" %}
7971   ins_encode %{
7972     __ movsbl($dst$$Register, $src$$Register);
7973   %}
7974   ins_pipe(ialu_reg_reg);
7975 %}
7976 
7977 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7978 // This idiom is used by the compiler the i2s bytecode.
7979 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7980   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7981 
7982   size(3);
7983   format %{ "MOVSX  $dst,$src :16" %}
7984   ins_encode %{
7985     __ movswl($dst$$Register, $src$$Register);
7986   %}
7987   ins_pipe(ialu_reg_reg);
7988 %}
7989 
7990 
7991 // Logical Shift Right by variable
7992 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7993   match(Set dst (URShiftI dst shift));
7994   effect(KILL cr);
7995 
7996   size(2);
7997   format %{ "SHR    $dst,$shift" %}
7998   opcode(0xD3, 0x5);  /* D3 /5 */
7999   ins_encode( OpcP, RegOpc( dst ) );
8000   ins_pipe( ialu_reg_reg );
8001 %}
8002 
8003 
8004 //----------Logical Instructions-----------------------------------------------
8005 //----------Integer Logical Instructions---------------------------------------
8006 // And Instructions
8007 // And Register with Register
8008 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8009   match(Set dst (AndI dst src));
8010   effect(KILL cr);
8011 
8012   size(2);
8013   format %{ "AND    $dst,$src" %}
8014   opcode(0x23);
8015   ins_encode( OpcP, RegReg( dst, src) );
8016   ins_pipe( ialu_reg_reg );
8017 %}
8018 
8019 // And Register with Immediate
8020 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8021   match(Set dst (AndI dst src));
8022   effect(KILL cr);
8023 
8024   format %{ "AND    $dst,$src" %}
8025   opcode(0x81,0x04);  /* Opcode 81 /4 */
8026   // ins_encode( RegImm( dst, src) );
8027   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8028   ins_pipe( ialu_reg );
8029 %}
8030 
8031 // And Register with Memory
8032 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8033   match(Set dst (AndI dst (LoadI src)));
8034   effect(KILL cr);
8035 
8036   ins_cost(125);
8037   format %{ "AND    $dst,$src" %}
8038   opcode(0x23);
8039   ins_encode( OpcP, RegMem( dst, src) );
8040   ins_pipe( ialu_reg_mem );
8041 %}
8042 
8043 // And Memory with Register
8044 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8045   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8046   effect(KILL cr);
8047 
8048   ins_cost(150);
8049   format %{ "AND    $dst,$src" %}
8050   opcode(0x21);  /* Opcode 21 /r */
8051   ins_encode( OpcP, RegMem( src, dst ) );
8052   ins_pipe( ialu_mem_reg );
8053 %}
8054 
8055 // And Memory with Immediate
8056 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8057   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8058   effect(KILL cr);
8059 
8060   ins_cost(125);
8061   format %{ "AND    $dst,$src" %}
8062   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8063   // ins_encode( MemImm( dst, src) );
8064   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8065   ins_pipe( ialu_mem_imm );
8066 %}
8067 
8068 // BMI1 instructions
8069 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8070   match(Set dst (AndI (XorI src1 minus_1) src2));
8071   predicate(UseBMI1Instructions);
8072   effect(KILL cr);
8073 
8074   format %{ "ANDNL  $dst, $src1, $src2" %}
8075 
8076   ins_encode %{
8077     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8078   %}
8079   ins_pipe(ialu_reg);
8080 %}
8081 
8082 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8083   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8084   predicate(UseBMI1Instructions);
8085   effect(KILL cr);
8086 
8087   ins_cost(125);
8088   format %{ "ANDNL  $dst, $src1, $src2" %}
8089 
8090   ins_encode %{
8091     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8092   %}
8093   ins_pipe(ialu_reg_mem);
8094 %}
8095 
8096 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8097   match(Set dst (AndI (SubI imm_zero src) src));
8098   predicate(UseBMI1Instructions);
8099   effect(KILL cr);
8100 
8101   format %{ "BLSIL  $dst, $src" %}
8102 
8103   ins_encode %{
8104     __ blsil($dst$$Register, $src$$Register);
8105   %}
8106   ins_pipe(ialu_reg);
8107 %}
8108 
8109 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8110   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8111   predicate(UseBMI1Instructions);
8112   effect(KILL cr);
8113 
8114   ins_cost(125);
8115   format %{ "BLSIL  $dst, $src" %}
8116 
8117   ins_encode %{
8118     __ blsil($dst$$Register, $src$$Address);
8119   %}
8120   ins_pipe(ialu_reg_mem);
8121 %}
8122 
8123 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8124 %{
8125   match(Set dst (XorI (AddI src minus_1) src));
8126   predicate(UseBMI1Instructions);
8127   effect(KILL cr);
8128 
8129   format %{ "BLSMSKL $dst, $src" %}
8130 
8131   ins_encode %{
8132     __ blsmskl($dst$$Register, $src$$Register);
8133   %}
8134 
8135   ins_pipe(ialu_reg);
8136 %}
8137 
8138 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8139 %{
8140   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8141   predicate(UseBMI1Instructions);
8142   effect(KILL cr);
8143 
8144   ins_cost(125);
8145   format %{ "BLSMSKL $dst, $src" %}
8146 
8147   ins_encode %{
8148     __ blsmskl($dst$$Register, $src$$Address);
8149   %}
8150 
8151   ins_pipe(ialu_reg_mem);
8152 %}
8153 
8154 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8155 %{
8156   match(Set dst (AndI (AddI src minus_1) src) );
8157   predicate(UseBMI1Instructions);
8158   effect(KILL cr);
8159 
8160   format %{ "BLSRL  $dst, $src" %}
8161 
8162   ins_encode %{
8163     __ blsrl($dst$$Register, $src$$Register);
8164   %}
8165 
8166   ins_pipe(ialu_reg);
8167 %}
8168 
8169 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8170 %{
8171   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8172   predicate(UseBMI1Instructions);
8173   effect(KILL cr);
8174 
8175   ins_cost(125);
8176   format %{ "BLSRL  $dst, $src" %}
8177 
8178   ins_encode %{
8179     __ blsrl($dst$$Register, $src$$Address);
8180   %}
8181 
8182   ins_pipe(ialu_reg_mem);
8183 %}
8184 
8185 // Or Instructions
8186 // Or Register with Register
8187 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8188   match(Set dst (OrI dst src));
8189   effect(KILL cr);
8190 
8191   size(2);
8192   format %{ "OR     $dst,$src" %}
8193   opcode(0x0B);
8194   ins_encode( OpcP, RegReg( dst, src) );
8195   ins_pipe( ialu_reg_reg );
8196 %}
8197 
8198 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8199   match(Set dst (OrI dst (CastP2X src)));
8200   effect(KILL cr);
8201 
8202   size(2);
8203   format %{ "OR     $dst,$src" %}
8204   opcode(0x0B);
8205   ins_encode( OpcP, RegReg( dst, src) );
8206   ins_pipe( ialu_reg_reg );
8207 %}
8208 
8209 
8210 // Or Register with Immediate
8211 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8212   match(Set dst (OrI dst src));
8213   effect(KILL cr);
8214 
8215   format %{ "OR     $dst,$src" %}
8216   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8217   // ins_encode( RegImm( dst, src) );
8218   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8219   ins_pipe( ialu_reg );
8220 %}
8221 
8222 // Or Register with Memory
8223 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8224   match(Set dst (OrI dst (LoadI src)));
8225   effect(KILL cr);
8226 
8227   ins_cost(125);
8228   format %{ "OR     $dst,$src" %}
8229   opcode(0x0B);
8230   ins_encode( OpcP, RegMem( dst, src) );
8231   ins_pipe( ialu_reg_mem );
8232 %}
8233 
8234 // Or Memory with Register
8235 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8236   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8237   effect(KILL cr);
8238 
8239   ins_cost(150);
8240   format %{ "OR     $dst,$src" %}
8241   opcode(0x09);  /* Opcode 09 /r */
8242   ins_encode( OpcP, RegMem( src, dst ) );
8243   ins_pipe( ialu_mem_reg );
8244 %}
8245 
8246 // Or Memory with Immediate
8247 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8248   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8249   effect(KILL cr);
8250 
8251   ins_cost(125);
8252   format %{ "OR     $dst,$src" %}
8253   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8254   // ins_encode( MemImm( dst, src) );
8255   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8256   ins_pipe( ialu_mem_imm );
8257 %}
8258 
8259 // ROL/ROR
8260 // ROL expand
8261 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8262   effect(USE_DEF dst, USE shift, KILL cr);
8263 
8264   format %{ "ROL    $dst, $shift" %}
8265   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8266   ins_encode( OpcP, RegOpc( dst ));
8267   ins_pipe( ialu_reg );
8268 %}
8269 
8270 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8271   effect(USE_DEF dst, USE shift, KILL cr);
8272 
8273   format %{ "ROL    $dst, $shift" %}
8274   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8275   ins_encode( RegOpcImm(dst, shift) );
8276   ins_pipe(ialu_reg);
8277 %}
8278 
8279 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8280   effect(USE_DEF dst, USE shift, KILL cr);
8281 
8282   format %{ "ROL    $dst, $shift" %}
8283   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8284   ins_encode(OpcP, RegOpc(dst));
8285   ins_pipe( ialu_reg_reg );
8286 %}
8287 // end of ROL expand
8288 
8289 // ROL 32bit by one once
8290 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8291   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8292 
8293   expand %{
8294     rolI_eReg_imm1(dst, lshift, cr);
8295   %}
8296 %}
8297 
8298 // ROL 32bit var by imm8 once
8299 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8300   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8301   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8302 
8303   expand %{
8304     rolI_eReg_imm8(dst, lshift, cr);
8305   %}
8306 %}
8307 
8308 // ROL 32bit var by var once
8309 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8310   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8311 
8312   expand %{
8313     rolI_eReg_CL(dst, shift, cr);
8314   %}
8315 %}
8316 
8317 // ROL 32bit var by var once
8318 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8319   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8320 
8321   expand %{
8322     rolI_eReg_CL(dst, shift, cr);
8323   %}
8324 %}
8325 
8326 // ROR expand
8327 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8328   effect(USE_DEF dst, USE shift, KILL cr);
8329 
8330   format %{ "ROR    $dst, $shift" %}
8331   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8332   ins_encode( OpcP, RegOpc( dst ) );
8333   ins_pipe( ialu_reg );
8334 %}
8335 
8336 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8337   effect (USE_DEF dst, USE shift, KILL cr);
8338 
8339   format %{ "ROR    $dst, $shift" %}
8340   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8341   ins_encode( RegOpcImm(dst, shift) );
8342   ins_pipe( ialu_reg );
8343 %}
8344 
8345 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8346   effect(USE_DEF dst, USE shift, KILL cr);
8347 
8348   format %{ "ROR    $dst, $shift" %}
8349   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8350   ins_encode(OpcP, RegOpc(dst));
8351   ins_pipe( ialu_reg_reg );
8352 %}
8353 // end of ROR expand
8354 
8355 // ROR right once
8356 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8357   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8358 
8359   expand %{
8360     rorI_eReg_imm1(dst, rshift, cr);
8361   %}
8362 %}
8363 
8364 // ROR 32bit by immI8 once
8365 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8366   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8367   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8368 
8369   expand %{
8370     rorI_eReg_imm8(dst, rshift, cr);
8371   %}
8372 %}
8373 
8374 // ROR 32bit var by var once
8375 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8376   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8377 
8378   expand %{
8379     rorI_eReg_CL(dst, shift, cr);
8380   %}
8381 %}
8382 
8383 // ROR 32bit var by var once
8384 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8385   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8386 
8387   expand %{
8388     rorI_eReg_CL(dst, shift, cr);
8389   %}
8390 %}
8391 
8392 // Xor Instructions
8393 // Xor Register with Register
8394 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8395   match(Set dst (XorI dst src));
8396   effect(KILL cr);
8397 
8398   size(2);
8399   format %{ "XOR    $dst,$src" %}
8400   opcode(0x33);
8401   ins_encode( OpcP, RegReg( dst, src) );
8402   ins_pipe( ialu_reg_reg );
8403 %}
8404 
8405 // Xor Register with Immediate -1
8406 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8407   match(Set dst (XorI dst imm));
8408 
8409   size(2);
8410   format %{ "NOT    $dst" %}
8411   ins_encode %{
8412      __ notl($dst$$Register);
8413   %}
8414   ins_pipe( ialu_reg );
8415 %}
8416 
8417 // Xor Register with Immediate
8418 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8419   match(Set dst (XorI dst src));
8420   effect(KILL cr);
8421 
8422   format %{ "XOR    $dst,$src" %}
8423   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8424   // ins_encode( RegImm( dst, src) );
8425   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8426   ins_pipe( ialu_reg );
8427 %}
8428 
8429 // Xor Register with Memory
8430 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8431   match(Set dst (XorI dst (LoadI src)));
8432   effect(KILL cr);
8433 
8434   ins_cost(125);
8435   format %{ "XOR    $dst,$src" %}
8436   opcode(0x33);
8437   ins_encode( OpcP, RegMem(dst, src) );
8438   ins_pipe( ialu_reg_mem );
8439 %}
8440 
8441 // Xor Memory with Register
8442 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8443   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8444   effect(KILL cr);
8445 
8446   ins_cost(150);
8447   format %{ "XOR    $dst,$src" %}
8448   opcode(0x31);  /* Opcode 31 /r */
8449   ins_encode( OpcP, RegMem( src, dst ) );
8450   ins_pipe( ialu_mem_reg );
8451 %}
8452 
8453 // Xor Memory with Immediate
8454 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8455   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8456   effect(KILL cr);
8457 
8458   ins_cost(125);
8459   format %{ "XOR    $dst,$src" %}
8460   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8461   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8462   ins_pipe( ialu_mem_imm );
8463 %}
8464 
8465 //----------Convert Int to Boolean---------------------------------------------
8466 
8467 instruct movI_nocopy(rRegI dst, rRegI src) %{
8468   effect( DEF dst, USE src );
8469   format %{ "MOV    $dst,$src" %}
8470   ins_encode( enc_Copy( dst, src) );
8471   ins_pipe( ialu_reg_reg );
8472 %}
8473 
8474 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8475   effect( USE_DEF dst, USE src, KILL cr );
8476 
8477   size(4);
8478   format %{ "NEG    $dst\n\t"
8479             "ADC    $dst,$src" %}
8480   ins_encode( neg_reg(dst),
8481               OpcRegReg(0x13,dst,src) );
8482   ins_pipe( ialu_reg_reg_long );
8483 %}
8484 
8485 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8486   match(Set dst (Conv2B src));
8487 
8488   expand %{
8489     movI_nocopy(dst,src);
8490     ci2b(dst,src,cr);
8491   %}
8492 %}
8493 
8494 instruct movP_nocopy(rRegI dst, eRegP src) %{
8495   effect( DEF dst, USE src );
8496   format %{ "MOV    $dst,$src" %}
8497   ins_encode( enc_Copy( dst, src) );
8498   ins_pipe( ialu_reg_reg );
8499 %}
8500 
8501 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8502   effect( USE_DEF dst, USE src, KILL cr );
8503   format %{ "NEG    $dst\n\t"
8504             "ADC    $dst,$src" %}
8505   ins_encode( neg_reg(dst),
8506               OpcRegReg(0x13,dst,src) );
8507   ins_pipe( ialu_reg_reg_long );
8508 %}
8509 
8510 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8511   match(Set dst (Conv2B src));
8512 
8513   expand %{
8514     movP_nocopy(dst,src);
8515     cp2b(dst,src,cr);
8516   %}
8517 %}
8518 
8519 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8520   match(Set dst (CmpLTMask p q));
8521   effect(KILL cr);
8522   ins_cost(400);
8523 
8524   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8525   format %{ "XOR    $dst,$dst\n\t"
8526             "CMP    $p,$q\n\t"
8527             "SETlt  $dst\n\t"
8528             "NEG    $dst" %}
8529   ins_encode %{
8530     Register Rp = $p$$Register;
8531     Register Rq = $q$$Register;
8532     Register Rd = $dst$$Register;
8533     Label done;
8534     __ xorl(Rd, Rd);
8535     __ cmpl(Rp, Rq);
8536     __ setb(Assembler::less, Rd);
8537     __ negl(Rd);
8538   %}
8539 
8540   ins_pipe(pipe_slow);
8541 %}
8542 
8543 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8544   match(Set dst (CmpLTMask dst zero));
8545   effect(DEF dst, KILL cr);
8546   ins_cost(100);
8547 
8548   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8549   ins_encode %{
8550   __ sarl($dst$$Register, 31);
8551   %}
8552   ins_pipe(ialu_reg);
8553 %}
8554 
8555 /* better to save a register than avoid a branch */
8556 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8557   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8558   effect(KILL cr);
8559   ins_cost(400);
8560   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8561             "JGE    done\n\t"
8562             "ADD    $p,$y\n"
8563             "done:  " %}
8564   ins_encode %{
8565     Register Rp = $p$$Register;
8566     Register Rq = $q$$Register;
8567     Register Ry = $y$$Register;
8568     Label done;
8569     __ subl(Rp, Rq);
8570     __ jccb(Assembler::greaterEqual, done);
8571     __ addl(Rp, Ry);
8572     __ bind(done);
8573   %}
8574 
8575   ins_pipe(pipe_cmplt);
8576 %}
8577 
8578 /* better to save a register than avoid a branch */
8579 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8580   match(Set y (AndI (CmpLTMask p q) y));
8581   effect(KILL cr);
8582 
8583   ins_cost(300);
8584 
8585   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8586             "JLT      done\n\t"
8587             "XORL     $y, $y\n"
8588             "done:  " %}
8589   ins_encode %{
8590     Register Rp = $p$$Register;
8591     Register Rq = $q$$Register;
8592     Register Ry = $y$$Register;
8593     Label done;
8594     __ cmpl(Rp, Rq);
8595     __ jccb(Assembler::less, done);
8596     __ xorl(Ry, Ry);
8597     __ bind(done);
8598   %}
8599 
8600   ins_pipe(pipe_cmplt);
8601 %}
8602 
8603 /* If I enable this, I encourage spilling in the inner loop of compress.
8604 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8605   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8606 */
8607 //----------Overflow Math Instructions-----------------------------------------
8608 
8609 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8610 %{
8611   match(Set cr (OverflowAddI op1 op2));
8612   effect(DEF cr, USE_KILL op1, USE op2);
8613 
8614   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8615 
8616   ins_encode %{
8617     __ addl($op1$$Register, $op2$$Register);
8618   %}
8619   ins_pipe(ialu_reg_reg);
8620 %}
8621 
8622 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8623 %{
8624   match(Set cr (OverflowAddI op1 op2));
8625   effect(DEF cr, USE_KILL op1, USE op2);
8626 
8627   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8628 
8629   ins_encode %{
8630     __ addl($op1$$Register, $op2$$constant);
8631   %}
8632   ins_pipe(ialu_reg_reg);
8633 %}
8634 
8635 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8636 %{
8637   match(Set cr (OverflowSubI op1 op2));
8638 
8639   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8640   ins_encode %{
8641     __ cmpl($op1$$Register, $op2$$Register);
8642   %}
8643   ins_pipe(ialu_reg_reg);
8644 %}
8645 
8646 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8647 %{
8648   match(Set cr (OverflowSubI op1 op2));
8649 
8650   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8651   ins_encode %{
8652     __ cmpl($op1$$Register, $op2$$constant);
8653   %}
8654   ins_pipe(ialu_reg_reg);
8655 %}
8656 
8657 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8658 %{
8659   match(Set cr (OverflowSubI zero op2));
8660   effect(DEF cr, USE_KILL op2);
8661 
8662   format %{ "NEG    $op2\t# overflow check int" %}
8663   ins_encode %{
8664     __ negl($op2$$Register);
8665   %}
8666   ins_pipe(ialu_reg_reg);
8667 %}
8668 
8669 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8670 %{
8671   match(Set cr (OverflowMulI op1 op2));
8672   effect(DEF cr, USE_KILL op1, USE op2);
8673 
8674   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8675   ins_encode %{
8676     __ imull($op1$$Register, $op2$$Register);
8677   %}
8678   ins_pipe(ialu_reg_reg_alu0);
8679 %}
8680 
8681 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8682 %{
8683   match(Set cr (OverflowMulI op1 op2));
8684   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8685 
8686   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8687   ins_encode %{
8688     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8689   %}
8690   ins_pipe(ialu_reg_reg_alu0);
8691 %}
8692 
8693 //----------Long Instructions------------------------------------------------
8694 // Add Long Register with Register
8695 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8696   match(Set dst (AddL dst src));
8697   effect(KILL cr);
8698   ins_cost(200);
8699   format %{ "ADD    $dst.lo,$src.lo\n\t"
8700             "ADC    $dst.hi,$src.hi" %}
8701   opcode(0x03, 0x13);
8702   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8703   ins_pipe( ialu_reg_reg_long );
8704 %}
8705 
8706 // Add Long Register with Immediate
8707 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8708   match(Set dst (AddL dst src));
8709   effect(KILL cr);
8710   format %{ "ADD    $dst.lo,$src.lo\n\t"
8711             "ADC    $dst.hi,$src.hi" %}
8712   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8713   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8714   ins_pipe( ialu_reg_long );
8715 %}
8716 
8717 // Add Long Register with Memory
8718 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8719   match(Set dst (AddL dst (LoadL mem)));
8720   effect(KILL cr);
8721   ins_cost(125);
8722   format %{ "ADD    $dst.lo,$mem\n\t"
8723             "ADC    $dst.hi,$mem+4" %}
8724   opcode(0x03, 0x13);
8725   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8726   ins_pipe( ialu_reg_long_mem );
8727 %}
8728 
8729 // Subtract Long Register with Register.
8730 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8731   match(Set dst (SubL dst src));
8732   effect(KILL cr);
8733   ins_cost(200);
8734   format %{ "SUB    $dst.lo,$src.lo\n\t"
8735             "SBB    $dst.hi,$src.hi" %}
8736   opcode(0x2B, 0x1B);
8737   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8738   ins_pipe( ialu_reg_reg_long );
8739 %}
8740 
8741 // Subtract Long Register with Immediate
8742 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8743   match(Set dst (SubL dst src));
8744   effect(KILL cr);
8745   format %{ "SUB    $dst.lo,$src.lo\n\t"
8746             "SBB    $dst.hi,$src.hi" %}
8747   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8748   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8749   ins_pipe( ialu_reg_long );
8750 %}
8751 
8752 // Subtract Long Register with Memory
8753 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8754   match(Set dst (SubL dst (LoadL mem)));
8755   effect(KILL cr);
8756   ins_cost(125);
8757   format %{ "SUB    $dst.lo,$mem\n\t"
8758             "SBB    $dst.hi,$mem+4" %}
8759   opcode(0x2B, 0x1B);
8760   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8761   ins_pipe( ialu_reg_long_mem );
8762 %}
8763 
8764 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8765   match(Set dst (SubL zero dst));
8766   effect(KILL cr);
8767   ins_cost(300);
8768   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8769   ins_encode( neg_long(dst) );
8770   ins_pipe( ialu_reg_reg_long );
8771 %}
8772 
8773 // And Long Register with Register
8774 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8775   match(Set dst (AndL dst src));
8776   effect(KILL cr);
8777   format %{ "AND    $dst.lo,$src.lo\n\t"
8778             "AND    $dst.hi,$src.hi" %}
8779   opcode(0x23,0x23);
8780   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8781   ins_pipe( ialu_reg_reg_long );
8782 %}
8783 
8784 // And Long Register with Immediate
8785 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8786   match(Set dst (AndL dst src));
8787   effect(KILL cr);
8788   format %{ "AND    $dst.lo,$src.lo\n\t"
8789             "AND    $dst.hi,$src.hi" %}
8790   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8791   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8792   ins_pipe( ialu_reg_long );
8793 %}
8794 
8795 // And Long Register with Memory
8796 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8797   match(Set dst (AndL dst (LoadL mem)));
8798   effect(KILL cr);
8799   ins_cost(125);
8800   format %{ "AND    $dst.lo,$mem\n\t"
8801             "AND    $dst.hi,$mem+4" %}
8802   opcode(0x23, 0x23);
8803   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8804   ins_pipe( ialu_reg_long_mem );
8805 %}
8806 
8807 // BMI1 instructions
8808 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8809   match(Set dst (AndL (XorL src1 minus_1) src2));
8810   predicate(UseBMI1Instructions);
8811   effect(KILL cr, TEMP dst);
8812 
8813   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8814             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8815          %}
8816 
8817   ins_encode %{
8818     Register Rdst = $dst$$Register;
8819     Register Rsrc1 = $src1$$Register;
8820     Register Rsrc2 = $src2$$Register;
8821     __ andnl(Rdst, Rsrc1, Rsrc2);
8822     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8823   %}
8824   ins_pipe(ialu_reg_reg_long);
8825 %}
8826 
8827 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8828   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8829   predicate(UseBMI1Instructions);
8830   effect(KILL cr, TEMP dst);
8831 
8832   ins_cost(125);
8833   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8834             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8835          %}
8836 
8837   ins_encode %{
8838     Register Rdst = $dst$$Register;
8839     Register Rsrc1 = $src1$$Register;
8840     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8841 
8842     __ andnl(Rdst, Rsrc1, $src2$$Address);
8843     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8844   %}
8845   ins_pipe(ialu_reg_mem);
8846 %}
8847 
8848 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8849   match(Set dst (AndL (SubL imm_zero src) src));
8850   predicate(UseBMI1Instructions);
8851   effect(KILL cr, TEMP dst);
8852 
8853   format %{ "MOVL   $dst.hi, 0\n\t"
8854             "BLSIL  $dst.lo, $src.lo\n\t"
8855             "JNZ    done\n\t"
8856             "BLSIL  $dst.hi, $src.hi\n"
8857             "done:"
8858          %}
8859 
8860   ins_encode %{
8861     Label done;
8862     Register Rdst = $dst$$Register;
8863     Register Rsrc = $src$$Register;
8864     __ movl(HIGH_FROM_LOW(Rdst), 0);
8865     __ blsil(Rdst, Rsrc);
8866     __ jccb(Assembler::notZero, done);
8867     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8868     __ bind(done);
8869   %}
8870   ins_pipe(ialu_reg);
8871 %}
8872 
8873 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8874   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8875   predicate(UseBMI1Instructions);
8876   effect(KILL cr, TEMP dst);
8877 
8878   ins_cost(125);
8879   format %{ "MOVL   $dst.hi, 0\n\t"
8880             "BLSIL  $dst.lo, $src\n\t"
8881             "JNZ    done\n\t"
8882             "BLSIL  $dst.hi, $src+4\n"
8883             "done:"
8884          %}
8885 
8886   ins_encode %{
8887     Label done;
8888     Register Rdst = $dst$$Register;
8889     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8890 
8891     __ movl(HIGH_FROM_LOW(Rdst), 0);
8892     __ blsil(Rdst, $src$$Address);
8893     __ jccb(Assembler::notZero, done);
8894     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8895     __ bind(done);
8896   %}
8897   ins_pipe(ialu_reg_mem);
8898 %}
8899 
8900 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8901 %{
8902   match(Set dst (XorL (AddL src minus_1) src));
8903   predicate(UseBMI1Instructions);
8904   effect(KILL cr, TEMP dst);
8905 
8906   format %{ "MOVL    $dst.hi, 0\n\t"
8907             "BLSMSKL $dst.lo, $src.lo\n\t"
8908             "JNC     done\n\t"
8909             "BLSMSKL $dst.hi, $src.hi\n"
8910             "done:"
8911          %}
8912 
8913   ins_encode %{
8914     Label done;
8915     Register Rdst = $dst$$Register;
8916     Register Rsrc = $src$$Register;
8917     __ movl(HIGH_FROM_LOW(Rdst), 0);
8918     __ blsmskl(Rdst, Rsrc);
8919     __ jccb(Assembler::carryClear, done);
8920     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8921     __ bind(done);
8922   %}
8923 
8924   ins_pipe(ialu_reg);
8925 %}
8926 
8927 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8928 %{
8929   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8930   predicate(UseBMI1Instructions);
8931   effect(KILL cr, TEMP dst);
8932 
8933   ins_cost(125);
8934   format %{ "MOVL    $dst.hi, 0\n\t"
8935             "BLSMSKL $dst.lo, $src\n\t"
8936             "JNC     done\n\t"
8937             "BLSMSKL $dst.hi, $src+4\n"
8938             "done:"
8939          %}
8940 
8941   ins_encode %{
8942     Label done;
8943     Register Rdst = $dst$$Register;
8944     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8945 
8946     __ movl(HIGH_FROM_LOW(Rdst), 0);
8947     __ blsmskl(Rdst, $src$$Address);
8948     __ jccb(Assembler::carryClear, done);
8949     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8950     __ bind(done);
8951   %}
8952 
8953   ins_pipe(ialu_reg_mem);
8954 %}
8955 
8956 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8957 %{
8958   match(Set dst (AndL (AddL src minus_1) src) );
8959   predicate(UseBMI1Instructions);
8960   effect(KILL cr, TEMP dst);
8961 
8962   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8963             "BLSRL  $dst.lo, $src.lo\n\t"
8964             "JNC    done\n\t"
8965             "BLSRL  $dst.hi, $src.hi\n"
8966             "done:"
8967   %}
8968 
8969   ins_encode %{
8970     Label done;
8971     Register Rdst = $dst$$Register;
8972     Register Rsrc = $src$$Register;
8973     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8974     __ blsrl(Rdst, Rsrc);
8975     __ jccb(Assembler::carryClear, done);
8976     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8977     __ bind(done);
8978   %}
8979 
8980   ins_pipe(ialu_reg);
8981 %}
8982 
8983 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8984 %{
8985   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8986   predicate(UseBMI1Instructions);
8987   effect(KILL cr, TEMP dst);
8988 
8989   ins_cost(125);
8990   format %{ "MOVL   $dst.hi, $src+4\n\t"
8991             "BLSRL  $dst.lo, $src\n\t"
8992             "JNC    done\n\t"
8993             "BLSRL  $dst.hi, $src+4\n"
8994             "done:"
8995   %}
8996 
8997   ins_encode %{
8998     Label done;
8999     Register Rdst = $dst$$Register;
9000     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9001     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9002     __ blsrl(Rdst, $src$$Address);
9003     __ jccb(Assembler::carryClear, done);
9004     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9005     __ bind(done);
9006   %}
9007 
9008   ins_pipe(ialu_reg_mem);
9009 %}
9010 
9011 // Or Long Register with Register
9012 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9013   match(Set dst (OrL dst src));
9014   effect(KILL cr);
9015   format %{ "OR     $dst.lo,$src.lo\n\t"
9016             "OR     $dst.hi,$src.hi" %}
9017   opcode(0x0B,0x0B);
9018   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9019   ins_pipe( ialu_reg_reg_long );
9020 %}
9021 
9022 // Or Long Register with Immediate
9023 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9024   match(Set dst (OrL dst src));
9025   effect(KILL cr);
9026   format %{ "OR     $dst.lo,$src.lo\n\t"
9027             "OR     $dst.hi,$src.hi" %}
9028   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9029   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9030   ins_pipe( ialu_reg_long );
9031 %}
9032 
9033 // Or Long Register with Memory
9034 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9035   match(Set dst (OrL dst (LoadL mem)));
9036   effect(KILL cr);
9037   ins_cost(125);
9038   format %{ "OR     $dst.lo,$mem\n\t"
9039             "OR     $dst.hi,$mem+4" %}
9040   opcode(0x0B,0x0B);
9041   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9042   ins_pipe( ialu_reg_long_mem );
9043 %}
9044 
9045 // Xor Long Register with Register
9046 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9047   match(Set dst (XorL dst src));
9048   effect(KILL cr);
9049   format %{ "XOR    $dst.lo,$src.lo\n\t"
9050             "XOR    $dst.hi,$src.hi" %}
9051   opcode(0x33,0x33);
9052   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9053   ins_pipe( ialu_reg_reg_long );
9054 %}
9055 
9056 // Xor Long Register with Immediate -1
9057 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9058   match(Set dst (XorL dst imm));
9059   format %{ "NOT    $dst.lo\n\t"
9060             "NOT    $dst.hi" %}
9061   ins_encode %{
9062      __ notl($dst$$Register);
9063      __ notl(HIGH_FROM_LOW($dst$$Register));
9064   %}
9065   ins_pipe( ialu_reg_long );
9066 %}
9067 
9068 // Xor Long Register with Immediate
9069 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9070   match(Set dst (XorL dst src));
9071   effect(KILL cr);
9072   format %{ "XOR    $dst.lo,$src.lo\n\t"
9073             "XOR    $dst.hi,$src.hi" %}
9074   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9075   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9076   ins_pipe( ialu_reg_long );
9077 %}
9078 
9079 // Xor Long Register with Memory
9080 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9081   match(Set dst (XorL dst (LoadL mem)));
9082   effect(KILL cr);
9083   ins_cost(125);
9084   format %{ "XOR    $dst.lo,$mem\n\t"
9085             "XOR    $dst.hi,$mem+4" %}
9086   opcode(0x33,0x33);
9087   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9088   ins_pipe( ialu_reg_long_mem );
9089 %}
9090 
9091 // Shift Left Long by 1
9092 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9093   predicate(UseNewLongLShift);
9094   match(Set dst (LShiftL dst cnt));
9095   effect(KILL cr);
9096   ins_cost(100);
9097   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9098             "ADC    $dst.hi,$dst.hi" %}
9099   ins_encode %{
9100     __ addl($dst$$Register,$dst$$Register);
9101     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9102   %}
9103   ins_pipe( ialu_reg_long );
9104 %}
9105 
9106 // Shift Left Long by 2
9107 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9108   predicate(UseNewLongLShift);
9109   match(Set dst (LShiftL dst cnt));
9110   effect(KILL cr);
9111   ins_cost(100);
9112   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9113             "ADC    $dst.hi,$dst.hi\n\t"
9114             "ADD    $dst.lo,$dst.lo\n\t"
9115             "ADC    $dst.hi,$dst.hi" %}
9116   ins_encode %{
9117     __ addl($dst$$Register,$dst$$Register);
9118     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9119     __ addl($dst$$Register,$dst$$Register);
9120     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9121   %}
9122   ins_pipe( ialu_reg_long );
9123 %}
9124 
9125 // Shift Left Long by 3
9126 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9127   predicate(UseNewLongLShift);
9128   match(Set dst (LShiftL dst cnt));
9129   effect(KILL cr);
9130   ins_cost(100);
9131   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9132             "ADC    $dst.hi,$dst.hi\n\t"
9133             "ADD    $dst.lo,$dst.lo\n\t"
9134             "ADC    $dst.hi,$dst.hi\n\t"
9135             "ADD    $dst.lo,$dst.lo\n\t"
9136             "ADC    $dst.hi,$dst.hi" %}
9137   ins_encode %{
9138     __ addl($dst$$Register,$dst$$Register);
9139     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9140     __ addl($dst$$Register,$dst$$Register);
9141     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9142     __ addl($dst$$Register,$dst$$Register);
9143     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9144   %}
9145   ins_pipe( ialu_reg_long );
9146 %}
9147 
9148 // Shift Left Long by 1-31
9149 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9150   match(Set dst (LShiftL dst cnt));
9151   effect(KILL cr);
9152   ins_cost(200);
9153   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9154             "SHL    $dst.lo,$cnt" %}
9155   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9156   ins_encode( move_long_small_shift(dst,cnt) );
9157   ins_pipe( ialu_reg_long );
9158 %}
9159 
9160 // Shift Left Long by 32-63
9161 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9162   match(Set dst (LShiftL dst cnt));
9163   effect(KILL cr);
9164   ins_cost(300);
9165   format %{ "MOV    $dst.hi,$dst.lo\n"
9166           "\tSHL    $dst.hi,$cnt-32\n"
9167           "\tXOR    $dst.lo,$dst.lo" %}
9168   opcode(0xC1, 0x4);  /* C1 /4 ib */
9169   ins_encode( move_long_big_shift_clr(dst,cnt) );
9170   ins_pipe( ialu_reg_long );
9171 %}
9172 
9173 // Shift Left Long by variable
9174 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9175   match(Set dst (LShiftL dst shift));
9176   effect(KILL cr);
9177   ins_cost(500+200);
9178   size(17);
9179   format %{ "TEST   $shift,32\n\t"
9180             "JEQ,s  small\n\t"
9181             "MOV    $dst.hi,$dst.lo\n\t"
9182             "XOR    $dst.lo,$dst.lo\n"
9183     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9184             "SHL    $dst.lo,$shift" %}
9185   ins_encode( shift_left_long( dst, shift ) );
9186   ins_pipe( pipe_slow );
9187 %}
9188 
9189 // Shift Right Long by 1-31
9190 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9191   match(Set dst (URShiftL dst cnt));
9192   effect(KILL cr);
9193   ins_cost(200);
9194   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9195             "SHR    $dst.hi,$cnt" %}
9196   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9197   ins_encode( move_long_small_shift(dst,cnt) );
9198   ins_pipe( ialu_reg_long );
9199 %}
9200 
9201 // Shift Right Long by 32-63
9202 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9203   match(Set dst (URShiftL dst cnt));
9204   effect(KILL cr);
9205   ins_cost(300);
9206   format %{ "MOV    $dst.lo,$dst.hi\n"
9207           "\tSHR    $dst.lo,$cnt-32\n"
9208           "\tXOR    $dst.hi,$dst.hi" %}
9209   opcode(0xC1, 0x5);  /* C1 /5 ib */
9210   ins_encode( move_long_big_shift_clr(dst,cnt) );
9211   ins_pipe( ialu_reg_long );
9212 %}
9213 
9214 // Shift Right Long by variable
9215 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9216   match(Set dst (URShiftL dst shift));
9217   effect(KILL cr);
9218   ins_cost(600);
9219   size(17);
9220   format %{ "TEST   $shift,32\n\t"
9221             "JEQ,s  small\n\t"
9222             "MOV    $dst.lo,$dst.hi\n\t"
9223             "XOR    $dst.hi,$dst.hi\n"
9224     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9225             "SHR    $dst.hi,$shift" %}
9226   ins_encode( shift_right_long( dst, shift ) );
9227   ins_pipe( pipe_slow );
9228 %}
9229 
9230 // Shift Right Long by 1-31
9231 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9232   match(Set dst (RShiftL dst cnt));
9233   effect(KILL cr);
9234   ins_cost(200);
9235   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9236             "SAR    $dst.hi,$cnt" %}
9237   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9238   ins_encode( move_long_small_shift(dst,cnt) );
9239   ins_pipe( ialu_reg_long );
9240 %}
9241 
9242 // Shift Right Long by 32-63
9243 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9244   match(Set dst (RShiftL dst cnt));
9245   effect(KILL cr);
9246   ins_cost(300);
9247   format %{ "MOV    $dst.lo,$dst.hi\n"
9248           "\tSAR    $dst.lo,$cnt-32\n"
9249           "\tSAR    $dst.hi,31" %}
9250   opcode(0xC1, 0x7);  /* C1 /7 ib */
9251   ins_encode( move_long_big_shift_sign(dst,cnt) );
9252   ins_pipe( ialu_reg_long );
9253 %}
9254 
9255 // Shift Right arithmetic Long by variable
9256 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9257   match(Set dst (RShiftL dst shift));
9258   effect(KILL cr);
9259   ins_cost(600);
9260   size(18);
9261   format %{ "TEST   $shift,32\n\t"
9262             "JEQ,s  small\n\t"
9263             "MOV    $dst.lo,$dst.hi\n\t"
9264             "SAR    $dst.hi,31\n"
9265     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9266             "SAR    $dst.hi,$shift" %}
9267   ins_encode( shift_right_arith_long( dst, shift ) );
9268   ins_pipe( pipe_slow );
9269 %}
9270 
9271 
9272 //----------Double Instructions------------------------------------------------
9273 // Double Math
9274 
9275 // Compare & branch
9276 
9277 // P6 version of float compare, sets condition codes in EFLAGS
9278 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9279   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9280   match(Set cr (CmpD src1 src2));
9281   effect(KILL rax);
9282   ins_cost(150);
9283   format %{ "FLD    $src1\n\t"
9284             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9285             "JNP    exit\n\t"
9286             "MOV    ah,1       // saw a NaN, set CF\n\t"
9287             "SAHF\n"
9288      "exit:\tNOP               // avoid branch to branch" %}
9289   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9290   ins_encode( Push_Reg_DPR(src1),
9291               OpcP, RegOpc(src2),
9292               cmpF_P6_fixup );
9293   ins_pipe( pipe_slow );
9294 %}
9295 
9296 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9297   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9298   match(Set cr (CmpD src1 src2));
9299   ins_cost(150);
9300   format %{ "FLD    $src1\n\t"
9301             "FUCOMIP ST,$src2  // P6 instruction" %}
9302   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9303   ins_encode( Push_Reg_DPR(src1),
9304               OpcP, RegOpc(src2));
9305   ins_pipe( pipe_slow );
9306 %}
9307 
9308 // Compare & branch
9309 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9310   predicate(UseSSE<=1);
9311   match(Set cr (CmpD src1 src2));
9312   effect(KILL rax);
9313   ins_cost(200);
9314   format %{ "FLD    $src1\n\t"
9315             "FCOMp  $src2\n\t"
9316             "FNSTSW AX\n\t"
9317             "TEST   AX,0x400\n\t"
9318             "JZ,s   flags\n\t"
9319             "MOV    AH,1\t# unordered treat as LT\n"
9320     "flags:\tSAHF" %}
9321   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9322   ins_encode( Push_Reg_DPR(src1),
9323               OpcP, RegOpc(src2),
9324               fpu_flags);
9325   ins_pipe( pipe_slow );
9326 %}
9327 
9328 // Compare vs zero into -1,0,1
9329 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9330   predicate(UseSSE<=1);
9331   match(Set dst (CmpD3 src1 zero));
9332   effect(KILL cr, KILL rax);
9333   ins_cost(280);
9334   format %{ "FTSTD  $dst,$src1" %}
9335   opcode(0xE4, 0xD9);
9336   ins_encode( Push_Reg_DPR(src1),
9337               OpcS, OpcP, PopFPU,
9338               CmpF_Result(dst));
9339   ins_pipe( pipe_slow );
9340 %}
9341 
9342 // Compare into -1,0,1
9343 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9344   predicate(UseSSE<=1);
9345   match(Set dst (CmpD3 src1 src2));
9346   effect(KILL cr, KILL rax);
9347   ins_cost(300);
9348   format %{ "FCMPD  $dst,$src1,$src2" %}
9349   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9350   ins_encode( Push_Reg_DPR(src1),
9351               OpcP, RegOpc(src2),
9352               CmpF_Result(dst));
9353   ins_pipe( pipe_slow );
9354 %}
9355 
9356 // float compare and set condition codes in EFLAGS by XMM regs
9357 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9358   predicate(UseSSE>=2);
9359   match(Set cr (CmpD src1 src2));
9360   ins_cost(145);
9361   format %{ "UCOMISD $src1,$src2\n\t"
9362             "JNP,s   exit\n\t"
9363             "PUSHF\t# saw NaN, set CF\n\t"
9364             "AND     [rsp], #0xffffff2b\n\t"
9365             "POPF\n"
9366     "exit:" %}
9367   ins_encode %{
9368     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9369     emit_cmpfp_fixup(_masm);
9370   %}
9371   ins_pipe( pipe_slow );
9372 %}
9373 
9374 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9375   predicate(UseSSE>=2);
9376   match(Set cr (CmpD src1 src2));
9377   ins_cost(100);
9378   format %{ "UCOMISD $src1,$src2" %}
9379   ins_encode %{
9380     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9381   %}
9382   ins_pipe( pipe_slow );
9383 %}
9384 
9385 // float compare and set condition codes in EFLAGS by XMM regs
9386 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9387   predicate(UseSSE>=2);
9388   match(Set cr (CmpD src1 (LoadD src2)));
9389   ins_cost(145);
9390   format %{ "UCOMISD $src1,$src2\n\t"
9391             "JNP,s   exit\n\t"
9392             "PUSHF\t# saw NaN, set CF\n\t"
9393             "AND     [rsp], #0xffffff2b\n\t"
9394             "POPF\n"
9395     "exit:" %}
9396   ins_encode %{
9397     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9398     emit_cmpfp_fixup(_masm);
9399   %}
9400   ins_pipe( pipe_slow );
9401 %}
9402 
9403 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9404   predicate(UseSSE>=2);
9405   match(Set cr (CmpD src1 (LoadD src2)));
9406   ins_cost(100);
9407   format %{ "UCOMISD $src1,$src2" %}
9408   ins_encode %{
9409     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9410   %}
9411   ins_pipe( pipe_slow );
9412 %}
9413 
9414 // Compare into -1,0,1 in XMM
9415 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9416   predicate(UseSSE>=2);
9417   match(Set dst (CmpD3 src1 src2));
9418   effect(KILL cr);
9419   ins_cost(255);
9420   format %{ "UCOMISD $src1, $src2\n\t"
9421             "MOV     $dst, #-1\n\t"
9422             "JP,s    done\n\t"
9423             "JB,s    done\n\t"
9424             "SETNE   $dst\n\t"
9425             "MOVZB   $dst, $dst\n"
9426     "done:" %}
9427   ins_encode %{
9428     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9429     emit_cmpfp3(_masm, $dst$$Register);
9430   %}
9431   ins_pipe( pipe_slow );
9432 %}
9433 
9434 // Compare into -1,0,1 in XMM and memory
9435 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9436   predicate(UseSSE>=2);
9437   match(Set dst (CmpD3 src1 (LoadD src2)));
9438   effect(KILL cr);
9439   ins_cost(275);
9440   format %{ "UCOMISD $src1, $src2\n\t"
9441             "MOV     $dst, #-1\n\t"
9442             "JP,s    done\n\t"
9443             "JB,s    done\n\t"
9444             "SETNE   $dst\n\t"
9445             "MOVZB   $dst, $dst\n"
9446     "done:" %}
9447   ins_encode %{
9448     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9449     emit_cmpfp3(_masm, $dst$$Register);
9450   %}
9451   ins_pipe( pipe_slow );
9452 %}
9453 
9454 
9455 instruct subDPR_reg(regDPR dst, regDPR src) %{
9456   predicate (UseSSE <=1);
9457   match(Set dst (SubD dst src));
9458 
9459   format %{ "FLD    $src\n\t"
9460             "DSUBp  $dst,ST" %}
9461   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9462   ins_cost(150);
9463   ins_encode( Push_Reg_DPR(src),
9464               OpcP, RegOpc(dst) );
9465   ins_pipe( fpu_reg_reg );
9466 %}
9467 
9468 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9469   predicate (UseSSE <=1);
9470   match(Set dst (RoundDouble (SubD src1 src2)));
9471   ins_cost(250);
9472 
9473   format %{ "FLD    $src2\n\t"
9474             "DSUB   ST,$src1\n\t"
9475             "FSTP_D $dst\t# D-round" %}
9476   opcode(0xD8, 0x5);
9477   ins_encode( Push_Reg_DPR(src2),
9478               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9479   ins_pipe( fpu_mem_reg_reg );
9480 %}
9481 
9482 
9483 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9484   predicate (UseSSE <=1);
9485   match(Set dst (SubD dst (LoadD src)));
9486   ins_cost(150);
9487 
9488   format %{ "FLD    $src\n\t"
9489             "DSUBp  $dst,ST" %}
9490   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9491   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9492               OpcP, RegOpc(dst) );
9493   ins_pipe( fpu_reg_mem );
9494 %}
9495 
9496 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9497   predicate (UseSSE<=1);
9498   match(Set dst (AbsD src));
9499   ins_cost(100);
9500   format %{ "FABS" %}
9501   opcode(0xE1, 0xD9);
9502   ins_encode( OpcS, OpcP );
9503   ins_pipe( fpu_reg_reg );
9504 %}
9505 
9506 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9507   predicate(UseSSE<=1);
9508   match(Set dst (NegD src));
9509   ins_cost(100);
9510   format %{ "FCHS" %}
9511   opcode(0xE0, 0xD9);
9512   ins_encode( OpcS, OpcP );
9513   ins_pipe( fpu_reg_reg );
9514 %}
9515 
9516 instruct addDPR_reg(regDPR dst, regDPR src) %{
9517   predicate(UseSSE<=1);
9518   match(Set dst (AddD dst src));
9519   format %{ "FLD    $src\n\t"
9520             "DADD   $dst,ST" %}
9521   size(4);
9522   ins_cost(150);
9523   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9524   ins_encode( Push_Reg_DPR(src),
9525               OpcP, RegOpc(dst) );
9526   ins_pipe( fpu_reg_reg );
9527 %}
9528 
9529 
9530 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9531   predicate(UseSSE<=1);
9532   match(Set dst (RoundDouble (AddD src1 src2)));
9533   ins_cost(250);
9534 
9535   format %{ "FLD    $src2\n\t"
9536             "DADD   ST,$src1\n\t"
9537             "FSTP_D $dst\t# D-round" %}
9538   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9539   ins_encode( Push_Reg_DPR(src2),
9540               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9541   ins_pipe( fpu_mem_reg_reg );
9542 %}
9543 
9544 
9545 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9546   predicate(UseSSE<=1);
9547   match(Set dst (AddD dst (LoadD src)));
9548   ins_cost(150);
9549 
9550   format %{ "FLD    $src\n\t"
9551             "DADDp  $dst,ST" %}
9552   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9553   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9554               OpcP, RegOpc(dst) );
9555   ins_pipe( fpu_reg_mem );
9556 %}
9557 
9558 // add-to-memory
9559 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9560   predicate(UseSSE<=1);
9561   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9562   ins_cost(150);
9563 
9564   format %{ "FLD_D  $dst\n\t"
9565             "DADD   ST,$src\n\t"
9566             "FST_D  $dst" %}
9567   opcode(0xDD, 0x0);
9568   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9569               Opcode(0xD8), RegOpc(src),
9570               set_instruction_start,
9571               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9572   ins_pipe( fpu_reg_mem );
9573 %}
9574 
9575 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9576   predicate(UseSSE<=1);
9577   match(Set dst (AddD dst con));
9578   ins_cost(125);
9579   format %{ "FLD1\n\t"
9580             "DADDp  $dst,ST" %}
9581   ins_encode %{
9582     __ fld1();
9583     __ faddp($dst$$reg);
9584   %}
9585   ins_pipe(fpu_reg);
9586 %}
9587 
9588 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9589   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9590   match(Set dst (AddD dst con));
9591   ins_cost(200);
9592   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9593             "DADDp  $dst,ST" %}
9594   ins_encode %{
9595     __ fld_d($constantaddress($con));
9596     __ faddp($dst$$reg);
9597   %}
9598   ins_pipe(fpu_reg_mem);
9599 %}
9600 
9601 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9602   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9603   match(Set dst (RoundDouble (AddD src con)));
9604   ins_cost(200);
9605   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9606             "DADD   ST,$src\n\t"
9607             "FSTP_D $dst\t# D-round" %}
9608   ins_encode %{
9609     __ fld_d($constantaddress($con));
9610     __ fadd($src$$reg);
9611     __ fstp_d(Address(rsp, $dst$$disp));
9612   %}
9613   ins_pipe(fpu_mem_reg_con);
9614 %}
9615 
9616 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9617   predicate(UseSSE<=1);
9618   match(Set dst (MulD dst src));
9619   format %{ "FLD    $src\n\t"
9620             "DMULp  $dst,ST" %}
9621   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9622   ins_cost(150);
9623   ins_encode( Push_Reg_DPR(src),
9624               OpcP, RegOpc(dst) );
9625   ins_pipe( fpu_reg_reg );
9626 %}
9627 
9628 // Strict FP instruction biases argument before multiply then
9629 // biases result to avoid double rounding of subnormals.
9630 //
9631 // scale arg1 by multiplying arg1 by 2^(-15360)
9632 // load arg2
9633 // multiply scaled arg1 by arg2
9634 // rescale product by 2^(15360)
9635 //
9636 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9637   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9638   match(Set dst (MulD dst src));
9639   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9640 
9641   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9642             "DMULp  $dst,ST\n\t"
9643             "FLD    $src\n\t"
9644             "DMULp  $dst,ST\n\t"
9645             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9646             "DMULp  $dst,ST\n\t" %}
9647   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9648   ins_encode( strictfp_bias1(dst),
9649               Push_Reg_DPR(src),
9650               OpcP, RegOpc(dst),
9651               strictfp_bias2(dst) );
9652   ins_pipe( fpu_reg_reg );
9653 %}
9654 
9655 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9656   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9657   match(Set dst (MulD dst con));
9658   ins_cost(200);
9659   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9660             "DMULp  $dst,ST" %}
9661   ins_encode %{
9662     __ fld_d($constantaddress($con));
9663     __ fmulp($dst$$reg);
9664   %}
9665   ins_pipe(fpu_reg_mem);
9666 %}
9667 
9668 
9669 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9670   predicate( UseSSE<=1 );
9671   match(Set dst (MulD dst (LoadD src)));
9672   ins_cost(200);
9673   format %{ "FLD_D  $src\n\t"
9674             "DMULp  $dst,ST" %}
9675   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9676   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9677               OpcP, RegOpc(dst) );
9678   ins_pipe( fpu_reg_mem );
9679 %}
9680 
9681 //
9682 // Cisc-alternate to reg-reg multiply
9683 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9684   predicate( UseSSE<=1 );
9685   match(Set dst (MulD src (LoadD mem)));
9686   ins_cost(250);
9687   format %{ "FLD_D  $mem\n\t"
9688             "DMUL   ST,$src\n\t"
9689             "FSTP_D $dst" %}
9690   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9691   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9692               OpcReg_FPR(src),
9693               Pop_Reg_DPR(dst) );
9694   ins_pipe( fpu_reg_reg_mem );
9695 %}
9696 
9697 
9698 // MACRO3 -- addDPR a mulDPR
9699 // This instruction is a '2-address' instruction in that the result goes
9700 // back to src2.  This eliminates a move from the macro; possibly the
9701 // register allocator will have to add it back (and maybe not).
9702 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9703   predicate( UseSSE<=1 );
9704   match(Set src2 (AddD (MulD src0 src1) src2));
9705   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9706             "DMUL   ST,$src1\n\t"
9707             "DADDp  $src2,ST" %}
9708   ins_cost(250);
9709   opcode(0xDD); /* LoadD DD /0 */
9710   ins_encode( Push_Reg_FPR(src0),
9711               FMul_ST_reg(src1),
9712               FAddP_reg_ST(src2) );
9713   ins_pipe( fpu_reg_reg_reg );
9714 %}
9715 
9716 
9717 // MACRO3 -- subDPR a mulDPR
9718 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9719   predicate( UseSSE<=1 );
9720   match(Set src2 (SubD (MulD src0 src1) src2));
9721   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9722             "DMUL   ST,$src1\n\t"
9723             "DSUBRp $src2,ST" %}
9724   ins_cost(250);
9725   ins_encode( Push_Reg_FPR(src0),
9726               FMul_ST_reg(src1),
9727               Opcode(0xDE), Opc_plus(0xE0,src2));
9728   ins_pipe( fpu_reg_reg_reg );
9729 %}
9730 
9731 
9732 instruct divDPR_reg(regDPR dst, regDPR src) %{
9733   predicate( UseSSE<=1 );
9734   match(Set dst (DivD dst src));
9735 
9736   format %{ "FLD    $src\n\t"
9737             "FDIVp  $dst,ST" %}
9738   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9739   ins_cost(150);
9740   ins_encode( Push_Reg_DPR(src),
9741               OpcP, RegOpc(dst) );
9742   ins_pipe( fpu_reg_reg );
9743 %}
9744 
9745 // Strict FP instruction biases argument before division then
9746 // biases result, to avoid double rounding of subnormals.
9747 //
9748 // scale dividend by multiplying dividend by 2^(-15360)
9749 // load divisor
9750 // divide scaled dividend by divisor
9751 // rescale quotient by 2^(15360)
9752 //
9753 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9754   predicate (UseSSE<=1);
9755   match(Set dst (DivD dst src));
9756   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9757   ins_cost(01);
9758 
9759   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9760             "DMULp  $dst,ST\n\t"
9761             "FLD    $src\n\t"
9762             "FDIVp  $dst,ST\n\t"
9763             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9764             "DMULp  $dst,ST\n\t" %}
9765   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9766   ins_encode( strictfp_bias1(dst),
9767               Push_Reg_DPR(src),
9768               OpcP, RegOpc(dst),
9769               strictfp_bias2(dst) );
9770   ins_pipe( fpu_reg_reg );
9771 %}
9772 
9773 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9774   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9775   match(Set dst (RoundDouble (DivD src1 src2)));
9776 
9777   format %{ "FLD    $src1\n\t"
9778             "FDIV   ST,$src2\n\t"
9779             "FSTP_D $dst\t# D-round" %}
9780   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9781   ins_encode( Push_Reg_DPR(src1),
9782               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9783   ins_pipe( fpu_mem_reg_reg );
9784 %}
9785 
9786 
9787 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9788   predicate(UseSSE<=1);
9789   match(Set dst (ModD dst src));
9790   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9791 
9792   format %{ "DMOD   $dst,$src" %}
9793   ins_cost(250);
9794   ins_encode(Push_Reg_Mod_DPR(dst, src),
9795               emitModDPR(),
9796               Push_Result_Mod_DPR(src),
9797               Pop_Reg_DPR(dst));
9798   ins_pipe( pipe_slow );
9799 %}
9800 
9801 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9802   predicate(UseSSE>=2);
9803   match(Set dst (ModD src0 src1));
9804   effect(KILL rax, KILL cr);
9805 
9806   format %{ "SUB    ESP,8\t # DMOD\n"
9807           "\tMOVSD  [ESP+0],$src1\n"
9808           "\tFLD_D  [ESP+0]\n"
9809           "\tMOVSD  [ESP+0],$src0\n"
9810           "\tFLD_D  [ESP+0]\n"
9811      "loop:\tFPREM\n"
9812           "\tFWAIT\n"
9813           "\tFNSTSW AX\n"
9814           "\tSAHF\n"
9815           "\tJP     loop\n"
9816           "\tFSTP_D [ESP+0]\n"
9817           "\tMOVSD  $dst,[ESP+0]\n"
9818           "\tADD    ESP,8\n"
9819           "\tFSTP   ST0\t # Restore FPU Stack"
9820     %}
9821   ins_cost(250);
9822   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9823   ins_pipe( pipe_slow );
9824 %}
9825 
9826 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9827   predicate (UseSSE<=1);
9828   match(Set dst(TanD src));
9829   format %{ "DTAN   $dst" %}
9830   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9831               Opcode(0xDD), Opcode(0xD8));   // fstp st
9832   ins_pipe( pipe_slow );
9833 %}
9834 
9835 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9836   predicate (UseSSE>=2);
9837   match(Set dst(TanD dst));
9838   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9839   format %{ "DTAN   $dst" %}
9840   ins_encode( Push_SrcD(dst),
9841               Opcode(0xD9), Opcode(0xF2),    // fptan
9842               Opcode(0xDD), Opcode(0xD8),   // fstp st
9843               Push_ResultD(dst) );
9844   ins_pipe( pipe_slow );
9845 %}
9846 
9847 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9848   predicate (UseSSE<=1);
9849   match(Set dst(AtanD dst src));
9850   format %{ "DATA   $dst,$src" %}
9851   opcode(0xD9, 0xF3);
9852   ins_encode( Push_Reg_DPR(src),
9853               OpcP, OpcS, RegOpc(dst) );
9854   ins_pipe( pipe_slow );
9855 %}
9856 
9857 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9858   predicate (UseSSE>=2);
9859   match(Set dst(AtanD dst src));
9860   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9861   format %{ "DATA   $dst,$src" %}
9862   opcode(0xD9, 0xF3);
9863   ins_encode( Push_SrcD(src),
9864               OpcP, OpcS, Push_ResultD(dst) );
9865   ins_pipe( pipe_slow );
9866 %}
9867 
9868 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9869   predicate (UseSSE<=1);
9870   match(Set dst (SqrtD src));
9871   format %{ "DSQRT  $dst,$src" %}
9872   opcode(0xFA, 0xD9);
9873   ins_encode( Push_Reg_DPR(src),
9874               OpcS, OpcP, Pop_Reg_DPR(dst) );
9875   ins_pipe( pipe_slow );
9876 %}
9877 
9878 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9879   predicate (UseSSE<=1);
9880   // The source Double operand on FPU stack
9881   match(Set dst (Log10D src));
9882   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9883   // fxch         ; swap ST(0) with ST(1)
9884   // fyl2x        ; compute log_10(2) * log_2(x)
9885   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9886             "FXCH   \n\t"
9887             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9888          %}
9889   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9890               Opcode(0xD9), Opcode(0xC9),   // fxch
9891               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9892 
9893   ins_pipe( pipe_slow );
9894 %}
9895 
9896 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9897   predicate (UseSSE>=2);
9898   effect(KILL cr);
9899   match(Set dst (Log10D src));
9900   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9901   // fyl2x        ; compute log_10(2) * log_2(x)
9902   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9903             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9904          %}
9905   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9906               Push_SrcD(src),
9907               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9908               Push_ResultD(dst));
9909 
9910   ins_pipe( pipe_slow );
9911 %}
9912 
9913 //-------------Float Instructions-------------------------------
9914 // Float Math
9915 
9916 // Code for float compare:
9917 //     fcompp();
9918 //     fwait(); fnstsw_ax();
9919 //     sahf();
9920 //     movl(dst, unordered_result);
9921 //     jcc(Assembler::parity, exit);
9922 //     movl(dst, less_result);
9923 //     jcc(Assembler::below, exit);
9924 //     movl(dst, equal_result);
9925 //     jcc(Assembler::equal, exit);
9926 //     movl(dst, greater_result);
9927 //   exit:
9928 
9929 // P6 version of float compare, sets condition codes in EFLAGS
9930 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9931   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9932   match(Set cr (CmpF src1 src2));
9933   effect(KILL rax);
9934   ins_cost(150);
9935   format %{ "FLD    $src1\n\t"
9936             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9937             "JNP    exit\n\t"
9938             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9939             "SAHF\n"
9940      "exit:\tNOP               // avoid branch to branch" %}
9941   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9942   ins_encode( Push_Reg_DPR(src1),
9943               OpcP, RegOpc(src2),
9944               cmpF_P6_fixup );
9945   ins_pipe( pipe_slow );
9946 %}
9947 
9948 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9949   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9950   match(Set cr (CmpF src1 src2));
9951   ins_cost(100);
9952   format %{ "FLD    $src1\n\t"
9953             "FUCOMIP ST,$src2  // P6 instruction" %}
9954   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9955   ins_encode( Push_Reg_DPR(src1),
9956               OpcP, RegOpc(src2));
9957   ins_pipe( pipe_slow );
9958 %}
9959 
9960 
9961 // Compare & branch
9962 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9963   predicate(UseSSE == 0);
9964   match(Set cr (CmpF src1 src2));
9965   effect(KILL rax);
9966   ins_cost(200);
9967   format %{ "FLD    $src1\n\t"
9968             "FCOMp  $src2\n\t"
9969             "FNSTSW AX\n\t"
9970             "TEST   AX,0x400\n\t"
9971             "JZ,s   flags\n\t"
9972             "MOV    AH,1\t# unordered treat as LT\n"
9973     "flags:\tSAHF" %}
9974   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9975   ins_encode( Push_Reg_DPR(src1),
9976               OpcP, RegOpc(src2),
9977               fpu_flags);
9978   ins_pipe( pipe_slow );
9979 %}
9980 
9981 // Compare vs zero into -1,0,1
9982 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9983   predicate(UseSSE == 0);
9984   match(Set dst (CmpF3 src1 zero));
9985   effect(KILL cr, KILL rax);
9986   ins_cost(280);
9987   format %{ "FTSTF  $dst,$src1" %}
9988   opcode(0xE4, 0xD9);
9989   ins_encode( Push_Reg_DPR(src1),
9990               OpcS, OpcP, PopFPU,
9991               CmpF_Result(dst));
9992   ins_pipe( pipe_slow );
9993 %}
9994 
9995 // Compare into -1,0,1
9996 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9997   predicate(UseSSE == 0);
9998   match(Set dst (CmpF3 src1 src2));
9999   effect(KILL cr, KILL rax);
10000   ins_cost(300);
10001   format %{ "FCMPF  $dst,$src1,$src2" %}
10002   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10003   ins_encode( Push_Reg_DPR(src1),
10004               OpcP, RegOpc(src2),
10005               CmpF_Result(dst));
10006   ins_pipe( pipe_slow );
10007 %}
10008 
10009 // float compare and set condition codes in EFLAGS by XMM regs
10010 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10011   predicate(UseSSE>=1);
10012   match(Set cr (CmpF src1 src2));
10013   ins_cost(145);
10014   format %{ "UCOMISS $src1,$src2\n\t"
10015             "JNP,s   exit\n\t"
10016             "PUSHF\t# saw NaN, set CF\n\t"
10017             "AND     [rsp], #0xffffff2b\n\t"
10018             "POPF\n"
10019     "exit:" %}
10020   ins_encode %{
10021     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10022     emit_cmpfp_fixup(_masm);
10023   %}
10024   ins_pipe( pipe_slow );
10025 %}
10026 
10027 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10028   predicate(UseSSE>=1);
10029   match(Set cr (CmpF src1 src2));
10030   ins_cost(100);
10031   format %{ "UCOMISS $src1,$src2" %}
10032   ins_encode %{
10033     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10034   %}
10035   ins_pipe( pipe_slow );
10036 %}
10037 
10038 // float compare and set condition codes in EFLAGS by XMM regs
10039 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10040   predicate(UseSSE>=1);
10041   match(Set cr (CmpF src1 (LoadF src2)));
10042   ins_cost(165);
10043   format %{ "UCOMISS $src1,$src2\n\t"
10044             "JNP,s   exit\n\t"
10045             "PUSHF\t# saw NaN, set CF\n\t"
10046             "AND     [rsp], #0xffffff2b\n\t"
10047             "POPF\n"
10048     "exit:" %}
10049   ins_encode %{
10050     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10051     emit_cmpfp_fixup(_masm);
10052   %}
10053   ins_pipe( pipe_slow );
10054 %}
10055 
10056 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10057   predicate(UseSSE>=1);
10058   match(Set cr (CmpF src1 (LoadF src2)));
10059   ins_cost(100);
10060   format %{ "UCOMISS $src1,$src2" %}
10061   ins_encode %{
10062     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10063   %}
10064   ins_pipe( pipe_slow );
10065 %}
10066 
10067 // Compare into -1,0,1 in XMM
10068 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10069   predicate(UseSSE>=1);
10070   match(Set dst (CmpF3 src1 src2));
10071   effect(KILL cr);
10072   ins_cost(255);
10073   format %{ "UCOMISS $src1, $src2\n\t"
10074             "MOV     $dst, #-1\n\t"
10075             "JP,s    done\n\t"
10076             "JB,s    done\n\t"
10077             "SETNE   $dst\n\t"
10078             "MOVZB   $dst, $dst\n"
10079     "done:" %}
10080   ins_encode %{
10081     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10082     emit_cmpfp3(_masm, $dst$$Register);
10083   %}
10084   ins_pipe( pipe_slow );
10085 %}
10086 
10087 // Compare into -1,0,1 in XMM and memory
10088 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10089   predicate(UseSSE>=1);
10090   match(Set dst (CmpF3 src1 (LoadF src2)));
10091   effect(KILL cr);
10092   ins_cost(275);
10093   format %{ "UCOMISS $src1, $src2\n\t"
10094             "MOV     $dst, #-1\n\t"
10095             "JP,s    done\n\t"
10096             "JB,s    done\n\t"
10097             "SETNE   $dst\n\t"
10098             "MOVZB   $dst, $dst\n"
10099     "done:" %}
10100   ins_encode %{
10101     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10102     emit_cmpfp3(_masm, $dst$$Register);
10103   %}
10104   ins_pipe( pipe_slow );
10105 %}
10106 
10107 // Spill to obtain 24-bit precision
10108 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10109   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10110   match(Set dst (SubF src1 src2));
10111 
10112   format %{ "FSUB   $dst,$src1 - $src2" %}
10113   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10114   ins_encode( Push_Reg_FPR(src1),
10115               OpcReg_FPR(src2),
10116               Pop_Mem_FPR(dst) );
10117   ins_pipe( fpu_mem_reg_reg );
10118 %}
10119 //
10120 // This instruction does not round to 24-bits
10121 instruct subFPR_reg(regFPR dst, regFPR src) %{
10122   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10123   match(Set dst (SubF dst src));
10124 
10125   format %{ "FSUB   $dst,$src" %}
10126   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10127   ins_encode( Push_Reg_FPR(src),
10128               OpcP, RegOpc(dst) );
10129   ins_pipe( fpu_reg_reg );
10130 %}
10131 
10132 // Spill to obtain 24-bit precision
10133 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10134   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10135   match(Set dst (AddF src1 src2));
10136 
10137   format %{ "FADD   $dst,$src1,$src2" %}
10138   opcode(0xD8, 0x0); /* D8 C0+i */
10139   ins_encode( Push_Reg_FPR(src2),
10140               OpcReg_FPR(src1),
10141               Pop_Mem_FPR(dst) );
10142   ins_pipe( fpu_mem_reg_reg );
10143 %}
10144 //
10145 // This instruction does not round to 24-bits
10146 instruct addFPR_reg(regFPR dst, regFPR src) %{
10147   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10148   match(Set dst (AddF dst src));
10149 
10150   format %{ "FLD    $src\n\t"
10151             "FADDp  $dst,ST" %}
10152   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10153   ins_encode( Push_Reg_FPR(src),
10154               OpcP, RegOpc(dst) );
10155   ins_pipe( fpu_reg_reg );
10156 %}
10157 
10158 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10159   predicate(UseSSE==0);
10160   match(Set dst (AbsF src));
10161   ins_cost(100);
10162   format %{ "FABS" %}
10163   opcode(0xE1, 0xD9);
10164   ins_encode( OpcS, OpcP );
10165   ins_pipe( fpu_reg_reg );
10166 %}
10167 
10168 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10169   predicate(UseSSE==0);
10170   match(Set dst (NegF src));
10171   ins_cost(100);
10172   format %{ "FCHS" %}
10173   opcode(0xE0, 0xD9);
10174   ins_encode( OpcS, OpcP );
10175   ins_pipe( fpu_reg_reg );
10176 %}
10177 
10178 // Cisc-alternate to addFPR_reg
10179 // Spill to obtain 24-bit precision
10180 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10181   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10182   match(Set dst (AddF src1 (LoadF src2)));
10183 
10184   format %{ "FLD    $src2\n\t"
10185             "FADD   ST,$src1\n\t"
10186             "FSTP_S $dst" %}
10187   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10188   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10189               OpcReg_FPR(src1),
10190               Pop_Mem_FPR(dst) );
10191   ins_pipe( fpu_mem_reg_mem );
10192 %}
10193 //
10194 // Cisc-alternate to addFPR_reg
10195 // This instruction does not round to 24-bits
10196 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10197   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10198   match(Set dst (AddF dst (LoadF src)));
10199 
10200   format %{ "FADD   $dst,$src" %}
10201   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10202   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10203               OpcP, RegOpc(dst) );
10204   ins_pipe( fpu_reg_mem );
10205 %}
10206 
10207 // // Following two instructions for _222_mpegaudio
10208 // Spill to obtain 24-bit precision
10209 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10210   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10211   match(Set dst (AddF src1 src2));
10212 
10213   format %{ "FADD   $dst,$src1,$src2" %}
10214   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10215   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10216               OpcReg_FPR(src2),
10217               Pop_Mem_FPR(dst) );
10218   ins_pipe( fpu_mem_reg_mem );
10219 %}
10220 
10221 // Cisc-spill variant
10222 // Spill to obtain 24-bit precision
10223 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10224   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10225   match(Set dst (AddF src1 (LoadF src2)));
10226 
10227   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10228   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10229   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10230               set_instruction_start,
10231               OpcP, RMopc_Mem(secondary,src1),
10232               Pop_Mem_FPR(dst) );
10233   ins_pipe( fpu_mem_mem_mem );
10234 %}
10235 
10236 // Spill to obtain 24-bit precision
10237 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10238   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10239   match(Set dst (AddF src1 src2));
10240 
10241   format %{ "FADD   $dst,$src1,$src2" %}
10242   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10243   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10244               set_instruction_start,
10245               OpcP, RMopc_Mem(secondary,src1),
10246               Pop_Mem_FPR(dst) );
10247   ins_pipe( fpu_mem_mem_mem );
10248 %}
10249 
10250 
10251 // Spill to obtain 24-bit precision
10252 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10253   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10254   match(Set dst (AddF src con));
10255   format %{ "FLD    $src\n\t"
10256             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10257             "FSTP_S $dst"  %}
10258   ins_encode %{
10259     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10260     __ fadd_s($constantaddress($con));
10261     __ fstp_s(Address(rsp, $dst$$disp));
10262   %}
10263   ins_pipe(fpu_mem_reg_con);
10264 %}
10265 //
10266 // This instruction does not round to 24-bits
10267 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10268   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269   match(Set dst (AddF src con));
10270   format %{ "FLD    $src\n\t"
10271             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10272             "FSTP   $dst"  %}
10273   ins_encode %{
10274     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10275     __ fadd_s($constantaddress($con));
10276     __ fstp_d($dst$$reg);
10277   %}
10278   ins_pipe(fpu_reg_reg_con);
10279 %}
10280 
10281 // Spill to obtain 24-bit precision
10282 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10283   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10284   match(Set dst (MulF src1 src2));
10285 
10286   format %{ "FLD    $src1\n\t"
10287             "FMUL   $src2\n\t"
10288             "FSTP_S $dst"  %}
10289   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10290   ins_encode( Push_Reg_FPR(src1),
10291               OpcReg_FPR(src2),
10292               Pop_Mem_FPR(dst) );
10293   ins_pipe( fpu_mem_reg_reg );
10294 %}
10295 //
10296 // This instruction does not round to 24-bits
10297 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10298   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10299   match(Set dst (MulF src1 src2));
10300 
10301   format %{ "FLD    $src1\n\t"
10302             "FMUL   $src2\n\t"
10303             "FSTP_S $dst"  %}
10304   opcode(0xD8, 0x1); /* D8 C8+i */
10305   ins_encode( Push_Reg_FPR(src2),
10306               OpcReg_FPR(src1),
10307               Pop_Reg_FPR(dst) );
10308   ins_pipe( fpu_reg_reg_reg );
10309 %}
10310 
10311 
10312 // Spill to obtain 24-bit precision
10313 // Cisc-alternate to reg-reg multiply
10314 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10315   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10316   match(Set dst (MulF src1 (LoadF src2)));
10317 
10318   format %{ "FLD_S  $src2\n\t"
10319             "FMUL   $src1\n\t"
10320             "FSTP_S $dst"  %}
10321   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10322   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10323               OpcReg_FPR(src1),
10324               Pop_Mem_FPR(dst) );
10325   ins_pipe( fpu_mem_reg_mem );
10326 %}
10327 //
10328 // This instruction does not round to 24-bits
10329 // Cisc-alternate to reg-reg multiply
10330 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10331   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10332   match(Set dst (MulF src1 (LoadF src2)));
10333 
10334   format %{ "FMUL   $dst,$src1,$src2" %}
10335   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10336   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10337               OpcReg_FPR(src1),
10338               Pop_Reg_FPR(dst) );
10339   ins_pipe( fpu_reg_reg_mem );
10340 %}
10341 
10342 // Spill to obtain 24-bit precision
10343 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10344   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10345   match(Set dst (MulF src1 src2));
10346 
10347   format %{ "FMUL   $dst,$src1,$src2" %}
10348   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10349   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10350               set_instruction_start,
10351               OpcP, RMopc_Mem(secondary,src1),
10352               Pop_Mem_FPR(dst) );
10353   ins_pipe( fpu_mem_mem_mem );
10354 %}
10355 
10356 // Spill to obtain 24-bit precision
10357 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10358   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10359   match(Set dst (MulF src con));
10360 
10361   format %{ "FLD    $src\n\t"
10362             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10363             "FSTP_S $dst"  %}
10364   ins_encode %{
10365     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10366     __ fmul_s($constantaddress($con));
10367     __ fstp_s(Address(rsp, $dst$$disp));
10368   %}
10369   ins_pipe(fpu_mem_reg_con);
10370 %}
10371 //
10372 // This instruction does not round to 24-bits
10373 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10374   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10375   match(Set dst (MulF src con));
10376 
10377   format %{ "FLD    $src\n\t"
10378             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10379             "FSTP   $dst"  %}
10380   ins_encode %{
10381     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10382     __ fmul_s($constantaddress($con));
10383     __ fstp_d($dst$$reg);
10384   %}
10385   ins_pipe(fpu_reg_reg_con);
10386 %}
10387 
10388 
10389 //
10390 // MACRO1 -- subsume unshared load into mulFPR
10391 // This instruction does not round to 24-bits
10392 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10393   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10394   match(Set dst (MulF (LoadF mem1) src));
10395 
10396   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10397             "FMUL   ST,$src\n\t"
10398             "FSTP   $dst" %}
10399   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10400   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10401               OpcReg_FPR(src),
10402               Pop_Reg_FPR(dst) );
10403   ins_pipe( fpu_reg_reg_mem );
10404 %}
10405 //
10406 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10407 // This instruction does not round to 24-bits
10408 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10409   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10410   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10411   ins_cost(95);
10412 
10413   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10414             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10415             "FADD   ST,$src2\n\t"
10416             "FSTP   $dst" %}
10417   opcode(0xD9); /* LoadF D9 /0 */
10418   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10419               FMul_ST_reg(src1),
10420               FAdd_ST_reg(src2),
10421               Pop_Reg_FPR(dst) );
10422   ins_pipe( fpu_reg_mem_reg_reg );
10423 %}
10424 
10425 // MACRO3 -- addFPR a mulFPR
10426 // This instruction does not round to 24-bits.  It is a '2-address'
10427 // instruction in that the result goes back to src2.  This eliminates
10428 // a move from the macro; possibly the register allocator will have
10429 // to add it back (and maybe not).
10430 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10431   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10432   match(Set src2 (AddF (MulF src0 src1) src2));
10433 
10434   format %{ "FLD    $src0     ===MACRO3===\n\t"
10435             "FMUL   ST,$src1\n\t"
10436             "FADDP  $src2,ST" %}
10437   opcode(0xD9); /* LoadF D9 /0 */
10438   ins_encode( Push_Reg_FPR(src0),
10439               FMul_ST_reg(src1),
10440               FAddP_reg_ST(src2) );
10441   ins_pipe( fpu_reg_reg_reg );
10442 %}
10443 
10444 // MACRO4 -- divFPR subFPR
10445 // This instruction does not round to 24-bits
10446 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10447   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10448   match(Set dst (DivF (SubF src2 src1) src3));
10449 
10450   format %{ "FLD    $src2   ===MACRO4===\n\t"
10451             "FSUB   ST,$src1\n\t"
10452             "FDIV   ST,$src3\n\t"
10453             "FSTP  $dst" %}
10454   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10455   ins_encode( Push_Reg_FPR(src2),
10456               subFPR_divFPR_encode(src1,src3),
10457               Pop_Reg_FPR(dst) );
10458   ins_pipe( fpu_reg_reg_reg_reg );
10459 %}
10460 
10461 // Spill to obtain 24-bit precision
10462 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10463   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10464   match(Set dst (DivF src1 src2));
10465 
10466   format %{ "FDIV   $dst,$src1,$src2" %}
10467   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10468   ins_encode( Push_Reg_FPR(src1),
10469               OpcReg_FPR(src2),
10470               Pop_Mem_FPR(dst) );
10471   ins_pipe( fpu_mem_reg_reg );
10472 %}
10473 //
10474 // This instruction does not round to 24-bits
10475 instruct divFPR_reg(regFPR dst, regFPR src) %{
10476   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10477   match(Set dst (DivF dst src));
10478 
10479   format %{ "FDIV   $dst,$src" %}
10480   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10481   ins_encode( Push_Reg_FPR(src),
10482               OpcP, RegOpc(dst) );
10483   ins_pipe( fpu_reg_reg );
10484 %}
10485 
10486 
10487 // Spill to obtain 24-bit precision
10488 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10489   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10490   match(Set dst (ModF src1 src2));
10491   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10492 
10493   format %{ "FMOD   $dst,$src1,$src2" %}
10494   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10495               emitModDPR(),
10496               Push_Result_Mod_DPR(src2),
10497               Pop_Mem_FPR(dst));
10498   ins_pipe( pipe_slow );
10499 %}
10500 //
10501 // This instruction does not round to 24-bits
10502 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10503   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10504   match(Set dst (ModF dst src));
10505   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10506 
10507   format %{ "FMOD   $dst,$src" %}
10508   ins_encode(Push_Reg_Mod_DPR(dst, src),
10509               emitModDPR(),
10510               Push_Result_Mod_DPR(src),
10511               Pop_Reg_FPR(dst));
10512   ins_pipe( pipe_slow );
10513 %}
10514 
10515 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10516   predicate(UseSSE>=1);
10517   match(Set dst (ModF src0 src1));
10518   effect(KILL rax, KILL cr);
10519   format %{ "SUB    ESP,4\t # FMOD\n"
10520           "\tMOVSS  [ESP+0],$src1\n"
10521           "\tFLD_S  [ESP+0]\n"
10522           "\tMOVSS  [ESP+0],$src0\n"
10523           "\tFLD_S  [ESP+0]\n"
10524      "loop:\tFPREM\n"
10525           "\tFWAIT\n"
10526           "\tFNSTSW AX\n"
10527           "\tSAHF\n"
10528           "\tJP     loop\n"
10529           "\tFSTP_S [ESP+0]\n"
10530           "\tMOVSS  $dst,[ESP+0]\n"
10531           "\tADD    ESP,4\n"
10532           "\tFSTP   ST0\t # Restore FPU Stack"
10533     %}
10534   ins_cost(250);
10535   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10536   ins_pipe( pipe_slow );
10537 %}
10538 
10539 
10540 //----------Arithmetic Conversion Instructions---------------------------------
10541 // The conversions operations are all Alpha sorted.  Please keep it that way!
10542 
10543 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10544   predicate(UseSSE==0);
10545   match(Set dst (RoundFloat src));
10546   ins_cost(125);
10547   format %{ "FST_S  $dst,$src\t# F-round" %}
10548   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10549   ins_pipe( fpu_mem_reg );
10550 %}
10551 
10552 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10553   predicate(UseSSE<=1);
10554   match(Set dst (RoundDouble src));
10555   ins_cost(125);
10556   format %{ "FST_D  $dst,$src\t# D-round" %}
10557   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10558   ins_pipe( fpu_mem_reg );
10559 %}
10560 
10561 // Force rounding to 24-bit precision and 6-bit exponent
10562 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10563   predicate(UseSSE==0);
10564   match(Set dst (ConvD2F src));
10565   format %{ "FST_S  $dst,$src\t# F-round" %}
10566   expand %{
10567     roundFloat_mem_reg(dst,src);
10568   %}
10569 %}
10570 
10571 // Force rounding to 24-bit precision and 6-bit exponent
10572 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10573   predicate(UseSSE==1);
10574   match(Set dst (ConvD2F src));
10575   effect( KILL cr );
10576   format %{ "SUB    ESP,4\n\t"
10577             "FST_S  [ESP],$src\t# F-round\n\t"
10578             "MOVSS  $dst,[ESP]\n\t"
10579             "ADD ESP,4" %}
10580   ins_encode %{
10581     __ subptr(rsp, 4);
10582     if ($src$$reg != FPR1L_enc) {
10583       __ fld_s($src$$reg-1);
10584       __ fstp_s(Address(rsp, 0));
10585     } else {
10586       __ fst_s(Address(rsp, 0));
10587     }
10588     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10589     __ addptr(rsp, 4);
10590   %}
10591   ins_pipe( pipe_slow );
10592 %}
10593 
10594 // Force rounding double precision to single precision
10595 instruct convD2F_reg(regF dst, regD src) %{
10596   predicate(UseSSE>=2);
10597   match(Set dst (ConvD2F src));
10598   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10599   ins_encode %{
10600     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10601   %}
10602   ins_pipe( pipe_slow );
10603 %}
10604 
10605 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10606   predicate(UseSSE==0);
10607   match(Set dst (ConvF2D src));
10608   format %{ "FST_S  $dst,$src\t# D-round" %}
10609   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10610   ins_pipe( fpu_reg_reg );
10611 %}
10612 
10613 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10614   predicate(UseSSE==1);
10615   match(Set dst (ConvF2D src));
10616   format %{ "FST_D  $dst,$src\t# D-round" %}
10617   expand %{
10618     roundDouble_mem_reg(dst,src);
10619   %}
10620 %}
10621 
10622 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10623   predicate(UseSSE==1);
10624   match(Set dst (ConvF2D src));
10625   effect( KILL cr );
10626   format %{ "SUB    ESP,4\n\t"
10627             "MOVSS  [ESP] $src\n\t"
10628             "FLD_S  [ESP]\n\t"
10629             "ADD    ESP,4\n\t"
10630             "FSTP   $dst\t# D-round" %}
10631   ins_encode %{
10632     __ subptr(rsp, 4);
10633     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10634     __ fld_s(Address(rsp, 0));
10635     __ addptr(rsp, 4);
10636     __ fstp_d($dst$$reg);
10637   %}
10638   ins_pipe( pipe_slow );
10639 %}
10640 
10641 instruct convF2D_reg(regD dst, regF src) %{
10642   predicate(UseSSE>=2);
10643   match(Set dst (ConvF2D src));
10644   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10645   ins_encode %{
10646     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10647   %}
10648   ins_pipe( pipe_slow );
10649 %}
10650 
10651 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10652 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10653   predicate(UseSSE<=1);
10654   match(Set dst (ConvD2I src));
10655   effect( KILL tmp, KILL cr );
10656   format %{ "FLD    $src\t# Convert double to int \n\t"
10657             "FLDCW  trunc mode\n\t"
10658             "SUB    ESP,4\n\t"
10659             "FISTp  [ESP + #0]\n\t"
10660             "FLDCW  std/24-bit mode\n\t"
10661             "POP    EAX\n\t"
10662             "CMP    EAX,0x80000000\n\t"
10663             "JNE,s  fast\n\t"
10664             "FLD_D  $src\n\t"
10665             "CALL   d2i_wrapper\n"
10666       "fast:" %}
10667   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10668   ins_pipe( pipe_slow );
10669 %}
10670 
10671 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10672 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10673   predicate(UseSSE>=2);
10674   match(Set dst (ConvD2I src));
10675   effect( KILL tmp, KILL cr );
10676   format %{ "CVTTSD2SI $dst, $src\n\t"
10677             "CMP    $dst,0x80000000\n\t"
10678             "JNE,s  fast\n\t"
10679             "SUB    ESP, 8\n\t"
10680             "MOVSD  [ESP], $src\n\t"
10681             "FLD_D  [ESP]\n\t"
10682             "ADD    ESP, 8\n\t"
10683             "CALL   d2i_wrapper\n"
10684       "fast:" %}
10685   ins_encode %{
10686     Label fast;
10687     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10688     __ cmpl($dst$$Register, 0x80000000);
10689     __ jccb(Assembler::notEqual, fast);
10690     __ subptr(rsp, 8);
10691     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10692     __ fld_d(Address(rsp, 0));
10693     __ addptr(rsp, 8);
10694     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10695     __ bind(fast);
10696   %}
10697   ins_pipe( pipe_slow );
10698 %}
10699 
10700 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10701   predicate(UseSSE<=1);
10702   match(Set dst (ConvD2L src));
10703   effect( KILL cr );
10704   format %{ "FLD    $src\t# Convert double to long\n\t"
10705             "FLDCW  trunc mode\n\t"
10706             "SUB    ESP,8\n\t"
10707             "FISTp  [ESP + #0]\n\t"
10708             "FLDCW  std/24-bit mode\n\t"
10709             "POP    EAX\n\t"
10710             "POP    EDX\n\t"
10711             "CMP    EDX,0x80000000\n\t"
10712             "JNE,s  fast\n\t"
10713             "TEST   EAX,EAX\n\t"
10714             "JNE,s  fast\n\t"
10715             "FLD    $src\n\t"
10716             "CALL   d2l_wrapper\n"
10717       "fast:" %}
10718   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10719   ins_pipe( pipe_slow );
10720 %}
10721 
10722 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10723 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10724   predicate (UseSSE>=2);
10725   match(Set dst (ConvD2L src));
10726   effect( KILL cr );
10727   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10728             "MOVSD  [ESP],$src\n\t"
10729             "FLD_D  [ESP]\n\t"
10730             "FLDCW  trunc mode\n\t"
10731             "FISTp  [ESP + #0]\n\t"
10732             "FLDCW  std/24-bit mode\n\t"
10733             "POP    EAX\n\t"
10734             "POP    EDX\n\t"
10735             "CMP    EDX,0x80000000\n\t"
10736             "JNE,s  fast\n\t"
10737             "TEST   EAX,EAX\n\t"
10738             "JNE,s  fast\n\t"
10739             "SUB    ESP,8\n\t"
10740             "MOVSD  [ESP],$src\n\t"
10741             "FLD_D  [ESP]\n\t"
10742             "ADD    ESP,8\n\t"
10743             "CALL   d2l_wrapper\n"
10744       "fast:" %}
10745   ins_encode %{
10746     Label fast;
10747     __ subptr(rsp, 8);
10748     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10749     __ fld_d(Address(rsp, 0));
10750     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10751     __ fistp_d(Address(rsp, 0));
10752     // Restore the rounding mode, mask the exception
10753     if (Compile::current()->in_24_bit_fp_mode()) {
10754       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10755     } else {
10756       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10757     }
10758     // Load the converted long, adjust CPU stack
10759     __ pop(rax);
10760     __ pop(rdx);
10761     __ cmpl(rdx, 0x80000000);
10762     __ jccb(Assembler::notEqual, fast);
10763     __ testl(rax, rax);
10764     __ jccb(Assembler::notEqual, fast);
10765     __ subptr(rsp, 8);
10766     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10767     __ fld_d(Address(rsp, 0));
10768     __ addptr(rsp, 8);
10769     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10770     __ bind(fast);
10771   %}
10772   ins_pipe( pipe_slow );
10773 %}
10774 
10775 // Convert a double to an int.  Java semantics require we do complex
10776 // manglations in the corner cases.  So we set the rounding mode to
10777 // 'zero', store the darned double down as an int, and reset the
10778 // rounding mode to 'nearest'.  The hardware stores a flag value down
10779 // if we would overflow or converted a NAN; we check for this and
10780 // and go the slow path if needed.
10781 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10782   predicate(UseSSE==0);
10783   match(Set dst (ConvF2I src));
10784   effect( KILL tmp, KILL cr );
10785   format %{ "FLD    $src\t# Convert float to int \n\t"
10786             "FLDCW  trunc mode\n\t"
10787             "SUB    ESP,4\n\t"
10788             "FISTp  [ESP + #0]\n\t"
10789             "FLDCW  std/24-bit mode\n\t"
10790             "POP    EAX\n\t"
10791             "CMP    EAX,0x80000000\n\t"
10792             "JNE,s  fast\n\t"
10793             "FLD    $src\n\t"
10794             "CALL   d2i_wrapper\n"
10795       "fast:" %}
10796   // DPR2I_encoding works for FPR2I
10797   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10798   ins_pipe( pipe_slow );
10799 %}
10800 
10801 // Convert a float in xmm to an int reg.
10802 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10803   predicate(UseSSE>=1);
10804   match(Set dst (ConvF2I src));
10805   effect( KILL tmp, KILL cr );
10806   format %{ "CVTTSS2SI $dst, $src\n\t"
10807             "CMP    $dst,0x80000000\n\t"
10808             "JNE,s  fast\n\t"
10809             "SUB    ESP, 4\n\t"
10810             "MOVSS  [ESP], $src\n\t"
10811             "FLD    [ESP]\n\t"
10812             "ADD    ESP, 4\n\t"
10813             "CALL   d2i_wrapper\n"
10814       "fast:" %}
10815   ins_encode %{
10816     Label fast;
10817     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10818     __ cmpl($dst$$Register, 0x80000000);
10819     __ jccb(Assembler::notEqual, fast);
10820     __ subptr(rsp, 4);
10821     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10822     __ fld_s(Address(rsp, 0));
10823     __ addptr(rsp, 4);
10824     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10825     __ bind(fast);
10826   %}
10827   ins_pipe( pipe_slow );
10828 %}
10829 
10830 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10831   predicate(UseSSE==0);
10832   match(Set dst (ConvF2L src));
10833   effect( KILL cr );
10834   format %{ "FLD    $src\t# Convert float to long\n\t"
10835             "FLDCW  trunc mode\n\t"
10836             "SUB    ESP,8\n\t"
10837             "FISTp  [ESP + #0]\n\t"
10838             "FLDCW  std/24-bit mode\n\t"
10839             "POP    EAX\n\t"
10840             "POP    EDX\n\t"
10841             "CMP    EDX,0x80000000\n\t"
10842             "JNE,s  fast\n\t"
10843             "TEST   EAX,EAX\n\t"
10844             "JNE,s  fast\n\t"
10845             "FLD    $src\n\t"
10846             "CALL   d2l_wrapper\n"
10847       "fast:" %}
10848   // DPR2L_encoding works for FPR2L
10849   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10850   ins_pipe( pipe_slow );
10851 %}
10852 
10853 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10854 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10855   predicate (UseSSE>=1);
10856   match(Set dst (ConvF2L src));
10857   effect( KILL cr );
10858   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10859             "MOVSS  [ESP],$src\n\t"
10860             "FLD_S  [ESP]\n\t"
10861             "FLDCW  trunc mode\n\t"
10862             "FISTp  [ESP + #0]\n\t"
10863             "FLDCW  std/24-bit mode\n\t"
10864             "POP    EAX\n\t"
10865             "POP    EDX\n\t"
10866             "CMP    EDX,0x80000000\n\t"
10867             "JNE,s  fast\n\t"
10868             "TEST   EAX,EAX\n\t"
10869             "JNE,s  fast\n\t"
10870             "SUB    ESP,4\t# Convert float to long\n\t"
10871             "MOVSS  [ESP],$src\n\t"
10872             "FLD_S  [ESP]\n\t"
10873             "ADD    ESP,4\n\t"
10874             "CALL   d2l_wrapper\n"
10875       "fast:" %}
10876   ins_encode %{
10877     Label fast;
10878     __ subptr(rsp, 8);
10879     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10880     __ fld_s(Address(rsp, 0));
10881     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10882     __ fistp_d(Address(rsp, 0));
10883     // Restore the rounding mode, mask the exception
10884     if (Compile::current()->in_24_bit_fp_mode()) {
10885       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10886     } else {
10887       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10888     }
10889     // Load the converted long, adjust CPU stack
10890     __ pop(rax);
10891     __ pop(rdx);
10892     __ cmpl(rdx, 0x80000000);
10893     __ jccb(Assembler::notEqual, fast);
10894     __ testl(rax, rax);
10895     __ jccb(Assembler::notEqual, fast);
10896     __ subptr(rsp, 4);
10897     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10898     __ fld_s(Address(rsp, 0));
10899     __ addptr(rsp, 4);
10900     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10901     __ bind(fast);
10902   %}
10903   ins_pipe( pipe_slow );
10904 %}
10905 
10906 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10907   predicate( UseSSE<=1 );
10908   match(Set dst (ConvI2D src));
10909   format %{ "FILD   $src\n\t"
10910             "FSTP   $dst" %}
10911   opcode(0xDB, 0x0);  /* DB /0 */
10912   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10913   ins_pipe( fpu_reg_mem );
10914 %}
10915 
10916 instruct convI2D_reg(regD dst, rRegI src) %{
10917   predicate( UseSSE>=2 && !UseXmmI2D );
10918   match(Set dst (ConvI2D src));
10919   format %{ "CVTSI2SD $dst,$src" %}
10920   ins_encode %{
10921     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10922   %}
10923   ins_pipe( pipe_slow );
10924 %}
10925 
10926 instruct convI2D_mem(regD dst, memory mem) %{
10927   predicate( UseSSE>=2 );
10928   match(Set dst (ConvI2D (LoadI mem)));
10929   format %{ "CVTSI2SD $dst,$mem" %}
10930   ins_encode %{
10931     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10932   %}
10933   ins_pipe( pipe_slow );
10934 %}
10935 
10936 instruct convXI2D_reg(regD dst, rRegI src)
10937 %{
10938   predicate( UseSSE>=2 && UseXmmI2D );
10939   match(Set dst (ConvI2D src));
10940 
10941   format %{ "MOVD  $dst,$src\n\t"
10942             "CVTDQ2PD $dst,$dst\t# i2d" %}
10943   ins_encode %{
10944     __ movdl($dst$$XMMRegister, $src$$Register);
10945     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10946   %}
10947   ins_pipe(pipe_slow); // XXX
10948 %}
10949 
10950 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10951   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10952   match(Set dst (ConvI2D (LoadI mem)));
10953   format %{ "FILD   $mem\n\t"
10954             "FSTP   $dst" %}
10955   opcode(0xDB);      /* DB /0 */
10956   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10957               Pop_Reg_DPR(dst));
10958   ins_pipe( fpu_reg_mem );
10959 %}
10960 
10961 // Convert a byte to a float; no rounding step needed.
10962 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10963   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10964   match(Set dst (ConvI2F src));
10965   format %{ "FILD   $src\n\t"
10966             "FSTP   $dst" %}
10967 
10968   opcode(0xDB, 0x0);  /* DB /0 */
10969   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10970   ins_pipe( fpu_reg_mem );
10971 %}
10972 
10973 // In 24-bit mode, force exponent rounding by storing back out
10974 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10975   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10976   match(Set dst (ConvI2F src));
10977   ins_cost(200);
10978   format %{ "FILD   $src\n\t"
10979             "FSTP_S $dst" %}
10980   opcode(0xDB, 0x0);  /* DB /0 */
10981   ins_encode( Push_Mem_I(src),
10982               Pop_Mem_FPR(dst));
10983   ins_pipe( fpu_mem_mem );
10984 %}
10985 
10986 // In 24-bit mode, force exponent rounding by storing back out
10987 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10988   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10989   match(Set dst (ConvI2F (LoadI mem)));
10990   ins_cost(200);
10991   format %{ "FILD   $mem\n\t"
10992             "FSTP_S $dst" %}
10993   opcode(0xDB);  /* DB /0 */
10994   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10995               Pop_Mem_FPR(dst));
10996   ins_pipe( fpu_mem_mem );
10997 %}
10998 
10999 // This instruction does not round to 24-bits
11000 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11001   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11002   match(Set dst (ConvI2F src));
11003   format %{ "FILD   $src\n\t"
11004             "FSTP   $dst" %}
11005   opcode(0xDB, 0x0);  /* DB /0 */
11006   ins_encode( Push_Mem_I(src),
11007               Pop_Reg_FPR(dst));
11008   ins_pipe( fpu_reg_mem );
11009 %}
11010 
11011 // This instruction does not round to 24-bits
11012 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11013   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11014   match(Set dst (ConvI2F (LoadI mem)));
11015   format %{ "FILD   $mem\n\t"
11016             "FSTP   $dst" %}
11017   opcode(0xDB);      /* DB /0 */
11018   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11019               Pop_Reg_FPR(dst));
11020   ins_pipe( fpu_reg_mem );
11021 %}
11022 
11023 // Convert an int to a float in xmm; no rounding step needed.
11024 instruct convI2F_reg(regF dst, rRegI src) %{
11025   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11026   match(Set dst (ConvI2F src));
11027   format %{ "CVTSI2SS $dst, $src" %}
11028   ins_encode %{
11029     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11030   %}
11031   ins_pipe( pipe_slow );
11032 %}
11033 
11034  instruct convXI2F_reg(regF dst, rRegI src)
11035 %{
11036   predicate( UseSSE>=2 && UseXmmI2F );
11037   match(Set dst (ConvI2F src));
11038 
11039   format %{ "MOVD  $dst,$src\n\t"
11040             "CVTDQ2PS $dst,$dst\t# i2f" %}
11041   ins_encode %{
11042     __ movdl($dst$$XMMRegister, $src$$Register);
11043     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11044   %}
11045   ins_pipe(pipe_slow); // XXX
11046 %}
11047 
11048 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11049   match(Set dst (ConvI2L src));
11050   effect(KILL cr);
11051   ins_cost(375);
11052   format %{ "MOV    $dst.lo,$src\n\t"
11053             "MOV    $dst.hi,$src\n\t"
11054             "SAR    $dst.hi,31" %}
11055   ins_encode(convert_int_long(dst,src));
11056   ins_pipe( ialu_reg_reg_long );
11057 %}
11058 
11059 // Zero-extend convert int to long
11060 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11061   match(Set dst (AndL (ConvI2L src) mask) );
11062   effect( KILL flags );
11063   ins_cost(250);
11064   format %{ "MOV    $dst.lo,$src\n\t"
11065             "XOR    $dst.hi,$dst.hi" %}
11066   opcode(0x33); // XOR
11067   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11068   ins_pipe( ialu_reg_reg_long );
11069 %}
11070 
11071 // Zero-extend long
11072 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11073   match(Set dst (AndL src mask) );
11074   effect( KILL flags );
11075   ins_cost(250);
11076   format %{ "MOV    $dst.lo,$src.lo\n\t"
11077             "XOR    $dst.hi,$dst.hi\n\t" %}
11078   opcode(0x33); // XOR
11079   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11080   ins_pipe( ialu_reg_reg_long );
11081 %}
11082 
11083 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11084   predicate (UseSSE<=1);
11085   match(Set dst (ConvL2D src));
11086   effect( KILL cr );
11087   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11088             "PUSH   $src.lo\n\t"
11089             "FILD   ST,[ESP + #0]\n\t"
11090             "ADD    ESP,8\n\t"
11091             "FSTP_D $dst\t# D-round" %}
11092   opcode(0xDF, 0x5);  /* DF /5 */
11093   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11094   ins_pipe( pipe_slow );
11095 %}
11096 
11097 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11098   predicate (UseSSE>=2);
11099   match(Set dst (ConvL2D src));
11100   effect( KILL cr );
11101   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11102             "PUSH   $src.lo\n\t"
11103             "FILD_D [ESP]\n\t"
11104             "FSTP_D [ESP]\n\t"
11105             "MOVSD  $dst,[ESP]\n\t"
11106             "ADD    ESP,8" %}
11107   opcode(0xDF, 0x5);  /* DF /5 */
11108   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11109   ins_pipe( pipe_slow );
11110 %}
11111 
11112 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11113   predicate (UseSSE>=1);
11114   match(Set dst (ConvL2F src));
11115   effect( KILL cr );
11116   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11117             "PUSH   $src.lo\n\t"
11118             "FILD_D [ESP]\n\t"
11119             "FSTP_S [ESP]\n\t"
11120             "MOVSS  $dst,[ESP]\n\t"
11121             "ADD    ESP,8" %}
11122   opcode(0xDF, 0x5);  /* DF /5 */
11123   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11124   ins_pipe( pipe_slow );
11125 %}
11126 
11127 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11128   match(Set dst (ConvL2F src));
11129   effect( KILL cr );
11130   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11131             "PUSH   $src.lo\n\t"
11132             "FILD   ST,[ESP + #0]\n\t"
11133             "ADD    ESP,8\n\t"
11134             "FSTP_S $dst\t# F-round" %}
11135   opcode(0xDF, 0x5);  /* DF /5 */
11136   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11137   ins_pipe( pipe_slow );
11138 %}
11139 
11140 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11141   match(Set dst (ConvL2I src));
11142   effect( DEF dst, USE src );
11143   format %{ "MOV    $dst,$src.lo" %}
11144   ins_encode(enc_CopyL_Lo(dst,src));
11145   ins_pipe( ialu_reg_reg );
11146 %}
11147 
11148 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11149   match(Set dst (MoveF2I src));
11150   effect( DEF dst, USE src );
11151   ins_cost(100);
11152   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11153   ins_encode %{
11154     __ movl($dst$$Register, Address(rsp, $src$$disp));
11155   %}
11156   ins_pipe( ialu_reg_mem );
11157 %}
11158 
11159 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11160   predicate(UseSSE==0);
11161   match(Set dst (MoveF2I src));
11162   effect( DEF dst, USE src );
11163 
11164   ins_cost(125);
11165   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11166   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11167   ins_pipe( fpu_mem_reg );
11168 %}
11169 
11170 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11171   predicate(UseSSE>=1);
11172   match(Set dst (MoveF2I src));
11173   effect( DEF dst, USE src );
11174 
11175   ins_cost(95);
11176   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11177   ins_encode %{
11178     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11179   %}
11180   ins_pipe( pipe_slow );
11181 %}
11182 
11183 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11184   predicate(UseSSE>=2);
11185   match(Set dst (MoveF2I src));
11186   effect( DEF dst, USE src );
11187   ins_cost(85);
11188   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11189   ins_encode %{
11190     __ movdl($dst$$Register, $src$$XMMRegister);
11191   %}
11192   ins_pipe( pipe_slow );
11193 %}
11194 
11195 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11196   match(Set dst (MoveI2F src));
11197   effect( DEF dst, USE src );
11198 
11199   ins_cost(100);
11200   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11201   ins_encode %{
11202     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11203   %}
11204   ins_pipe( ialu_mem_reg );
11205 %}
11206 
11207 
11208 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11209   predicate(UseSSE==0);
11210   match(Set dst (MoveI2F src));
11211   effect(DEF dst, USE src);
11212 
11213   ins_cost(125);
11214   format %{ "FLD_S  $src\n\t"
11215             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11216   opcode(0xD9);               /* D9 /0, FLD m32real */
11217   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11218               Pop_Reg_FPR(dst) );
11219   ins_pipe( fpu_reg_mem );
11220 %}
11221 
11222 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11223   predicate(UseSSE>=1);
11224   match(Set dst (MoveI2F src));
11225   effect( DEF dst, USE src );
11226 
11227   ins_cost(95);
11228   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11229   ins_encode %{
11230     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11231   %}
11232   ins_pipe( pipe_slow );
11233 %}
11234 
11235 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11236   predicate(UseSSE>=2);
11237   match(Set dst (MoveI2F src));
11238   effect( DEF dst, USE src );
11239 
11240   ins_cost(85);
11241   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11242   ins_encode %{
11243     __ movdl($dst$$XMMRegister, $src$$Register);
11244   %}
11245   ins_pipe( pipe_slow );
11246 %}
11247 
11248 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11249   match(Set dst (MoveD2L src));
11250   effect(DEF dst, USE src);
11251 
11252   ins_cost(250);
11253   format %{ "MOV    $dst.lo,$src\n\t"
11254             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11255   opcode(0x8B, 0x8B);
11256   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11257   ins_pipe( ialu_mem_long_reg );
11258 %}
11259 
11260 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11261   predicate(UseSSE<=1);
11262   match(Set dst (MoveD2L src));
11263   effect(DEF dst, USE src);
11264 
11265   ins_cost(125);
11266   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11267   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11268   ins_pipe( fpu_mem_reg );
11269 %}
11270 
11271 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11272   predicate(UseSSE>=2);
11273   match(Set dst (MoveD2L src));
11274   effect(DEF dst, USE src);
11275   ins_cost(95);
11276   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11277   ins_encode %{
11278     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11279   %}
11280   ins_pipe( pipe_slow );
11281 %}
11282 
11283 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11284   predicate(UseSSE>=2);
11285   match(Set dst (MoveD2L src));
11286   effect(DEF dst, USE src, TEMP tmp);
11287   ins_cost(85);
11288   format %{ "MOVD   $dst.lo,$src\n\t"
11289             "PSHUFLW $tmp,$src,0x4E\n\t"
11290             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11291   ins_encode %{
11292     __ movdl($dst$$Register, $src$$XMMRegister);
11293     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11294     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11295   %}
11296   ins_pipe( pipe_slow );
11297 %}
11298 
11299 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11300   match(Set dst (MoveL2D src));
11301   effect(DEF dst, USE src);
11302 
11303   ins_cost(200);
11304   format %{ "MOV    $dst,$src.lo\n\t"
11305             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11306   opcode(0x89, 0x89);
11307   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11308   ins_pipe( ialu_mem_long_reg );
11309 %}
11310 
11311 
11312 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11313   predicate(UseSSE<=1);
11314   match(Set dst (MoveL2D src));
11315   effect(DEF dst, USE src);
11316   ins_cost(125);
11317 
11318   format %{ "FLD_D  $src\n\t"
11319             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11320   opcode(0xDD);               /* DD /0, FLD m64real */
11321   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11322               Pop_Reg_DPR(dst) );
11323   ins_pipe( fpu_reg_mem );
11324 %}
11325 
11326 
11327 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11328   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11329   match(Set dst (MoveL2D src));
11330   effect(DEF dst, USE src);
11331 
11332   ins_cost(95);
11333   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11334   ins_encode %{
11335     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11336   %}
11337   ins_pipe( pipe_slow );
11338 %}
11339 
11340 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11341   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11342   match(Set dst (MoveL2D src));
11343   effect(DEF dst, USE src);
11344 
11345   ins_cost(95);
11346   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11347   ins_encode %{
11348     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11349   %}
11350   ins_pipe( pipe_slow );
11351 %}
11352 
11353 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11354   predicate(UseSSE>=2);
11355   match(Set dst (MoveL2D src));
11356   effect(TEMP dst, USE src, TEMP tmp);
11357   ins_cost(85);
11358   format %{ "MOVD   $dst,$src.lo\n\t"
11359             "MOVD   $tmp,$src.hi\n\t"
11360             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11361   ins_encode %{
11362     __ movdl($dst$$XMMRegister, $src$$Register);
11363     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11364     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11365   %}
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 
11370 // =======================================================================
11371 // fast clearing of an array
11372 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11373   predicate(!((ClearArrayNode*)n)->is_large());
11374   match(Set dummy (ClearArray cnt base));
11375   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11376 
11377   format %{ $$template
11378     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11379     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11380     $$emit$$"JG     LARGE\n\t"
11381     $$emit$$"SHL    ECX, 1\n\t"
11382     $$emit$$"DEC    ECX\n\t"
11383     $$emit$$"JS     DONE\t# Zero length\n\t"
11384     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11385     $$emit$$"DEC    ECX\n\t"
11386     $$emit$$"JGE    LOOP\n\t"
11387     $$emit$$"JMP    DONE\n\t"
11388     $$emit$$"# LARGE:\n\t"
11389     if (UseFastStosb) {
11390        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11391        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11392     } else {
11393        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11394        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11395     }
11396     $$emit$$"# DONE"
11397   %}
11398   ins_encode %{
11399     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11400   %}
11401   ins_pipe( pipe_slow );
11402 %}
11403 
11404 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11405   predicate(((ClearArrayNode*)n)->is_large());
11406   match(Set dummy (ClearArray cnt base));
11407   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11408   format %{ $$template
11409     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11410     if (UseFastStosb) {
11411        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11412        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11413     } else {
11414        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11415        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11416     }
11417     $$emit$$"# DONE"
11418   %}
11419   ins_encode %{
11420     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11421   %}
11422   ins_pipe( pipe_slow );
11423 %}
11424 
11425 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11426                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11427   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11428   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11429   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11430 
11431   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11432   ins_encode %{
11433     __ string_compare($str1$$Register, $str2$$Register,
11434                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11435                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11436   %}
11437   ins_pipe( pipe_slow );
11438 %}
11439 
11440 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11441                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11442   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11443   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11444   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11445 
11446   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11447   ins_encode %{
11448     __ string_compare($str1$$Register, $str2$$Register,
11449                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11450                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11451   %}
11452   ins_pipe( pipe_slow );
11453 %}
11454 
11455 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11456                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11457   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11458   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11459   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11460 
11461   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11462   ins_encode %{
11463     __ string_compare($str1$$Register, $str2$$Register,
11464                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11465                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11466   %}
11467   ins_pipe( pipe_slow );
11468 %}
11469 
11470 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11471                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11472   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11473   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11474   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11475 
11476   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11477   ins_encode %{
11478     __ string_compare($str2$$Register, $str1$$Register,
11479                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11480                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11481   %}
11482   ins_pipe( pipe_slow );
11483 %}
11484 
11485 // fast string equals
11486 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11487                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11488   match(Set result (StrEquals (Binary str1 str2) cnt));
11489   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11490 
11491   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11492   ins_encode %{
11493     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11494                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11495                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11496   %}
11497 
11498   ins_pipe( pipe_slow );
11499 %}
11500 
11501 // fast search of substring with known size.
11502 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11503                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11504   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11505   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11506   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11507 
11508   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11509   ins_encode %{
11510     int icnt2 = (int)$int_cnt2$$constant;
11511     if (icnt2 >= 16) {
11512       // IndexOf for constant substrings with size >= 16 elements
11513       // which don't need to be loaded through stack.
11514       __ string_indexofC8($str1$$Register, $str2$$Register,
11515                           $cnt1$$Register, $cnt2$$Register,
11516                           icnt2, $result$$Register,
11517                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11518     } else {
11519       // Small strings are loaded through stack if they cross page boundary.
11520       __ string_indexof($str1$$Register, $str2$$Register,
11521                         $cnt1$$Register, $cnt2$$Register,
11522                         icnt2, $result$$Register,
11523                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11524     }
11525   %}
11526   ins_pipe( pipe_slow );
11527 %}
11528 
11529 // fast search of substring with known size.
11530 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11531                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11532   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11533   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11534   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11535 
11536   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11537   ins_encode %{
11538     int icnt2 = (int)$int_cnt2$$constant;
11539     if (icnt2 >= 8) {
11540       // IndexOf for constant substrings with size >= 8 elements
11541       // which don't need to be loaded through stack.
11542       __ string_indexofC8($str1$$Register, $str2$$Register,
11543                           $cnt1$$Register, $cnt2$$Register,
11544                           icnt2, $result$$Register,
11545                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11546     } else {
11547       // Small strings are loaded through stack if they cross page boundary.
11548       __ string_indexof($str1$$Register, $str2$$Register,
11549                         $cnt1$$Register, $cnt2$$Register,
11550                         icnt2, $result$$Register,
11551                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11552     }
11553   %}
11554   ins_pipe( pipe_slow );
11555 %}
11556 
11557 // fast search of substring with known size.
11558 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11559                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11560   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11561   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11562   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11563 
11564   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11565   ins_encode %{
11566     int icnt2 = (int)$int_cnt2$$constant;
11567     if (icnt2 >= 8) {
11568       // IndexOf for constant substrings with size >= 8 elements
11569       // which don't need to be loaded through stack.
11570       __ string_indexofC8($str1$$Register, $str2$$Register,
11571                           $cnt1$$Register, $cnt2$$Register,
11572                           icnt2, $result$$Register,
11573                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11574     } else {
11575       // Small strings are loaded through stack if they cross page boundary.
11576       __ string_indexof($str1$$Register, $str2$$Register,
11577                         $cnt1$$Register, $cnt2$$Register,
11578                         icnt2, $result$$Register,
11579                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11580     }
11581   %}
11582   ins_pipe( pipe_slow );
11583 %}
11584 
11585 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11586                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11587   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11588   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11589   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11590 
11591   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11592   ins_encode %{
11593     __ string_indexof($str1$$Register, $str2$$Register,
11594                       $cnt1$$Register, $cnt2$$Register,
11595                       (-1), $result$$Register,
11596                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11597   %}
11598   ins_pipe( pipe_slow );
11599 %}
11600 
11601 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11602                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11603   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11604   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11605   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11606 
11607   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11608   ins_encode %{
11609     __ string_indexof($str1$$Register, $str2$$Register,
11610                       $cnt1$$Register, $cnt2$$Register,
11611                       (-1), $result$$Register,
11612                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11613   %}
11614   ins_pipe( pipe_slow );
11615 %}
11616 
11617 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11618                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11619   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11620   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11621   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11622 
11623   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11624   ins_encode %{
11625     __ string_indexof($str1$$Register, $str2$$Register,
11626                       $cnt1$$Register, $cnt2$$Register,
11627                       (-1), $result$$Register,
11628                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11629   %}
11630   ins_pipe( pipe_slow );
11631 %}
11632 
11633 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11634                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11635   predicate(UseSSE42Intrinsics);
11636   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11637   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11638   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11639   ins_encode %{
11640     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11641                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11642   %}
11643   ins_pipe( pipe_slow );
11644 %}
11645 
11646 // fast array equals
11647 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11648                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11649 %{
11650   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11651   match(Set result (AryEq ary1 ary2));
11652   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11653   //ins_cost(300);
11654 
11655   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11656   ins_encode %{
11657     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11658                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11659                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11660   %}
11661   ins_pipe( pipe_slow );
11662 %}
11663 
11664 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11665                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11666 %{
11667   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11668   match(Set result (AryEq ary1 ary2));
11669   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11670   //ins_cost(300);
11671 
11672   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11673   ins_encode %{
11674     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11675                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11676                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11677   %}
11678   ins_pipe( pipe_slow );
11679 %}
11680 
11681 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11682                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11683 %{
11684   match(Set result (HasNegatives ary1 len));
11685   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11686 
11687   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11688   ins_encode %{
11689     __ has_negatives($ary1$$Register, $len$$Register,
11690                      $result$$Register, $tmp3$$Register,
11691                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11692   %}
11693   ins_pipe( pipe_slow );
11694 %}
11695 
11696 // fast char[] to byte[] compression
11697 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11698                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11699   match(Set result (StrCompressedCopy src (Binary dst len)));
11700   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11701 
11702   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11703   ins_encode %{
11704     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11705                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11706                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11707   %}
11708   ins_pipe( pipe_slow );
11709 %}
11710 
11711 // fast byte[] to char[] inflation
11712 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11713                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11714   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11715   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11716 
11717   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11718   ins_encode %{
11719     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11720                           $tmp1$$XMMRegister, $tmp2$$Register);
11721   %}
11722   ins_pipe( pipe_slow );
11723 %}
11724 
11725 // encode char[] to byte[] in ISO_8859_1
11726 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11727                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11728                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11729   match(Set result (EncodeISOArray src (Binary dst len)));
11730   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11731 
11732   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11733   ins_encode %{
11734     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11735                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11736                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11737   %}
11738   ins_pipe( pipe_slow );
11739 %}
11740 
11741 
11742 //----------Control Flow Instructions------------------------------------------
11743 // Signed compare Instructions
11744 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11745   match(Set cr (CmpI op1 op2));
11746   effect( DEF cr, USE op1, USE op2 );
11747   format %{ "CMP    $op1,$op2" %}
11748   opcode(0x3B);  /* Opcode 3B /r */
11749   ins_encode( OpcP, RegReg( op1, op2) );
11750   ins_pipe( ialu_cr_reg_reg );
11751 %}
11752 
11753 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11754   match(Set cr (CmpI op1 op2));
11755   effect( DEF cr, USE op1 );
11756   format %{ "CMP    $op1,$op2" %}
11757   opcode(0x81,0x07);  /* Opcode 81 /7 */
11758   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11759   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11760   ins_pipe( ialu_cr_reg_imm );
11761 %}
11762 
11763 // Cisc-spilled version of cmpI_eReg
11764 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11765   match(Set cr (CmpI op1 (LoadI op2)));
11766 
11767   format %{ "CMP    $op1,$op2" %}
11768   ins_cost(500);
11769   opcode(0x3B);  /* Opcode 3B /r */
11770   ins_encode( OpcP, RegMem( op1, op2) );
11771   ins_pipe( ialu_cr_reg_mem );
11772 %}
11773 
11774 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11775   match(Set cr (CmpI src zero));
11776   effect( DEF cr, USE src );
11777 
11778   format %{ "TEST   $src,$src" %}
11779   opcode(0x85);
11780   ins_encode( OpcP, RegReg( src, src ) );
11781   ins_pipe( ialu_cr_reg_imm );
11782 %}
11783 
11784 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11785   match(Set cr (CmpI (AndI src con) zero));
11786 
11787   format %{ "TEST   $src,$con" %}
11788   opcode(0xF7,0x00);
11789   ins_encode( OpcP, RegOpc(src), Con32(con) );
11790   ins_pipe( ialu_cr_reg_imm );
11791 %}
11792 
11793 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11794   match(Set cr (CmpI (AndI src mem) zero));
11795 
11796   format %{ "TEST   $src,$mem" %}
11797   opcode(0x85);
11798   ins_encode( OpcP, RegMem( src, mem ) );
11799   ins_pipe( ialu_cr_reg_mem );
11800 %}
11801 
11802 // Unsigned compare Instructions; really, same as signed except they
11803 // produce an eFlagsRegU instead of eFlagsReg.
11804 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11805   match(Set cr (CmpU op1 op2));
11806 
11807   format %{ "CMPu   $op1,$op2" %}
11808   opcode(0x3B);  /* Opcode 3B /r */
11809   ins_encode( OpcP, RegReg( op1, op2) );
11810   ins_pipe( ialu_cr_reg_reg );
11811 %}
11812 
11813 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11814   match(Set cr (CmpU op1 op2));
11815 
11816   format %{ "CMPu   $op1,$op2" %}
11817   opcode(0x81,0x07);  /* Opcode 81 /7 */
11818   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11819   ins_pipe( ialu_cr_reg_imm );
11820 %}
11821 
11822 // // Cisc-spilled version of cmpU_eReg
11823 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11824   match(Set cr (CmpU op1 (LoadI op2)));
11825 
11826   format %{ "CMPu   $op1,$op2" %}
11827   ins_cost(500);
11828   opcode(0x3B);  /* Opcode 3B /r */
11829   ins_encode( OpcP, RegMem( op1, op2) );
11830   ins_pipe( ialu_cr_reg_mem );
11831 %}
11832 
11833 // // Cisc-spilled version of cmpU_eReg
11834 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11835 //  match(Set cr (CmpU (LoadI op1) op2));
11836 //
11837 //  format %{ "CMPu   $op1,$op2" %}
11838 //  ins_cost(500);
11839 //  opcode(0x39);  /* Opcode 39 /r */
11840 //  ins_encode( OpcP, RegMem( op1, op2) );
11841 //%}
11842 
11843 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11844   match(Set cr (CmpU src zero));
11845 
11846   format %{ "TESTu  $src,$src" %}
11847   opcode(0x85);
11848   ins_encode( OpcP, RegReg( src, src ) );
11849   ins_pipe( ialu_cr_reg_imm );
11850 %}
11851 
11852 // Unsigned pointer compare Instructions
11853 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11854   match(Set cr (CmpP op1 op2));
11855 
11856   format %{ "CMPu   $op1,$op2" %}
11857   opcode(0x3B);  /* Opcode 3B /r */
11858   ins_encode( OpcP, RegReg( op1, op2) );
11859   ins_pipe( ialu_cr_reg_reg );
11860 %}
11861 
11862 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11863   match(Set cr (CmpP op1 op2));
11864 
11865   format %{ "CMPu   $op1,$op2" %}
11866   opcode(0x81,0x07);  /* Opcode 81 /7 */
11867   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11868   ins_pipe( ialu_cr_reg_imm );
11869 %}
11870 
11871 // // Cisc-spilled version of cmpP_eReg
11872 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11873   match(Set cr (CmpP op1 (LoadP op2)));
11874 
11875   format %{ "CMPu   $op1,$op2" %}
11876   ins_cost(500);
11877   opcode(0x3B);  /* Opcode 3B /r */
11878   ins_encode( OpcP, RegMem( op1, op2) );
11879   ins_pipe( ialu_cr_reg_mem );
11880 %}
11881 
11882 // // Cisc-spilled version of cmpP_eReg
11883 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11884 //  match(Set cr (CmpP (LoadP op1) op2));
11885 //
11886 //  format %{ "CMPu   $op1,$op2" %}
11887 //  ins_cost(500);
11888 //  opcode(0x39);  /* Opcode 39 /r */
11889 //  ins_encode( OpcP, RegMem( op1, op2) );
11890 //%}
11891 
11892 // Compare raw pointer (used in out-of-heap check).
11893 // Only works because non-oop pointers must be raw pointers
11894 // and raw pointers have no anti-dependencies.
11895 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11896   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11897   match(Set cr (CmpP op1 (LoadP op2)));
11898 
11899   format %{ "CMPu   $op1,$op2" %}
11900   opcode(0x3B);  /* Opcode 3B /r */
11901   ins_encode( OpcP, RegMem( op1, op2) );
11902   ins_pipe( ialu_cr_reg_mem );
11903 %}
11904 
11905 //
11906 // This will generate a signed flags result. This should be ok
11907 // since any compare to a zero should be eq/neq.
11908 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11909   match(Set cr (CmpP src zero));
11910 
11911   format %{ "TEST   $src,$src" %}
11912   opcode(0x85);
11913   ins_encode( OpcP, RegReg( src, src ) );
11914   ins_pipe( ialu_cr_reg_imm );
11915 %}
11916 
11917 // Cisc-spilled version of testP_reg
11918 // This will generate a signed flags result. This should be ok
11919 // since any compare to a zero should be eq/neq.
11920 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11921   match(Set cr (CmpP (LoadP op) zero));
11922 
11923   format %{ "TEST   $op,0xFFFFFFFF" %}
11924   ins_cost(500);
11925   opcode(0xF7);               /* Opcode F7 /0 */
11926   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11927   ins_pipe( ialu_cr_reg_imm );
11928 %}
11929 
11930 // Yanked all unsigned pointer compare operations.
11931 // Pointer compares are done with CmpP which is already unsigned.
11932 
11933 //----------Max and Min--------------------------------------------------------
11934 // Min Instructions
11935 ////
11936 //   *** Min and Max using the conditional move are slower than the
11937 //   *** branch version on a Pentium III.
11938 // // Conditional move for min
11939 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11940 //  effect( USE_DEF op2, USE op1, USE cr );
11941 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11942 //  opcode(0x4C,0x0F);
11943 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11944 //  ins_pipe( pipe_cmov_reg );
11945 //%}
11946 //
11947 //// Min Register with Register (P6 version)
11948 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11949 //  predicate(VM_Version::supports_cmov() );
11950 //  match(Set op2 (MinI op1 op2));
11951 //  ins_cost(200);
11952 //  expand %{
11953 //    eFlagsReg cr;
11954 //    compI_eReg(cr,op1,op2);
11955 //    cmovI_reg_lt(op2,op1,cr);
11956 //  %}
11957 //%}
11958 
11959 // Min Register with Register (generic version)
11960 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11961   match(Set dst (MinI dst src));
11962   effect(KILL flags);
11963   ins_cost(300);
11964 
11965   format %{ "MIN    $dst,$src" %}
11966   opcode(0xCC);
11967   ins_encode( min_enc(dst,src) );
11968   ins_pipe( pipe_slow );
11969 %}
11970 
11971 // Max Register with Register
11972 //   *** Min and Max using the conditional move are slower than the
11973 //   *** branch version on a Pentium III.
11974 // // Conditional move for max
11975 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11976 //  effect( USE_DEF op2, USE op1, USE cr );
11977 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11978 //  opcode(0x4F,0x0F);
11979 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11980 //  ins_pipe( pipe_cmov_reg );
11981 //%}
11982 //
11983 // // Max Register with Register (P6 version)
11984 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11985 //  predicate(VM_Version::supports_cmov() );
11986 //  match(Set op2 (MaxI op1 op2));
11987 //  ins_cost(200);
11988 //  expand %{
11989 //    eFlagsReg cr;
11990 //    compI_eReg(cr,op1,op2);
11991 //    cmovI_reg_gt(op2,op1,cr);
11992 //  %}
11993 //%}
11994 
11995 // Max Register with Register (generic version)
11996 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11997   match(Set dst (MaxI dst src));
11998   effect(KILL flags);
11999   ins_cost(300);
12000 
12001   format %{ "MAX    $dst,$src" %}
12002   opcode(0xCC);
12003   ins_encode( max_enc(dst,src) );
12004   ins_pipe( pipe_slow );
12005 %}
12006 
12007 // ============================================================================
12008 // Counted Loop limit node which represents exact final iterator value.
12009 // Note: the resulting value should fit into integer range since
12010 // counted loops have limit check on overflow.
12011 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12012   match(Set limit (LoopLimit (Binary init limit) stride));
12013   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12014   ins_cost(300);
12015 
12016   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12017   ins_encode %{
12018     int strd = (int)$stride$$constant;
12019     assert(strd != 1 && strd != -1, "sanity");
12020     int m1 = (strd > 0) ? 1 : -1;
12021     // Convert limit to long (EAX:EDX)
12022     __ cdql();
12023     // Convert init to long (init:tmp)
12024     __ movl($tmp$$Register, $init$$Register);
12025     __ sarl($tmp$$Register, 31);
12026     // $limit - $init
12027     __ subl($limit$$Register, $init$$Register);
12028     __ sbbl($limit_hi$$Register, $tmp$$Register);
12029     // + ($stride - 1)
12030     if (strd > 0) {
12031       __ addl($limit$$Register, (strd - 1));
12032       __ adcl($limit_hi$$Register, 0);
12033       __ movl($tmp$$Register, strd);
12034     } else {
12035       __ addl($limit$$Register, (strd + 1));
12036       __ adcl($limit_hi$$Register, -1);
12037       __ lneg($limit_hi$$Register, $limit$$Register);
12038       __ movl($tmp$$Register, -strd);
12039     }
12040     // signed devision: (EAX:EDX) / pos_stride
12041     __ idivl($tmp$$Register);
12042     if (strd < 0) {
12043       // restore sign
12044       __ negl($tmp$$Register);
12045     }
12046     // (EAX) * stride
12047     __ mull($tmp$$Register);
12048     // + init (ignore upper bits)
12049     __ addl($limit$$Register, $init$$Register);
12050   %}
12051   ins_pipe( pipe_slow );
12052 %}
12053 
12054 // ============================================================================
12055 // Branch Instructions
12056 // Jump Table
12057 instruct jumpXtnd(rRegI switch_val) %{
12058   match(Jump switch_val);
12059   ins_cost(350);
12060   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12061   ins_encode %{
12062     // Jump to Address(table_base + switch_reg)
12063     Address index(noreg, $switch_val$$Register, Address::times_1);
12064     __ jump(ArrayAddress($constantaddress, index));
12065   %}
12066   ins_pipe(pipe_jmp);
12067 %}
12068 
12069 // Jump Direct - Label defines a relative address from JMP+1
12070 instruct jmpDir(label labl) %{
12071   match(Goto);
12072   effect(USE labl);
12073 
12074   ins_cost(300);
12075   format %{ "JMP    $labl" %}
12076   size(5);
12077   ins_encode %{
12078     Label* L = $labl$$label;
12079     __ jmp(*L, false); // Always long jump
12080   %}
12081   ins_pipe( pipe_jmp );
12082 %}
12083 
12084 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12085 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12086   match(If cop cr);
12087   effect(USE labl);
12088 
12089   ins_cost(300);
12090   format %{ "J$cop    $labl" %}
12091   size(6);
12092   ins_encode %{
12093     Label* L = $labl$$label;
12094     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12095   %}
12096   ins_pipe( pipe_jcc );
12097 %}
12098 
12099 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12100 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12101   match(CountedLoopEnd cop cr);
12102   effect(USE labl);
12103 
12104   ins_cost(300);
12105   format %{ "J$cop    $labl\t# Loop end" %}
12106   size(6);
12107   ins_encode %{
12108     Label* L = $labl$$label;
12109     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12110   %}
12111   ins_pipe( pipe_jcc );
12112 %}
12113 
12114 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12115 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12116   match(CountedLoopEnd cop cmp);
12117   effect(USE labl);
12118 
12119   ins_cost(300);
12120   format %{ "J$cop,u  $labl\t# Loop end" %}
12121   size(6);
12122   ins_encode %{
12123     Label* L = $labl$$label;
12124     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12125   %}
12126   ins_pipe( pipe_jcc );
12127 %}
12128 
12129 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12130   match(CountedLoopEnd cop cmp);
12131   effect(USE labl);
12132 
12133   ins_cost(200);
12134   format %{ "J$cop,u  $labl\t# Loop end" %}
12135   size(6);
12136   ins_encode %{
12137     Label* L = $labl$$label;
12138     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12139   %}
12140   ins_pipe( pipe_jcc );
12141 %}
12142 
12143 // Jump Direct Conditional - using unsigned comparison
12144 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12145   match(If cop cmp);
12146   effect(USE labl);
12147 
12148   ins_cost(300);
12149   format %{ "J$cop,u  $labl" %}
12150   size(6);
12151   ins_encode %{
12152     Label* L = $labl$$label;
12153     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12154   %}
12155   ins_pipe(pipe_jcc);
12156 %}
12157 
12158 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12159   match(If cop cmp);
12160   effect(USE labl);
12161 
12162   ins_cost(200);
12163   format %{ "J$cop,u  $labl" %}
12164   size(6);
12165   ins_encode %{
12166     Label* L = $labl$$label;
12167     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12168   %}
12169   ins_pipe(pipe_jcc);
12170 %}
12171 
12172 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12173   match(If cop cmp);
12174   effect(USE labl);
12175 
12176   ins_cost(200);
12177   format %{ $$template
12178     if ($cop$$cmpcode == Assembler::notEqual) {
12179       $$emit$$"JP,u   $labl\n\t"
12180       $$emit$$"J$cop,u   $labl"
12181     } else {
12182       $$emit$$"JP,u   done\n\t"
12183       $$emit$$"J$cop,u   $labl\n\t"
12184       $$emit$$"done:"
12185     }
12186   %}
12187   ins_encode %{
12188     Label* l = $labl$$label;
12189     if ($cop$$cmpcode == Assembler::notEqual) {
12190       __ jcc(Assembler::parity, *l, false);
12191       __ jcc(Assembler::notEqual, *l, false);
12192     } else if ($cop$$cmpcode == Assembler::equal) {
12193       Label done;
12194       __ jccb(Assembler::parity, done);
12195       __ jcc(Assembler::equal, *l, false);
12196       __ bind(done);
12197     } else {
12198        ShouldNotReachHere();
12199     }
12200   %}
12201   ins_pipe(pipe_jcc);
12202 %}
12203 
12204 // ============================================================================
12205 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12206 // array for an instance of the superklass.  Set a hidden internal cache on a
12207 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12208 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12209 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12210   match(Set result (PartialSubtypeCheck sub super));
12211   effect( KILL rcx, KILL cr );
12212 
12213   ins_cost(1100);  // slightly larger than the next version
12214   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12215             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12216             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12217             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12218             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12219             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12220             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12221      "miss:\t" %}
12222 
12223   opcode(0x1); // Force a XOR of EDI
12224   ins_encode( enc_PartialSubtypeCheck() );
12225   ins_pipe( pipe_slow );
12226 %}
12227 
12228 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12229   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12230   effect( KILL rcx, KILL result );
12231 
12232   ins_cost(1000);
12233   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12234             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12235             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12236             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12237             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12238             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12239      "miss:\t" %}
12240 
12241   opcode(0x0);  // No need to XOR EDI
12242   ins_encode( enc_PartialSubtypeCheck() );
12243   ins_pipe( pipe_slow );
12244 %}
12245 
12246 // ============================================================================
12247 // Branch Instructions -- short offset versions
12248 //
12249 // These instructions are used to replace jumps of a long offset (the default
12250 // match) with jumps of a shorter offset.  These instructions are all tagged
12251 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12252 // match rules in general matching.  Instead, the ADLC generates a conversion
12253 // method in the MachNode which can be used to do in-place replacement of the
12254 // long variant with the shorter variant.  The compiler will determine if a
12255 // branch can be taken by the is_short_branch_offset() predicate in the machine
12256 // specific code section of the file.
12257 
12258 // Jump Direct - Label defines a relative address from JMP+1
12259 instruct jmpDir_short(label labl) %{
12260   match(Goto);
12261   effect(USE labl);
12262 
12263   ins_cost(300);
12264   format %{ "JMP,s  $labl" %}
12265   size(2);
12266   ins_encode %{
12267     Label* L = $labl$$label;
12268     __ jmpb(*L);
12269   %}
12270   ins_pipe( pipe_jmp );
12271   ins_short_branch(1);
12272 %}
12273 
12274 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12275 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12276   match(If cop cr);
12277   effect(USE labl);
12278 
12279   ins_cost(300);
12280   format %{ "J$cop,s  $labl" %}
12281   size(2);
12282   ins_encode %{
12283     Label* L = $labl$$label;
12284     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12285   %}
12286   ins_pipe( pipe_jcc );
12287   ins_short_branch(1);
12288 %}
12289 
12290 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12291 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12292   match(CountedLoopEnd cop cr);
12293   effect(USE labl);
12294 
12295   ins_cost(300);
12296   format %{ "J$cop,s  $labl\t# Loop end" %}
12297   size(2);
12298   ins_encode %{
12299     Label* L = $labl$$label;
12300     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12301   %}
12302   ins_pipe( pipe_jcc );
12303   ins_short_branch(1);
12304 %}
12305 
12306 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12307 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12308   match(CountedLoopEnd cop cmp);
12309   effect(USE labl);
12310 
12311   ins_cost(300);
12312   format %{ "J$cop,us $labl\t# Loop end" %}
12313   size(2);
12314   ins_encode %{
12315     Label* L = $labl$$label;
12316     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12317   %}
12318   ins_pipe( pipe_jcc );
12319   ins_short_branch(1);
12320 %}
12321 
12322 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12323   match(CountedLoopEnd cop cmp);
12324   effect(USE labl);
12325 
12326   ins_cost(300);
12327   format %{ "J$cop,us $labl\t# Loop end" %}
12328   size(2);
12329   ins_encode %{
12330     Label* L = $labl$$label;
12331     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12332   %}
12333   ins_pipe( pipe_jcc );
12334   ins_short_branch(1);
12335 %}
12336 
12337 // Jump Direct Conditional - using unsigned comparison
12338 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12339   match(If cop cmp);
12340   effect(USE labl);
12341 
12342   ins_cost(300);
12343   format %{ "J$cop,us $labl" %}
12344   size(2);
12345   ins_encode %{
12346     Label* L = $labl$$label;
12347     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12348   %}
12349   ins_pipe( pipe_jcc );
12350   ins_short_branch(1);
12351 %}
12352 
12353 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12354   match(If cop cmp);
12355   effect(USE labl);
12356 
12357   ins_cost(300);
12358   format %{ "J$cop,us $labl" %}
12359   size(2);
12360   ins_encode %{
12361     Label* L = $labl$$label;
12362     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12363   %}
12364   ins_pipe( pipe_jcc );
12365   ins_short_branch(1);
12366 %}
12367 
12368 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12369   match(If cop cmp);
12370   effect(USE labl);
12371 
12372   ins_cost(300);
12373   format %{ $$template
12374     if ($cop$$cmpcode == Assembler::notEqual) {
12375       $$emit$$"JP,u,s   $labl\n\t"
12376       $$emit$$"J$cop,u,s   $labl"
12377     } else {
12378       $$emit$$"JP,u,s   done\n\t"
12379       $$emit$$"J$cop,u,s  $labl\n\t"
12380       $$emit$$"done:"
12381     }
12382   %}
12383   size(4);
12384   ins_encode %{
12385     Label* l = $labl$$label;
12386     if ($cop$$cmpcode == Assembler::notEqual) {
12387       __ jccb(Assembler::parity, *l);
12388       __ jccb(Assembler::notEqual, *l);
12389     } else if ($cop$$cmpcode == Assembler::equal) {
12390       Label done;
12391       __ jccb(Assembler::parity, done);
12392       __ jccb(Assembler::equal, *l);
12393       __ bind(done);
12394     } else {
12395        ShouldNotReachHere();
12396     }
12397   %}
12398   ins_pipe(pipe_jcc);
12399   ins_short_branch(1);
12400 %}
12401 
12402 // ============================================================================
12403 // Long Compare
12404 //
12405 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12406 // is tricky.  The flavor of compare used depends on whether we are testing
12407 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12408 // The GE test is the negated LT test.  The LE test can be had by commuting
12409 // the operands (yielding a GE test) and then negating; negate again for the
12410 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12411 // NE test is negated from that.
12412 
12413 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12414 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12415 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12416 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12417 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12418 // foo match ends up with the wrong leaf.  One fix is to not match both
12419 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12420 // both forms beat the trinary form of long-compare and both are very useful
12421 // on Intel which has so few registers.
12422 
12423 // Manifest a CmpL result in an integer register.  Very painful.
12424 // This is the test to avoid.
12425 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12426   match(Set dst (CmpL3 src1 src2));
12427   effect( KILL flags );
12428   ins_cost(1000);
12429   format %{ "XOR    $dst,$dst\n\t"
12430             "CMP    $src1.hi,$src2.hi\n\t"
12431             "JLT,s  m_one\n\t"
12432             "JGT,s  p_one\n\t"
12433             "CMP    $src1.lo,$src2.lo\n\t"
12434             "JB,s   m_one\n\t"
12435             "JEQ,s  done\n"
12436     "p_one:\tINC    $dst\n\t"
12437             "JMP,s  done\n"
12438     "m_one:\tDEC    $dst\n"
12439      "done:" %}
12440   ins_encode %{
12441     Label p_one, m_one, done;
12442     __ xorptr($dst$$Register, $dst$$Register);
12443     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12444     __ jccb(Assembler::less,    m_one);
12445     __ jccb(Assembler::greater, p_one);
12446     __ cmpl($src1$$Register, $src2$$Register);
12447     __ jccb(Assembler::below,   m_one);
12448     __ jccb(Assembler::equal,   done);
12449     __ bind(p_one);
12450     __ incrementl($dst$$Register);
12451     __ jmpb(done);
12452     __ bind(m_one);
12453     __ decrementl($dst$$Register);
12454     __ bind(done);
12455   %}
12456   ins_pipe( pipe_slow );
12457 %}
12458 
12459 //======
12460 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12461 // compares.  Can be used for LE or GT compares by reversing arguments.
12462 // NOT GOOD FOR EQ/NE tests.
12463 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12464   match( Set flags (CmpL src zero ));
12465   ins_cost(100);
12466   format %{ "TEST   $src.hi,$src.hi" %}
12467   opcode(0x85);
12468   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12469   ins_pipe( ialu_cr_reg_reg );
12470 %}
12471 
12472 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12473 // compares.  Can be used for LE or GT compares by reversing arguments.
12474 // NOT GOOD FOR EQ/NE tests.
12475 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12476   match( Set flags (CmpL src1 src2 ));
12477   effect( TEMP tmp );
12478   ins_cost(300);
12479   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12480             "MOV    $tmp,$src1.hi\n\t"
12481             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12482   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12483   ins_pipe( ialu_cr_reg_reg );
12484 %}
12485 
12486 // Long compares reg < zero/req OR reg >= zero/req.
12487 // Just a wrapper for a normal branch, plus the predicate test.
12488 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12489   match(If cmp flags);
12490   effect(USE labl);
12491   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12492   expand %{
12493     jmpCon(cmp,flags,labl);    // JLT or JGE...
12494   %}
12495 %}
12496 
12497 // Compare 2 longs and CMOVE longs.
12498 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12499   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12500   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12501   ins_cost(400);
12502   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12503             "CMOV$cmp $dst.hi,$src.hi" %}
12504   opcode(0x0F,0x40);
12505   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12506   ins_pipe( pipe_cmov_reg_long );
12507 %}
12508 
12509 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12510   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12511   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12512   ins_cost(500);
12513   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12514             "CMOV$cmp $dst.hi,$src.hi" %}
12515   opcode(0x0F,0x40);
12516   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12517   ins_pipe( pipe_cmov_reg_long );
12518 %}
12519 
12520 // Compare 2 longs and CMOVE ints.
12521 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12522   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12523   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12524   ins_cost(200);
12525   format %{ "CMOV$cmp $dst,$src" %}
12526   opcode(0x0F,0x40);
12527   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12528   ins_pipe( pipe_cmov_reg );
12529 %}
12530 
12531 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12532   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12533   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12534   ins_cost(250);
12535   format %{ "CMOV$cmp $dst,$src" %}
12536   opcode(0x0F,0x40);
12537   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12538   ins_pipe( pipe_cmov_mem );
12539 %}
12540 
12541 // Compare 2 longs and CMOVE ints.
12542 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12543   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12544   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12545   ins_cost(200);
12546   format %{ "CMOV$cmp $dst,$src" %}
12547   opcode(0x0F,0x40);
12548   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12549   ins_pipe( pipe_cmov_reg );
12550 %}
12551 
12552 // Compare 2 longs and CMOVE doubles
12553 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12554   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12555   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12556   ins_cost(200);
12557   expand %{
12558     fcmovDPR_regS(cmp,flags,dst,src);
12559   %}
12560 %}
12561 
12562 // Compare 2 longs and CMOVE doubles
12563 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12564   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12565   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12566   ins_cost(200);
12567   expand %{
12568     fcmovD_regS(cmp,flags,dst,src);
12569   %}
12570 %}
12571 
12572 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12573   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12574   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12575   ins_cost(200);
12576   expand %{
12577     fcmovFPR_regS(cmp,flags,dst,src);
12578   %}
12579 %}
12580 
12581 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12582   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12583   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12584   ins_cost(200);
12585   expand %{
12586     fcmovF_regS(cmp,flags,dst,src);
12587   %}
12588 %}
12589 
12590 //======
12591 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12592 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12593   match( Set flags (CmpL src zero ));
12594   effect(TEMP tmp);
12595   ins_cost(200);
12596   format %{ "MOV    $tmp,$src.lo\n\t"
12597             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12598   ins_encode( long_cmp_flags0( src, tmp ) );
12599   ins_pipe( ialu_reg_reg_long );
12600 %}
12601 
12602 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12603 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12604   match( Set flags (CmpL src1 src2 ));
12605   ins_cost(200+300);
12606   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12607             "JNE,s  skip\n\t"
12608             "CMP    $src1.hi,$src2.hi\n\t"
12609      "skip:\t" %}
12610   ins_encode( long_cmp_flags1( src1, src2 ) );
12611   ins_pipe( ialu_cr_reg_reg );
12612 %}
12613 
12614 // Long compare reg == zero/reg OR reg != zero/reg
12615 // Just a wrapper for a normal branch, plus the predicate test.
12616 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12617   match(If cmp flags);
12618   effect(USE labl);
12619   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12620   expand %{
12621     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12622   %}
12623 %}
12624 
12625 // Compare 2 longs and CMOVE longs.
12626 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12627   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12628   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12629   ins_cost(400);
12630   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12631             "CMOV$cmp $dst.hi,$src.hi" %}
12632   opcode(0x0F,0x40);
12633   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12634   ins_pipe( pipe_cmov_reg_long );
12635 %}
12636 
12637 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12638   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12639   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12640   ins_cost(500);
12641   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12642             "CMOV$cmp $dst.hi,$src.hi" %}
12643   opcode(0x0F,0x40);
12644   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12645   ins_pipe( pipe_cmov_reg_long );
12646 %}
12647 
12648 // Compare 2 longs and CMOVE ints.
12649 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12650   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12651   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12652   ins_cost(200);
12653   format %{ "CMOV$cmp $dst,$src" %}
12654   opcode(0x0F,0x40);
12655   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12656   ins_pipe( pipe_cmov_reg );
12657 %}
12658 
12659 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12660   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12661   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12662   ins_cost(250);
12663   format %{ "CMOV$cmp $dst,$src" %}
12664   opcode(0x0F,0x40);
12665   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12666   ins_pipe( pipe_cmov_mem );
12667 %}
12668 
12669 // Compare 2 longs and CMOVE ints.
12670 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12671   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12672   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12673   ins_cost(200);
12674   format %{ "CMOV$cmp $dst,$src" %}
12675   opcode(0x0F,0x40);
12676   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12677   ins_pipe( pipe_cmov_reg );
12678 %}
12679 
12680 // Compare 2 longs and CMOVE doubles
12681 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12682   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12683   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12684   ins_cost(200);
12685   expand %{
12686     fcmovDPR_regS(cmp,flags,dst,src);
12687   %}
12688 %}
12689 
12690 // Compare 2 longs and CMOVE doubles
12691 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12692   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12693   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12694   ins_cost(200);
12695   expand %{
12696     fcmovD_regS(cmp,flags,dst,src);
12697   %}
12698 %}
12699 
12700 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12701   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12702   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12703   ins_cost(200);
12704   expand %{
12705     fcmovFPR_regS(cmp,flags,dst,src);
12706   %}
12707 %}
12708 
12709 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12710   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12711   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12712   ins_cost(200);
12713   expand %{
12714     fcmovF_regS(cmp,flags,dst,src);
12715   %}
12716 %}
12717 
12718 //======
12719 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12720 // Same as cmpL_reg_flags_LEGT except must negate src
12721 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12722   match( Set flags (CmpL src zero ));
12723   effect( TEMP tmp );
12724   ins_cost(300);
12725   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12726             "CMP    $tmp,$src.lo\n\t"
12727             "SBB    $tmp,$src.hi\n\t" %}
12728   ins_encode( long_cmp_flags3(src, tmp) );
12729   ins_pipe( ialu_reg_reg_long );
12730 %}
12731 
12732 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12733 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12734 // requires a commuted test to get the same result.
12735 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12736   match( Set flags (CmpL src1 src2 ));
12737   effect( TEMP tmp );
12738   ins_cost(300);
12739   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12740             "MOV    $tmp,$src2.hi\n\t"
12741             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12742   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12743   ins_pipe( ialu_cr_reg_reg );
12744 %}
12745 
12746 // Long compares reg < zero/req OR reg >= zero/req.
12747 // Just a wrapper for a normal branch, plus the predicate test
12748 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12749   match(If cmp flags);
12750   effect(USE labl);
12751   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12752   ins_cost(300);
12753   expand %{
12754     jmpCon(cmp,flags,labl);    // JGT or JLE...
12755   %}
12756 %}
12757 
12758 // Compare 2 longs and CMOVE longs.
12759 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12760   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12761   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12762   ins_cost(400);
12763   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12764             "CMOV$cmp $dst.hi,$src.hi" %}
12765   opcode(0x0F,0x40);
12766   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12767   ins_pipe( pipe_cmov_reg_long );
12768 %}
12769 
12770 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12771   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12772   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12773   ins_cost(500);
12774   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12775             "CMOV$cmp $dst.hi,$src.hi+4" %}
12776   opcode(0x0F,0x40);
12777   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12778   ins_pipe( pipe_cmov_reg_long );
12779 %}
12780 
12781 // Compare 2 longs and CMOVE ints.
12782 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12783   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12784   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12785   ins_cost(200);
12786   format %{ "CMOV$cmp $dst,$src" %}
12787   opcode(0x0F,0x40);
12788   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12789   ins_pipe( pipe_cmov_reg );
12790 %}
12791 
12792 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12793   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12794   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12795   ins_cost(250);
12796   format %{ "CMOV$cmp $dst,$src" %}
12797   opcode(0x0F,0x40);
12798   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12799   ins_pipe( pipe_cmov_mem );
12800 %}
12801 
12802 // Compare 2 longs and CMOVE ptrs.
12803 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12804   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12805   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12806   ins_cost(200);
12807   format %{ "CMOV$cmp $dst,$src" %}
12808   opcode(0x0F,0x40);
12809   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12810   ins_pipe( pipe_cmov_reg );
12811 %}
12812 
12813 // Compare 2 longs and CMOVE doubles
12814 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12815   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12816   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12817   ins_cost(200);
12818   expand %{
12819     fcmovDPR_regS(cmp,flags,dst,src);
12820   %}
12821 %}
12822 
12823 // Compare 2 longs and CMOVE doubles
12824 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12825   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12826   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12827   ins_cost(200);
12828   expand %{
12829     fcmovD_regS(cmp,flags,dst,src);
12830   %}
12831 %}
12832 
12833 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12834   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12835   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12836   ins_cost(200);
12837   expand %{
12838     fcmovFPR_regS(cmp,flags,dst,src);
12839   %}
12840 %}
12841 
12842 
12843 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12844   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12845   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12846   ins_cost(200);
12847   expand %{
12848     fcmovF_regS(cmp,flags,dst,src);
12849   %}
12850 %}
12851 
12852 
12853 // ============================================================================
12854 // Procedure Call/Return Instructions
12855 // Call Java Static Instruction
12856 // Note: If this code changes, the corresponding ret_addr_offset() and
12857 //       compute_padding() functions will have to be adjusted.
12858 instruct CallStaticJavaDirect(method meth) %{
12859   match(CallStaticJava);
12860   effect(USE meth);
12861 
12862   ins_cost(300);
12863   format %{ "CALL,static " %}
12864   opcode(0xE8); /* E8 cd */
12865   ins_encode( pre_call_resets,
12866               Java_Static_Call( meth ),
12867               call_epilog,
12868               post_call_FPU );
12869   ins_pipe( pipe_slow );
12870   ins_alignment(4);
12871 %}
12872 
12873 // Call Java Dynamic Instruction
12874 // Note: If this code changes, the corresponding ret_addr_offset() and
12875 //       compute_padding() functions will have to be adjusted.
12876 instruct CallDynamicJavaDirect(method meth) %{
12877   match(CallDynamicJava);
12878   effect(USE meth);
12879 
12880   ins_cost(300);
12881   format %{ "MOV    EAX,(oop)-1\n\t"
12882             "CALL,dynamic" %}
12883   opcode(0xE8); /* E8 cd */
12884   ins_encode( pre_call_resets,
12885               Java_Dynamic_Call( meth ),
12886               call_epilog,
12887               post_call_FPU );
12888   ins_pipe( pipe_slow );
12889   ins_alignment(4);
12890 %}
12891 
12892 // Call Runtime Instruction
12893 instruct CallRuntimeDirect(method meth) %{
12894   match(CallRuntime );
12895   effect(USE meth);
12896 
12897   ins_cost(300);
12898   format %{ "CALL,runtime " %}
12899   opcode(0xE8); /* E8 cd */
12900   // Use FFREEs to clear entries in float stack
12901   ins_encode( pre_call_resets,
12902               FFree_Float_Stack_All,
12903               Java_To_Runtime( meth ),
12904               post_call_FPU );
12905   ins_pipe( pipe_slow );
12906 %}
12907 
12908 // Call runtime without safepoint
12909 instruct CallLeafDirect(method meth) %{
12910   match(CallLeaf);
12911   effect(USE meth);
12912 
12913   ins_cost(300);
12914   format %{ "CALL_LEAF,runtime " %}
12915   opcode(0xE8); /* E8 cd */
12916   ins_encode( pre_call_resets,
12917               FFree_Float_Stack_All,
12918               Java_To_Runtime( meth ),
12919               Verify_FPU_For_Leaf, post_call_FPU );
12920   ins_pipe( pipe_slow );
12921 %}
12922 
12923 instruct CallLeafNoFPDirect(method meth) %{
12924   match(CallLeafNoFP);
12925   effect(USE meth);
12926 
12927   ins_cost(300);
12928   format %{ "CALL_LEAF_NOFP,runtime " %}
12929   opcode(0xE8); /* E8 cd */
12930   ins_encode(Java_To_Runtime(meth));
12931   ins_pipe( pipe_slow );
12932 %}
12933 
12934 
12935 // Return Instruction
12936 // Remove the return address & jump to it.
12937 instruct Ret() %{
12938   match(Return);
12939   format %{ "RET" %}
12940   opcode(0xC3);
12941   ins_encode(OpcP);
12942   ins_pipe( pipe_jmp );
12943 %}
12944 
12945 // Tail Call; Jump from runtime stub to Java code.
12946 // Also known as an 'interprocedural jump'.
12947 // Target of jump will eventually return to caller.
12948 // TailJump below removes the return address.
12949 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12950   match(TailCall jump_target method_oop );
12951   ins_cost(300);
12952   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12953   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12954   ins_encode( OpcP, RegOpc(jump_target) );
12955   ins_pipe( pipe_jmp );
12956 %}
12957 
12958 
12959 // Tail Jump; remove the return address; jump to target.
12960 // TailCall above leaves the return address around.
12961 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12962   match( TailJump jump_target ex_oop );
12963   ins_cost(300);
12964   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12965             "JMP    $jump_target " %}
12966   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12967   ins_encode( enc_pop_rdx,
12968               OpcP, RegOpc(jump_target) );
12969   ins_pipe( pipe_jmp );
12970 %}
12971 
12972 // Create exception oop: created by stack-crawling runtime code.
12973 // Created exception is now available to this handler, and is setup
12974 // just prior to jumping to this handler.  No code emitted.
12975 instruct CreateException( eAXRegP ex_oop )
12976 %{
12977   match(Set ex_oop (CreateEx));
12978 
12979   size(0);
12980   // use the following format syntax
12981   format %{ "# exception oop is in EAX; no code emitted" %}
12982   ins_encode();
12983   ins_pipe( empty );
12984 %}
12985 
12986 
12987 // Rethrow exception:
12988 // The exception oop will come in the first argument position.
12989 // Then JUMP (not call) to the rethrow stub code.
12990 instruct RethrowException()
12991 %{
12992   match(Rethrow);
12993 
12994   // use the following format syntax
12995   format %{ "JMP    rethrow_stub" %}
12996   ins_encode(enc_rethrow);
12997   ins_pipe( pipe_jmp );
12998 %}
12999 
13000 // inlined locking and unlocking
13001 
13002 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13003   predicate(Compile::current()->use_rtm());
13004   match(Set cr (FastLock object box));
13005   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13006   ins_cost(300);
13007   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13008   ins_encode %{
13009     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13010                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13011                  _counters, _rtm_counters, _stack_rtm_counters,
13012                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13013                  true, ra_->C->profile_rtm());
13014   %}
13015   ins_pipe(pipe_slow);
13016 %}
13017 
13018 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13019   predicate(!Compile::current()->use_rtm());
13020   match(Set cr (FastLock object box));
13021   effect(TEMP tmp, TEMP scr, USE_KILL box);
13022   ins_cost(300);
13023   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13024   ins_encode %{
13025     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13026                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13027   %}
13028   ins_pipe(pipe_slow);
13029 %}
13030 
13031 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13032   match(Set cr (FastUnlock object box));
13033   effect(TEMP tmp, USE_KILL box);
13034   ins_cost(300);
13035   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13036   ins_encode %{
13037     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13038   %}
13039   ins_pipe(pipe_slow);
13040 %}
13041 
13042 
13043 
13044 // ============================================================================
13045 // Safepoint Instruction
13046 instruct safePoint_poll(eFlagsReg cr) %{
13047   match(SafePoint);
13048   effect(KILL cr);
13049 
13050   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13051   // On SPARC that might be acceptable as we can generate the address with
13052   // just a sethi, saving an or.  By polling at offset 0 we can end up
13053   // putting additional pressure on the index-0 in the D$.  Because of
13054   // alignment (just like the situation at hand) the lower indices tend
13055   // to see more traffic.  It'd be better to change the polling address
13056   // to offset 0 of the last $line in the polling page.
13057 
13058   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13059   ins_cost(125);
13060   size(6) ;
13061   ins_encode( Safepoint_Poll() );
13062   ins_pipe( ialu_reg_mem );
13063 %}
13064 
13065 
13066 // ============================================================================
13067 // This name is KNOWN by the ADLC and cannot be changed.
13068 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13069 // for this guy.
13070 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13071   match(Set dst (ThreadLocal));
13072   effect(DEF dst, KILL cr);
13073 
13074   format %{ "MOV    $dst, Thread::current()" %}
13075   ins_encode %{
13076     Register dstReg = as_Register($dst$$reg);
13077     __ get_thread(dstReg);
13078   %}
13079   ins_pipe( ialu_reg_fat );
13080 %}
13081 
13082 
13083 
13084 //----------PEEPHOLE RULES-----------------------------------------------------
13085 // These must follow all instruction definitions as they use the names
13086 // defined in the instructions definitions.
13087 //
13088 // peepmatch ( root_instr_name [preceding_instruction]* );
13089 //
13090 // peepconstraint %{
13091 // (instruction_number.operand_name relational_op instruction_number.operand_name
13092 //  [, ...] );
13093 // // instruction numbers are zero-based using left to right order in peepmatch
13094 //
13095 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13096 // // provide an instruction_number.operand_name for each operand that appears
13097 // // in the replacement instruction's match rule
13098 //
13099 // ---------VM FLAGS---------------------------------------------------------
13100 //
13101 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13102 //
13103 // Each peephole rule is given an identifying number starting with zero and
13104 // increasing by one in the order seen by the parser.  An individual peephole
13105 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13106 // on the command-line.
13107 //
13108 // ---------CURRENT LIMITATIONS----------------------------------------------
13109 //
13110 // Only match adjacent instructions in same basic block
13111 // Only equality constraints
13112 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13113 // Only one replacement instruction
13114 //
13115 // ---------EXAMPLE----------------------------------------------------------
13116 //
13117 // // pertinent parts of existing instructions in architecture description
13118 // instruct movI(rRegI dst, rRegI src) %{
13119 //   match(Set dst (CopyI src));
13120 // %}
13121 //
13122 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13123 //   match(Set dst (AddI dst src));
13124 //   effect(KILL cr);
13125 // %}
13126 //
13127 // // Change (inc mov) to lea
13128 // peephole %{
13129 //   // increment preceeded by register-register move
13130 //   peepmatch ( incI_eReg movI );
13131 //   // require that the destination register of the increment
13132 //   // match the destination register of the move
13133 //   peepconstraint ( 0.dst == 1.dst );
13134 //   // construct a replacement instruction that sets
13135 //   // the destination to ( move's source register + one )
13136 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13137 // %}
13138 //
13139 // Implementation no longer uses movX instructions since
13140 // machine-independent system no longer uses CopyX nodes.
13141 //
13142 // peephole %{
13143 //   peepmatch ( incI_eReg movI );
13144 //   peepconstraint ( 0.dst == 1.dst );
13145 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13146 // %}
13147 //
13148 // peephole %{
13149 //   peepmatch ( decI_eReg movI );
13150 //   peepconstraint ( 0.dst == 1.dst );
13151 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13152 // %}
13153 //
13154 // peephole %{
13155 //   peepmatch ( addI_eReg_imm movI );
13156 //   peepconstraint ( 0.dst == 1.dst );
13157 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13158 // %}
13159 //
13160 // peephole %{
13161 //   peepmatch ( addP_eReg_imm movP );
13162 //   peepconstraint ( 0.dst == 1.dst );
13163 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13164 // %}
13165 
13166 // // Change load of spilled value to only a spill
13167 // instruct storeI(memory mem, rRegI src) %{
13168 //   match(Set mem (StoreI mem src));
13169 // %}
13170 //
13171 // instruct loadI(rRegI dst, memory mem) %{
13172 //   match(Set dst (LoadI mem));
13173 // %}
13174 //
13175 peephole %{
13176   peepmatch ( loadI storeI );
13177   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13178   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13179 %}
13180 
13181 //----------SMARTSPILL RULES---------------------------------------------------
13182 // These must follow all instruction definitions as they use the names
13183 // defined in the instructions definitions.