1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     if (reg_lo+1 == reg_hi) { // double move?
 799       if (is_load) {
 800         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     } else {
 805       if (is_load) {
 806         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 807       } else {
 808         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 809       }
 810     }
 811 #ifndef PRODUCT
 812   } else if (!do_size) {
 813     if (size != 0) st->print("\n\t");
 814     if (reg_lo+1 == reg_hi) { // double move?
 815       if (is_load) st->print("%s %s,[ESP + #%d]",
 816                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 817                               Matcher::regName[reg_lo], offset);
 818       else         st->print("MOVSD  [ESP + #%d],%s",
 819                               offset, Matcher::regName[reg_lo]);
 820     } else {
 821       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 822                               Matcher::regName[reg_lo], offset);
 823       else         st->print("MOVSS  [ESP + #%d],%s",
 824                               offset, Matcher::regName[reg_lo]);
 825     }
 826 #endif
 827   }
 828   bool is_single_byte = false;
 829   if ((UseAVX > 2) && (offset != 0)) {
 830     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 831   }
 832   int offset_size = 0;
 833   if (UseAVX > 2 ) {
 834     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 835   } else {
 836     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 837   }
 838   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 839   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 840   return size+5+offset_size;
 841 }
 842 
 843 
 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 845                             int src_hi, int dst_hi, int size, outputStream* st ) {
 846   if (cbuf) {
 847     MacroAssembler _masm(cbuf);
 848     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 849       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 850                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 851     } else {
 852       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 853                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 854     }
 855 #ifndef PRODUCT
 856   } else if (!do_size) {
 857     if (size != 0) st->print("\n\t");
 858     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 859       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 860         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       } else {
 862         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       }
 864     } else {
 865       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 866         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 867       } else {
 868         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 869       }
 870     }
 871 #endif
 872   }
 873   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 874   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 875   int sz = (UseAVX > 2) ? 6 : 4;
 876   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 877       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 878   return size + sz;
 879 }
 880 
 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 882                             int src_hi, int dst_hi, int size, outputStream* st ) {
 883   // 32-bit
 884   if (cbuf) {
 885     MacroAssembler _masm(cbuf);
 886     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 887              as_Register(Matcher::_regEncode[src_lo]));
 888 #ifndef PRODUCT
 889   } else if (!do_size) {
 890     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 891 #endif
 892   }
 893   return (UseAVX> 2) ? 6 : 4;
 894 }
 895 
 896 
 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 898                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 899   // 32-bit
 900   if (cbuf) {
 901     MacroAssembler _masm(cbuf);
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }   
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1016       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Should the Matcher clone shifts on addressing modes, expecting them to
1433 // be subsumed into complex addressing expressions or compute them into
1434 // registers?  True for Intel but false for most RISCs
1435 const bool Matcher::clone_shift_expressions = true;
1436 
1437 // Do we need to mask the count passed to shift instructions or does
1438 // the cpu only look at the lower 5/6 bits anyway?
1439 const bool Matcher::need_masked_shift_count = false;
1440 
1441 bool Matcher::narrow_oop_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::narrow_klass_use_complex_address() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 
1452 // Is it better to copy float constants, or load them directly from memory?
1453 // Intel can load a float constant from a direct address, requiring no
1454 // extra registers.  Most RISCs will have to materialize an address into a
1455 // register first, so they would do better to copy the constant from stack.
1456 const bool Matcher::rematerialize_float_constants = true;
1457 
1458 // If CPU can load and store mis-aligned doubles directly then no fixup is
1459 // needed.  Else we split the double into 2 integer pieces and move it
1460 // piece-by-piece.  Only happens when passing doubles into C code as the
1461 // Java calling convention forces doubles to be aligned.
1462 const bool Matcher::misaligned_doubles_ok = true;
1463 
1464 
1465 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1466   // Get the memory operand from the node
1467   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1468   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1469   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1470   uint opcnt     = 1;                 // First operand
1471   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1472   while( idx >= skipped+num_edges ) {
1473     skipped += num_edges;
1474     opcnt++;                          // Bump operand count
1475     assert( opcnt < numopnds, "Accessing non-existent operand" );
1476     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1477   }
1478 
1479   MachOper *memory = node->_opnds[opcnt];
1480   MachOper *new_memory = NULL;
1481   switch (memory->opcode()) {
1482   case DIRECT:
1483   case INDOFFSET32X:
1484     // No transformation necessary.
1485     return;
1486   case INDIRECT:
1487     new_memory = new indirect_win95_safeOper( );
1488     break;
1489   case INDOFFSET8:
1490     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1491     break;
1492   case INDOFFSET32:
1493     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1494     break;
1495   case INDINDEXOFFSET:
1496     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1497     break;
1498   case INDINDEXSCALE:
1499     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1500     break;
1501   case INDINDEXSCALEOFFSET:
1502     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1503     break;
1504   case LOAD_LONG_INDIRECT:
1505   case LOAD_LONG_INDOFFSET32:
1506     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1507     return;
1508   default:
1509     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1510     return;
1511   }
1512   node->_opnds[opcnt] = new_memory;
1513 }
1514 
1515 // Advertise here if the CPU requires explicit rounding operations
1516 // to implement the UseStrictFP mode.
1517 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1518 
1519 // Are floats conerted to double when stored to stack during deoptimization?
1520 // On x32 it is stored with convertion only when FPU is used for floats.
1521 bool Matcher::float_in_double() { return (UseSSE == 0); }
1522 
1523 // Do ints take an entire long register or just half?
1524 const bool Matcher::int_in_long = false;
1525 
1526 // Return whether or not this register is ever used as an argument.  This
1527 // function is used on startup to build the trampoline stubs in generateOptoStub.
1528 // Registers not mentioned will be killed by the VM call in the trampoline, and
1529 // arguments in those registers not be available to the callee.
1530 bool Matcher::can_be_java_arg( int reg ) {
1531   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1532   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1533   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1534   return false;
1535 }
1536 
1537 bool Matcher::is_spillable_arg( int reg ) {
1538   return can_be_java_arg(reg);
1539 }
1540 
1541 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1542   // Use hardware integer DIV instruction when
1543   // it is faster than a code which use multiply.
1544   // Only when constant divisor fits into 32 bit
1545   // (min_jint is excluded to get only correct
1546   // positive 32 bit values from negative).
1547   return VM_Version::has_fast_idiv() &&
1548          (divisor == (int)divisor && divisor != min_jint);
1549 }
1550 
1551 // Register for DIVI projection of divmodI
1552 RegMask Matcher::divI_proj_mask() {
1553   return EAX_REG_mask();
1554 }
1555 
1556 // Register for MODI projection of divmodI
1557 RegMask Matcher::modI_proj_mask() {
1558   return EDX_REG_mask();
1559 }
1560 
1561 // Register for DIVL projection of divmodL
1562 RegMask Matcher::divL_proj_mask() {
1563   ShouldNotReachHere();
1564   return RegMask();
1565 }
1566 
1567 // Register for MODL projection of divmodL
1568 RegMask Matcher::modL_proj_mask() {
1569   ShouldNotReachHere();
1570   return RegMask();
1571 }
1572 
1573 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1574   return NO_REG_mask();
1575 }
1576 
1577 // Returns true if the high 32 bits of the value is known to be zero.
1578 bool is_operand_hi32_zero(Node* n) {
1579   int opc = n->Opcode();
1580   if (opc == Op_AndL) {
1581     Node* o2 = n->in(2);
1582     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1583       return true;
1584     }
1585   }
1586   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587     return true;
1588   }
1589   return false;
1590 }
1591 
1592 %}
1593 
1594 //----------ENCODING BLOCK-----------------------------------------------------
1595 // This block specifies the encoding classes used by the compiler to output
1596 // byte streams.  Encoding classes generate functions which are called by
1597 // Machine Instruction Nodes in order to generate the bit encoding of the
1598 // instruction.  Operands specify their base encoding interface with the
1599 // interface keyword.  There are currently supported four interfaces,
1600 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1601 // operand to generate a function which returns its register number when
1602 // queried.   CONST_INTER causes an operand to generate a function which
1603 // returns the value of the constant when queried.  MEMORY_INTER causes an
1604 // operand to generate four functions which return the Base Register, the
1605 // Index Register, the Scale Value, and the Offset Value of the operand when
1606 // queried.  COND_INTER causes an operand to generate six functions which
1607 // return the encoding code (ie - encoding bits for the instruction)
1608 // associated with each basic boolean condition for a conditional instruction.
1609 // Instructions specify two basic values for encoding.  They use the
1610 // ins_encode keyword to specify their encoding class (which must be one of
1611 // the class names specified in the encoding block), and they use the
1612 // opcode keyword to specify, in order, their primary, secondary, and
1613 // tertiary opcode.  Only the opcode sections which a particular instruction
1614 // needs for encoding need to be specified.
1615 encode %{
1616   // Build emit functions for each basic byte or larger field in the intel
1617   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1618   // code in the enc_class source block.  Emit functions will live in the
1619   // main source block for now.  In future, we can generalize this by
1620   // adding a syntax that specifies the sizes of fields in an order,
1621   // so that the adlc can build the emit functions automagically
1622 
1623   // Emit primary opcode
1624   enc_class OpcP %{
1625     emit_opcode(cbuf, $primary);
1626   %}
1627 
1628   // Emit secondary opcode
1629   enc_class OpcS %{
1630     emit_opcode(cbuf, $secondary);
1631   %}
1632 
1633   // Emit opcode directly
1634   enc_class Opcode(immI d8) %{
1635     emit_opcode(cbuf, $d8$$constant);
1636   %}
1637 
1638   enc_class SizePrefix %{
1639     emit_opcode(cbuf,0x66);
1640   %}
1641 
1642   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1643     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1644   %}
1645 
1646   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1647     emit_opcode(cbuf,$opcode$$constant);
1648     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1649   %}
1650 
1651   enc_class mov_r32_imm0( rRegI dst ) %{
1652     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1653     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1654   %}
1655 
1656   enc_class cdq_enc %{
1657     // Full implementation of Java idiv and irem; checks for
1658     // special case as described in JVM spec., p.243 & p.271.
1659     //
1660     //         normal case                           special case
1661     //
1662     // input : rax,: dividend                         min_int
1663     //         reg: divisor                          -1
1664     //
1665     // output: rax,: quotient  (= rax, idiv reg)       min_int
1666     //         rdx: remainder (= rax, irem reg)       0
1667     //
1668     //  Code sequnce:
1669     //
1670     //  81 F8 00 00 00 80    cmp         rax,80000000h
1671     //  0F 85 0B 00 00 00    jne         normal_case
1672     //  33 D2                xor         rdx,edx
1673     //  83 F9 FF             cmp         rcx,0FFh
1674     //  0F 84 03 00 00 00    je          done
1675     //                  normal_case:
1676     //  99                   cdq
1677     //  F7 F9                idiv        rax,ecx
1678     //                  done:
1679     //
1680     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1681     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1682     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1683     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1684     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1686     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1687     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1688     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1689     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1690     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1691     // normal_case:
1692     emit_opcode(cbuf,0x99);                                         // cdq
1693     // idiv (note: must be emitted by the user of this rule)
1694     // normal:
1695   %}
1696 
1697   // Dense encoding for older common ops
1698   enc_class Opc_plus(immI opcode, rRegI reg) %{
1699     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1700   %}
1701 
1702 
1703   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1704   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1705     // Check for 8-bit immediate, and set sign extend bit in opcode
1706     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1707       emit_opcode(cbuf, $primary | 0x02);
1708     }
1709     else {                          // If 32-bit immediate
1710       emit_opcode(cbuf, $primary);
1711     }
1712   %}
1713 
1714   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1715     // Emit primary opcode and set sign-extend bit
1716     // Check for 8-bit immediate, and set sign extend bit in opcode
1717     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1718       emit_opcode(cbuf, $primary | 0x02);    }
1719     else {                          // If 32-bit immediate
1720       emit_opcode(cbuf, $primary);
1721     }
1722     // Emit r/m byte with secondary opcode, after primary opcode.
1723     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1724   %}
1725 
1726   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1727     // Check for 8-bit immediate, and set sign extend bit in opcode
1728     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1729       $$$emit8$imm$$constant;
1730     }
1731     else {                          // If 32-bit immediate
1732       // Output immediate
1733       $$$emit32$imm$$constant;
1734     }
1735   %}
1736 
1737   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1738     // Emit primary opcode and set sign-extend bit
1739     // Check for 8-bit immediate, and set sign extend bit in opcode
1740     int con = (int)$imm$$constant; // Throw away top bits
1741     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1742     // Emit r/m byte with secondary opcode, after primary opcode.
1743     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1744     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1745     else                               emit_d32(cbuf,con);
1746   %}
1747 
1748   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1749     // Emit primary opcode and set sign-extend bit
1750     // Check for 8-bit immediate, and set sign extend bit in opcode
1751     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1752     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1753     // Emit r/m byte with tertiary opcode, after primary opcode.
1754     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1755     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1756     else                               emit_d32(cbuf,con);
1757   %}
1758 
1759   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1760     emit_cc(cbuf, $secondary, $dst$$reg );
1761   %}
1762 
1763   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1764     int destlo = $dst$$reg;
1765     int desthi = HIGH_FROM_LOW(destlo);
1766     // bswap lo
1767     emit_opcode(cbuf, 0x0F);
1768     emit_cc(cbuf, 0xC8, destlo);
1769     // bswap hi
1770     emit_opcode(cbuf, 0x0F);
1771     emit_cc(cbuf, 0xC8, desthi);
1772     // xchg lo and hi
1773     emit_opcode(cbuf, 0x87);
1774     emit_rm(cbuf, 0x3, destlo, desthi);
1775   %}
1776 
1777   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1778     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1779   %}
1780 
1781   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782     $$$emit8$primary;
1783     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784   %}
1785 
1786   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1787     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788     emit_d8(cbuf, op >> 8 );
1789     emit_d8(cbuf, op & 255);
1790   %}
1791 
1792   // emulate a CMOV with a conditional branch around a MOV
1793   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794     // Invert sense of branch from sense of CMOV
1795     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796     emit_d8( cbuf, $brOffs$$constant );
1797   %}
1798 
1799   enc_class enc_PartialSubtypeCheck( ) %{
1800     Register Redi = as_Register(EDI_enc); // result register
1801     Register Reax = as_Register(EAX_enc); // super class
1802     Register Recx = as_Register(ECX_enc); // killed
1803     Register Resi = as_Register(ESI_enc); // sub class
1804     Label miss;
1805 
1806     MacroAssembler _masm(&cbuf);
1807     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808                                      NULL, &miss,
1809                                      /*set_cond_codes:*/ true);
1810     if ($primary) {
1811       __ xorptr(Redi, Redi);
1812     }
1813     __ bind(miss);
1814   %}
1815 
1816   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1817     MacroAssembler masm(&cbuf);
1818     int start = masm.offset();
1819     if (UseSSE >= 2) {
1820       if (VerifyFPU) {
1821         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822       }
1823     } else {
1824       // External c_calling_convention expects the FPU stack to be 'clean'.
1825       // Compiled code leaves it dirty.  Do cleanup now.
1826       masm.empty_FPU_stack();
1827     }
1828     if (sizeof_FFree_Float_Stack_All == -1) {
1829       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830     } else {
1831       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832     }
1833   %}
1834 
1835   enc_class Verify_FPU_For_Leaf %{
1836     if( VerifyFPU ) {
1837       MacroAssembler masm(&cbuf);
1838       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839     }
1840   %}
1841 
1842   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843     // This is the instruction starting address for relocation info.
1844     cbuf.set_insts_mark();
1845     $$$emit8$primary;
1846     // CALL directly to the runtime
1847     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1849 
1850     if (UseSSE >= 2) {
1851       MacroAssembler _masm(&cbuf);
1852       BasicType rt = tf()->return_type();
1853 
1854       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855         // A C runtime call where the return value is unused.  In SSE2+
1856         // mode the result needs to be removed from the FPU stack.  It's
1857         // likely that this function call could be removed by the
1858         // optimizer if the C function is a pure function.
1859         __ ffree(0);
1860       } else if (rt == T_FLOAT) {
1861         __ lea(rsp, Address(rsp, -4));
1862         __ fstp_s(Address(rsp, 0));
1863         __ movflt(xmm0, Address(rsp, 0));
1864         __ lea(rsp, Address(rsp,  4));
1865       } else if (rt == T_DOUBLE) {
1866         __ lea(rsp, Address(rsp, -8));
1867         __ fstp_d(Address(rsp, 0));
1868         __ movdbl(xmm0, Address(rsp, 0));
1869         __ lea(rsp, Address(rsp,  8));
1870       }
1871     }
1872   %}
1873 
1874 
1875   enc_class pre_call_resets %{
1876     // If method sets FPU control word restore it here
1877     debug_only(int off0 = cbuf.insts_size());
1878     if (ra_->C->in_24_bit_fp_mode()) {
1879       MacroAssembler _masm(&cbuf);
1880       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881     }
1882     if (ra_->C->max_vector_size() > 16) {
1883       // Clear upper bits of YMM registers when current compiled code uses
1884       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1885       MacroAssembler _masm(&cbuf);
1886       __ vzeroupper();
1887     }
1888     debug_only(int off1 = cbuf.insts_size());
1889     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1890   %}
1891 
1892   enc_class post_call_FPU %{
1893     // If method sets FPU control word do it here also
1894     if (Compile::current()->in_24_bit_fp_mode()) {
1895       MacroAssembler masm(&cbuf);
1896       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1897     }
1898   %}
1899 
1900   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1901     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1902     // who we intended to call.
1903     cbuf.set_insts_mark();
1904     $$$emit8$primary;
1905 
1906     if (!_method) {
1907       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1908                      runtime_call_Relocation::spec(),
1909                      RELOC_IMM32);
1910     } else {
1911       int method_index = resolved_method_index(cbuf);
1912       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1913                                                   : static_call_Relocation::spec(method_index);
1914       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1915                      rspec, RELOC_DISP32);
1916       // Emit stubs for static call.
1917       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1918       if (stub == NULL) {
1919         ciEnv::current()->record_failure("CodeCache is full");
1920         return;
1921       }
1922     }
1923   %}
1924 
1925   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1926     MacroAssembler _masm(&cbuf);
1927     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1928   %}
1929 
1930   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1931     int disp = in_bytes(Method::from_compiled_offset());
1932     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1933 
1934     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1935     cbuf.set_insts_mark();
1936     $$$emit8$primary;
1937     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1938     emit_d8(cbuf, disp);             // Displacement
1939 
1940   %}
1941 
1942 //   Following encoding is no longer used, but may be restored if calling
1943 //   convention changes significantly.
1944 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1945 //
1946 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1947 //     // int ic_reg     = Matcher::inline_cache_reg();
1948 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1949 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1950 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1951 //
1952 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1953 //     // // so we load it immediately before the call
1954 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1955 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1956 //
1957 //     // xor rbp,ebp
1958 //     emit_opcode(cbuf, 0x33);
1959 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1960 //
1961 //     // CALL to interpreter.
1962 //     cbuf.set_insts_mark();
1963 //     $$$emit8$primary;
1964 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1965 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1966 //   %}
1967 
1968   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1969     $$$emit8$primary;
1970     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1971     $$$emit8$shift$$constant;
1972   %}
1973 
1974   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1975     // Load immediate does not have a zero or sign extended version
1976     // for 8-bit immediates
1977     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1978     $$$emit32$src$$constant;
1979   %}
1980 
1981   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1982     // Load immediate does not have a zero or sign extended version
1983     // for 8-bit immediates
1984     emit_opcode(cbuf, $primary + $dst$$reg);
1985     $$$emit32$src$$constant;
1986   %}
1987 
1988   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1989     // Load immediate does not have a zero or sign extended version
1990     // for 8-bit immediates
1991     int dst_enc = $dst$$reg;
1992     int src_con = $src$$constant & 0x0FFFFFFFFL;
1993     if (src_con == 0) {
1994       // xor dst, dst
1995       emit_opcode(cbuf, 0x33);
1996       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1997     } else {
1998       emit_opcode(cbuf, $primary + dst_enc);
1999       emit_d32(cbuf, src_con);
2000     }
2001   %}
2002 
2003   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2004     // Load immediate does not have a zero or sign extended version
2005     // for 8-bit immediates
2006     int dst_enc = $dst$$reg + 2;
2007     int src_con = ((julong)($src$$constant)) >> 32;
2008     if (src_con == 0) {
2009       // xor dst, dst
2010       emit_opcode(cbuf, 0x33);
2011       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2012     } else {
2013       emit_opcode(cbuf, $primary + dst_enc);
2014       emit_d32(cbuf, src_con);
2015     }
2016   %}
2017 
2018 
2019   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2020   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2021     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2022   %}
2023 
2024   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2025     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2026   %}
2027 
2028   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2029     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2030   %}
2031 
2032   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2033     $$$emit8$primary;
2034     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2035   %}
2036 
2037   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2038     $$$emit8$secondary;
2039     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2040   %}
2041 
2042   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2043     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2044   %}
2045 
2046   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2047     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2048   %}
2049 
2050   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2051     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2052   %}
2053 
2054   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2055     // Output immediate
2056     $$$emit32$src$$constant;
2057   %}
2058 
2059   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2060     // Output Float immediate bits
2061     jfloat jf = $src$$constant;
2062     int    jf_as_bits = jint_cast( jf );
2063     emit_d32(cbuf, jf_as_bits);
2064   %}
2065 
2066   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2067     // Output Float immediate bits
2068     jfloat jf = $src$$constant;
2069     int    jf_as_bits = jint_cast( jf );
2070     emit_d32(cbuf, jf_as_bits);
2071   %}
2072 
2073   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2074     // Output immediate
2075     $$$emit16$src$$constant;
2076   %}
2077 
2078   enc_class Con_d32(immI src) %{
2079     emit_d32(cbuf,$src$$constant);
2080   %}
2081 
2082   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2083     // Output immediate memory reference
2084     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2085     emit_d32(cbuf, 0x00);
2086   %}
2087 
2088   enc_class lock_prefix( ) %{
2089     if( os::is_MP() )
2090       emit_opcode(cbuf,0xF0);         // [Lock]
2091   %}
2092 
2093   // Cmp-xchg long value.
2094   // Note: we need to swap rbx, and rcx before and after the
2095   //       cmpxchg8 instruction because the instruction uses
2096   //       rcx as the high order word of the new value to store but
2097   //       our register encoding uses rbx,.
2098   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099 
2100     // XCHG  rbx,ecx
2101     emit_opcode(cbuf,0x87);
2102     emit_opcode(cbuf,0xD9);
2103     // [Lock]
2104     if( os::is_MP() )
2105       emit_opcode(cbuf,0xF0);
2106     // CMPXCHG8 [Eptr]
2107     emit_opcode(cbuf,0x0F);
2108     emit_opcode(cbuf,0xC7);
2109     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2110     // XCHG  rbx,ecx
2111     emit_opcode(cbuf,0x87);
2112     emit_opcode(cbuf,0xD9);
2113   %}
2114 
2115   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2116     // [Lock]
2117     if( os::is_MP() )
2118       emit_opcode(cbuf,0xF0);
2119 
2120     // CMPXCHG [Eptr]
2121     emit_opcode(cbuf,0x0F);
2122     emit_opcode(cbuf,0xB1);
2123     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2124   %}
2125 
2126   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2127     int res_encoding = $res$$reg;
2128 
2129     // MOV  res,0
2130     emit_opcode( cbuf, 0xB8 + res_encoding);
2131     emit_d32( cbuf, 0 );
2132     // JNE,s  fail
2133     emit_opcode(cbuf,0x75);
2134     emit_d8(cbuf, 5 );
2135     // MOV  res,1
2136     emit_opcode( cbuf, 0xB8 + res_encoding);
2137     emit_d32( cbuf, 1 );
2138     // fail:
2139   %}
2140 
2141   enc_class set_instruction_start( ) %{
2142     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2143   %}
2144 
2145   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2146     int reg_encoding = $ereg$$reg;
2147     int base  = $mem$$base;
2148     int index = $mem$$index;
2149     int scale = $mem$$scale;
2150     int displace = $mem$$disp;
2151     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2152     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2153   %}
2154 
2155   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2156     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2157     int base  = $mem$$base;
2158     int index = $mem$$index;
2159     int scale = $mem$$scale;
2160     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2161     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2162     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2163   %}
2164 
2165   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2166     int r1, r2;
2167     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2168     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2169     emit_opcode(cbuf,0x0F);
2170     emit_opcode(cbuf,$tertiary);
2171     emit_rm(cbuf, 0x3, r1, r2);
2172     emit_d8(cbuf,$cnt$$constant);
2173     emit_d8(cbuf,$primary);
2174     emit_rm(cbuf, 0x3, $secondary, r1);
2175     emit_d8(cbuf,$cnt$$constant);
2176   %}
2177 
2178   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2179     emit_opcode( cbuf, 0x8B ); // Move
2180     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2181     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2182       emit_d8(cbuf,$primary);
2183       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2184       emit_d8(cbuf,$cnt$$constant-32);
2185     }
2186     emit_d8(cbuf,$primary);
2187     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2188     emit_d8(cbuf,31);
2189   %}
2190 
2191   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2192     int r1, r2;
2193     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2194     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2195 
2196     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2197     emit_rm(cbuf, 0x3, r1, r2);
2198     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2199       emit_opcode(cbuf,$primary);
2200       emit_rm(cbuf, 0x3, $secondary, r1);
2201       emit_d8(cbuf,$cnt$$constant-32);
2202     }
2203     emit_opcode(cbuf,0x33);  // XOR r2,r2
2204     emit_rm(cbuf, 0x3, r2, r2);
2205   %}
2206 
2207   // Clone of RegMem but accepts an extra parameter to access each
2208   // half of a double in memory; it never needs relocation info.
2209   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2210     emit_opcode(cbuf,$opcode$$constant);
2211     int reg_encoding = $rm_reg$$reg;
2212     int base     = $mem$$base;
2213     int index    = $mem$$index;
2214     int scale    = $mem$$scale;
2215     int displace = $mem$$disp + $disp_for_half$$constant;
2216     relocInfo::relocType disp_reloc = relocInfo::none;
2217     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2218   %}
2219 
2220   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2221   //
2222   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2223   // and it never needs relocation information.
2224   // Frequently used to move data between FPU's Stack Top and memory.
2225   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2226     int rm_byte_opcode = $rm_opcode$$constant;
2227     int base     = $mem$$base;
2228     int index    = $mem$$index;
2229     int scale    = $mem$$scale;
2230     int displace = $mem$$disp;
2231     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2232     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2233   %}
2234 
2235   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2236     int rm_byte_opcode = $rm_opcode$$constant;
2237     int base     = $mem$$base;
2238     int index    = $mem$$index;
2239     int scale    = $mem$$scale;
2240     int displace = $mem$$disp;
2241     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2242     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2243   %}
2244 
2245   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2246     int reg_encoding = $dst$$reg;
2247     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2248     int index        = 0x04;            // 0x04 indicates no index
2249     int scale        = 0x00;            // 0x00 indicates no scale
2250     int displace     = $src1$$constant; // 0x00 indicates no displacement
2251     relocInfo::relocType disp_reloc = relocInfo::none;
2252     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2253   %}
2254 
2255   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2256     // Compare dst,src
2257     emit_opcode(cbuf,0x3B);
2258     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2259     // jmp dst < src around move
2260     emit_opcode(cbuf,0x7C);
2261     emit_d8(cbuf,2);
2262     // move dst,src
2263     emit_opcode(cbuf,0x8B);
2264     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2265   %}
2266 
2267   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2268     // Compare dst,src
2269     emit_opcode(cbuf,0x3B);
2270     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2271     // jmp dst > src around move
2272     emit_opcode(cbuf,0x7F);
2273     emit_d8(cbuf,2);
2274     // move dst,src
2275     emit_opcode(cbuf,0x8B);
2276     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2277   %}
2278 
2279   enc_class enc_FPR_store(memory mem, regDPR src) %{
2280     // If src is FPR1, we can just FST to store it.
2281     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2282     int reg_encoding = 0x2; // Just store
2283     int base  = $mem$$base;
2284     int index = $mem$$index;
2285     int scale = $mem$$scale;
2286     int displace = $mem$$disp;
2287     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2288     if( $src$$reg != FPR1L_enc ) {
2289       reg_encoding = 0x3;  // Store & pop
2290       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2291       emit_d8( cbuf, 0xC0-1+$src$$reg );
2292     }
2293     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2294     emit_opcode(cbuf,$primary);
2295     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2296   %}
2297 
2298   enc_class neg_reg(rRegI dst) %{
2299     // NEG $dst
2300     emit_opcode(cbuf,0xF7);
2301     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2302   %}
2303 
2304   enc_class setLT_reg(eCXRegI dst) %{
2305     // SETLT $dst
2306     emit_opcode(cbuf,0x0F);
2307     emit_opcode(cbuf,0x9C);
2308     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2309   %}
2310 
2311   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2312     int tmpReg = $tmp$$reg;
2313 
2314     // SUB $p,$q
2315     emit_opcode(cbuf,0x2B);
2316     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2317     // SBB $tmp,$tmp
2318     emit_opcode(cbuf,0x1B);
2319     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2320     // AND $tmp,$y
2321     emit_opcode(cbuf,0x23);
2322     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2323     // ADD $p,$tmp
2324     emit_opcode(cbuf,0x03);
2325     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2326   %}
2327 
2328   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2329     // TEST shift,32
2330     emit_opcode(cbuf,0xF7);
2331     emit_rm(cbuf, 0x3, 0, ECX_enc);
2332     emit_d32(cbuf,0x20);
2333     // JEQ,s small
2334     emit_opcode(cbuf, 0x74);
2335     emit_d8(cbuf, 0x04);
2336     // MOV    $dst.hi,$dst.lo
2337     emit_opcode( cbuf, 0x8B );
2338     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2339     // CLR    $dst.lo
2340     emit_opcode(cbuf, 0x33);
2341     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2342 // small:
2343     // SHLD   $dst.hi,$dst.lo,$shift
2344     emit_opcode(cbuf,0x0F);
2345     emit_opcode(cbuf,0xA5);
2346     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2347     // SHL    $dst.lo,$shift"
2348     emit_opcode(cbuf,0xD3);
2349     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2350   %}
2351 
2352   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2353     // TEST shift,32
2354     emit_opcode(cbuf,0xF7);
2355     emit_rm(cbuf, 0x3, 0, ECX_enc);
2356     emit_d32(cbuf,0x20);
2357     // JEQ,s small
2358     emit_opcode(cbuf, 0x74);
2359     emit_d8(cbuf, 0x04);
2360     // MOV    $dst.lo,$dst.hi
2361     emit_opcode( cbuf, 0x8B );
2362     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2363     // CLR    $dst.hi
2364     emit_opcode(cbuf, 0x33);
2365     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2366 // small:
2367     // SHRD   $dst.lo,$dst.hi,$shift
2368     emit_opcode(cbuf,0x0F);
2369     emit_opcode(cbuf,0xAD);
2370     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2371     // SHR    $dst.hi,$shift"
2372     emit_opcode(cbuf,0xD3);
2373     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2374   %}
2375 
2376   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2377     // TEST shift,32
2378     emit_opcode(cbuf,0xF7);
2379     emit_rm(cbuf, 0x3, 0, ECX_enc);
2380     emit_d32(cbuf,0x20);
2381     // JEQ,s small
2382     emit_opcode(cbuf, 0x74);
2383     emit_d8(cbuf, 0x05);
2384     // MOV    $dst.lo,$dst.hi
2385     emit_opcode( cbuf, 0x8B );
2386     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2387     // SAR    $dst.hi,31
2388     emit_opcode(cbuf, 0xC1);
2389     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2390     emit_d8(cbuf, 0x1F );
2391 // small:
2392     // SHRD   $dst.lo,$dst.hi,$shift
2393     emit_opcode(cbuf,0x0F);
2394     emit_opcode(cbuf,0xAD);
2395     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2396     // SAR    $dst.hi,$shift"
2397     emit_opcode(cbuf,0xD3);
2398     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2399   %}
2400 
2401 
2402   // ----------------- Encodings for floating point unit -----------------
2403   // May leave result in FPU-TOS or FPU reg depending on opcodes
2404   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2405     $$$emit8$primary;
2406     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2407   %}
2408 
2409   // Pop argument in FPR0 with FSTP ST(0)
2410   enc_class PopFPU() %{
2411     emit_opcode( cbuf, 0xDD );
2412     emit_d8( cbuf, 0xD8 );
2413   %}
2414 
2415   // !!!!! equivalent to Pop_Reg_F
2416   enc_class Pop_Reg_DPR( regDPR dst ) %{
2417     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2418     emit_d8( cbuf, 0xD8+$dst$$reg );
2419   %}
2420 
2421   enc_class Push_Reg_DPR( regDPR dst ) %{
2422     emit_opcode( cbuf, 0xD9 );
2423     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2424   %}
2425 
2426   enc_class strictfp_bias1( regDPR dst ) %{
2427     emit_opcode( cbuf, 0xDB );           // FLD m80real
2428     emit_opcode( cbuf, 0x2D );
2429     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2430     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2431     emit_opcode( cbuf, 0xC8+$dst$$reg );
2432   %}
2433 
2434   enc_class strictfp_bias2( regDPR dst ) %{
2435     emit_opcode( cbuf, 0xDB );           // FLD m80real
2436     emit_opcode( cbuf, 0x2D );
2437     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2438     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2439     emit_opcode( cbuf, 0xC8+$dst$$reg );
2440   %}
2441 
2442   // Special case for moving an integer register to a stack slot.
2443   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2444     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2445   %}
2446 
2447   // Special case for moving a register to a stack slot.
2448   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2449     // Opcode already emitted
2450     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2451     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2452     emit_d32(cbuf, $dst$$disp);   // Displacement
2453   %}
2454 
2455   // Push the integer in stackSlot 'src' onto FP-stack
2456   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2457     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2458   %}
2459 
2460   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2461   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2462     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2463   %}
2464 
2465   // Same as Pop_Mem_F except for opcode
2466   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2467   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2468     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2469   %}
2470 
2471   enc_class Pop_Reg_FPR( regFPR dst ) %{
2472     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2473     emit_d8( cbuf, 0xD8+$dst$$reg );
2474   %}
2475 
2476   enc_class Push_Reg_FPR( regFPR dst ) %{
2477     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2478     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2479   %}
2480 
2481   // Push FPU's float to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2483     int pop = 0x02;
2484     if ($src$$reg != FPR1L_enc) {
2485       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2486       emit_d8( cbuf, 0xC0-1+$src$$reg );
2487       pop = 0x03;
2488     }
2489     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2490   %}
2491 
2492   // Push FPU's double to a stack-slot, and pop FPU-stack
2493   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2494     int pop = 0x02;
2495     if ($src$$reg != FPR1L_enc) {
2496       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2497       emit_d8( cbuf, 0xC0-1+$src$$reg );
2498       pop = 0x03;
2499     }
2500     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2501   %}
2502 
2503   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2504   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2505     int pop = 0xD0 - 1; // -1 since we skip FLD
2506     if ($src$$reg != FPR1L_enc) {
2507       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2508       emit_d8( cbuf, 0xC0-1+$src$$reg );
2509       pop = 0xD8;
2510     }
2511     emit_opcode( cbuf, 0xDD );
2512     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2513   %}
2514 
2515 
2516   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2517     // load dst in FPR0
2518     emit_opcode( cbuf, 0xD9 );
2519     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2520     if ($src$$reg != FPR1L_enc) {
2521       // fincstp
2522       emit_opcode (cbuf, 0xD9);
2523       emit_opcode (cbuf, 0xF7);
2524       // swap src with FPR1:
2525       // FXCH FPR1 with src
2526       emit_opcode(cbuf, 0xD9);
2527       emit_d8(cbuf, 0xC8-1+$src$$reg );
2528       // fdecstp
2529       emit_opcode (cbuf, 0xD9);
2530       emit_opcode (cbuf, 0xF6);
2531     }
2532   %}
2533 
2534   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2535     MacroAssembler _masm(&cbuf);
2536     __ subptr(rsp, 8);
2537     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2538     __ fld_d(Address(rsp, 0));
2539     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2540     __ fld_d(Address(rsp, 0));
2541   %}
2542 
2543   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2544     MacroAssembler _masm(&cbuf);
2545     __ subptr(rsp, 4);
2546     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2547     __ fld_s(Address(rsp, 0));
2548     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2549     __ fld_s(Address(rsp, 0));
2550   %}
2551 
2552   enc_class Push_ResultD(regD dst) %{
2553     MacroAssembler _masm(&cbuf);
2554     __ fstp_d(Address(rsp, 0));
2555     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2556     __ addptr(rsp, 8);
2557   %}
2558 
2559   enc_class Push_ResultF(regF dst, immI d8) %{
2560     MacroAssembler _masm(&cbuf);
2561     __ fstp_s(Address(rsp, 0));
2562     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2563     __ addptr(rsp, $d8$$constant);
2564   %}
2565 
2566   enc_class Push_SrcD(regD src) %{
2567     MacroAssembler _masm(&cbuf);
2568     __ subptr(rsp, 8);
2569     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2570     __ fld_d(Address(rsp, 0));
2571   %}
2572 
2573   enc_class push_stack_temp_qword() %{
2574     MacroAssembler _masm(&cbuf);
2575     __ subptr(rsp, 8);
2576   %}
2577 
2578   enc_class pop_stack_temp_qword() %{
2579     MacroAssembler _masm(&cbuf);
2580     __ addptr(rsp, 8);
2581   %}
2582 
2583   enc_class push_xmm_to_fpr1(regD src) %{
2584     MacroAssembler _masm(&cbuf);
2585     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2586     __ fld_d(Address(rsp, 0));
2587   %}
2588 
2589   enc_class Push_Result_Mod_DPR( regDPR src) %{
2590     if ($src$$reg != FPR1L_enc) {
2591       // fincstp
2592       emit_opcode (cbuf, 0xD9);
2593       emit_opcode (cbuf, 0xF7);
2594       // FXCH FPR1 with src
2595       emit_opcode(cbuf, 0xD9);
2596       emit_d8(cbuf, 0xC8-1+$src$$reg );
2597       // fdecstp
2598       emit_opcode (cbuf, 0xD9);
2599       emit_opcode (cbuf, 0xF6);
2600     }
2601     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2602     // // FSTP   FPR$dst$$reg
2603     // emit_opcode( cbuf, 0xDD );
2604     // emit_d8( cbuf, 0xD8+$dst$$reg );
2605   %}
2606 
2607   enc_class fnstsw_sahf_skip_parity() %{
2608     // fnstsw ax
2609     emit_opcode( cbuf, 0xDF );
2610     emit_opcode( cbuf, 0xE0 );
2611     // sahf
2612     emit_opcode( cbuf, 0x9E );
2613     // jnp  ::skip
2614     emit_opcode( cbuf, 0x7B );
2615     emit_opcode( cbuf, 0x05 );
2616   %}
2617 
2618   enc_class emitModDPR() %{
2619     // fprem must be iterative
2620     // :: loop
2621     // fprem
2622     emit_opcode( cbuf, 0xD9 );
2623     emit_opcode( cbuf, 0xF8 );
2624     // wait
2625     emit_opcode( cbuf, 0x9b );
2626     // fnstsw ax
2627     emit_opcode( cbuf, 0xDF );
2628     emit_opcode( cbuf, 0xE0 );
2629     // sahf
2630     emit_opcode( cbuf, 0x9E );
2631     // jp  ::loop
2632     emit_opcode( cbuf, 0x0F );
2633     emit_opcode( cbuf, 0x8A );
2634     emit_opcode( cbuf, 0xF4 );
2635     emit_opcode( cbuf, 0xFF );
2636     emit_opcode( cbuf, 0xFF );
2637     emit_opcode( cbuf, 0xFF );
2638   %}
2639 
2640   enc_class fpu_flags() %{
2641     // fnstsw_ax
2642     emit_opcode( cbuf, 0xDF);
2643     emit_opcode( cbuf, 0xE0);
2644     // test ax,0x0400
2645     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2646     emit_opcode( cbuf, 0xA9 );
2647     emit_d16   ( cbuf, 0x0400 );
2648     // // // This sequence works, but stalls for 12-16 cycles on PPro
2649     // // test rax,0x0400
2650     // emit_opcode( cbuf, 0xA9 );
2651     // emit_d32   ( cbuf, 0x00000400 );
2652     //
2653     // jz exit (no unordered comparison)
2654     emit_opcode( cbuf, 0x74 );
2655     emit_d8    ( cbuf, 0x02 );
2656     // mov ah,1 - treat as LT case (set carry flag)
2657     emit_opcode( cbuf, 0xB4 );
2658     emit_d8    ( cbuf, 0x01 );
2659     // sahf
2660     emit_opcode( cbuf, 0x9E);
2661   %}
2662 
2663   enc_class cmpF_P6_fixup() %{
2664     // Fixup the integer flags in case comparison involved a NaN
2665     //
2666     // JNP exit (no unordered comparison, P-flag is set by NaN)
2667     emit_opcode( cbuf, 0x7B );
2668     emit_d8    ( cbuf, 0x03 );
2669     // MOV AH,1 - treat as LT case (set carry flag)
2670     emit_opcode( cbuf, 0xB4 );
2671     emit_d8    ( cbuf, 0x01 );
2672     // SAHF
2673     emit_opcode( cbuf, 0x9E);
2674     // NOP     // target for branch to avoid branch to branch
2675     emit_opcode( cbuf, 0x90);
2676   %}
2677 
2678 //     fnstsw_ax();
2679 //     sahf();
2680 //     movl(dst, nan_result);
2681 //     jcc(Assembler::parity, exit);
2682 //     movl(dst, less_result);
2683 //     jcc(Assembler::below, exit);
2684 //     movl(dst, equal_result);
2685 //     jcc(Assembler::equal, exit);
2686 //     movl(dst, greater_result);
2687 
2688 // less_result     =  1;
2689 // greater_result  = -1;
2690 // equal_result    = 0;
2691 // nan_result      = -1;
2692 
2693   enc_class CmpF_Result(rRegI dst) %{
2694     // fnstsw_ax();
2695     emit_opcode( cbuf, 0xDF);
2696     emit_opcode( cbuf, 0xE0);
2697     // sahf
2698     emit_opcode( cbuf, 0x9E);
2699     // movl(dst, nan_result);
2700     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2701     emit_d32( cbuf, -1 );
2702     // jcc(Assembler::parity, exit);
2703     emit_opcode( cbuf, 0x7A );
2704     emit_d8    ( cbuf, 0x13 );
2705     // movl(dst, less_result);
2706     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2707     emit_d32( cbuf, -1 );
2708     // jcc(Assembler::below, exit);
2709     emit_opcode( cbuf, 0x72 );
2710     emit_d8    ( cbuf, 0x0C );
2711     // movl(dst, equal_result);
2712     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2713     emit_d32( cbuf, 0 );
2714     // jcc(Assembler::equal, exit);
2715     emit_opcode( cbuf, 0x74 );
2716     emit_d8    ( cbuf, 0x05 );
2717     // movl(dst, greater_result);
2718     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2719     emit_d32( cbuf, 1 );
2720   %}
2721 
2722 
2723   // Compare the longs and set flags
2724   // BROKEN!  Do Not use as-is
2725   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2726     // CMP    $src1.hi,$src2.hi
2727     emit_opcode( cbuf, 0x3B );
2728     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2729     // JNE,s  done
2730     emit_opcode(cbuf,0x75);
2731     emit_d8(cbuf, 2 );
2732     // CMP    $src1.lo,$src2.lo
2733     emit_opcode( cbuf, 0x3B );
2734     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2735 // done:
2736   %}
2737 
2738   enc_class convert_int_long( regL dst, rRegI src ) %{
2739     // mov $dst.lo,$src
2740     int dst_encoding = $dst$$reg;
2741     int src_encoding = $src$$reg;
2742     encode_Copy( cbuf, dst_encoding  , src_encoding );
2743     // mov $dst.hi,$src
2744     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2745     // sar $dst.hi,31
2746     emit_opcode( cbuf, 0xC1 );
2747     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2748     emit_d8(cbuf, 0x1F );
2749   %}
2750 
2751   enc_class convert_long_double( eRegL src ) %{
2752     // push $src.hi
2753     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2754     // push $src.lo
2755     emit_opcode(cbuf, 0x50+$src$$reg  );
2756     // fild 64-bits at [SP]
2757     emit_opcode(cbuf,0xdf);
2758     emit_d8(cbuf, 0x6C);
2759     emit_d8(cbuf, 0x24);
2760     emit_d8(cbuf, 0x00);
2761     // pop stack
2762     emit_opcode(cbuf, 0x83); // add  SP, #8
2763     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2764     emit_d8(cbuf, 0x8);
2765   %}
2766 
2767   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2768     // IMUL   EDX:EAX,$src1
2769     emit_opcode( cbuf, 0xF7 );
2770     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2771     // SAR    EDX,$cnt-32
2772     int shift_count = ((int)$cnt$$constant) - 32;
2773     if (shift_count > 0) {
2774       emit_opcode(cbuf, 0xC1);
2775       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2776       emit_d8(cbuf, shift_count);
2777     }
2778   %}
2779 
2780   // this version doesn't have add sp, 8
2781   enc_class convert_long_double2( eRegL src ) %{
2782     // push $src.hi
2783     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2784     // push $src.lo
2785     emit_opcode(cbuf, 0x50+$src$$reg  );
2786     // fild 64-bits at [SP]
2787     emit_opcode(cbuf,0xdf);
2788     emit_d8(cbuf, 0x6C);
2789     emit_d8(cbuf, 0x24);
2790     emit_d8(cbuf, 0x00);
2791   %}
2792 
2793   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2794     // Basic idea: long = (long)int * (long)int
2795     // IMUL EDX:EAX, src
2796     emit_opcode( cbuf, 0xF7 );
2797     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2798   %}
2799 
2800   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2801     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2802     // MUL EDX:EAX, src
2803     emit_opcode( cbuf, 0xF7 );
2804     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2805   %}
2806 
2807   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2808     // Basic idea: lo(result) = lo(x_lo * y_lo)
2809     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2810     // MOV    $tmp,$src.lo
2811     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2812     // IMUL   $tmp,EDX
2813     emit_opcode( cbuf, 0x0F );
2814     emit_opcode( cbuf, 0xAF );
2815     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2816     // MOV    EDX,$src.hi
2817     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2818     // IMUL   EDX,EAX
2819     emit_opcode( cbuf, 0x0F );
2820     emit_opcode( cbuf, 0xAF );
2821     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2822     // ADD    $tmp,EDX
2823     emit_opcode( cbuf, 0x03 );
2824     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2825     // MUL   EDX:EAX,$src.lo
2826     emit_opcode( cbuf, 0xF7 );
2827     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2828     // ADD    EDX,ESI
2829     emit_opcode( cbuf, 0x03 );
2830     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2831   %}
2832 
2833   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2834     // Basic idea: lo(result) = lo(src * y_lo)
2835     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2836     // IMUL   $tmp,EDX,$src
2837     emit_opcode( cbuf, 0x6B );
2838     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2839     emit_d8( cbuf, (int)$src$$constant );
2840     // MOV    EDX,$src
2841     emit_opcode(cbuf, 0xB8 + EDX_enc);
2842     emit_d32( cbuf, (int)$src$$constant );
2843     // MUL   EDX:EAX,EDX
2844     emit_opcode( cbuf, 0xF7 );
2845     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2846     // ADD    EDX,ESI
2847     emit_opcode( cbuf, 0x03 );
2848     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2849   %}
2850 
2851   enc_class long_div( eRegL src1, eRegL src2 ) %{
2852     // PUSH src1.hi
2853     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2854     // PUSH src1.lo
2855     emit_opcode(cbuf,               0x50+$src1$$reg  );
2856     // PUSH src2.hi
2857     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2858     // PUSH src2.lo
2859     emit_opcode(cbuf,               0x50+$src2$$reg  );
2860     // CALL directly to the runtime
2861     cbuf.set_insts_mark();
2862     emit_opcode(cbuf,0xE8);       // Call into runtime
2863     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2864     // Restore stack
2865     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2866     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2867     emit_d8(cbuf, 4*4);
2868   %}
2869 
2870   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2871     // PUSH src1.hi
2872     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2873     // PUSH src1.lo
2874     emit_opcode(cbuf,               0x50+$src1$$reg  );
2875     // PUSH src2.hi
2876     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2877     // PUSH src2.lo
2878     emit_opcode(cbuf,               0x50+$src2$$reg  );
2879     // CALL directly to the runtime
2880     cbuf.set_insts_mark();
2881     emit_opcode(cbuf,0xE8);       // Call into runtime
2882     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2883     // Restore stack
2884     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2885     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2886     emit_d8(cbuf, 4*4);
2887   %}
2888 
2889   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2890     // MOV   $tmp,$src.lo
2891     emit_opcode(cbuf, 0x8B);
2892     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2893     // OR    $tmp,$src.hi
2894     emit_opcode(cbuf, 0x0B);
2895     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2896   %}
2897 
2898   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2899     // CMP    $src1.lo,$src2.lo
2900     emit_opcode( cbuf, 0x3B );
2901     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2902     // JNE,s  skip
2903     emit_cc(cbuf, 0x70, 0x5);
2904     emit_d8(cbuf,2);
2905     // CMP    $src1.hi,$src2.hi
2906     emit_opcode( cbuf, 0x3B );
2907     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2908   %}
2909 
2910   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2911     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2912     emit_opcode( cbuf, 0x3B );
2913     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2914     // MOV    $tmp,$src1.hi
2915     emit_opcode( cbuf, 0x8B );
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2917     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2918     emit_opcode( cbuf, 0x1B );
2919     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2920   %}
2921 
2922   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2923     // XOR    $tmp,$tmp
2924     emit_opcode(cbuf,0x33);  // XOR
2925     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2926     // CMP    $tmp,$src.lo
2927     emit_opcode( cbuf, 0x3B );
2928     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2929     // SBB    $tmp,$src.hi
2930     emit_opcode( cbuf, 0x1B );
2931     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2932   %}
2933 
2934  // Sniff, sniff... smells like Gnu Superoptimizer
2935   enc_class neg_long( eRegL dst ) %{
2936     emit_opcode(cbuf,0xF7);    // NEG hi
2937     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2938     emit_opcode(cbuf,0xF7);    // NEG lo
2939     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2940     emit_opcode(cbuf,0x83);    // SBB hi,0
2941     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2942     emit_d8    (cbuf,0 );
2943   %}
2944 
2945   enc_class enc_pop_rdx() %{
2946     emit_opcode(cbuf,0x5A);
2947   %}
2948 
2949   enc_class enc_rethrow() %{
2950     cbuf.set_insts_mark();
2951     emit_opcode(cbuf, 0xE9);        // jmp    entry
2952     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2953                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2954   %}
2955 
2956 
2957   // Convert a double to an int.  Java semantics require we do complex
2958   // manglelations in the corner cases.  So we set the rounding mode to
2959   // 'zero', store the darned double down as an int, and reset the
2960   // rounding mode to 'nearest'.  The hardware throws an exception which
2961   // patches up the correct value directly to the stack.
2962   enc_class DPR2I_encoding( regDPR src ) %{
2963     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2964     // exceptions here, so that a NAN or other corner-case value will
2965     // thrown an exception (but normal values get converted at full speed).
2966     // However, I2C adapters and other float-stack manglers leave pending
2967     // invalid-op exceptions hanging.  We would have to clear them before
2968     // enabling them and that is more expensive than just testing for the
2969     // invalid value Intel stores down in the corner cases.
2970     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2971     emit_opcode(cbuf,0x2D);
2972     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2973     // Allocate a word
2974     emit_opcode(cbuf,0x83);            // SUB ESP,4
2975     emit_opcode(cbuf,0xEC);
2976     emit_d8(cbuf,0x04);
2977     // Encoding assumes a double has been pushed into FPR0.
2978     // Store down the double as an int, popping the FPU stack
2979     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2980     emit_opcode(cbuf,0x1C);
2981     emit_d8(cbuf,0x24);
2982     // Restore the rounding mode; mask the exception
2983     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2984     emit_opcode(cbuf,0x2D);
2985     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2986         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2987         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2988 
2989     // Load the converted int; adjust CPU stack
2990     emit_opcode(cbuf,0x58);       // POP EAX
2991     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2992     emit_d32   (cbuf,0x80000000); //         0x80000000
2993     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2994     emit_d8    (cbuf,0x07);       // Size of slow_call
2995     // Push src onto stack slow-path
2996     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2997     emit_d8    (cbuf,0xC0-1+$src$$reg );
2998     // CALL directly to the runtime
2999     cbuf.set_insts_mark();
3000     emit_opcode(cbuf,0xE8);       // Call into runtime
3001     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3002     // Carry on here...
3003   %}
3004 
3005   enc_class DPR2L_encoding( regDPR src ) %{
3006     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3007     emit_opcode(cbuf,0x2D);
3008     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3009     // Allocate a word
3010     emit_opcode(cbuf,0x83);            // SUB ESP,8
3011     emit_opcode(cbuf,0xEC);
3012     emit_d8(cbuf,0x08);
3013     // Encoding assumes a double has been pushed into FPR0.
3014     // Store down the double as a long, popping the FPU stack
3015     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3016     emit_opcode(cbuf,0x3C);
3017     emit_d8(cbuf,0x24);
3018     // Restore the rounding mode; mask the exception
3019     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3020     emit_opcode(cbuf,0x2D);
3021     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3022         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3023         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3024 
3025     // Load the converted int; adjust CPU stack
3026     emit_opcode(cbuf,0x58);       // POP EAX
3027     emit_opcode(cbuf,0x5A);       // POP EDX
3028     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3029     emit_d8    (cbuf,0xFA);       // rdx
3030     emit_d32   (cbuf,0x80000000); //         0x80000000
3031     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3032     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3033     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3034     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3035     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3036     emit_d8    (cbuf,0x07);       // Size of slow_call
3037     // Push src onto stack slow-path
3038     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3039     emit_d8    (cbuf,0xC0-1+$src$$reg );
3040     // CALL directly to the runtime
3041     cbuf.set_insts_mark();
3042     emit_opcode(cbuf,0xE8);       // Call into runtime
3043     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3044     // Carry on here...
3045   %}
3046 
3047   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3048     // Operand was loaded from memory into fp ST (stack top)
3049     // FMUL   ST,$src  /* D8 C8+i */
3050     emit_opcode(cbuf, 0xD8);
3051     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3052   %}
3053 
3054   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3055     // FADDP  ST,src2  /* D8 C0+i */
3056     emit_opcode(cbuf, 0xD8);
3057     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3058     //could use FADDP  src2,fpST  /* DE C0+i */
3059   %}
3060 
3061   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3062     // FADDP  src2,ST  /* DE C0+i */
3063     emit_opcode(cbuf, 0xDE);
3064     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3065   %}
3066 
3067   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3068     // Operand has been loaded into fp ST (stack top)
3069       // FSUB   ST,$src1
3070       emit_opcode(cbuf, 0xD8);
3071       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3072 
3073       // FDIV
3074       emit_opcode(cbuf, 0xD8);
3075       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3076   %}
3077 
3078   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3079     // Operand was loaded from memory into fp ST (stack top)
3080     // FADD   ST,$src  /* D8 C0+i */
3081     emit_opcode(cbuf, 0xD8);
3082     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3083 
3084     // FMUL  ST,src2  /* D8 C*+i */
3085     emit_opcode(cbuf, 0xD8);
3086     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3087   %}
3088 
3089 
3090   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3091     // Operand was loaded from memory into fp ST (stack top)
3092     // FADD   ST,$src  /* D8 C0+i */
3093     emit_opcode(cbuf, 0xD8);
3094     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3095 
3096     // FMULP  src2,ST  /* DE C8+i */
3097     emit_opcode(cbuf, 0xDE);
3098     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3099   %}
3100 
3101   // Atomically load the volatile long
3102   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3103     emit_opcode(cbuf,0xDF);
3104     int rm_byte_opcode = 0x05;
3105     int base     = $mem$$base;
3106     int index    = $mem$$index;
3107     int scale    = $mem$$scale;
3108     int displace = $mem$$disp;
3109     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3110     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3111     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3112   %}
3113 
3114   // Volatile Store Long.  Must be atomic, so move it into
3115   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3116   // target address before the store (for null-ptr checks)
3117   // so the memory operand is used twice in the encoding.
3118   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3119     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3120     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3121     emit_opcode(cbuf,0xDF);
3122     int rm_byte_opcode = 0x07;
3123     int base     = $mem$$base;
3124     int index    = $mem$$index;
3125     int scale    = $mem$$scale;
3126     int displace = $mem$$disp;
3127     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3128     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3129   %}
3130 
3131   // Safepoint Poll.  This polls the safepoint page, and causes an
3132   // exception if it is not readable. Unfortunately, it kills the condition code
3133   // in the process
3134   // We current use TESTL [spp],EDI
3135   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3136 
3137   enc_class Safepoint_Poll() %{
3138     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3139     emit_opcode(cbuf,0x85);
3140     emit_rm (cbuf, 0x0, 0x7, 0x5);
3141     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3142   %}
3143 %}
3144 
3145 
3146 //----------FRAME--------------------------------------------------------------
3147 // Definition of frame structure and management information.
3148 //
3149 //  S T A C K   L A Y O U T    Allocators stack-slot number
3150 //                             |   (to get allocators register number
3151 //  G  Owned by    |        |  v    add OptoReg::stack0())
3152 //  r   CALLER     |        |
3153 //  o     |        +--------+      pad to even-align allocators stack-slot
3154 //  w     V        |  pad0  |        numbers; owned by CALLER
3155 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3156 //  h     ^        |   in   |  5
3157 //        |        |  args  |  4   Holes in incoming args owned by SELF
3158 //  |     |        |        |  3
3159 //  |     |        +--------+
3160 //  V     |        | old out|      Empty on Intel, window on Sparc
3161 //        |    old |preserve|      Must be even aligned.
3162 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3163 //        |        |   in   |  3   area for Intel ret address
3164 //     Owned by    |preserve|      Empty on Sparc.
3165 //       SELF      +--------+
3166 //        |        |  pad2  |  2   pad to align old SP
3167 //        |        +--------+  1
3168 //        |        | locks  |  0
3169 //        |        +--------+----> OptoReg::stack0(), even aligned
3170 //        |        |  pad1  | 11   pad to align new SP
3171 //        |        +--------+
3172 //        |        |        | 10
3173 //        |        | spills |  9   spills
3174 //        V        |        |  8   (pad0 slot for callee)
3175 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3176 //        ^        |  out   |  7
3177 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3178 //     Owned by    +--------+
3179 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3180 //        |    new |preserve|      Must be even-aligned.
3181 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3182 //        |        |        |
3183 //
3184 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3185 //         known from SELF's arguments and the Java calling convention.
3186 //         Region 6-7 is determined per call site.
3187 // Note 2: If the calling convention leaves holes in the incoming argument
3188 //         area, those holes are owned by SELF.  Holes in the outgoing area
3189 //         are owned by the CALLEE.  Holes should not be nessecary in the
3190 //         incoming area, as the Java calling convention is completely under
3191 //         the control of the AD file.  Doubles can be sorted and packed to
3192 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3193 //         varargs C calling conventions.
3194 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3195 //         even aligned with pad0 as needed.
3196 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3197 //         region 6-11 is even aligned; it may be padded out more so that
3198 //         the region from SP to FP meets the minimum stack alignment.
3199 
3200 frame %{
3201   // What direction does stack grow in (assumed to be same for C & Java)
3202   stack_direction(TOWARDS_LOW);
3203 
3204   // These three registers define part of the calling convention
3205   // between compiled code and the interpreter.
3206   inline_cache_reg(EAX);                // Inline Cache Register
3207   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3208 
3209   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3210   cisc_spilling_operand_name(indOffset32);
3211 
3212   // Number of stack slots consumed by locking an object
3213   sync_stack_slots(1);
3214 
3215   // Compiled code's Frame Pointer
3216   frame_pointer(ESP);
3217   // Interpreter stores its frame pointer in a register which is
3218   // stored to the stack by I2CAdaptors.
3219   // I2CAdaptors convert from interpreted java to compiled java.
3220   interpreter_frame_pointer(EBP);
3221 
3222   // Stack alignment requirement
3223   // Alignment size in bytes (128-bit -> 16 bytes)
3224   stack_alignment(StackAlignmentInBytes);
3225 
3226   // Number of stack slots between incoming argument block and the start of
3227   // a new frame.  The PROLOG must add this many slots to the stack.  The
3228   // EPILOG must remove this many slots.  Intel needs one slot for
3229   // return address and one for rbp, (must save rbp)
3230   in_preserve_stack_slots(2+VerifyStackAtCalls);
3231 
3232   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3233   // for calls to C.  Supports the var-args backing area for register parms.
3234   varargs_C_out_slots_killed(0);
3235 
3236   // The after-PROLOG location of the return address.  Location of
3237   // return address specifies a type (REG or STACK) and a number
3238   // representing the register number (i.e. - use a register name) or
3239   // stack slot.
3240   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3241   // Otherwise, it is above the locks and verification slot and alignment word
3242   return_addr(STACK - 1 +
3243               round_to((Compile::current()->in_preserve_stack_slots() +
3244                         Compile::current()->fixed_slots()),
3245                        stack_alignment_in_slots()));
3246 
3247   // Body of function which returns an integer array locating
3248   // arguments either in registers or in stack slots.  Passed an array
3249   // of ideal registers called "sig" and a "length" count.  Stack-slot
3250   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3251   // arguments for a CALLEE.  Incoming stack arguments are
3252   // automatically biased by the preserve_stack_slots field above.
3253   calling_convention %{
3254     // No difference between ingoing/outgoing just pass false
3255     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3256   %}
3257 
3258 
3259   // Body of function which returns an integer array locating
3260   // arguments either in registers or in stack slots.  Passed an array
3261   // of ideal registers called "sig" and a "length" count.  Stack-slot
3262   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3263   // arguments for a CALLEE.  Incoming stack arguments are
3264   // automatically biased by the preserve_stack_slots field above.
3265   c_calling_convention %{
3266     // This is obviously always outgoing
3267     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3268   %}
3269 
3270   // Location of C & interpreter return values
3271   c_return_value %{
3272     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3273     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3274     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3275 
3276     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3277     // that C functions return float and double results in XMM0.
3278     if( ideal_reg == Op_RegD && UseSSE>=2 )
3279       return OptoRegPair(XMM0b_num,XMM0_num);
3280     if( ideal_reg == Op_RegF && UseSSE>=2 )
3281       return OptoRegPair(OptoReg::Bad,XMM0_num);
3282 
3283     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3284   %}
3285 
3286   // Location of return values
3287   return_value %{
3288     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3289     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3290     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3291     if( ideal_reg == Op_RegD && UseSSE>=2 )
3292       return OptoRegPair(XMM0b_num,XMM0_num);
3293     if( ideal_reg == Op_RegF && UseSSE>=1 )
3294       return OptoRegPair(OptoReg::Bad,XMM0_num);
3295     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3296   %}
3297 
3298 %}
3299 
3300 //----------ATTRIBUTES---------------------------------------------------------
3301 //----------Operand Attributes-------------------------------------------------
3302 op_attrib op_cost(0);        // Required cost attribute
3303 
3304 //----------Instruction Attributes---------------------------------------------
3305 ins_attrib ins_cost(100);       // Required cost attribute
3306 ins_attrib ins_size(8);         // Required size attribute (in bits)
3307 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3308                                 // non-matching short branch variant of some
3309                                                             // long branch?
3310 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3311                                 // specifies the alignment that some part of the instruction (not
3312                                 // necessarily the start) requires.  If > 1, a compute_padding()
3313                                 // function must be provided for the instruction
3314 
3315 //----------OPERANDS-----------------------------------------------------------
3316 // Operand definitions must precede instruction definitions for correct parsing
3317 // in the ADLC because operands constitute user defined types which are used in
3318 // instruction definitions.
3319 
3320 //----------Simple Operands----------------------------------------------------
3321 // Immediate Operands
3322 // Integer Immediate
3323 operand immI() %{
3324   match(ConI);
3325 
3326   op_cost(10);
3327   format %{ %}
3328   interface(CONST_INTER);
3329 %}
3330 
3331 // Constant for test vs zero
3332 operand immI0() %{
3333   predicate(n->get_int() == 0);
3334   match(ConI);
3335 
3336   op_cost(0);
3337   format %{ %}
3338   interface(CONST_INTER);
3339 %}
3340 
3341 // Constant for increment
3342 operand immI1() %{
3343   predicate(n->get_int() == 1);
3344   match(ConI);
3345 
3346   op_cost(0);
3347   format %{ %}
3348   interface(CONST_INTER);
3349 %}
3350 
3351 // Constant for decrement
3352 operand immI_M1() %{
3353   predicate(n->get_int() == -1);
3354   match(ConI);
3355 
3356   op_cost(0);
3357   format %{ %}
3358   interface(CONST_INTER);
3359 %}
3360 
3361 // Valid scale values for addressing modes
3362 operand immI2() %{
3363   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3364   match(ConI);
3365 
3366   format %{ %}
3367   interface(CONST_INTER);
3368 %}
3369 
3370 operand immI8() %{
3371   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3372   match(ConI);
3373 
3374   op_cost(5);
3375   format %{ %}
3376   interface(CONST_INTER);
3377 %}
3378 
3379 operand immI16() %{
3380   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3381   match(ConI);
3382 
3383   op_cost(10);
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 // Int Immediate non-negative
3389 operand immU31()
3390 %{
3391   predicate(n->get_int() >= 0);
3392   match(ConI);
3393 
3394   op_cost(0);
3395   format %{ %}
3396   interface(CONST_INTER);
3397 %}
3398 
3399 // Constant for long shifts
3400 operand immI_32() %{
3401   predicate( n->get_int() == 32 );
3402   match(ConI);
3403 
3404   op_cost(0);
3405   format %{ %}
3406   interface(CONST_INTER);
3407 %}
3408 
3409 operand immI_1_31() %{
3410   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3411   match(ConI);
3412 
3413   op_cost(0);
3414   format %{ %}
3415   interface(CONST_INTER);
3416 %}
3417 
3418 operand immI_32_63() %{
3419   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3420   match(ConI);
3421   op_cost(0);
3422 
3423   format %{ %}
3424   interface(CONST_INTER);
3425 %}
3426 
3427 operand immI_1() %{
3428   predicate( n->get_int() == 1 );
3429   match(ConI);
3430 
3431   op_cost(0);
3432   format %{ %}
3433   interface(CONST_INTER);
3434 %}
3435 
3436 operand immI_2() %{
3437   predicate( n->get_int() == 2 );
3438   match(ConI);
3439 
3440   op_cost(0);
3441   format %{ %}
3442   interface(CONST_INTER);
3443 %}
3444 
3445 operand immI_3() %{
3446   predicate( n->get_int() == 3 );
3447   match(ConI);
3448 
3449   op_cost(0);
3450   format %{ %}
3451   interface(CONST_INTER);
3452 %}
3453 
3454 // Pointer Immediate
3455 operand immP() %{
3456   match(ConP);
3457 
3458   op_cost(10);
3459   format %{ %}
3460   interface(CONST_INTER);
3461 %}
3462 
3463 // NULL Pointer Immediate
3464 operand immP0() %{
3465   predicate( n->get_ptr() == 0 );
3466   match(ConP);
3467   op_cost(0);
3468 
3469   format %{ %}
3470   interface(CONST_INTER);
3471 %}
3472 
3473 // Long Immediate
3474 operand immL() %{
3475   match(ConL);
3476 
3477   op_cost(20);
3478   format %{ %}
3479   interface(CONST_INTER);
3480 %}
3481 
3482 // Long Immediate zero
3483 operand immL0() %{
3484   predicate( n->get_long() == 0L );
3485   match(ConL);
3486   op_cost(0);
3487 
3488   format %{ %}
3489   interface(CONST_INTER);
3490 %}
3491 
3492 // Long Immediate zero
3493 operand immL_M1() %{
3494   predicate( n->get_long() == -1L );
3495   match(ConL);
3496   op_cost(0);
3497 
3498   format %{ %}
3499   interface(CONST_INTER);
3500 %}
3501 
3502 // Long immediate from 0 to 127.
3503 // Used for a shorter form of long mul by 10.
3504 operand immL_127() %{
3505   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3506   match(ConL);
3507   op_cost(0);
3508 
3509   format %{ %}
3510   interface(CONST_INTER);
3511 %}
3512 
3513 // Long Immediate: low 32-bit mask
3514 operand immL_32bits() %{
3515   predicate(n->get_long() == 0xFFFFFFFFL);
3516   match(ConL);
3517   op_cost(0);
3518 
3519   format %{ %}
3520   interface(CONST_INTER);
3521 %}
3522 
3523 // Long Immediate: low 32-bit mask
3524 operand immL32() %{
3525   predicate(n->get_long() == (int)(n->get_long()));
3526   match(ConL);
3527   op_cost(20);
3528 
3529   format %{ %}
3530   interface(CONST_INTER);
3531 %}
3532 
3533 //Double Immediate zero
3534 operand immDPR0() %{
3535   // Do additional (and counter-intuitive) test against NaN to work around VC++
3536   // bug that generates code such that NaNs compare equal to 0.0
3537   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3538   match(ConD);
3539 
3540   op_cost(5);
3541   format %{ %}
3542   interface(CONST_INTER);
3543 %}
3544 
3545 // Double Immediate one
3546 operand immDPR1() %{
3547   predicate( UseSSE<=1 && n->getd() == 1.0 );
3548   match(ConD);
3549 
3550   op_cost(5);
3551   format %{ %}
3552   interface(CONST_INTER);
3553 %}
3554 
3555 // Double Immediate
3556 operand immDPR() %{
3557   predicate(UseSSE<=1);
3558   match(ConD);
3559 
3560   op_cost(5);
3561   format %{ %}
3562   interface(CONST_INTER);
3563 %}
3564 
3565 operand immD() %{
3566   predicate(UseSSE>=2);
3567   match(ConD);
3568 
3569   op_cost(5);
3570   format %{ %}
3571   interface(CONST_INTER);
3572 %}
3573 
3574 // Double Immediate zero
3575 operand immD0() %{
3576   // Do additional (and counter-intuitive) test against NaN to work around VC++
3577   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3578   // compare equal to -0.0.
3579   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3580   match(ConD);
3581 
3582   format %{ %}
3583   interface(CONST_INTER);
3584 %}
3585 
3586 // Float Immediate zero
3587 operand immFPR0() %{
3588   predicate(UseSSE == 0 && n->getf() == 0.0F);
3589   match(ConF);
3590 
3591   op_cost(5);
3592   format %{ %}
3593   interface(CONST_INTER);
3594 %}
3595 
3596 // Float Immediate one
3597 operand immFPR1() %{
3598   predicate(UseSSE == 0 && n->getf() == 1.0F);
3599   match(ConF);
3600 
3601   op_cost(5);
3602   format %{ %}
3603   interface(CONST_INTER);
3604 %}
3605 
3606 // Float Immediate
3607 operand immFPR() %{
3608   predicate( UseSSE == 0 );
3609   match(ConF);
3610 
3611   op_cost(5);
3612   format %{ %}
3613   interface(CONST_INTER);
3614 %}
3615 
3616 // Float Immediate
3617 operand immF() %{
3618   predicate(UseSSE >= 1);
3619   match(ConF);
3620 
3621   op_cost(5);
3622   format %{ %}
3623   interface(CONST_INTER);
3624 %}
3625 
3626 // Float Immediate zero.  Zero and not -0.0
3627 operand immF0() %{
3628   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3629   match(ConF);
3630 
3631   op_cost(5);
3632   format %{ %}
3633   interface(CONST_INTER);
3634 %}
3635 
3636 // Immediates for special shifts (sign extend)
3637 
3638 // Constants for increment
3639 operand immI_16() %{
3640   predicate( n->get_int() == 16 );
3641   match(ConI);
3642 
3643   format %{ %}
3644   interface(CONST_INTER);
3645 %}
3646 
3647 operand immI_24() %{
3648   predicate( n->get_int() == 24 );
3649   match(ConI);
3650 
3651   format %{ %}
3652   interface(CONST_INTER);
3653 %}
3654 
3655 // Constant for byte-wide masking
3656 operand immI_255() %{
3657   predicate( n->get_int() == 255 );
3658   match(ConI);
3659 
3660   format %{ %}
3661   interface(CONST_INTER);
3662 %}
3663 
3664 // Constant for short-wide masking
3665 operand immI_65535() %{
3666   predicate(n->get_int() == 65535);
3667   match(ConI);
3668 
3669   format %{ %}
3670   interface(CONST_INTER);
3671 %}
3672 
3673 // Register Operands
3674 // Integer Register
3675 operand rRegI() %{
3676   constraint(ALLOC_IN_RC(int_reg));
3677   match(RegI);
3678   match(xRegI);
3679   match(eAXRegI);
3680   match(eBXRegI);
3681   match(eCXRegI);
3682   match(eDXRegI);
3683   match(eDIRegI);
3684   match(eSIRegI);
3685 
3686   format %{ %}
3687   interface(REG_INTER);
3688 %}
3689 
3690 // Subset of Integer Register
3691 operand xRegI(rRegI reg) %{
3692   constraint(ALLOC_IN_RC(int_x_reg));
3693   match(reg);
3694   match(eAXRegI);
3695   match(eBXRegI);
3696   match(eCXRegI);
3697   match(eDXRegI);
3698 
3699   format %{ %}
3700   interface(REG_INTER);
3701 %}
3702 
3703 // Special Registers
3704 operand eAXRegI(xRegI reg) %{
3705   constraint(ALLOC_IN_RC(eax_reg));
3706   match(reg);
3707   match(rRegI);
3708 
3709   format %{ "EAX" %}
3710   interface(REG_INTER);
3711 %}
3712 
3713 // Special Registers
3714 operand eBXRegI(xRegI reg) %{
3715   constraint(ALLOC_IN_RC(ebx_reg));
3716   match(reg);
3717   match(rRegI);
3718 
3719   format %{ "EBX" %}
3720   interface(REG_INTER);
3721 %}
3722 
3723 operand eCXRegI(xRegI reg) %{
3724   constraint(ALLOC_IN_RC(ecx_reg));
3725   match(reg);
3726   match(rRegI);
3727 
3728   format %{ "ECX" %}
3729   interface(REG_INTER);
3730 %}
3731 
3732 operand eDXRegI(xRegI reg) %{
3733   constraint(ALLOC_IN_RC(edx_reg));
3734   match(reg);
3735   match(rRegI);
3736 
3737   format %{ "EDX" %}
3738   interface(REG_INTER);
3739 %}
3740 
3741 operand eDIRegI(xRegI reg) %{
3742   constraint(ALLOC_IN_RC(edi_reg));
3743   match(reg);
3744   match(rRegI);
3745 
3746   format %{ "EDI" %}
3747   interface(REG_INTER);
3748 %}
3749 
3750 operand naxRegI() %{
3751   constraint(ALLOC_IN_RC(nax_reg));
3752   match(RegI);
3753   match(eCXRegI);
3754   match(eDXRegI);
3755   match(eSIRegI);
3756   match(eDIRegI);
3757 
3758   format %{ %}
3759   interface(REG_INTER);
3760 %}
3761 
3762 operand nadxRegI() %{
3763   constraint(ALLOC_IN_RC(nadx_reg));
3764   match(RegI);
3765   match(eBXRegI);
3766   match(eCXRegI);
3767   match(eSIRegI);
3768   match(eDIRegI);
3769 
3770   format %{ %}
3771   interface(REG_INTER);
3772 %}
3773 
3774 operand ncxRegI() %{
3775   constraint(ALLOC_IN_RC(ncx_reg));
3776   match(RegI);
3777   match(eAXRegI);
3778   match(eDXRegI);
3779   match(eSIRegI);
3780   match(eDIRegI);
3781 
3782   format %{ %}
3783   interface(REG_INTER);
3784 %}
3785 
3786 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3787 // //
3788 operand eSIRegI(xRegI reg) %{
3789    constraint(ALLOC_IN_RC(esi_reg));
3790    match(reg);
3791    match(rRegI);
3792 
3793    format %{ "ESI" %}
3794    interface(REG_INTER);
3795 %}
3796 
3797 // Pointer Register
3798 operand anyRegP() %{
3799   constraint(ALLOC_IN_RC(any_reg));
3800   match(RegP);
3801   match(eAXRegP);
3802   match(eBXRegP);
3803   match(eCXRegP);
3804   match(eDIRegP);
3805   match(eRegP);
3806 
3807   format %{ %}
3808   interface(REG_INTER);
3809 %}
3810 
3811 operand eRegP() %{
3812   constraint(ALLOC_IN_RC(int_reg));
3813   match(RegP);
3814   match(eAXRegP);
3815   match(eBXRegP);
3816   match(eCXRegP);
3817   match(eDIRegP);
3818 
3819   format %{ %}
3820   interface(REG_INTER);
3821 %}
3822 
3823 // On windows95, EBP is not safe to use for implicit null tests.
3824 operand eRegP_no_EBP() %{
3825   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3826   match(RegP);
3827   match(eAXRegP);
3828   match(eBXRegP);
3829   match(eCXRegP);
3830   match(eDIRegP);
3831 
3832   op_cost(100);
3833   format %{ %}
3834   interface(REG_INTER);
3835 %}
3836 
3837 operand naxRegP() %{
3838   constraint(ALLOC_IN_RC(nax_reg));
3839   match(RegP);
3840   match(eBXRegP);
3841   match(eDXRegP);
3842   match(eCXRegP);
3843   match(eSIRegP);
3844   match(eDIRegP);
3845 
3846   format %{ %}
3847   interface(REG_INTER);
3848 %}
3849 
3850 operand nabxRegP() %{
3851   constraint(ALLOC_IN_RC(nabx_reg));
3852   match(RegP);
3853   match(eCXRegP);
3854   match(eDXRegP);
3855   match(eSIRegP);
3856   match(eDIRegP);
3857 
3858   format %{ %}
3859   interface(REG_INTER);
3860 %}
3861 
3862 operand pRegP() %{
3863   constraint(ALLOC_IN_RC(p_reg));
3864   match(RegP);
3865   match(eBXRegP);
3866   match(eDXRegP);
3867   match(eSIRegP);
3868   match(eDIRegP);
3869 
3870   format %{ %}
3871   interface(REG_INTER);
3872 %}
3873 
3874 // Special Registers
3875 // Return a pointer value
3876 operand eAXRegP(eRegP reg) %{
3877   constraint(ALLOC_IN_RC(eax_reg));
3878   match(reg);
3879   format %{ "EAX" %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 // Used in AtomicAdd
3884 operand eBXRegP(eRegP reg) %{
3885   constraint(ALLOC_IN_RC(ebx_reg));
3886   match(reg);
3887   format %{ "EBX" %}
3888   interface(REG_INTER);
3889 %}
3890 
3891 // Tail-call (interprocedural jump) to interpreter
3892 operand eCXRegP(eRegP reg) %{
3893   constraint(ALLOC_IN_RC(ecx_reg));
3894   match(reg);
3895   format %{ "ECX" %}
3896   interface(REG_INTER);
3897 %}
3898 
3899 operand eSIRegP(eRegP reg) %{
3900   constraint(ALLOC_IN_RC(esi_reg));
3901   match(reg);
3902   format %{ "ESI" %}
3903   interface(REG_INTER);
3904 %}
3905 
3906 // Used in rep stosw
3907 operand eDIRegP(eRegP reg) %{
3908   constraint(ALLOC_IN_RC(edi_reg));
3909   match(reg);
3910   format %{ "EDI" %}
3911   interface(REG_INTER);
3912 %}
3913 
3914 operand eRegL() %{
3915   constraint(ALLOC_IN_RC(long_reg));
3916   match(RegL);
3917   match(eADXRegL);
3918 
3919   format %{ %}
3920   interface(REG_INTER);
3921 %}
3922 
3923 operand eADXRegL( eRegL reg ) %{
3924   constraint(ALLOC_IN_RC(eadx_reg));
3925   match(reg);
3926 
3927   format %{ "EDX:EAX" %}
3928   interface(REG_INTER);
3929 %}
3930 
3931 operand eBCXRegL( eRegL reg ) %{
3932   constraint(ALLOC_IN_RC(ebcx_reg));
3933   match(reg);
3934 
3935   format %{ "EBX:ECX" %}
3936   interface(REG_INTER);
3937 %}
3938 
3939 // Special case for integer high multiply
3940 operand eADXRegL_low_only() %{
3941   constraint(ALLOC_IN_RC(eadx_reg));
3942   match(RegL);
3943 
3944   format %{ "EAX" %}
3945   interface(REG_INTER);
3946 %}
3947 
3948 // Flags register, used as output of compare instructions
3949 operand eFlagsReg() %{
3950   constraint(ALLOC_IN_RC(int_flags));
3951   match(RegFlags);
3952 
3953   format %{ "EFLAGS" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 // Flags register, used as output of FLOATING POINT compare instructions
3958 operand eFlagsRegU() %{
3959   constraint(ALLOC_IN_RC(int_flags));
3960   match(RegFlags);
3961 
3962   format %{ "EFLAGS_U" %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 operand eFlagsRegUCF() %{
3967   constraint(ALLOC_IN_RC(int_flags));
3968   match(RegFlags);
3969   predicate(false);
3970 
3971   format %{ "EFLAGS_U_CF" %}
3972   interface(REG_INTER);
3973 %}
3974 
3975 // Condition Code Register used by long compare
3976 operand flagsReg_long_LTGE() %{
3977   constraint(ALLOC_IN_RC(int_flags));
3978   match(RegFlags);
3979   format %{ "FLAGS_LTGE" %}
3980   interface(REG_INTER);
3981 %}
3982 operand flagsReg_long_EQNE() %{
3983   constraint(ALLOC_IN_RC(int_flags));
3984   match(RegFlags);
3985   format %{ "FLAGS_EQNE" %}
3986   interface(REG_INTER);
3987 %}
3988 operand flagsReg_long_LEGT() %{
3989   constraint(ALLOC_IN_RC(int_flags));
3990   match(RegFlags);
3991   format %{ "FLAGS_LEGT" %}
3992   interface(REG_INTER);
3993 %}
3994 
3995 // Float register operands
3996 operand regDPR() %{
3997   predicate( UseSSE < 2 );
3998   constraint(ALLOC_IN_RC(fp_dbl_reg));
3999   match(RegD);
4000   match(regDPR1);
4001   match(regDPR2);
4002   format %{ %}
4003   interface(REG_INTER);
4004 %}
4005 
4006 operand regDPR1(regDPR reg) %{
4007   predicate( UseSSE < 2 );
4008   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4009   match(reg);
4010   format %{ "FPR1" %}
4011   interface(REG_INTER);
4012 %}
4013 
4014 operand regDPR2(regDPR reg) %{
4015   predicate( UseSSE < 2 );
4016   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4017   match(reg);
4018   format %{ "FPR2" %}
4019   interface(REG_INTER);
4020 %}
4021 
4022 operand regnotDPR1(regDPR reg) %{
4023   predicate( UseSSE < 2 );
4024   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4025   match(reg);
4026   format %{ %}
4027   interface(REG_INTER);
4028 %}
4029 
4030 // Float register operands
4031 operand regFPR() %{
4032   predicate( UseSSE < 2 );
4033   constraint(ALLOC_IN_RC(fp_flt_reg));
4034   match(RegF);
4035   match(regFPR1);
4036   format %{ %}
4037   interface(REG_INTER);
4038 %}
4039 
4040 // Float register operands
4041 operand regFPR1(regFPR reg) %{
4042   predicate( UseSSE < 2 );
4043   constraint(ALLOC_IN_RC(fp_flt_reg0));
4044   match(reg);
4045   format %{ "FPR1" %}
4046   interface(REG_INTER);
4047 %}
4048 
4049 // XMM Float register operands
4050 operand regF() %{
4051   predicate( UseSSE>=1 );
4052   constraint(ALLOC_IN_RC(float_reg_legacy));
4053   match(RegF);
4054   format %{ %}
4055   interface(REG_INTER);
4056 %}
4057 
4058 // XMM Double register operands
4059 operand regD() %{
4060   predicate( UseSSE>=2 );
4061   constraint(ALLOC_IN_RC(double_reg_legacy));
4062   match(RegD);
4063   format %{ %}
4064   interface(REG_INTER);
4065 %}
4066 
4067 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4068 // runtime code generation via reg_class_dynamic.
4069 operand vecS() %{
4070   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4071   match(VecS);
4072 
4073   format %{ %}
4074   interface(REG_INTER);
4075 %}
4076 
4077 operand vecD() %{
4078   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4079   match(VecD);
4080 
4081   format %{ %}
4082   interface(REG_INTER);
4083 %}
4084 
4085 operand vecX() %{
4086   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4087   match(VecX);
4088 
4089   format %{ %}
4090   interface(REG_INTER);
4091 %}
4092 
4093 operand vecY() %{
4094   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4095   match(VecY);
4096 
4097   format %{ %}
4098   interface(REG_INTER);
4099 %}
4100 
4101 //----------Memory Operands----------------------------------------------------
4102 // Direct Memory Operand
4103 operand direct(immP addr) %{
4104   match(addr);
4105 
4106   format %{ "[$addr]" %}
4107   interface(MEMORY_INTER) %{
4108     base(0xFFFFFFFF);
4109     index(0x4);
4110     scale(0x0);
4111     disp($addr);
4112   %}
4113 %}
4114 
4115 // Indirect Memory Operand
4116 operand indirect(eRegP reg) %{
4117   constraint(ALLOC_IN_RC(int_reg));
4118   match(reg);
4119 
4120   format %{ "[$reg]" %}
4121   interface(MEMORY_INTER) %{
4122     base($reg);
4123     index(0x4);
4124     scale(0x0);
4125     disp(0x0);
4126   %}
4127 %}
4128 
4129 // Indirect Memory Plus Short Offset Operand
4130 operand indOffset8(eRegP reg, immI8 off) %{
4131   match(AddP reg off);
4132 
4133   format %{ "[$reg + $off]" %}
4134   interface(MEMORY_INTER) %{
4135     base($reg);
4136     index(0x4);
4137     scale(0x0);
4138     disp($off);
4139   %}
4140 %}
4141 
4142 // Indirect Memory Plus Long Offset Operand
4143 operand indOffset32(eRegP reg, immI off) %{
4144   match(AddP reg off);
4145 
4146   format %{ "[$reg + $off]" %}
4147   interface(MEMORY_INTER) %{
4148     base($reg);
4149     index(0x4);
4150     scale(0x0);
4151     disp($off);
4152   %}
4153 %}
4154 
4155 // Indirect Memory Plus Long Offset Operand
4156 operand indOffset32X(rRegI reg, immP off) %{
4157   match(AddP off reg);
4158 
4159   format %{ "[$reg + $off]" %}
4160   interface(MEMORY_INTER) %{
4161     base($reg);
4162     index(0x4);
4163     scale(0x0);
4164     disp($off);
4165   %}
4166 %}
4167 
4168 // Indirect Memory Plus Index Register Plus Offset Operand
4169 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4170   match(AddP (AddP reg ireg) off);
4171 
4172   op_cost(10);
4173   format %{"[$reg + $off + $ireg]" %}
4174   interface(MEMORY_INTER) %{
4175     base($reg);
4176     index($ireg);
4177     scale(0x0);
4178     disp($off);
4179   %}
4180 %}
4181 
4182 // Indirect Memory Plus Index Register Plus Offset Operand
4183 operand indIndex(eRegP reg, rRegI ireg) %{
4184   match(AddP reg ireg);
4185 
4186   op_cost(10);
4187   format %{"[$reg + $ireg]" %}
4188   interface(MEMORY_INTER) %{
4189     base($reg);
4190     index($ireg);
4191     scale(0x0);
4192     disp(0x0);
4193   %}
4194 %}
4195 
4196 // // -------------------------------------------------------------------------
4197 // // 486 architecture doesn't support "scale * index + offset" with out a base
4198 // // -------------------------------------------------------------------------
4199 // // Scaled Memory Operands
4200 // // Indirect Memory Times Scale Plus Offset Operand
4201 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4202 //   match(AddP off (LShiftI ireg scale));
4203 //
4204 //   op_cost(10);
4205 //   format %{"[$off + $ireg << $scale]" %}
4206 //   interface(MEMORY_INTER) %{
4207 //     base(0x4);
4208 //     index($ireg);
4209 //     scale($scale);
4210 //     disp($off);
4211 //   %}
4212 // %}
4213 
4214 // Indirect Memory Times Scale Plus Index Register
4215 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4216   match(AddP reg (LShiftI ireg scale));
4217 
4218   op_cost(10);
4219   format %{"[$reg + $ireg << $scale]" %}
4220   interface(MEMORY_INTER) %{
4221     base($reg);
4222     index($ireg);
4223     scale($scale);
4224     disp(0x0);
4225   %}
4226 %}
4227 
4228 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4229 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4230   match(AddP (AddP reg (LShiftI ireg scale)) off);
4231 
4232   op_cost(10);
4233   format %{"[$reg + $off + $ireg << $scale]" %}
4234   interface(MEMORY_INTER) %{
4235     base($reg);
4236     index($ireg);
4237     scale($scale);
4238     disp($off);
4239   %}
4240 %}
4241 
4242 //----------Load Long Memory Operands------------------------------------------
4243 // The load-long idiom will use it's address expression again after loading
4244 // the first word of the long.  If the load-long destination overlaps with
4245 // registers used in the addressing expression, the 2nd half will be loaded
4246 // from a clobbered address.  Fix this by requiring that load-long use
4247 // address registers that do not overlap with the load-long target.
4248 
4249 // load-long support
4250 operand load_long_RegP() %{
4251   constraint(ALLOC_IN_RC(esi_reg));
4252   match(RegP);
4253   match(eSIRegP);
4254   op_cost(100);
4255   format %{  %}
4256   interface(REG_INTER);
4257 %}
4258 
4259 // Indirect Memory Operand Long
4260 operand load_long_indirect(load_long_RegP reg) %{
4261   constraint(ALLOC_IN_RC(esi_reg));
4262   match(reg);
4263 
4264   format %{ "[$reg]" %}
4265   interface(MEMORY_INTER) %{
4266     base($reg);
4267     index(0x4);
4268     scale(0x0);
4269     disp(0x0);
4270   %}
4271 %}
4272 
4273 // Indirect Memory Plus Long Offset Operand
4274 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4275   match(AddP reg off);
4276 
4277   format %{ "[$reg + $off]" %}
4278   interface(MEMORY_INTER) %{
4279     base($reg);
4280     index(0x4);
4281     scale(0x0);
4282     disp($off);
4283   %}
4284 %}
4285 
4286 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4287 
4288 
4289 //----------Special Memory Operands--------------------------------------------
4290 // Stack Slot Operand - This operand is used for loading and storing temporary
4291 //                      values on the stack where a match requires a value to
4292 //                      flow through memory.
4293 operand stackSlotP(sRegP reg) %{
4294   constraint(ALLOC_IN_RC(stack_slots));
4295   // No match rule because this operand is only generated in matching
4296   format %{ "[$reg]" %}
4297   interface(MEMORY_INTER) %{
4298     base(0x4);   // ESP
4299     index(0x4);  // No Index
4300     scale(0x0);  // No Scale
4301     disp($reg);  // Stack Offset
4302   %}
4303 %}
4304 
4305 operand stackSlotI(sRegI reg) %{
4306   constraint(ALLOC_IN_RC(stack_slots));
4307   // No match rule because this operand is only generated in matching
4308   format %{ "[$reg]" %}
4309   interface(MEMORY_INTER) %{
4310     base(0x4);   // ESP
4311     index(0x4);  // No Index
4312     scale(0x0);  // No Scale
4313     disp($reg);  // Stack Offset
4314   %}
4315 %}
4316 
4317 operand stackSlotF(sRegF reg) %{
4318   constraint(ALLOC_IN_RC(stack_slots));
4319   // No match rule because this operand is only generated in matching
4320   format %{ "[$reg]" %}
4321   interface(MEMORY_INTER) %{
4322     base(0x4);   // ESP
4323     index(0x4);  // No Index
4324     scale(0x0);  // No Scale
4325     disp($reg);  // Stack Offset
4326   %}
4327 %}
4328 
4329 operand stackSlotD(sRegD reg) %{
4330   constraint(ALLOC_IN_RC(stack_slots));
4331   // No match rule because this operand is only generated in matching
4332   format %{ "[$reg]" %}
4333   interface(MEMORY_INTER) %{
4334     base(0x4);   // ESP
4335     index(0x4);  // No Index
4336     scale(0x0);  // No Scale
4337     disp($reg);  // Stack Offset
4338   %}
4339 %}
4340 
4341 operand stackSlotL(sRegL reg) %{
4342   constraint(ALLOC_IN_RC(stack_slots));
4343   // No match rule because this operand is only generated in matching
4344   format %{ "[$reg]" %}
4345   interface(MEMORY_INTER) %{
4346     base(0x4);   // ESP
4347     index(0x4);  // No Index
4348     scale(0x0);  // No Scale
4349     disp($reg);  // Stack Offset
4350   %}
4351 %}
4352 
4353 //----------Memory Operands - Win95 Implicit Null Variants----------------
4354 // Indirect Memory Operand
4355 operand indirect_win95_safe(eRegP_no_EBP reg)
4356 %{
4357   constraint(ALLOC_IN_RC(int_reg));
4358   match(reg);
4359 
4360   op_cost(100);
4361   format %{ "[$reg]" %}
4362   interface(MEMORY_INTER) %{
4363     base($reg);
4364     index(0x4);
4365     scale(0x0);
4366     disp(0x0);
4367   %}
4368 %}
4369 
4370 // Indirect Memory Plus Short Offset Operand
4371 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4372 %{
4373   match(AddP reg off);
4374 
4375   op_cost(100);
4376   format %{ "[$reg + $off]" %}
4377   interface(MEMORY_INTER) %{
4378     base($reg);
4379     index(0x4);
4380     scale(0x0);
4381     disp($off);
4382   %}
4383 %}
4384 
4385 // Indirect Memory Plus Long Offset Operand
4386 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4387 %{
4388   match(AddP reg off);
4389 
4390   op_cost(100);
4391   format %{ "[$reg + $off]" %}
4392   interface(MEMORY_INTER) %{
4393     base($reg);
4394     index(0x4);
4395     scale(0x0);
4396     disp($off);
4397   %}
4398 %}
4399 
4400 // Indirect Memory Plus Index Register Plus Offset Operand
4401 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4402 %{
4403   match(AddP (AddP reg ireg) off);
4404 
4405   op_cost(100);
4406   format %{"[$reg + $off + $ireg]" %}
4407   interface(MEMORY_INTER) %{
4408     base($reg);
4409     index($ireg);
4410     scale(0x0);
4411     disp($off);
4412   %}
4413 %}
4414 
4415 // Indirect Memory Times Scale Plus Index Register
4416 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4417 %{
4418   match(AddP reg (LShiftI ireg scale));
4419 
4420   op_cost(100);
4421   format %{"[$reg + $ireg << $scale]" %}
4422   interface(MEMORY_INTER) %{
4423     base($reg);
4424     index($ireg);
4425     scale($scale);
4426     disp(0x0);
4427   %}
4428 %}
4429 
4430 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4431 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4432 %{
4433   match(AddP (AddP reg (LShiftI ireg scale)) off);
4434 
4435   op_cost(100);
4436   format %{"[$reg + $off + $ireg << $scale]" %}
4437   interface(MEMORY_INTER) %{
4438     base($reg);
4439     index($ireg);
4440     scale($scale);
4441     disp($off);
4442   %}
4443 %}
4444 
4445 //----------Conditional Branch Operands----------------------------------------
4446 // Comparison Op  - This is the operation of the comparison, and is limited to
4447 //                  the following set of codes:
4448 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4449 //
4450 // Other attributes of the comparison, such as unsignedness, are specified
4451 // by the comparison instruction that sets a condition code flags register.
4452 // That result is represented by a flags operand whose subtype is appropriate
4453 // to the unsignedness (etc.) of the comparison.
4454 //
4455 // Later, the instruction which matches both the Comparison Op (a Bool) and
4456 // the flags (produced by the Cmp) specifies the coding of the comparison op
4457 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4458 
4459 // Comparision Code
4460 operand cmpOp() %{
4461   match(Bool);
4462 
4463   format %{ "" %}
4464   interface(COND_INTER) %{
4465     equal(0x4, "e");
4466     not_equal(0x5, "ne");
4467     less(0xC, "l");
4468     greater_equal(0xD, "ge");
4469     less_equal(0xE, "le");
4470     greater(0xF, "g");
4471     overflow(0x0, "o");
4472     no_overflow(0x1, "no");
4473   %}
4474 %}
4475 
4476 // Comparison Code, unsigned compare.  Used by FP also, with
4477 // C2 (unordered) turned into GT or LT already.  The other bits
4478 // C0 and C3 are turned into Carry & Zero flags.
4479 operand cmpOpU() %{
4480   match(Bool);
4481 
4482   format %{ "" %}
4483   interface(COND_INTER) %{
4484     equal(0x4, "e");
4485     not_equal(0x5, "ne");
4486     less(0x2, "b");
4487     greater_equal(0x3, "nb");
4488     less_equal(0x6, "be");
4489     greater(0x7, "nbe");
4490     overflow(0x0, "o");
4491     no_overflow(0x1, "no");
4492   %}
4493 %}
4494 
4495 // Floating comparisons that don't require any fixup for the unordered case
4496 operand cmpOpUCF() %{
4497   match(Bool);
4498   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4499             n->as_Bool()->_test._test == BoolTest::ge ||
4500             n->as_Bool()->_test._test == BoolTest::le ||
4501             n->as_Bool()->_test._test == BoolTest::gt);
4502   format %{ "" %}
4503   interface(COND_INTER) %{
4504     equal(0x4, "e");
4505     not_equal(0x5, "ne");
4506     less(0x2, "b");
4507     greater_equal(0x3, "nb");
4508     less_equal(0x6, "be");
4509     greater(0x7, "nbe");
4510     overflow(0x0, "o");
4511     no_overflow(0x1, "no");
4512   %}
4513 %}
4514 
4515 
4516 // Floating comparisons that can be fixed up with extra conditional jumps
4517 operand cmpOpUCF2() %{
4518   match(Bool);
4519   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4520             n->as_Bool()->_test._test == BoolTest::eq);
4521   format %{ "" %}
4522   interface(COND_INTER) %{
4523     equal(0x4, "e");
4524     not_equal(0x5, "ne");
4525     less(0x2, "b");
4526     greater_equal(0x3, "nb");
4527     less_equal(0x6, "be");
4528     greater(0x7, "nbe");
4529     overflow(0x0, "o");
4530     no_overflow(0x1, "no");
4531   %}
4532 %}
4533 
4534 // Comparison Code for FP conditional move
4535 operand cmpOp_fcmov() %{
4536   match(Bool);
4537 
4538   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4539             n->as_Bool()->_test._test != BoolTest::no_overflow);
4540   format %{ "" %}
4541   interface(COND_INTER) %{
4542     equal        (0x0C8);
4543     not_equal    (0x1C8);
4544     less         (0x0C0);
4545     greater_equal(0x1C0);
4546     less_equal   (0x0D0);
4547     greater      (0x1D0);
4548     overflow(0x0, "o"); // not really supported by the instruction
4549     no_overflow(0x1, "no"); // not really supported by the instruction
4550   %}
4551 %}
4552 
4553 // Comparision Code used in long compares
4554 operand cmpOp_commute() %{
4555   match(Bool);
4556 
4557   format %{ "" %}
4558   interface(COND_INTER) %{
4559     equal(0x4, "e");
4560     not_equal(0x5, "ne");
4561     less(0xF, "g");
4562     greater_equal(0xE, "le");
4563     less_equal(0xD, "ge");
4564     greater(0xC, "l");
4565     overflow(0x0, "o");
4566     no_overflow(0x1, "no");
4567   %}
4568 %}
4569 
4570 //----------OPERAND CLASSES----------------------------------------------------
4571 // Operand Classes are groups of operands that are used as to simplify
4572 // instruction definitions by not requiring the AD writer to specify separate
4573 // instructions for every form of operand when the instruction accepts
4574 // multiple operand types with the same basic encoding and format.  The classic
4575 // case of this is memory operands.
4576 
4577 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4578                indIndex, indIndexScale, indIndexScaleOffset);
4579 
4580 // Long memory operations are encoded in 2 instructions and a +4 offset.
4581 // This means some kind of offset is always required and you cannot use
4582 // an oop as the offset (done when working on static globals).
4583 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4584                     indIndex, indIndexScale, indIndexScaleOffset);
4585 
4586 
4587 //----------PIPELINE-----------------------------------------------------------
4588 // Rules which define the behavior of the target architectures pipeline.
4589 pipeline %{
4590 
4591 //----------ATTRIBUTES---------------------------------------------------------
4592 attributes %{
4593   variable_size_instructions;        // Fixed size instructions
4594   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4595   instruction_unit_size = 1;         // An instruction is 1 bytes long
4596   instruction_fetch_unit_size = 16;  // The processor fetches one line
4597   instruction_fetch_units = 1;       // of 16 bytes
4598 
4599   // List of nop instructions
4600   nops( MachNop );
4601 %}
4602 
4603 //----------RESOURCES----------------------------------------------------------
4604 // Resources are the functional units available to the machine
4605 
4606 // Generic P2/P3 pipeline
4607 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4608 // 3 instructions decoded per cycle.
4609 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4610 // 2 ALU op, only ALU0 handles mul/div instructions.
4611 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4612            MS0, MS1, MEM = MS0 | MS1,
4613            BR, FPU,
4614            ALU0, ALU1, ALU = ALU0 | ALU1 );
4615 
4616 //----------PIPELINE DESCRIPTION-----------------------------------------------
4617 // Pipeline Description specifies the stages in the machine's pipeline
4618 
4619 // Generic P2/P3 pipeline
4620 pipe_desc(S0, S1, S2, S3, S4, S5);
4621 
4622 //----------PIPELINE CLASSES---------------------------------------------------
4623 // Pipeline Classes describe the stages in which input and output are
4624 // referenced by the hardware pipeline.
4625 
4626 // Naming convention: ialu or fpu
4627 // Then: _reg
4628 // Then: _reg if there is a 2nd register
4629 // Then: _long if it's a pair of instructions implementing a long
4630 // Then: _fat if it requires the big decoder
4631 //   Or: _mem if it requires the big decoder and a memory unit.
4632 
4633 // Integer ALU reg operation
4634 pipe_class ialu_reg(rRegI dst) %{
4635     single_instruction;
4636     dst    : S4(write);
4637     dst    : S3(read);
4638     DECODE : S0;        // any decoder
4639     ALU    : S3;        // any alu
4640 %}
4641 
4642 // Long ALU reg operation
4643 pipe_class ialu_reg_long(eRegL dst) %{
4644     instruction_count(2);
4645     dst    : S4(write);
4646     dst    : S3(read);
4647     DECODE : S0(2);     // any 2 decoders
4648     ALU    : S3(2);     // both alus
4649 %}
4650 
4651 // Integer ALU reg operation using big decoder
4652 pipe_class ialu_reg_fat(rRegI dst) %{
4653     single_instruction;
4654     dst    : S4(write);
4655     dst    : S3(read);
4656     D0     : S0;        // big decoder only
4657     ALU    : S3;        // any alu
4658 %}
4659 
4660 // Long ALU reg operation using big decoder
4661 pipe_class ialu_reg_long_fat(eRegL dst) %{
4662     instruction_count(2);
4663     dst    : S4(write);
4664     dst    : S3(read);
4665     D0     : S0(2);     // big decoder only; twice
4666     ALU    : S3(2);     // any 2 alus
4667 %}
4668 
4669 // Integer ALU reg-reg operation
4670 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4671     single_instruction;
4672     dst    : S4(write);
4673     src    : S3(read);
4674     DECODE : S0;        // any decoder
4675     ALU    : S3;        // any alu
4676 %}
4677 
4678 // Long ALU reg-reg operation
4679 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4680     instruction_count(2);
4681     dst    : S4(write);
4682     src    : S3(read);
4683     DECODE : S0(2);     // any 2 decoders
4684     ALU    : S3(2);     // both alus
4685 %}
4686 
4687 // Integer ALU reg-reg operation
4688 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4689     single_instruction;
4690     dst    : S4(write);
4691     src    : S3(read);
4692     D0     : S0;        // big decoder only
4693     ALU    : S3;        // any alu
4694 %}
4695 
4696 // Long ALU reg-reg operation
4697 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4698     instruction_count(2);
4699     dst    : S4(write);
4700     src    : S3(read);
4701     D0     : S0(2);     // big decoder only; twice
4702     ALU    : S3(2);     // both alus
4703 %}
4704 
4705 // Integer ALU reg-mem operation
4706 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4707     single_instruction;
4708     dst    : S5(write);
4709     mem    : S3(read);
4710     D0     : S0;        // big decoder only
4711     ALU    : S4;        // any alu
4712     MEM    : S3;        // any mem
4713 %}
4714 
4715 // Long ALU reg-mem operation
4716 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4717     instruction_count(2);
4718     dst    : S5(write);
4719     mem    : S3(read);
4720     D0     : S0(2);     // big decoder only; twice
4721     ALU    : S4(2);     // any 2 alus
4722     MEM    : S3(2);     // both mems
4723 %}
4724 
4725 // Integer mem operation (prefetch)
4726 pipe_class ialu_mem(memory mem)
4727 %{
4728     single_instruction;
4729     mem    : S3(read);
4730     D0     : S0;        // big decoder only
4731     MEM    : S3;        // any mem
4732 %}
4733 
4734 // Integer Store to Memory
4735 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4736     single_instruction;
4737     mem    : S3(read);
4738     src    : S5(read);
4739     D0     : S0;        // big decoder only
4740     ALU    : S4;        // any alu
4741     MEM    : S3;
4742 %}
4743 
4744 // Long Store to Memory
4745 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4746     instruction_count(2);
4747     mem    : S3(read);
4748     src    : S5(read);
4749     D0     : S0(2);     // big decoder only; twice
4750     ALU    : S4(2);     // any 2 alus
4751     MEM    : S3(2);     // Both mems
4752 %}
4753 
4754 // Integer Store to Memory
4755 pipe_class ialu_mem_imm(memory mem) %{
4756     single_instruction;
4757     mem    : S3(read);
4758     D0     : S0;        // big decoder only
4759     ALU    : S4;        // any alu
4760     MEM    : S3;
4761 %}
4762 
4763 // Integer ALU0 reg-reg operation
4764 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4765     single_instruction;
4766     dst    : S4(write);
4767     src    : S3(read);
4768     D0     : S0;        // Big decoder only
4769     ALU0   : S3;        // only alu0
4770 %}
4771 
4772 // Integer ALU0 reg-mem operation
4773 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4774     single_instruction;
4775     dst    : S5(write);
4776     mem    : S3(read);
4777     D0     : S0;        // big decoder only
4778     ALU0   : S4;        // ALU0 only
4779     MEM    : S3;        // any mem
4780 %}
4781 
4782 // Integer ALU reg-reg operation
4783 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4784     single_instruction;
4785     cr     : S4(write);
4786     src1   : S3(read);
4787     src2   : S3(read);
4788     DECODE : S0;        // any decoder
4789     ALU    : S3;        // any alu
4790 %}
4791 
4792 // Integer ALU reg-imm operation
4793 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4794     single_instruction;
4795     cr     : S4(write);
4796     src1   : S3(read);
4797     DECODE : S0;        // any decoder
4798     ALU    : S3;        // any alu
4799 %}
4800 
4801 // Integer ALU reg-mem operation
4802 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4803     single_instruction;
4804     cr     : S4(write);
4805     src1   : S3(read);
4806     src2   : S3(read);
4807     D0     : S0;        // big decoder only
4808     ALU    : S4;        // any alu
4809     MEM    : S3;
4810 %}
4811 
4812 // Conditional move reg-reg
4813 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4814     instruction_count(4);
4815     y      : S4(read);
4816     q      : S3(read);
4817     p      : S3(read);
4818     DECODE : S0(4);     // any decoder
4819 %}
4820 
4821 // Conditional move reg-reg
4822 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4823     single_instruction;
4824     dst    : S4(write);
4825     src    : S3(read);
4826     cr     : S3(read);
4827     DECODE : S0;        // any decoder
4828 %}
4829 
4830 // Conditional move reg-mem
4831 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4832     single_instruction;
4833     dst    : S4(write);
4834     src    : S3(read);
4835     cr     : S3(read);
4836     DECODE : S0;        // any decoder
4837     MEM    : S3;
4838 %}
4839 
4840 // Conditional move reg-reg long
4841 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4842     single_instruction;
4843     dst    : S4(write);
4844     src    : S3(read);
4845     cr     : S3(read);
4846     DECODE : S0(2);     // any 2 decoders
4847 %}
4848 
4849 // Conditional move double reg-reg
4850 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4851     single_instruction;
4852     dst    : S4(write);
4853     src    : S3(read);
4854     cr     : S3(read);
4855     DECODE : S0;        // any decoder
4856 %}
4857 
4858 // Float reg-reg operation
4859 pipe_class fpu_reg(regDPR dst) %{
4860     instruction_count(2);
4861     dst    : S3(read);
4862     DECODE : S0(2);     // any 2 decoders
4863     FPU    : S3;
4864 %}
4865 
4866 // Float reg-reg operation
4867 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4868     instruction_count(2);
4869     dst    : S4(write);
4870     src    : S3(read);
4871     DECODE : S0(2);     // any 2 decoders
4872     FPU    : S3;
4873 %}
4874 
4875 // Float reg-reg operation
4876 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4877     instruction_count(3);
4878     dst    : S4(write);
4879     src1   : S3(read);
4880     src2   : S3(read);
4881     DECODE : S0(3);     // any 3 decoders
4882     FPU    : S3(2);
4883 %}
4884 
4885 // Float reg-reg operation
4886 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4887     instruction_count(4);
4888     dst    : S4(write);
4889     src1   : S3(read);
4890     src2   : S3(read);
4891     src3   : S3(read);
4892     DECODE : S0(4);     // any 3 decoders
4893     FPU    : S3(2);
4894 %}
4895 
4896 // Float reg-reg operation
4897 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4898     instruction_count(4);
4899     dst    : S4(write);
4900     src1   : S3(read);
4901     src2   : S3(read);
4902     src3   : S3(read);
4903     DECODE : S1(3);     // any 3 decoders
4904     D0     : S0;        // Big decoder only
4905     FPU    : S3(2);
4906     MEM    : S3;
4907 %}
4908 
4909 // Float reg-mem operation
4910 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4911     instruction_count(2);
4912     dst    : S5(write);
4913     mem    : S3(read);
4914     D0     : S0;        // big decoder only
4915     DECODE : S1;        // any decoder for FPU POP
4916     FPU    : S4;
4917     MEM    : S3;        // any mem
4918 %}
4919 
4920 // Float reg-mem operation
4921 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4922     instruction_count(3);
4923     dst    : S5(write);
4924     src1   : S3(read);
4925     mem    : S3(read);
4926     D0     : S0;        // big decoder only
4927     DECODE : S1(2);     // any decoder for FPU POP
4928     FPU    : S4;
4929     MEM    : S3;        // any mem
4930 %}
4931 
4932 // Float mem-reg operation
4933 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4934     instruction_count(2);
4935     src    : S5(read);
4936     mem    : S3(read);
4937     DECODE : S0;        // any decoder for FPU PUSH
4938     D0     : S1;        // big decoder only
4939     FPU    : S4;
4940     MEM    : S3;        // any mem
4941 %}
4942 
4943 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4944     instruction_count(3);
4945     src1   : S3(read);
4946     src2   : S3(read);
4947     mem    : S3(read);
4948     DECODE : S0(2);     // any decoder for FPU PUSH
4949     D0     : S1;        // big decoder only
4950     FPU    : S4;
4951     MEM    : S3;        // any mem
4952 %}
4953 
4954 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4955     instruction_count(3);
4956     src1   : S3(read);
4957     src2   : S3(read);
4958     mem    : S4(read);
4959     DECODE : S0;        // any decoder for FPU PUSH
4960     D0     : S0(2);     // big decoder only
4961     FPU    : S4;
4962     MEM    : S3(2);     // any mem
4963 %}
4964 
4965 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4966     instruction_count(2);
4967     src1   : S3(read);
4968     dst    : S4(read);
4969     D0     : S0(2);     // big decoder only
4970     MEM    : S3(2);     // any mem
4971 %}
4972 
4973 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4974     instruction_count(3);
4975     src1   : S3(read);
4976     src2   : S3(read);
4977     dst    : S4(read);
4978     D0     : S0(3);     // big decoder only
4979     FPU    : S4;
4980     MEM    : S3(3);     // any mem
4981 %}
4982 
4983 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4984     instruction_count(3);
4985     src1   : S4(read);
4986     mem    : S4(read);
4987     DECODE : S0;        // any decoder for FPU PUSH
4988     D0     : S0(2);     // big decoder only
4989     FPU    : S4;
4990     MEM    : S3(2);     // any mem
4991 %}
4992 
4993 // Float load constant
4994 pipe_class fpu_reg_con(regDPR dst) %{
4995     instruction_count(2);
4996     dst    : S5(write);
4997     D0     : S0;        // big decoder only for the load
4998     DECODE : S1;        // any decoder for FPU POP
4999     FPU    : S4;
5000     MEM    : S3;        // any mem
5001 %}
5002 
5003 // Float load constant
5004 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5005     instruction_count(3);
5006     dst    : S5(write);
5007     src    : S3(read);
5008     D0     : S0;        // big decoder only for the load
5009     DECODE : S1(2);     // any decoder for FPU POP
5010     FPU    : S4;
5011     MEM    : S3;        // any mem
5012 %}
5013 
5014 // UnConditional branch
5015 pipe_class pipe_jmp( label labl ) %{
5016     single_instruction;
5017     BR   : S3;
5018 %}
5019 
5020 // Conditional branch
5021 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5022     single_instruction;
5023     cr    : S1(read);
5024     BR    : S3;
5025 %}
5026 
5027 // Allocation idiom
5028 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5029     instruction_count(1); force_serialization;
5030     fixed_latency(6);
5031     heap_ptr : S3(read);
5032     DECODE   : S0(3);
5033     D0       : S2;
5034     MEM      : S3;
5035     ALU      : S3(2);
5036     dst      : S5(write);
5037     BR       : S5;
5038 %}
5039 
5040 // Generic big/slow expanded idiom
5041 pipe_class pipe_slow(  ) %{
5042     instruction_count(10); multiple_bundles; force_serialization;
5043     fixed_latency(100);
5044     D0  : S0(2);
5045     MEM : S3(2);
5046 %}
5047 
5048 // The real do-nothing guy
5049 pipe_class empty( ) %{
5050     instruction_count(0);
5051 %}
5052 
5053 // Define the class for the Nop node
5054 define %{
5055    MachNop = empty;
5056 %}
5057 
5058 %}
5059 
5060 //----------INSTRUCTIONS-------------------------------------------------------
5061 //
5062 // match      -- States which machine-independent subtree may be replaced
5063 //               by this instruction.
5064 // ins_cost   -- The estimated cost of this instruction is used by instruction
5065 //               selection to identify a minimum cost tree of machine
5066 //               instructions that matches a tree of machine-independent
5067 //               instructions.
5068 // format     -- A string providing the disassembly for this instruction.
5069 //               The value of an instruction's operand may be inserted
5070 //               by referring to it with a '$' prefix.
5071 // opcode     -- Three instruction opcodes may be provided.  These are referred
5072 //               to within an encode class as $primary, $secondary, and $tertiary
5073 //               respectively.  The primary opcode is commonly used to
5074 //               indicate the type of machine instruction, while secondary
5075 //               and tertiary are often used for prefix options or addressing
5076 //               modes.
5077 // ins_encode -- A list of encode classes with parameters. The encode class
5078 //               name must have been defined in an 'enc_class' specification
5079 //               in the encode section of the architecture description.
5080 
5081 //----------BSWAP-Instruction--------------------------------------------------
5082 instruct bytes_reverse_int(rRegI dst) %{
5083   match(Set dst (ReverseBytesI dst));
5084 
5085   format %{ "BSWAP  $dst" %}
5086   opcode(0x0F, 0xC8);
5087   ins_encode( OpcP, OpcSReg(dst) );
5088   ins_pipe( ialu_reg );
5089 %}
5090 
5091 instruct bytes_reverse_long(eRegL dst) %{
5092   match(Set dst (ReverseBytesL dst));
5093 
5094   format %{ "BSWAP  $dst.lo\n\t"
5095             "BSWAP  $dst.hi\n\t"
5096             "XCHG   $dst.lo $dst.hi" %}
5097 
5098   ins_cost(125);
5099   ins_encode( bswap_long_bytes(dst) );
5100   ins_pipe( ialu_reg_reg);
5101 %}
5102 
5103 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5104   match(Set dst (ReverseBytesUS dst));
5105   effect(KILL cr);
5106 
5107   format %{ "BSWAP  $dst\n\t"
5108             "SHR    $dst,16\n\t" %}
5109   ins_encode %{
5110     __ bswapl($dst$$Register);
5111     __ shrl($dst$$Register, 16);
5112   %}
5113   ins_pipe( ialu_reg );
5114 %}
5115 
5116 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5117   match(Set dst (ReverseBytesS dst));
5118   effect(KILL cr);
5119 
5120   format %{ "BSWAP  $dst\n\t"
5121             "SAR    $dst,16\n\t" %}
5122   ins_encode %{
5123     __ bswapl($dst$$Register);
5124     __ sarl($dst$$Register, 16);
5125   %}
5126   ins_pipe( ialu_reg );
5127 %}
5128 
5129 
5130 //---------- Zeros Count Instructions ------------------------------------------
5131 
5132 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5133   predicate(UseCountLeadingZerosInstruction);
5134   match(Set dst (CountLeadingZerosI src));
5135   effect(KILL cr);
5136 
5137   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5138   ins_encode %{
5139     __ lzcntl($dst$$Register, $src$$Register);
5140   %}
5141   ins_pipe(ialu_reg);
5142 %}
5143 
5144 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5145   predicate(!UseCountLeadingZerosInstruction);
5146   match(Set dst (CountLeadingZerosI src));
5147   effect(KILL cr);
5148 
5149   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5150             "JNZ    skip\n\t"
5151             "MOV    $dst, -1\n"
5152       "skip:\n\t"
5153             "NEG    $dst\n\t"
5154             "ADD    $dst, 31" %}
5155   ins_encode %{
5156     Register Rdst = $dst$$Register;
5157     Register Rsrc = $src$$Register;
5158     Label skip;
5159     __ bsrl(Rdst, Rsrc);
5160     __ jccb(Assembler::notZero, skip);
5161     __ movl(Rdst, -1);
5162     __ bind(skip);
5163     __ negl(Rdst);
5164     __ addl(Rdst, BitsPerInt - 1);
5165   %}
5166   ins_pipe(ialu_reg);
5167 %}
5168 
5169 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5170   predicate(UseCountLeadingZerosInstruction);
5171   match(Set dst (CountLeadingZerosL src));
5172   effect(TEMP dst, KILL cr);
5173 
5174   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5175             "JNC    done\n\t"
5176             "LZCNT  $dst, $src.lo\n\t"
5177             "ADD    $dst, 32\n"
5178       "done:" %}
5179   ins_encode %{
5180     Register Rdst = $dst$$Register;
5181     Register Rsrc = $src$$Register;
5182     Label done;
5183     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5184     __ jccb(Assembler::carryClear, done);
5185     __ lzcntl(Rdst, Rsrc);
5186     __ addl(Rdst, BitsPerInt);
5187     __ bind(done);
5188   %}
5189   ins_pipe(ialu_reg);
5190 %}
5191 
5192 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5193   predicate(!UseCountLeadingZerosInstruction);
5194   match(Set dst (CountLeadingZerosL src));
5195   effect(TEMP dst, KILL cr);
5196 
5197   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5198             "JZ     msw_is_zero\n\t"
5199             "ADD    $dst, 32\n\t"
5200             "JMP    not_zero\n"
5201       "msw_is_zero:\n\t"
5202             "BSR    $dst, $src.lo\n\t"
5203             "JNZ    not_zero\n\t"
5204             "MOV    $dst, -1\n"
5205       "not_zero:\n\t"
5206             "NEG    $dst\n\t"
5207             "ADD    $dst, 63\n" %}
5208  ins_encode %{
5209     Register Rdst = $dst$$Register;
5210     Register Rsrc = $src$$Register;
5211     Label msw_is_zero;
5212     Label not_zero;
5213     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5214     __ jccb(Assembler::zero, msw_is_zero);
5215     __ addl(Rdst, BitsPerInt);
5216     __ jmpb(not_zero);
5217     __ bind(msw_is_zero);
5218     __ bsrl(Rdst, Rsrc);
5219     __ jccb(Assembler::notZero, not_zero);
5220     __ movl(Rdst, -1);
5221     __ bind(not_zero);
5222     __ negl(Rdst);
5223     __ addl(Rdst, BitsPerLong - 1);
5224   %}
5225   ins_pipe(ialu_reg);
5226 %}
5227 
5228 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5229   predicate(UseCountTrailingZerosInstruction);
5230   match(Set dst (CountTrailingZerosI src));
5231   effect(KILL cr);
5232 
5233   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5234   ins_encode %{
5235     __ tzcntl($dst$$Register, $src$$Register);
5236   %}
5237   ins_pipe(ialu_reg);
5238 %}
5239 
5240 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5241   predicate(!UseCountTrailingZerosInstruction);
5242   match(Set dst (CountTrailingZerosI src));
5243   effect(KILL cr);
5244 
5245   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5246             "JNZ    done\n\t"
5247             "MOV    $dst, 32\n"
5248       "done:" %}
5249   ins_encode %{
5250     Register Rdst = $dst$$Register;
5251     Label done;
5252     __ bsfl(Rdst, $src$$Register);
5253     __ jccb(Assembler::notZero, done);
5254     __ movl(Rdst, BitsPerInt);
5255     __ bind(done);
5256   %}
5257   ins_pipe(ialu_reg);
5258 %}
5259 
5260 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5261   predicate(UseCountTrailingZerosInstruction);
5262   match(Set dst (CountTrailingZerosL src));
5263   effect(TEMP dst, KILL cr);
5264 
5265   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5266             "JNC    done\n\t"
5267             "TZCNT  $dst, $src.hi\n\t"
5268             "ADD    $dst, 32\n"
5269             "done:" %}
5270   ins_encode %{
5271     Register Rdst = $dst$$Register;
5272     Register Rsrc = $src$$Register;
5273     Label done;
5274     __ tzcntl(Rdst, Rsrc);
5275     __ jccb(Assembler::carryClear, done);
5276     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5277     __ addl(Rdst, BitsPerInt);
5278     __ bind(done);
5279   %}
5280   ins_pipe(ialu_reg);
5281 %}
5282 
5283 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5284   predicate(!UseCountTrailingZerosInstruction);
5285   match(Set dst (CountTrailingZerosL src));
5286   effect(TEMP dst, KILL cr);
5287 
5288   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5289             "JNZ    done\n\t"
5290             "BSF    $dst, $src.hi\n\t"
5291             "JNZ    msw_not_zero\n\t"
5292             "MOV    $dst, 32\n"
5293       "msw_not_zero:\n\t"
5294             "ADD    $dst, 32\n"
5295       "done:" %}
5296   ins_encode %{
5297     Register Rdst = $dst$$Register;
5298     Register Rsrc = $src$$Register;
5299     Label msw_not_zero;
5300     Label done;
5301     __ bsfl(Rdst, Rsrc);
5302     __ jccb(Assembler::notZero, done);
5303     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5304     __ jccb(Assembler::notZero, msw_not_zero);
5305     __ movl(Rdst, BitsPerInt);
5306     __ bind(msw_not_zero);
5307     __ addl(Rdst, BitsPerInt);
5308     __ bind(done);
5309   %}
5310   ins_pipe(ialu_reg);
5311 %}
5312 
5313 
5314 //---------- Population Count Instructions -------------------------------------
5315 
5316 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5317   predicate(UsePopCountInstruction);
5318   match(Set dst (PopCountI src));
5319   effect(KILL cr);
5320 
5321   format %{ "POPCNT $dst, $src" %}
5322   ins_encode %{
5323     __ popcntl($dst$$Register, $src$$Register);
5324   %}
5325   ins_pipe(ialu_reg);
5326 %}
5327 
5328 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5329   predicate(UsePopCountInstruction);
5330   match(Set dst (PopCountI (LoadI mem)));
5331   effect(KILL cr);
5332 
5333   format %{ "POPCNT $dst, $mem" %}
5334   ins_encode %{
5335     __ popcntl($dst$$Register, $mem$$Address);
5336   %}
5337   ins_pipe(ialu_reg);
5338 %}
5339 
5340 // Note: Long.bitCount(long) returns an int.
5341 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5342   predicate(UsePopCountInstruction);
5343   match(Set dst (PopCountL src));
5344   effect(KILL cr, TEMP tmp, TEMP dst);
5345 
5346   format %{ "POPCNT $dst, $src.lo\n\t"
5347             "POPCNT $tmp, $src.hi\n\t"
5348             "ADD    $dst, $tmp" %}
5349   ins_encode %{
5350     __ popcntl($dst$$Register, $src$$Register);
5351     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5352     __ addl($dst$$Register, $tmp$$Register);
5353   %}
5354   ins_pipe(ialu_reg);
5355 %}
5356 
5357 // Note: Long.bitCount(long) returns an int.
5358 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5359   predicate(UsePopCountInstruction);
5360   match(Set dst (PopCountL (LoadL mem)));
5361   effect(KILL cr, TEMP tmp, TEMP dst);
5362 
5363   format %{ "POPCNT $dst, $mem\n\t"
5364             "POPCNT $tmp, $mem+4\n\t"
5365             "ADD    $dst, $tmp" %}
5366   ins_encode %{
5367     //__ popcntl($dst$$Register, $mem$$Address$$first);
5368     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5369     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5370     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5371     __ addl($dst$$Register, $tmp$$Register);
5372   %}
5373   ins_pipe(ialu_reg);
5374 %}
5375 
5376 
5377 //----------Load/Store/Move Instructions---------------------------------------
5378 //----------Load Instructions--------------------------------------------------
5379 // Load Byte (8bit signed)
5380 instruct loadB(xRegI dst, memory mem) %{
5381   match(Set dst (LoadB mem));
5382 
5383   ins_cost(125);
5384   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5385 
5386   ins_encode %{
5387     __ movsbl($dst$$Register, $mem$$Address);
5388   %}
5389 
5390   ins_pipe(ialu_reg_mem);
5391 %}
5392 
5393 // Load Byte (8bit signed) into Long Register
5394 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5395   match(Set dst (ConvI2L (LoadB mem)));
5396   effect(KILL cr);
5397 
5398   ins_cost(375);
5399   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5400             "MOV    $dst.hi,$dst.lo\n\t"
5401             "SAR    $dst.hi,7" %}
5402 
5403   ins_encode %{
5404     __ movsbl($dst$$Register, $mem$$Address);
5405     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5406     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5407   %}
5408 
5409   ins_pipe(ialu_reg_mem);
5410 %}
5411 
5412 // Load Unsigned Byte (8bit UNsigned)
5413 instruct loadUB(xRegI dst, memory mem) %{
5414   match(Set dst (LoadUB mem));
5415 
5416   ins_cost(125);
5417   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5418 
5419   ins_encode %{
5420     __ movzbl($dst$$Register, $mem$$Address);
5421   %}
5422 
5423   ins_pipe(ialu_reg_mem);
5424 %}
5425 
5426 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5427 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5428   match(Set dst (ConvI2L (LoadUB mem)));
5429   effect(KILL cr);
5430 
5431   ins_cost(250);
5432   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5433             "XOR    $dst.hi,$dst.hi" %}
5434 
5435   ins_encode %{
5436     Register Rdst = $dst$$Register;
5437     __ movzbl(Rdst, $mem$$Address);
5438     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5439   %}
5440 
5441   ins_pipe(ialu_reg_mem);
5442 %}
5443 
5444 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5445 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5446   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5447   effect(KILL cr);
5448 
5449   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5450             "XOR    $dst.hi,$dst.hi\n\t"
5451             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5452   ins_encode %{
5453     Register Rdst = $dst$$Register;
5454     __ movzbl(Rdst, $mem$$Address);
5455     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5456     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5457   %}
5458   ins_pipe(ialu_reg_mem);
5459 %}
5460 
5461 // Load Short (16bit signed)
5462 instruct loadS(rRegI dst, memory mem) %{
5463   match(Set dst (LoadS mem));
5464 
5465   ins_cost(125);
5466   format %{ "MOVSX  $dst,$mem\t# short" %}
5467 
5468   ins_encode %{
5469     __ movswl($dst$$Register, $mem$$Address);
5470   %}
5471 
5472   ins_pipe(ialu_reg_mem);
5473 %}
5474 
5475 // Load Short (16 bit signed) to Byte (8 bit signed)
5476 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5477   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5478 
5479   ins_cost(125);
5480   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5481   ins_encode %{
5482     __ movsbl($dst$$Register, $mem$$Address);
5483   %}
5484   ins_pipe(ialu_reg_mem);
5485 %}
5486 
5487 // Load Short (16bit signed) into Long Register
5488 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5489   match(Set dst (ConvI2L (LoadS mem)));
5490   effect(KILL cr);
5491 
5492   ins_cost(375);
5493   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5494             "MOV    $dst.hi,$dst.lo\n\t"
5495             "SAR    $dst.hi,15" %}
5496 
5497   ins_encode %{
5498     __ movswl($dst$$Register, $mem$$Address);
5499     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5500     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5501   %}
5502 
5503   ins_pipe(ialu_reg_mem);
5504 %}
5505 
5506 // Load Unsigned Short/Char (16bit unsigned)
5507 instruct loadUS(rRegI dst, memory mem) %{
5508   match(Set dst (LoadUS mem));
5509 
5510   ins_cost(125);
5511   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5512 
5513   ins_encode %{
5514     __ movzwl($dst$$Register, $mem$$Address);
5515   %}
5516 
5517   ins_pipe(ialu_reg_mem);
5518 %}
5519 
5520 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5521 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5522   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5523 
5524   ins_cost(125);
5525   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5526   ins_encode %{
5527     __ movsbl($dst$$Register, $mem$$Address);
5528   %}
5529   ins_pipe(ialu_reg_mem);
5530 %}
5531 
5532 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5533 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5534   match(Set dst (ConvI2L (LoadUS mem)));
5535   effect(KILL cr);
5536 
5537   ins_cost(250);
5538   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5539             "XOR    $dst.hi,$dst.hi" %}
5540 
5541   ins_encode %{
5542     __ movzwl($dst$$Register, $mem$$Address);
5543     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5544   %}
5545 
5546   ins_pipe(ialu_reg_mem);
5547 %}
5548 
5549 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5550 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5551   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5552   effect(KILL cr);
5553 
5554   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5555             "XOR    $dst.hi,$dst.hi" %}
5556   ins_encode %{
5557     Register Rdst = $dst$$Register;
5558     __ movzbl(Rdst, $mem$$Address);
5559     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5560   %}
5561   ins_pipe(ialu_reg_mem);
5562 %}
5563 
5564 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5565 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5566   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5567   effect(KILL cr);
5568 
5569   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5570             "XOR    $dst.hi,$dst.hi\n\t"
5571             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5572   ins_encode %{
5573     Register Rdst = $dst$$Register;
5574     __ movzwl(Rdst, $mem$$Address);
5575     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5576     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5577   %}
5578   ins_pipe(ialu_reg_mem);
5579 %}
5580 
5581 // Load Integer
5582 instruct loadI(rRegI dst, memory mem) %{
5583   match(Set dst (LoadI mem));
5584 
5585   ins_cost(125);
5586   format %{ "MOV    $dst,$mem\t# int" %}
5587 
5588   ins_encode %{
5589     __ movl($dst$$Register, $mem$$Address);
5590   %}
5591 
5592   ins_pipe(ialu_reg_mem);
5593 %}
5594 
5595 // Load Integer (32 bit signed) to Byte (8 bit signed)
5596 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5597   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5598 
5599   ins_cost(125);
5600   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5601   ins_encode %{
5602     __ movsbl($dst$$Register, $mem$$Address);
5603   %}
5604   ins_pipe(ialu_reg_mem);
5605 %}
5606 
5607 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5608 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5609   match(Set dst (AndI (LoadI mem) mask));
5610 
5611   ins_cost(125);
5612   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5613   ins_encode %{
5614     __ movzbl($dst$$Register, $mem$$Address);
5615   %}
5616   ins_pipe(ialu_reg_mem);
5617 %}
5618 
5619 // Load Integer (32 bit signed) to Short (16 bit signed)
5620 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5621   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5622 
5623   ins_cost(125);
5624   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5625   ins_encode %{
5626     __ movswl($dst$$Register, $mem$$Address);
5627   %}
5628   ins_pipe(ialu_reg_mem);
5629 %}
5630 
5631 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5632 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5633   match(Set dst (AndI (LoadI mem) mask));
5634 
5635   ins_cost(125);
5636   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5637   ins_encode %{
5638     __ movzwl($dst$$Register, $mem$$Address);
5639   %}
5640   ins_pipe(ialu_reg_mem);
5641 %}
5642 
5643 // Load Integer into Long Register
5644 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5645   match(Set dst (ConvI2L (LoadI mem)));
5646   effect(KILL cr);
5647 
5648   ins_cost(375);
5649   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5650             "MOV    $dst.hi,$dst.lo\n\t"
5651             "SAR    $dst.hi,31" %}
5652 
5653   ins_encode %{
5654     __ movl($dst$$Register, $mem$$Address);
5655     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5656     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5657   %}
5658 
5659   ins_pipe(ialu_reg_mem);
5660 %}
5661 
5662 // Load Integer with mask 0xFF into Long Register
5663 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5664   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5665   effect(KILL cr);
5666 
5667   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5668             "XOR    $dst.hi,$dst.hi" %}
5669   ins_encode %{
5670     Register Rdst = $dst$$Register;
5671     __ movzbl(Rdst, $mem$$Address);
5672     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5673   %}
5674   ins_pipe(ialu_reg_mem);
5675 %}
5676 
5677 // Load Integer with mask 0xFFFF into Long Register
5678 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5679   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5680   effect(KILL cr);
5681 
5682   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5683             "XOR    $dst.hi,$dst.hi" %}
5684   ins_encode %{
5685     Register Rdst = $dst$$Register;
5686     __ movzwl(Rdst, $mem$$Address);
5687     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5688   %}
5689   ins_pipe(ialu_reg_mem);
5690 %}
5691 
5692 // Load Integer with 31-bit mask into Long Register
5693 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5694   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5695   effect(KILL cr);
5696 
5697   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5698             "XOR    $dst.hi,$dst.hi\n\t"
5699             "AND    $dst.lo,$mask" %}
5700   ins_encode %{
5701     Register Rdst = $dst$$Register;
5702     __ movl(Rdst, $mem$$Address);
5703     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5704     __ andl(Rdst, $mask$$constant);
5705   %}
5706   ins_pipe(ialu_reg_mem);
5707 %}
5708 
5709 // Load Unsigned Integer into Long Register
5710 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5711   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5712   effect(KILL cr);
5713 
5714   ins_cost(250);
5715   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5716             "XOR    $dst.hi,$dst.hi" %}
5717 
5718   ins_encode %{
5719     __ movl($dst$$Register, $mem$$Address);
5720     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5721   %}
5722 
5723   ins_pipe(ialu_reg_mem);
5724 %}
5725 
5726 // Load Long.  Cannot clobber address while loading, so restrict address
5727 // register to ESI
5728 instruct loadL(eRegL dst, load_long_memory mem) %{
5729   predicate(!((LoadLNode*)n)->require_atomic_access());
5730   match(Set dst (LoadL mem));
5731 
5732   ins_cost(250);
5733   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5734             "MOV    $dst.hi,$mem+4" %}
5735 
5736   ins_encode %{
5737     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5738     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5739     __ movl($dst$$Register, Amemlo);
5740     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5741   %}
5742 
5743   ins_pipe(ialu_reg_long_mem);
5744 %}
5745 
5746 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5747 // then store it down to the stack and reload on the int
5748 // side.
5749 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5750   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5751   match(Set dst (LoadL mem));
5752 
5753   ins_cost(200);
5754   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5755             "FISTp  $dst" %}
5756   ins_encode(enc_loadL_volatile(mem,dst));
5757   ins_pipe( fpu_reg_mem );
5758 %}
5759 
5760 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5761   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5762   match(Set dst (LoadL mem));
5763   effect(TEMP tmp);
5764   ins_cost(180);
5765   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5766             "MOVSD  $dst,$tmp" %}
5767   ins_encode %{
5768     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5769     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5770   %}
5771   ins_pipe( pipe_slow );
5772 %}
5773 
5774 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5775   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5776   match(Set dst (LoadL mem));
5777   effect(TEMP tmp);
5778   ins_cost(160);
5779   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5780             "MOVD   $dst.lo,$tmp\n\t"
5781             "PSRLQ  $tmp,32\n\t"
5782             "MOVD   $dst.hi,$tmp" %}
5783   ins_encode %{
5784     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5785     __ movdl($dst$$Register, $tmp$$XMMRegister);
5786     __ psrlq($tmp$$XMMRegister, 32);
5787     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5788   %}
5789   ins_pipe( pipe_slow );
5790 %}
5791 
5792 // Load Range
5793 instruct loadRange(rRegI dst, memory mem) %{
5794   match(Set dst (LoadRange mem));
5795 
5796   ins_cost(125);
5797   format %{ "MOV    $dst,$mem" %}
5798   opcode(0x8B);
5799   ins_encode( OpcP, RegMem(dst,mem));
5800   ins_pipe( ialu_reg_mem );
5801 %}
5802 
5803 
5804 // Load Pointer
5805 instruct loadP(eRegP dst, memory mem) %{
5806   match(Set dst (LoadP mem));
5807 
5808   ins_cost(125);
5809   format %{ "MOV    $dst,$mem" %}
5810   opcode(0x8B);
5811   ins_encode( OpcP, RegMem(dst,mem));
5812   ins_pipe( ialu_reg_mem );
5813 %}
5814 
5815 // Load Klass Pointer
5816 instruct loadKlass(eRegP dst, memory mem) %{
5817   match(Set dst (LoadKlass mem));
5818 
5819   ins_cost(125);
5820   format %{ "MOV    $dst,$mem" %}
5821   opcode(0x8B);
5822   ins_encode( OpcP, RegMem(dst,mem));
5823   ins_pipe( ialu_reg_mem );
5824 %}
5825 
5826 // Load Double
5827 instruct loadDPR(regDPR dst, memory mem) %{
5828   predicate(UseSSE<=1);
5829   match(Set dst (LoadD mem));
5830 
5831   ins_cost(150);
5832   format %{ "FLD_D  ST,$mem\n\t"
5833             "FSTP   $dst" %}
5834   opcode(0xDD);               /* DD /0 */
5835   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5836               Pop_Reg_DPR(dst) );
5837   ins_pipe( fpu_reg_mem );
5838 %}
5839 
5840 // Load Double to XMM
5841 instruct loadD(regD dst, memory mem) %{
5842   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5843   match(Set dst (LoadD mem));
5844   ins_cost(145);
5845   format %{ "MOVSD  $dst,$mem" %}
5846   ins_encode %{
5847     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5848   %}
5849   ins_pipe( pipe_slow );
5850 %}
5851 
5852 instruct loadD_partial(regD dst, memory mem) %{
5853   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5854   match(Set dst (LoadD mem));
5855   ins_cost(145);
5856   format %{ "MOVLPD $dst,$mem" %}
5857   ins_encode %{
5858     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5859   %}
5860   ins_pipe( pipe_slow );
5861 %}
5862 
5863 // Load to XMM register (single-precision floating point)
5864 // MOVSS instruction
5865 instruct loadF(regF dst, memory mem) %{
5866   predicate(UseSSE>=1);
5867   match(Set dst (LoadF mem));
5868   ins_cost(145);
5869   format %{ "MOVSS  $dst,$mem" %}
5870   ins_encode %{
5871     __ movflt ($dst$$XMMRegister, $mem$$Address);
5872   %}
5873   ins_pipe( pipe_slow );
5874 %}
5875 
5876 // Load Float
5877 instruct loadFPR(regFPR dst, memory mem) %{
5878   predicate(UseSSE==0);
5879   match(Set dst (LoadF mem));
5880 
5881   ins_cost(150);
5882   format %{ "FLD_S  ST,$mem\n\t"
5883             "FSTP   $dst" %}
5884   opcode(0xD9);               /* D9 /0 */
5885   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5886               Pop_Reg_FPR(dst) );
5887   ins_pipe( fpu_reg_mem );
5888 %}
5889 
5890 // Load Effective Address
5891 instruct leaP8(eRegP dst, indOffset8 mem) %{
5892   match(Set dst mem);
5893 
5894   ins_cost(110);
5895   format %{ "LEA    $dst,$mem" %}
5896   opcode(0x8D);
5897   ins_encode( OpcP, RegMem(dst,mem));
5898   ins_pipe( ialu_reg_reg_fat );
5899 %}
5900 
5901 instruct leaP32(eRegP dst, indOffset32 mem) %{
5902   match(Set dst mem);
5903 
5904   ins_cost(110);
5905   format %{ "LEA    $dst,$mem" %}
5906   opcode(0x8D);
5907   ins_encode( OpcP, RegMem(dst,mem));
5908   ins_pipe( ialu_reg_reg_fat );
5909 %}
5910 
5911 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5912   match(Set dst mem);
5913 
5914   ins_cost(110);
5915   format %{ "LEA    $dst,$mem" %}
5916   opcode(0x8D);
5917   ins_encode( OpcP, RegMem(dst,mem));
5918   ins_pipe( ialu_reg_reg_fat );
5919 %}
5920 
5921 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5922   match(Set dst mem);
5923 
5924   ins_cost(110);
5925   format %{ "LEA    $dst,$mem" %}
5926   opcode(0x8D);
5927   ins_encode( OpcP, RegMem(dst,mem));
5928   ins_pipe( ialu_reg_reg_fat );
5929 %}
5930 
5931 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5932   match(Set dst mem);
5933 
5934   ins_cost(110);
5935   format %{ "LEA    $dst,$mem" %}
5936   opcode(0x8D);
5937   ins_encode( OpcP, RegMem(dst,mem));
5938   ins_pipe( ialu_reg_reg_fat );
5939 %}
5940 
5941 // Load Constant
5942 instruct loadConI(rRegI dst, immI src) %{
5943   match(Set dst src);
5944 
5945   format %{ "MOV    $dst,$src" %}
5946   ins_encode( LdImmI(dst, src) );
5947   ins_pipe( ialu_reg_fat );
5948 %}
5949 
5950 // Load Constant zero
5951 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5952   match(Set dst src);
5953   effect(KILL cr);
5954 
5955   ins_cost(50);
5956   format %{ "XOR    $dst,$dst" %}
5957   opcode(0x33);  /* + rd */
5958   ins_encode( OpcP, RegReg( dst, dst ) );
5959   ins_pipe( ialu_reg );
5960 %}
5961 
5962 instruct loadConP(eRegP dst, immP src) %{
5963   match(Set dst src);
5964 
5965   format %{ "MOV    $dst,$src" %}
5966   opcode(0xB8);  /* + rd */
5967   ins_encode( LdImmP(dst, src) );
5968   ins_pipe( ialu_reg_fat );
5969 %}
5970 
5971 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5972   match(Set dst src);
5973   effect(KILL cr);
5974   ins_cost(200);
5975   format %{ "MOV    $dst.lo,$src.lo\n\t"
5976             "MOV    $dst.hi,$src.hi" %}
5977   opcode(0xB8);
5978   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5979   ins_pipe( ialu_reg_long_fat );
5980 %}
5981 
5982 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5983   match(Set dst src);
5984   effect(KILL cr);
5985   ins_cost(150);
5986   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5987             "XOR    $dst.hi,$dst.hi" %}
5988   opcode(0x33,0x33);
5989   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5990   ins_pipe( ialu_reg_long );
5991 %}
5992 
5993 // The instruction usage is guarded by predicate in operand immFPR().
5994 instruct loadConFPR(regFPR dst, immFPR con) %{
5995   match(Set dst con);
5996   ins_cost(125);
5997   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5998             "FSTP   $dst" %}
5999   ins_encode %{
6000     __ fld_s($constantaddress($con));
6001     __ fstp_d($dst$$reg);
6002   %}
6003   ins_pipe(fpu_reg_con);
6004 %}
6005 
6006 // The instruction usage is guarded by predicate in operand immFPR0().
6007 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6008   match(Set dst con);
6009   ins_cost(125);
6010   format %{ "FLDZ   ST\n\t"
6011             "FSTP   $dst" %}
6012   ins_encode %{
6013     __ fldz();
6014     __ fstp_d($dst$$reg);
6015   %}
6016   ins_pipe(fpu_reg_con);
6017 %}
6018 
6019 // The instruction usage is guarded by predicate in operand immFPR1().
6020 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6021   match(Set dst con);
6022   ins_cost(125);
6023   format %{ "FLD1   ST\n\t"
6024             "FSTP   $dst" %}
6025   ins_encode %{
6026     __ fld1();
6027     __ fstp_d($dst$$reg);
6028   %}
6029   ins_pipe(fpu_reg_con);
6030 %}
6031 
6032 // The instruction usage is guarded by predicate in operand immF().
6033 instruct loadConF(regF dst, immF con) %{
6034   match(Set dst con);
6035   ins_cost(125);
6036   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6037   ins_encode %{
6038     __ movflt($dst$$XMMRegister, $constantaddress($con));
6039   %}
6040   ins_pipe(pipe_slow);
6041 %}
6042 
6043 // The instruction usage is guarded by predicate in operand immF0().
6044 instruct loadConF0(regF dst, immF0 src) %{
6045   match(Set dst src);
6046   ins_cost(100);
6047   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6048   ins_encode %{
6049     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6050   %}
6051   ins_pipe(pipe_slow);
6052 %}
6053 
6054 // The instruction usage is guarded by predicate in operand immDPR().
6055 instruct loadConDPR(regDPR dst, immDPR con) %{
6056   match(Set dst con);
6057   ins_cost(125);
6058 
6059   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6060             "FSTP   $dst" %}
6061   ins_encode %{
6062     __ fld_d($constantaddress($con));
6063     __ fstp_d($dst$$reg);
6064   %}
6065   ins_pipe(fpu_reg_con);
6066 %}
6067 
6068 // The instruction usage is guarded by predicate in operand immDPR0().
6069 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6070   match(Set dst con);
6071   ins_cost(125);
6072 
6073   format %{ "FLDZ   ST\n\t"
6074             "FSTP   $dst" %}
6075   ins_encode %{
6076     __ fldz();
6077     __ fstp_d($dst$$reg);
6078   %}
6079   ins_pipe(fpu_reg_con);
6080 %}
6081 
6082 // The instruction usage is guarded by predicate in operand immDPR1().
6083 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6084   match(Set dst con);
6085   ins_cost(125);
6086 
6087   format %{ "FLD1   ST\n\t"
6088             "FSTP   $dst" %}
6089   ins_encode %{
6090     __ fld1();
6091     __ fstp_d($dst$$reg);
6092   %}
6093   ins_pipe(fpu_reg_con);
6094 %}
6095 
6096 // The instruction usage is guarded by predicate in operand immD().
6097 instruct loadConD(regD dst, immD con) %{
6098   match(Set dst con);
6099   ins_cost(125);
6100   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6101   ins_encode %{
6102     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6103   %}
6104   ins_pipe(pipe_slow);
6105 %}
6106 
6107 // The instruction usage is guarded by predicate in operand immD0().
6108 instruct loadConD0(regD dst, immD0 src) %{
6109   match(Set dst src);
6110   ins_cost(100);
6111   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6112   ins_encode %{
6113     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6114   %}
6115   ins_pipe( pipe_slow );
6116 %}
6117 
6118 // Load Stack Slot
6119 instruct loadSSI(rRegI dst, stackSlotI src) %{
6120   match(Set dst src);
6121   ins_cost(125);
6122 
6123   format %{ "MOV    $dst,$src" %}
6124   opcode(0x8B);
6125   ins_encode( OpcP, RegMem(dst,src));
6126   ins_pipe( ialu_reg_mem );
6127 %}
6128 
6129 instruct loadSSL(eRegL dst, stackSlotL src) %{
6130   match(Set dst src);
6131 
6132   ins_cost(200);
6133   format %{ "MOV    $dst,$src.lo\n\t"
6134             "MOV    $dst+4,$src.hi" %}
6135   opcode(0x8B, 0x8B);
6136   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6137   ins_pipe( ialu_mem_long_reg );
6138 %}
6139 
6140 // Load Stack Slot
6141 instruct loadSSP(eRegP dst, stackSlotP src) %{
6142   match(Set dst src);
6143   ins_cost(125);
6144 
6145   format %{ "MOV    $dst,$src" %}
6146   opcode(0x8B);
6147   ins_encode( OpcP, RegMem(dst,src));
6148   ins_pipe( ialu_reg_mem );
6149 %}
6150 
6151 // Load Stack Slot
6152 instruct loadSSF(regFPR dst, stackSlotF src) %{
6153   match(Set dst src);
6154   ins_cost(125);
6155 
6156   format %{ "FLD_S  $src\n\t"
6157             "FSTP   $dst" %}
6158   opcode(0xD9);               /* D9 /0, FLD m32real */
6159   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6160               Pop_Reg_FPR(dst) );
6161   ins_pipe( fpu_reg_mem );
6162 %}
6163 
6164 // Load Stack Slot
6165 instruct loadSSD(regDPR dst, stackSlotD src) %{
6166   match(Set dst src);
6167   ins_cost(125);
6168 
6169   format %{ "FLD_D  $src\n\t"
6170             "FSTP   $dst" %}
6171   opcode(0xDD);               /* DD /0, FLD m64real */
6172   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6173               Pop_Reg_DPR(dst) );
6174   ins_pipe( fpu_reg_mem );
6175 %}
6176 
6177 // Prefetch instructions for allocation.
6178 // Must be safe to execute with invalid address (cannot fault).
6179 
6180 instruct prefetchAlloc0( memory mem ) %{
6181   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6182   match(PrefetchAllocation mem);
6183   ins_cost(0);
6184   size(0);
6185   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6186   ins_encode();
6187   ins_pipe(empty);
6188 %}
6189 
6190 instruct prefetchAlloc( memory mem ) %{
6191   predicate(AllocatePrefetchInstr==3);
6192   match( PrefetchAllocation mem );
6193   ins_cost(100);
6194 
6195   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6196   ins_encode %{
6197     __ prefetchw($mem$$Address);
6198   %}
6199   ins_pipe(ialu_mem);
6200 %}
6201 
6202 instruct prefetchAllocNTA( memory mem ) %{
6203   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6204   match(PrefetchAllocation mem);
6205   ins_cost(100);
6206 
6207   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6208   ins_encode %{
6209     __ prefetchnta($mem$$Address);
6210   %}
6211   ins_pipe(ialu_mem);
6212 %}
6213 
6214 instruct prefetchAllocT0( memory mem ) %{
6215   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6216   match(PrefetchAllocation mem);
6217   ins_cost(100);
6218 
6219   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6220   ins_encode %{
6221     __ prefetcht0($mem$$Address);
6222   %}
6223   ins_pipe(ialu_mem);
6224 %}
6225 
6226 instruct prefetchAllocT2( memory mem ) %{
6227   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6228   match(PrefetchAllocation mem);
6229   ins_cost(100);
6230 
6231   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6232   ins_encode %{
6233     __ prefetcht2($mem$$Address);
6234   %}
6235   ins_pipe(ialu_mem);
6236 %}
6237 
6238 //----------Store Instructions-------------------------------------------------
6239 
6240 // Store Byte
6241 instruct storeB(memory mem, xRegI src) %{
6242   match(Set mem (StoreB mem src));
6243 
6244   ins_cost(125);
6245   format %{ "MOV8   $mem,$src" %}
6246   opcode(0x88);
6247   ins_encode( OpcP, RegMem( src, mem ) );
6248   ins_pipe( ialu_mem_reg );
6249 %}
6250 
6251 // Store Char/Short
6252 instruct storeC(memory mem, rRegI src) %{
6253   match(Set mem (StoreC mem src));
6254 
6255   ins_cost(125);
6256   format %{ "MOV16  $mem,$src" %}
6257   opcode(0x89, 0x66);
6258   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6259   ins_pipe( ialu_mem_reg );
6260 %}
6261 
6262 // Store Integer
6263 instruct storeI(memory mem, rRegI src) %{
6264   match(Set mem (StoreI mem src));
6265 
6266   ins_cost(125);
6267   format %{ "MOV    $mem,$src" %}
6268   opcode(0x89);
6269   ins_encode( OpcP, RegMem( src, mem ) );
6270   ins_pipe( ialu_mem_reg );
6271 %}
6272 
6273 // Store Long
6274 instruct storeL(long_memory mem, eRegL src) %{
6275   predicate(!((StoreLNode*)n)->require_atomic_access());
6276   match(Set mem (StoreL mem src));
6277 
6278   ins_cost(200);
6279   format %{ "MOV    $mem,$src.lo\n\t"
6280             "MOV    $mem+4,$src.hi" %}
6281   opcode(0x89, 0x89);
6282   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6283   ins_pipe( ialu_mem_long_reg );
6284 %}
6285 
6286 // Store Long to Integer
6287 instruct storeL2I(memory mem, eRegL src) %{
6288   match(Set mem (StoreI mem (ConvL2I src)));
6289 
6290   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6291   ins_encode %{
6292     __ movl($mem$$Address, $src$$Register);
6293   %}
6294   ins_pipe(ialu_mem_reg);
6295 %}
6296 
6297 // Volatile Store Long.  Must be atomic, so move it into
6298 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6299 // target address before the store (for null-ptr checks)
6300 // so the memory operand is used twice in the encoding.
6301 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6302   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6303   match(Set mem (StoreL mem src));
6304   effect( KILL cr );
6305   ins_cost(400);
6306   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6307             "FILD   $src\n\t"
6308             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6309   opcode(0x3B);
6310   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6311   ins_pipe( fpu_reg_mem );
6312 %}
6313 
6314 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6315   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6316   match(Set mem (StoreL mem src));
6317   effect( TEMP tmp, KILL cr );
6318   ins_cost(380);
6319   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6320             "MOVSD  $tmp,$src\n\t"
6321             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6322   ins_encode %{
6323     __ cmpl(rax, $mem$$Address);
6324     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6325     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6326   %}
6327   ins_pipe( pipe_slow );
6328 %}
6329 
6330 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6331   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6332   match(Set mem (StoreL mem src));
6333   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6334   ins_cost(360);
6335   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6336             "MOVD   $tmp,$src.lo\n\t"
6337             "MOVD   $tmp2,$src.hi\n\t"
6338             "PUNPCKLDQ $tmp,$tmp2\n\t"
6339             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6340   ins_encode %{
6341     __ cmpl(rax, $mem$$Address);
6342     __ movdl($tmp$$XMMRegister, $src$$Register);
6343     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6344     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6345     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6346   %}
6347   ins_pipe( pipe_slow );
6348 %}
6349 
6350 // Store Pointer; for storing unknown oops and raw pointers
6351 instruct storeP(memory mem, anyRegP src) %{
6352   match(Set mem (StoreP mem src));
6353 
6354   ins_cost(125);
6355   format %{ "MOV    $mem,$src" %}
6356   opcode(0x89);
6357   ins_encode( OpcP, RegMem( src, mem ) );
6358   ins_pipe( ialu_mem_reg );
6359 %}
6360 
6361 // Store Integer Immediate
6362 instruct storeImmI(memory mem, immI src) %{
6363   match(Set mem (StoreI mem src));
6364 
6365   ins_cost(150);
6366   format %{ "MOV    $mem,$src" %}
6367   opcode(0xC7);               /* C7 /0 */
6368   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6369   ins_pipe( ialu_mem_imm );
6370 %}
6371 
6372 // Store Short/Char Immediate
6373 instruct storeImmI16(memory mem, immI16 src) %{
6374   predicate(UseStoreImmI16);
6375   match(Set mem (StoreC mem src));
6376 
6377   ins_cost(150);
6378   format %{ "MOV16  $mem,$src" %}
6379   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6380   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6381   ins_pipe( ialu_mem_imm );
6382 %}
6383 
6384 // Store Pointer Immediate; null pointers or constant oops that do not
6385 // need card-mark barriers.
6386 instruct storeImmP(memory mem, immP src) %{
6387   match(Set mem (StoreP mem src));
6388 
6389   ins_cost(150);
6390   format %{ "MOV    $mem,$src" %}
6391   opcode(0xC7);               /* C7 /0 */
6392   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6393   ins_pipe( ialu_mem_imm );
6394 %}
6395 
6396 // Store Byte Immediate
6397 instruct storeImmB(memory mem, immI8 src) %{
6398   match(Set mem (StoreB mem src));
6399 
6400   ins_cost(150);
6401   format %{ "MOV8   $mem,$src" %}
6402   opcode(0xC6);               /* C6 /0 */
6403   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6404   ins_pipe( ialu_mem_imm );
6405 %}
6406 
6407 // Store CMS card-mark Immediate
6408 instruct storeImmCM(memory mem, immI8 src) %{
6409   match(Set mem (StoreCM mem src));
6410 
6411   ins_cost(150);
6412   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6413   opcode(0xC6);               /* C6 /0 */
6414   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6415   ins_pipe( ialu_mem_imm );
6416 %}
6417 
6418 // Store Double
6419 instruct storeDPR( memory mem, regDPR1 src) %{
6420   predicate(UseSSE<=1);
6421   match(Set mem (StoreD mem src));
6422 
6423   ins_cost(100);
6424   format %{ "FST_D  $mem,$src" %}
6425   opcode(0xDD);       /* DD /2 */
6426   ins_encode( enc_FPR_store(mem,src) );
6427   ins_pipe( fpu_mem_reg );
6428 %}
6429 
6430 // Store double does rounding on x86
6431 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6432   predicate(UseSSE<=1);
6433   match(Set mem (StoreD mem (RoundDouble src)));
6434 
6435   ins_cost(100);
6436   format %{ "FST_D  $mem,$src\t# round" %}
6437   opcode(0xDD);       /* DD /2 */
6438   ins_encode( enc_FPR_store(mem,src) );
6439   ins_pipe( fpu_mem_reg );
6440 %}
6441 
6442 // Store XMM register to memory (double-precision floating points)
6443 // MOVSD instruction
6444 instruct storeD(memory mem, regD src) %{
6445   predicate(UseSSE>=2);
6446   match(Set mem (StoreD mem src));
6447   ins_cost(95);
6448   format %{ "MOVSD  $mem,$src" %}
6449   ins_encode %{
6450     __ movdbl($mem$$Address, $src$$XMMRegister);
6451   %}
6452   ins_pipe( pipe_slow );
6453 %}
6454 
6455 // Store XMM register to memory (single-precision floating point)
6456 // MOVSS instruction
6457 instruct storeF(memory mem, regF src) %{
6458   predicate(UseSSE>=1);
6459   match(Set mem (StoreF mem src));
6460   ins_cost(95);
6461   format %{ "MOVSS  $mem,$src" %}
6462   ins_encode %{
6463     __ movflt($mem$$Address, $src$$XMMRegister);
6464   %}
6465   ins_pipe( pipe_slow );
6466 %}
6467 
6468 // Store Float
6469 instruct storeFPR( memory mem, regFPR1 src) %{
6470   predicate(UseSSE==0);
6471   match(Set mem (StoreF mem src));
6472 
6473   ins_cost(100);
6474   format %{ "FST_S  $mem,$src" %}
6475   opcode(0xD9);       /* D9 /2 */
6476   ins_encode( enc_FPR_store(mem,src) );
6477   ins_pipe( fpu_mem_reg );
6478 %}
6479 
6480 // Store Float does rounding on x86
6481 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6482   predicate(UseSSE==0);
6483   match(Set mem (StoreF mem (RoundFloat src)));
6484 
6485   ins_cost(100);
6486   format %{ "FST_S  $mem,$src\t# round" %}
6487   opcode(0xD9);       /* D9 /2 */
6488   ins_encode( enc_FPR_store(mem,src) );
6489   ins_pipe( fpu_mem_reg );
6490 %}
6491 
6492 // Store Float does rounding on x86
6493 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6494   predicate(UseSSE<=1);
6495   match(Set mem (StoreF mem (ConvD2F src)));
6496 
6497   ins_cost(100);
6498   format %{ "FST_S  $mem,$src\t# D-round" %}
6499   opcode(0xD9);       /* D9 /2 */
6500   ins_encode( enc_FPR_store(mem,src) );
6501   ins_pipe( fpu_mem_reg );
6502 %}
6503 
6504 // Store immediate Float value (it is faster than store from FPU register)
6505 // The instruction usage is guarded by predicate in operand immFPR().
6506 instruct storeFPR_imm( memory mem, immFPR src) %{
6507   match(Set mem (StoreF mem src));
6508 
6509   ins_cost(50);
6510   format %{ "MOV    $mem,$src\t# store float" %}
6511   opcode(0xC7);               /* C7 /0 */
6512   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6513   ins_pipe( ialu_mem_imm );
6514 %}
6515 
6516 // Store immediate Float value (it is faster than store from XMM register)
6517 // The instruction usage is guarded by predicate in operand immF().
6518 instruct storeF_imm( memory mem, immF src) %{
6519   match(Set mem (StoreF mem src));
6520 
6521   ins_cost(50);
6522   format %{ "MOV    $mem,$src\t# store float" %}
6523   opcode(0xC7);               /* C7 /0 */
6524   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6525   ins_pipe( ialu_mem_imm );
6526 %}
6527 
6528 // Store Integer to stack slot
6529 instruct storeSSI(stackSlotI dst, rRegI src) %{
6530   match(Set dst src);
6531 
6532   ins_cost(100);
6533   format %{ "MOV    $dst,$src" %}
6534   opcode(0x89);
6535   ins_encode( OpcPRegSS( dst, src ) );
6536   ins_pipe( ialu_mem_reg );
6537 %}
6538 
6539 // Store Integer to stack slot
6540 instruct storeSSP(stackSlotP dst, eRegP src) %{
6541   match(Set dst src);
6542 
6543   ins_cost(100);
6544   format %{ "MOV    $dst,$src" %}
6545   opcode(0x89);
6546   ins_encode( OpcPRegSS( dst, src ) );
6547   ins_pipe( ialu_mem_reg );
6548 %}
6549 
6550 // Store Long to stack slot
6551 instruct storeSSL(stackSlotL dst, eRegL src) %{
6552   match(Set dst src);
6553 
6554   ins_cost(200);
6555   format %{ "MOV    $dst,$src.lo\n\t"
6556             "MOV    $dst+4,$src.hi" %}
6557   opcode(0x89, 0x89);
6558   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6559   ins_pipe( ialu_mem_long_reg );
6560 %}
6561 
6562 //----------MemBar Instructions-----------------------------------------------
6563 // Memory barrier flavors
6564 
6565 instruct membar_acquire() %{
6566   match(MemBarAcquire);
6567   match(LoadFence);
6568   ins_cost(400);
6569 
6570   size(0);
6571   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6572   ins_encode();
6573   ins_pipe(empty);
6574 %}
6575 
6576 instruct membar_acquire_lock() %{
6577   match(MemBarAcquireLock);
6578   ins_cost(0);
6579 
6580   size(0);
6581   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6582   ins_encode( );
6583   ins_pipe(empty);
6584 %}
6585 
6586 instruct membar_release() %{
6587   match(MemBarRelease);
6588   match(StoreFence);
6589   ins_cost(400);
6590 
6591   size(0);
6592   format %{ "MEMBAR-release ! (empty encoding)" %}
6593   ins_encode( );
6594   ins_pipe(empty);
6595 %}
6596 
6597 instruct membar_release_lock() %{
6598   match(MemBarReleaseLock);
6599   ins_cost(0);
6600 
6601   size(0);
6602   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6603   ins_encode( );
6604   ins_pipe(empty);
6605 %}
6606 
6607 instruct membar_volatile(eFlagsReg cr) %{
6608   match(MemBarVolatile);
6609   effect(KILL cr);
6610   ins_cost(400);
6611 
6612   format %{
6613     $$template
6614     if (os::is_MP()) {
6615       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6616     } else {
6617       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6618     }
6619   %}
6620   ins_encode %{
6621     __ membar(Assembler::StoreLoad);
6622   %}
6623   ins_pipe(pipe_slow);
6624 %}
6625 
6626 instruct unnecessary_membar_volatile() %{
6627   match(MemBarVolatile);
6628   predicate(Matcher::post_store_load_barrier(n));
6629   ins_cost(0);
6630 
6631   size(0);
6632   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6633   ins_encode( );
6634   ins_pipe(empty);
6635 %}
6636 
6637 instruct membar_storestore() %{
6638   match(MemBarStoreStore);
6639   ins_cost(0);
6640 
6641   size(0);
6642   format %{ "MEMBAR-storestore (empty encoding)" %}
6643   ins_encode( );
6644   ins_pipe(empty);
6645 %}
6646 
6647 //----------Move Instructions--------------------------------------------------
6648 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6649   match(Set dst (CastX2P src));
6650   format %{ "# X2P  $dst, $src" %}
6651   ins_encode( /*empty encoding*/ );
6652   ins_cost(0);
6653   ins_pipe(empty);
6654 %}
6655 
6656 instruct castP2X(rRegI dst, eRegP src ) %{
6657   match(Set dst (CastP2X src));
6658   ins_cost(50);
6659   format %{ "MOV    $dst, $src\t# CastP2X" %}
6660   ins_encode( enc_Copy( dst, src) );
6661   ins_pipe( ialu_reg_reg );
6662 %}
6663 
6664 //----------Conditional Move---------------------------------------------------
6665 // Conditional move
6666 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6667   predicate(!VM_Version::supports_cmov() );
6668   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6669   ins_cost(200);
6670   format %{ "J$cop,us skip\t# signed cmove\n\t"
6671             "MOV    $dst,$src\n"
6672       "skip:" %}
6673   ins_encode %{
6674     Label Lskip;
6675     // Invert sense of branch from sense of CMOV
6676     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6677     __ movl($dst$$Register, $src$$Register);
6678     __ bind(Lskip);
6679   %}
6680   ins_pipe( pipe_cmov_reg );
6681 %}
6682 
6683 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6684   predicate(!VM_Version::supports_cmov() );
6685   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6686   ins_cost(200);
6687   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6688             "MOV    $dst,$src\n"
6689       "skip:" %}
6690   ins_encode %{
6691     Label Lskip;
6692     // Invert sense of branch from sense of CMOV
6693     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6694     __ movl($dst$$Register, $src$$Register);
6695     __ bind(Lskip);
6696   %}
6697   ins_pipe( pipe_cmov_reg );
6698 %}
6699 
6700 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6701   predicate(VM_Version::supports_cmov() );
6702   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6703   ins_cost(200);
6704   format %{ "CMOV$cop $dst,$src" %}
6705   opcode(0x0F,0x40);
6706   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6707   ins_pipe( pipe_cmov_reg );
6708 %}
6709 
6710 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6711   predicate(VM_Version::supports_cmov() );
6712   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6713   ins_cost(200);
6714   format %{ "CMOV$cop $dst,$src" %}
6715   opcode(0x0F,0x40);
6716   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6717   ins_pipe( pipe_cmov_reg );
6718 %}
6719 
6720 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6721   predicate(VM_Version::supports_cmov() );
6722   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6723   ins_cost(200);
6724   expand %{
6725     cmovI_regU(cop, cr, dst, src);
6726   %}
6727 %}
6728 
6729 // Conditional move
6730 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6731   predicate(VM_Version::supports_cmov() );
6732   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6733   ins_cost(250);
6734   format %{ "CMOV$cop $dst,$src" %}
6735   opcode(0x0F,0x40);
6736   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6737   ins_pipe( pipe_cmov_mem );
6738 %}
6739 
6740 // Conditional move
6741 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6742   predicate(VM_Version::supports_cmov() );
6743   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6744   ins_cost(250);
6745   format %{ "CMOV$cop $dst,$src" %}
6746   opcode(0x0F,0x40);
6747   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6748   ins_pipe( pipe_cmov_mem );
6749 %}
6750 
6751 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6752   predicate(VM_Version::supports_cmov() );
6753   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6754   ins_cost(250);
6755   expand %{
6756     cmovI_memU(cop, cr, dst, src);
6757   %}
6758 %}
6759 
6760 // Conditional move
6761 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6762   predicate(VM_Version::supports_cmov() );
6763   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6764   ins_cost(200);
6765   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6766   opcode(0x0F,0x40);
6767   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6768   ins_pipe( pipe_cmov_reg );
6769 %}
6770 
6771 // Conditional move (non-P6 version)
6772 // Note:  a CMoveP is generated for  stubs and native wrappers
6773 //        regardless of whether we are on a P6, so we
6774 //        emulate a cmov here
6775 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6776   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6777   ins_cost(300);
6778   format %{ "Jn$cop   skip\n\t"
6779           "MOV    $dst,$src\t# pointer\n"
6780       "skip:" %}
6781   opcode(0x8b);
6782   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6783   ins_pipe( pipe_cmov_reg );
6784 %}
6785 
6786 // Conditional move
6787 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6788   predicate(VM_Version::supports_cmov() );
6789   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6790   ins_cost(200);
6791   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6792   opcode(0x0F,0x40);
6793   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6794   ins_pipe( pipe_cmov_reg );
6795 %}
6796 
6797 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6798   predicate(VM_Version::supports_cmov() );
6799   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6800   ins_cost(200);
6801   expand %{
6802     cmovP_regU(cop, cr, dst, src);
6803   %}
6804 %}
6805 
6806 // DISABLED: Requires the ADLC to emit a bottom_type call that
6807 // correctly meets the two pointer arguments; one is an incoming
6808 // register but the other is a memory operand.  ALSO appears to
6809 // be buggy with implicit null checks.
6810 //
6811 //// Conditional move
6812 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6813 //  predicate(VM_Version::supports_cmov() );
6814 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6815 //  ins_cost(250);
6816 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6817 //  opcode(0x0F,0x40);
6818 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6819 //  ins_pipe( pipe_cmov_mem );
6820 //%}
6821 //
6822 //// Conditional move
6823 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6824 //  predicate(VM_Version::supports_cmov() );
6825 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6826 //  ins_cost(250);
6827 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6828 //  opcode(0x0F,0x40);
6829 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6830 //  ins_pipe( pipe_cmov_mem );
6831 //%}
6832 
6833 // Conditional move
6834 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6835   predicate(UseSSE<=1);
6836   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6837   ins_cost(200);
6838   format %{ "FCMOV$cop $dst,$src\t# double" %}
6839   opcode(0xDA);
6840   ins_encode( enc_cmov_dpr(cop,src) );
6841   ins_pipe( pipe_cmovDPR_reg );
6842 %}
6843 
6844 // Conditional move
6845 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6846   predicate(UseSSE==0);
6847   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6848   ins_cost(200);
6849   format %{ "FCMOV$cop $dst,$src\t# float" %}
6850   opcode(0xDA);
6851   ins_encode( enc_cmov_dpr(cop,src) );
6852   ins_pipe( pipe_cmovDPR_reg );
6853 %}
6854 
6855 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6856 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6857   predicate(UseSSE<=1);
6858   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6859   ins_cost(200);
6860   format %{ "Jn$cop   skip\n\t"
6861             "MOV    $dst,$src\t# double\n"
6862       "skip:" %}
6863   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6864   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6865   ins_pipe( pipe_cmovDPR_reg );
6866 %}
6867 
6868 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6869 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6870   predicate(UseSSE==0);
6871   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6872   ins_cost(200);
6873   format %{ "Jn$cop    skip\n\t"
6874             "MOV    $dst,$src\t# float\n"
6875       "skip:" %}
6876   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6877   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6878   ins_pipe( pipe_cmovDPR_reg );
6879 %}
6880 
6881 // No CMOVE with SSE/SSE2
6882 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6883   predicate (UseSSE>=1);
6884   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6885   ins_cost(200);
6886   format %{ "Jn$cop   skip\n\t"
6887             "MOVSS  $dst,$src\t# float\n"
6888       "skip:" %}
6889   ins_encode %{
6890     Label skip;
6891     // Invert sense of branch from sense of CMOV
6892     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6893     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6894     __ bind(skip);
6895   %}
6896   ins_pipe( pipe_slow );
6897 %}
6898 
6899 // No CMOVE with SSE/SSE2
6900 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6901   predicate (UseSSE>=2);
6902   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6903   ins_cost(200);
6904   format %{ "Jn$cop   skip\n\t"
6905             "MOVSD  $dst,$src\t# float\n"
6906       "skip:" %}
6907   ins_encode %{
6908     Label skip;
6909     // Invert sense of branch from sense of CMOV
6910     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6911     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6912     __ bind(skip);
6913   %}
6914   ins_pipe( pipe_slow );
6915 %}
6916 
6917 // unsigned version
6918 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6919   predicate (UseSSE>=1);
6920   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6921   ins_cost(200);
6922   format %{ "Jn$cop   skip\n\t"
6923             "MOVSS  $dst,$src\t# float\n"
6924       "skip:" %}
6925   ins_encode %{
6926     Label skip;
6927     // Invert sense of branch from sense of CMOV
6928     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6929     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6930     __ bind(skip);
6931   %}
6932   ins_pipe( pipe_slow );
6933 %}
6934 
6935 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6936   predicate (UseSSE>=1);
6937   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6938   ins_cost(200);
6939   expand %{
6940     fcmovF_regU(cop, cr, dst, src);
6941   %}
6942 %}
6943 
6944 // unsigned version
6945 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6946   predicate (UseSSE>=2);
6947   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6948   ins_cost(200);
6949   format %{ "Jn$cop   skip\n\t"
6950             "MOVSD  $dst,$src\t# float\n"
6951       "skip:" %}
6952   ins_encode %{
6953     Label skip;
6954     // Invert sense of branch from sense of CMOV
6955     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6956     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6957     __ bind(skip);
6958   %}
6959   ins_pipe( pipe_slow );
6960 %}
6961 
6962 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6963   predicate (UseSSE>=2);
6964   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6965   ins_cost(200);
6966   expand %{
6967     fcmovD_regU(cop, cr, dst, src);
6968   %}
6969 %}
6970 
6971 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6972   predicate(VM_Version::supports_cmov() );
6973   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6974   ins_cost(200);
6975   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6976             "CMOV$cop $dst.hi,$src.hi" %}
6977   opcode(0x0F,0x40);
6978   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6979   ins_pipe( pipe_cmov_reg_long );
6980 %}
6981 
6982 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6983   predicate(VM_Version::supports_cmov() );
6984   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6985   ins_cost(200);
6986   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6987             "CMOV$cop $dst.hi,$src.hi" %}
6988   opcode(0x0F,0x40);
6989   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6990   ins_pipe( pipe_cmov_reg_long );
6991 %}
6992 
6993 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6994   predicate(VM_Version::supports_cmov() );
6995   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6996   ins_cost(200);
6997   expand %{
6998     cmovL_regU(cop, cr, dst, src);
6999   %}
7000 %}
7001 
7002 //----------Arithmetic Instructions--------------------------------------------
7003 //----------Addition Instructions----------------------------------------------
7004 
7005 // Integer Addition Instructions
7006 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7007   match(Set dst (AddI dst src));
7008   effect(KILL cr);
7009 
7010   size(2);
7011   format %{ "ADD    $dst,$src" %}
7012   opcode(0x03);
7013   ins_encode( OpcP, RegReg( dst, src) );
7014   ins_pipe( ialu_reg_reg );
7015 %}
7016 
7017 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7018   match(Set dst (AddI dst src));
7019   effect(KILL cr);
7020 
7021   format %{ "ADD    $dst,$src" %}
7022   opcode(0x81, 0x00); /* /0 id */
7023   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7024   ins_pipe( ialu_reg );
7025 %}
7026 
7027 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7028   predicate(UseIncDec);
7029   match(Set dst (AddI dst src));
7030   effect(KILL cr);
7031 
7032   size(1);
7033   format %{ "INC    $dst" %}
7034   opcode(0x40); /*  */
7035   ins_encode( Opc_plus( primary, dst ) );
7036   ins_pipe( ialu_reg );
7037 %}
7038 
7039 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7040   match(Set dst (AddI src0 src1));
7041   ins_cost(110);
7042 
7043   format %{ "LEA    $dst,[$src0 + $src1]" %}
7044   opcode(0x8D); /* 0x8D /r */
7045   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7046   ins_pipe( ialu_reg_reg );
7047 %}
7048 
7049 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7050   match(Set dst (AddP src0 src1));
7051   ins_cost(110);
7052 
7053   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7054   opcode(0x8D); /* 0x8D /r */
7055   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7056   ins_pipe( ialu_reg_reg );
7057 %}
7058 
7059 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7060   predicate(UseIncDec);
7061   match(Set dst (AddI dst src));
7062   effect(KILL cr);
7063 
7064   size(1);
7065   format %{ "DEC    $dst" %}
7066   opcode(0x48); /*  */
7067   ins_encode( Opc_plus( primary, dst ) );
7068   ins_pipe( ialu_reg );
7069 %}
7070 
7071 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7072   match(Set dst (AddP dst src));
7073   effect(KILL cr);
7074 
7075   size(2);
7076   format %{ "ADD    $dst,$src" %}
7077   opcode(0x03);
7078   ins_encode( OpcP, RegReg( dst, src) );
7079   ins_pipe( ialu_reg_reg );
7080 %}
7081 
7082 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7083   match(Set dst (AddP dst src));
7084   effect(KILL cr);
7085 
7086   format %{ "ADD    $dst,$src" %}
7087   opcode(0x81,0x00); /* Opcode 81 /0 id */
7088   // ins_encode( RegImm( dst, src) );
7089   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7090   ins_pipe( ialu_reg );
7091 %}
7092 
7093 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7094   match(Set dst (AddI dst (LoadI src)));
7095   effect(KILL cr);
7096 
7097   ins_cost(125);
7098   format %{ "ADD    $dst,$src" %}
7099   opcode(0x03);
7100   ins_encode( OpcP, RegMem( dst, src) );
7101   ins_pipe( ialu_reg_mem );
7102 %}
7103 
7104 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7105   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7106   effect(KILL cr);
7107 
7108   ins_cost(150);
7109   format %{ "ADD    $dst,$src" %}
7110   opcode(0x01);  /* Opcode 01 /r */
7111   ins_encode( OpcP, RegMem( src, dst ) );
7112   ins_pipe( ialu_mem_reg );
7113 %}
7114 
7115 // Add Memory with Immediate
7116 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7117   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7118   effect(KILL cr);
7119 
7120   ins_cost(125);
7121   format %{ "ADD    $dst,$src" %}
7122   opcode(0x81);               /* Opcode 81 /0 id */
7123   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7124   ins_pipe( ialu_mem_imm );
7125 %}
7126 
7127 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7128   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7129   effect(KILL cr);
7130 
7131   ins_cost(125);
7132   format %{ "INC    $dst" %}
7133   opcode(0xFF);               /* Opcode FF /0 */
7134   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7135   ins_pipe( ialu_mem_imm );
7136 %}
7137 
7138 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7139   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7140   effect(KILL cr);
7141 
7142   ins_cost(125);
7143   format %{ "DEC    $dst" %}
7144   opcode(0xFF);               /* Opcode FF /1 */
7145   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7146   ins_pipe( ialu_mem_imm );
7147 %}
7148 
7149 
7150 instruct checkCastPP( eRegP dst ) %{
7151   match(Set dst (CheckCastPP dst));
7152 
7153   size(0);
7154   format %{ "#checkcastPP of $dst" %}
7155   ins_encode( /*empty encoding*/ );
7156   ins_pipe( empty );
7157 %}
7158 
7159 instruct castPP( eRegP dst ) %{
7160   match(Set dst (CastPP dst));
7161   format %{ "#castPP of $dst" %}
7162   ins_encode( /*empty encoding*/ );
7163   ins_pipe( empty );
7164 %}
7165 
7166 instruct castII( rRegI dst ) %{
7167   match(Set dst (CastII dst));
7168   format %{ "#castII of $dst" %}
7169   ins_encode( /*empty encoding*/ );
7170   ins_cost(0);
7171   ins_pipe( empty );
7172 %}
7173 
7174 
7175 // Load-locked - same as a regular pointer load when used with compare-swap
7176 instruct loadPLocked(eRegP dst, memory mem) %{
7177   match(Set dst (LoadPLocked mem));
7178 
7179   ins_cost(125);
7180   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7181   opcode(0x8B);
7182   ins_encode( OpcP, RegMem(dst,mem));
7183   ins_pipe( ialu_reg_mem );
7184 %}
7185 
7186 // Conditional-store of the updated heap-top.
7187 // Used during allocation of the shared heap.
7188 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7189 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7190   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7191   // EAX is killed if there is contention, but then it's also unused.
7192   // In the common case of no contention, EAX holds the new oop address.
7193   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7194   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7195   ins_pipe( pipe_cmpxchg );
7196 %}
7197 
7198 // Conditional-store of an int value.
7199 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7200 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7201   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7202   effect(KILL oldval);
7203   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7204   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7205   ins_pipe( pipe_cmpxchg );
7206 %}
7207 
7208 // Conditional-store of a long value.
7209 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7210 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7211   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7212   effect(KILL oldval);
7213   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7214             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7215             "XCHG   EBX,ECX"
7216   %}
7217   ins_encode %{
7218     // Note: we need to swap rbx, and rcx before and after the
7219     //       cmpxchg8 instruction because the instruction uses
7220     //       rcx as the high order word of the new value to store but
7221     //       our register encoding uses rbx.
7222     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7223     if( os::is_MP() )
7224       __ lock();
7225     __ cmpxchg8($mem$$Address);
7226     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7227   %}
7228   ins_pipe( pipe_cmpxchg );
7229 %}
7230 
7231 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7232 
7233 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7234   predicate(VM_Version::supports_cx8());
7235   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7236   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7237   effect(KILL cr, KILL oldval);
7238   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7239             "MOV    $res,0\n\t"
7240             "JNE,s  fail\n\t"
7241             "MOV    $res,1\n"
7242           "fail:" %}
7243   ins_encode( enc_cmpxchg8(mem_ptr),
7244               enc_flags_ne_to_boolean(res) );
7245   ins_pipe( pipe_cmpxchg );
7246 %}
7247 
7248 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7249   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7250   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7251   effect(KILL cr, KILL oldval);
7252   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7253             "MOV    $res,0\n\t"
7254             "JNE,s  fail\n\t"
7255             "MOV    $res,1\n"
7256           "fail:" %}
7257   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7258   ins_pipe( pipe_cmpxchg );
7259 %}
7260 
7261 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7262   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7263   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7264   effect(KILL cr, KILL oldval);
7265   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7266             "MOV    $res,0\n\t"
7267             "JNE,s  fail\n\t"
7268             "MOV    $res,1\n"
7269           "fail:" %}
7270   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7271   ins_pipe( pipe_cmpxchg );
7272 %}
7273 
7274 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7275   predicate(VM_Version::supports_cx8());
7276   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7277   effect(KILL cr);
7278   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7279   ins_encode( enc_cmpxchg8(mem_ptr) );
7280   ins_pipe( pipe_cmpxchg );
7281 %}
7282 
7283 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7284   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7285   effect(KILL cr);
7286   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7287   ins_encode( enc_cmpxchg(mem_ptr) );
7288   ins_pipe( pipe_cmpxchg );
7289 %}
7290 
7291 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7292   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7293   effect(KILL cr);
7294   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7295   ins_encode( enc_cmpxchg(mem_ptr) );
7296   ins_pipe( pipe_cmpxchg );
7297 %}
7298 
7299 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7300   predicate(n->as_LoadStore()->result_not_used());
7301   match(Set dummy (GetAndAddI mem add));
7302   effect(KILL cr);
7303   format %{ "ADDL  [$mem],$add" %}
7304   ins_encode %{
7305     if (os::is_MP()) { __ lock(); }
7306     __ addl($mem$$Address, $add$$constant);
7307   %}
7308   ins_pipe( pipe_cmpxchg );
7309 %}
7310 
7311 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7312   match(Set newval (GetAndAddI mem newval));
7313   effect(KILL cr);
7314   format %{ "XADDL  [$mem],$newval" %}
7315   ins_encode %{
7316     if (os::is_MP()) { __ lock(); }
7317     __ xaddl($mem$$Address, $newval$$Register);
7318   %}
7319   ins_pipe( pipe_cmpxchg );
7320 %}
7321 
7322 instruct xchgI( memory mem, rRegI newval) %{
7323   match(Set newval (GetAndSetI mem newval));
7324   format %{ "XCHGL  $newval,[$mem]" %}
7325   ins_encode %{
7326     __ xchgl($newval$$Register, $mem$$Address);
7327   %}
7328   ins_pipe( pipe_cmpxchg );
7329 %}
7330 
7331 instruct xchgP( memory mem, pRegP newval) %{
7332   match(Set newval (GetAndSetP mem newval));
7333   format %{ "XCHGL  $newval,[$mem]" %}
7334   ins_encode %{
7335     __ xchgl($newval$$Register, $mem$$Address);
7336   %}
7337   ins_pipe( pipe_cmpxchg );
7338 %}
7339 
7340 //----------Subtraction Instructions-------------------------------------------
7341 
7342 // Integer Subtraction Instructions
7343 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7344   match(Set dst (SubI dst src));
7345   effect(KILL cr);
7346 
7347   size(2);
7348   format %{ "SUB    $dst,$src" %}
7349   opcode(0x2B);
7350   ins_encode( OpcP, RegReg( dst, src) );
7351   ins_pipe( ialu_reg_reg );
7352 %}
7353 
7354 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7355   match(Set dst (SubI dst src));
7356   effect(KILL cr);
7357 
7358   format %{ "SUB    $dst,$src" %}
7359   opcode(0x81,0x05);  /* Opcode 81 /5 */
7360   // ins_encode( RegImm( dst, src) );
7361   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7362   ins_pipe( ialu_reg );
7363 %}
7364 
7365 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7366   match(Set dst (SubI dst (LoadI src)));
7367   effect(KILL cr);
7368 
7369   ins_cost(125);
7370   format %{ "SUB    $dst,$src" %}
7371   opcode(0x2B);
7372   ins_encode( OpcP, RegMem( dst, src) );
7373   ins_pipe( ialu_reg_mem );
7374 %}
7375 
7376 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7377   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7378   effect(KILL cr);
7379 
7380   ins_cost(150);
7381   format %{ "SUB    $dst,$src" %}
7382   opcode(0x29);  /* Opcode 29 /r */
7383   ins_encode( OpcP, RegMem( src, dst ) );
7384   ins_pipe( ialu_mem_reg );
7385 %}
7386 
7387 // Subtract from a pointer
7388 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7389   match(Set dst (AddP dst (SubI zero src)));
7390   effect(KILL cr);
7391 
7392   size(2);
7393   format %{ "SUB    $dst,$src" %}
7394   opcode(0x2B);
7395   ins_encode( OpcP, RegReg( dst, src) );
7396   ins_pipe( ialu_reg_reg );
7397 %}
7398 
7399 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7400   match(Set dst (SubI zero dst));
7401   effect(KILL cr);
7402 
7403   size(2);
7404   format %{ "NEG    $dst" %}
7405   opcode(0xF7,0x03);  // Opcode F7 /3
7406   ins_encode( OpcP, RegOpc( dst ) );
7407   ins_pipe( ialu_reg );
7408 %}
7409 
7410 //----------Multiplication/Division Instructions-------------------------------
7411 // Integer Multiplication Instructions
7412 // Multiply Register
7413 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7414   match(Set dst (MulI dst src));
7415   effect(KILL cr);
7416 
7417   size(3);
7418   ins_cost(300);
7419   format %{ "IMUL   $dst,$src" %}
7420   opcode(0xAF, 0x0F);
7421   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7422   ins_pipe( ialu_reg_reg_alu0 );
7423 %}
7424 
7425 // Multiply 32-bit Immediate
7426 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7427   match(Set dst (MulI src imm));
7428   effect(KILL cr);
7429 
7430   ins_cost(300);
7431   format %{ "IMUL   $dst,$src,$imm" %}
7432   opcode(0x69);  /* 69 /r id */
7433   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7434   ins_pipe( ialu_reg_reg_alu0 );
7435 %}
7436 
7437 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7438   match(Set dst src);
7439   effect(KILL cr);
7440 
7441   // Note that this is artificially increased to make it more expensive than loadConL
7442   ins_cost(250);
7443   format %{ "MOV    EAX,$src\t// low word only" %}
7444   opcode(0xB8);
7445   ins_encode( LdImmL_Lo(dst, src) );
7446   ins_pipe( ialu_reg_fat );
7447 %}
7448 
7449 // Multiply by 32-bit Immediate, taking the shifted high order results
7450 //  (special case for shift by 32)
7451 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7452   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7453   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7454              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7455              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7456   effect(USE src1, KILL cr);
7457 
7458   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7459   ins_cost(0*100 + 1*400 - 150);
7460   format %{ "IMUL   EDX:EAX,$src1" %}
7461   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7462   ins_pipe( pipe_slow );
7463 %}
7464 
7465 // Multiply by 32-bit Immediate, taking the shifted high order results
7466 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7467   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7468   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7469              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7470              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7471   effect(USE src1, KILL cr);
7472 
7473   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7474   ins_cost(1*100 + 1*400 - 150);
7475   format %{ "IMUL   EDX:EAX,$src1\n\t"
7476             "SAR    EDX,$cnt-32" %}
7477   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7478   ins_pipe( pipe_slow );
7479 %}
7480 
7481 // Multiply Memory 32-bit Immediate
7482 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7483   match(Set dst (MulI (LoadI src) imm));
7484   effect(KILL cr);
7485 
7486   ins_cost(300);
7487   format %{ "IMUL   $dst,$src,$imm" %}
7488   opcode(0x69);  /* 69 /r id */
7489   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7490   ins_pipe( ialu_reg_mem_alu0 );
7491 %}
7492 
7493 // Multiply Memory
7494 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7495   match(Set dst (MulI dst (LoadI src)));
7496   effect(KILL cr);
7497 
7498   ins_cost(350);
7499   format %{ "IMUL   $dst,$src" %}
7500   opcode(0xAF, 0x0F);
7501   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7502   ins_pipe( ialu_reg_mem_alu0 );
7503 %}
7504 
7505 // Multiply Register Int to Long
7506 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7507   // Basic Idea: long = (long)int * (long)int
7508   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7509   effect(DEF dst, USE src, USE src1, KILL flags);
7510 
7511   ins_cost(300);
7512   format %{ "IMUL   $dst,$src1" %}
7513 
7514   ins_encode( long_int_multiply( dst, src1 ) );
7515   ins_pipe( ialu_reg_reg_alu0 );
7516 %}
7517 
7518 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7519   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7520   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7521   effect(KILL flags);
7522 
7523   ins_cost(300);
7524   format %{ "MUL    $dst,$src1" %}
7525 
7526   ins_encode( long_uint_multiply(dst, src1) );
7527   ins_pipe( ialu_reg_reg_alu0 );
7528 %}
7529 
7530 // Multiply Register Long
7531 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7532   match(Set dst (MulL dst src));
7533   effect(KILL cr, TEMP tmp);
7534   ins_cost(4*100+3*400);
7535 // Basic idea: lo(result) = lo(x_lo * y_lo)
7536 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7537   format %{ "MOV    $tmp,$src.lo\n\t"
7538             "IMUL   $tmp,EDX\n\t"
7539             "MOV    EDX,$src.hi\n\t"
7540             "IMUL   EDX,EAX\n\t"
7541             "ADD    $tmp,EDX\n\t"
7542             "MUL    EDX:EAX,$src.lo\n\t"
7543             "ADD    EDX,$tmp" %}
7544   ins_encode( long_multiply( dst, src, tmp ) );
7545   ins_pipe( pipe_slow );
7546 %}
7547 
7548 // Multiply Register Long where the left operand's high 32 bits are zero
7549 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7550   predicate(is_operand_hi32_zero(n->in(1)));
7551   match(Set dst (MulL dst src));
7552   effect(KILL cr, TEMP tmp);
7553   ins_cost(2*100+2*400);
7554 // Basic idea: lo(result) = lo(x_lo * y_lo)
7555 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7556   format %{ "MOV    $tmp,$src.hi\n\t"
7557             "IMUL   $tmp,EAX\n\t"
7558             "MUL    EDX:EAX,$src.lo\n\t"
7559             "ADD    EDX,$tmp" %}
7560   ins_encode %{
7561     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7562     __ imull($tmp$$Register, rax);
7563     __ mull($src$$Register);
7564     __ addl(rdx, $tmp$$Register);
7565   %}
7566   ins_pipe( pipe_slow );
7567 %}
7568 
7569 // Multiply Register Long where the right operand's high 32 bits are zero
7570 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7571   predicate(is_operand_hi32_zero(n->in(2)));
7572   match(Set dst (MulL dst src));
7573   effect(KILL cr, TEMP tmp);
7574   ins_cost(2*100+2*400);
7575 // Basic idea: lo(result) = lo(x_lo * y_lo)
7576 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7577   format %{ "MOV    $tmp,$src.lo\n\t"
7578             "IMUL   $tmp,EDX\n\t"
7579             "MUL    EDX:EAX,$src.lo\n\t"
7580             "ADD    EDX,$tmp" %}
7581   ins_encode %{
7582     __ movl($tmp$$Register, $src$$Register);
7583     __ imull($tmp$$Register, rdx);
7584     __ mull($src$$Register);
7585     __ addl(rdx, $tmp$$Register);
7586   %}
7587   ins_pipe( pipe_slow );
7588 %}
7589 
7590 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7591 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7592   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7593   match(Set dst (MulL dst src));
7594   effect(KILL cr);
7595   ins_cost(1*400);
7596 // Basic idea: lo(result) = lo(x_lo * y_lo)
7597 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7598   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7599   ins_encode %{
7600     __ mull($src$$Register);
7601   %}
7602   ins_pipe( pipe_slow );
7603 %}
7604 
7605 // Multiply Register Long by small constant
7606 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7607   match(Set dst (MulL dst src));
7608   effect(KILL cr, TEMP tmp);
7609   ins_cost(2*100+2*400);
7610   size(12);
7611 // Basic idea: lo(result) = lo(src * EAX)
7612 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7613   format %{ "IMUL   $tmp,EDX,$src\n\t"
7614             "MOV    EDX,$src\n\t"
7615             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7616             "ADD    EDX,$tmp" %}
7617   ins_encode( long_multiply_con( dst, src, tmp ) );
7618   ins_pipe( pipe_slow );
7619 %}
7620 
7621 // Integer DIV with Register
7622 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7623   match(Set rax (DivI rax div));
7624   effect(KILL rdx, KILL cr);
7625   size(26);
7626   ins_cost(30*100+10*100);
7627   format %{ "CMP    EAX,0x80000000\n\t"
7628             "JNE,s  normal\n\t"
7629             "XOR    EDX,EDX\n\t"
7630             "CMP    ECX,-1\n\t"
7631             "JE,s   done\n"
7632     "normal: CDQ\n\t"
7633             "IDIV   $div\n\t"
7634     "done:"        %}
7635   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7636   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7637   ins_pipe( ialu_reg_reg_alu0 );
7638 %}
7639 
7640 // Divide Register Long
7641 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7642   match(Set dst (DivL src1 src2));
7643   effect( KILL cr, KILL cx, KILL bx );
7644   ins_cost(10000);
7645   format %{ "PUSH   $src1.hi\n\t"
7646             "PUSH   $src1.lo\n\t"
7647             "PUSH   $src2.hi\n\t"
7648             "PUSH   $src2.lo\n\t"
7649             "CALL   SharedRuntime::ldiv\n\t"
7650             "ADD    ESP,16" %}
7651   ins_encode( long_div(src1,src2) );
7652   ins_pipe( pipe_slow );
7653 %}
7654 
7655 // Integer DIVMOD with Register, both quotient and mod results
7656 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7657   match(DivModI rax div);
7658   effect(KILL cr);
7659   size(26);
7660   ins_cost(30*100+10*100);
7661   format %{ "CMP    EAX,0x80000000\n\t"
7662             "JNE,s  normal\n\t"
7663             "XOR    EDX,EDX\n\t"
7664             "CMP    ECX,-1\n\t"
7665             "JE,s   done\n"
7666     "normal: CDQ\n\t"
7667             "IDIV   $div\n\t"
7668     "done:"        %}
7669   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7670   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7671   ins_pipe( pipe_slow );
7672 %}
7673 
7674 // Integer MOD with Register
7675 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7676   match(Set rdx (ModI rax div));
7677   effect(KILL rax, KILL cr);
7678 
7679   size(26);
7680   ins_cost(300);
7681   format %{ "CDQ\n\t"
7682             "IDIV   $div" %}
7683   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7684   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7685   ins_pipe( ialu_reg_reg_alu0 );
7686 %}
7687 
7688 // Remainder Register Long
7689 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7690   match(Set dst (ModL src1 src2));
7691   effect( KILL cr, KILL cx, KILL bx );
7692   ins_cost(10000);
7693   format %{ "PUSH   $src1.hi\n\t"
7694             "PUSH   $src1.lo\n\t"
7695             "PUSH   $src2.hi\n\t"
7696             "PUSH   $src2.lo\n\t"
7697             "CALL   SharedRuntime::lrem\n\t"
7698             "ADD    ESP,16" %}
7699   ins_encode( long_mod(src1,src2) );
7700   ins_pipe( pipe_slow );
7701 %}
7702 
7703 // Divide Register Long (no special case since divisor != -1)
7704 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7705   match(Set dst (DivL dst imm));
7706   effect( TEMP tmp, TEMP tmp2, KILL cr );
7707   ins_cost(1000);
7708   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7709             "XOR    $tmp2,$tmp2\n\t"
7710             "CMP    $tmp,EDX\n\t"
7711             "JA,s   fast\n\t"
7712             "MOV    $tmp2,EAX\n\t"
7713             "MOV    EAX,EDX\n\t"
7714             "MOV    EDX,0\n\t"
7715             "JLE,s  pos\n\t"
7716             "LNEG   EAX : $tmp2\n\t"
7717             "DIV    $tmp # unsigned division\n\t"
7718             "XCHG   EAX,$tmp2\n\t"
7719             "DIV    $tmp\n\t"
7720             "LNEG   $tmp2 : EAX\n\t"
7721             "JMP,s  done\n"
7722     "pos:\n\t"
7723             "DIV    $tmp\n\t"
7724             "XCHG   EAX,$tmp2\n"
7725     "fast:\n\t"
7726             "DIV    $tmp\n"
7727     "done:\n\t"
7728             "MOV    EDX,$tmp2\n\t"
7729             "NEG    EDX:EAX # if $imm < 0" %}
7730   ins_encode %{
7731     int con = (int)$imm$$constant;
7732     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7733     int pcon = (con > 0) ? con : -con;
7734     Label Lfast, Lpos, Ldone;
7735 
7736     __ movl($tmp$$Register, pcon);
7737     __ xorl($tmp2$$Register,$tmp2$$Register);
7738     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7739     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7740 
7741     __ movl($tmp2$$Register, $dst$$Register); // save
7742     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7743     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7744     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7745 
7746     // Negative dividend.
7747     // convert value to positive to use unsigned division
7748     __ lneg($dst$$Register, $tmp2$$Register);
7749     __ divl($tmp$$Register);
7750     __ xchgl($dst$$Register, $tmp2$$Register);
7751     __ divl($tmp$$Register);
7752     // revert result back to negative
7753     __ lneg($tmp2$$Register, $dst$$Register);
7754     __ jmpb(Ldone);
7755 
7756     __ bind(Lpos);
7757     __ divl($tmp$$Register); // Use unsigned division
7758     __ xchgl($dst$$Register, $tmp2$$Register);
7759     // Fallthrow for final divide, tmp2 has 32 bit hi result
7760 
7761     __ bind(Lfast);
7762     // fast path: src is positive
7763     __ divl($tmp$$Register); // Use unsigned division
7764 
7765     __ bind(Ldone);
7766     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7767     if (con < 0) {
7768       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7769     }
7770   %}
7771   ins_pipe( pipe_slow );
7772 %}
7773 
7774 // Remainder Register Long (remainder fit into 32 bits)
7775 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7776   match(Set dst (ModL dst imm));
7777   effect( TEMP tmp, TEMP tmp2, KILL cr );
7778   ins_cost(1000);
7779   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7780             "CMP    $tmp,EDX\n\t"
7781             "JA,s   fast\n\t"
7782             "MOV    $tmp2,EAX\n\t"
7783             "MOV    EAX,EDX\n\t"
7784             "MOV    EDX,0\n\t"
7785             "JLE,s  pos\n\t"
7786             "LNEG   EAX : $tmp2\n\t"
7787             "DIV    $tmp # unsigned division\n\t"
7788             "MOV    EAX,$tmp2\n\t"
7789             "DIV    $tmp\n\t"
7790             "NEG    EDX\n\t"
7791             "JMP,s  done\n"
7792     "pos:\n\t"
7793             "DIV    $tmp\n\t"
7794             "MOV    EAX,$tmp2\n"
7795     "fast:\n\t"
7796             "DIV    $tmp\n"
7797     "done:\n\t"
7798             "MOV    EAX,EDX\n\t"
7799             "SAR    EDX,31\n\t" %}
7800   ins_encode %{
7801     int con = (int)$imm$$constant;
7802     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7803     int pcon = (con > 0) ? con : -con;
7804     Label  Lfast, Lpos, Ldone;
7805 
7806     __ movl($tmp$$Register, pcon);
7807     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7808     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7809 
7810     __ movl($tmp2$$Register, $dst$$Register); // save
7811     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7812     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7813     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7814 
7815     // Negative dividend.
7816     // convert value to positive to use unsigned division
7817     __ lneg($dst$$Register, $tmp2$$Register);
7818     __ divl($tmp$$Register);
7819     __ movl($dst$$Register, $tmp2$$Register);
7820     __ divl($tmp$$Register);
7821     // revert remainder back to negative
7822     __ negl(HIGH_FROM_LOW($dst$$Register));
7823     __ jmpb(Ldone);
7824 
7825     __ bind(Lpos);
7826     __ divl($tmp$$Register);
7827     __ movl($dst$$Register, $tmp2$$Register);
7828 
7829     __ bind(Lfast);
7830     // fast path: src is positive
7831     __ divl($tmp$$Register);
7832 
7833     __ bind(Ldone);
7834     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7835     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7836 
7837   %}
7838   ins_pipe( pipe_slow );
7839 %}
7840 
7841 // Integer Shift Instructions
7842 // Shift Left by one
7843 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7844   match(Set dst (LShiftI dst shift));
7845   effect(KILL cr);
7846 
7847   size(2);
7848   format %{ "SHL    $dst,$shift" %}
7849   opcode(0xD1, 0x4);  /* D1 /4 */
7850   ins_encode( OpcP, RegOpc( dst ) );
7851   ins_pipe( ialu_reg );
7852 %}
7853 
7854 // Shift Left by 8-bit immediate
7855 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7856   match(Set dst (LShiftI dst shift));
7857   effect(KILL cr);
7858 
7859   size(3);
7860   format %{ "SHL    $dst,$shift" %}
7861   opcode(0xC1, 0x4);  /* C1 /4 ib */
7862   ins_encode( RegOpcImm( dst, shift) );
7863   ins_pipe( ialu_reg );
7864 %}
7865 
7866 // Shift Left by variable
7867 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7868   match(Set dst (LShiftI dst shift));
7869   effect(KILL cr);
7870 
7871   size(2);
7872   format %{ "SHL    $dst,$shift" %}
7873   opcode(0xD3, 0x4);  /* D3 /4 */
7874   ins_encode( OpcP, RegOpc( dst ) );
7875   ins_pipe( ialu_reg_reg );
7876 %}
7877 
7878 // Arithmetic shift right by one
7879 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7880   match(Set dst (RShiftI dst shift));
7881   effect(KILL cr);
7882 
7883   size(2);
7884   format %{ "SAR    $dst,$shift" %}
7885   opcode(0xD1, 0x7);  /* D1 /7 */
7886   ins_encode( OpcP, RegOpc( dst ) );
7887   ins_pipe( ialu_reg );
7888 %}
7889 
7890 // Arithmetic shift right by one
7891 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7892   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7893   effect(KILL cr);
7894   format %{ "SAR    $dst,$shift" %}
7895   opcode(0xD1, 0x7);  /* D1 /7 */
7896   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7897   ins_pipe( ialu_mem_imm );
7898 %}
7899 
7900 // Arithmetic Shift Right by 8-bit immediate
7901 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7902   match(Set dst (RShiftI dst shift));
7903   effect(KILL cr);
7904 
7905   size(3);
7906   format %{ "SAR    $dst,$shift" %}
7907   opcode(0xC1, 0x7);  /* C1 /7 ib */
7908   ins_encode( RegOpcImm( dst, shift ) );
7909   ins_pipe( ialu_mem_imm );
7910 %}
7911 
7912 // Arithmetic Shift Right by 8-bit immediate
7913 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7914   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7915   effect(KILL cr);
7916 
7917   format %{ "SAR    $dst,$shift" %}
7918   opcode(0xC1, 0x7);  /* C1 /7 ib */
7919   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7920   ins_pipe( ialu_mem_imm );
7921 %}
7922 
7923 // Arithmetic Shift Right by variable
7924 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7925   match(Set dst (RShiftI dst shift));
7926   effect(KILL cr);
7927 
7928   size(2);
7929   format %{ "SAR    $dst,$shift" %}
7930   opcode(0xD3, 0x7);  /* D3 /7 */
7931   ins_encode( OpcP, RegOpc( dst ) );
7932   ins_pipe( ialu_reg_reg );
7933 %}
7934 
7935 // Logical shift right by one
7936 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7937   match(Set dst (URShiftI dst shift));
7938   effect(KILL cr);
7939 
7940   size(2);
7941   format %{ "SHR    $dst,$shift" %}
7942   opcode(0xD1, 0x5);  /* D1 /5 */
7943   ins_encode( OpcP, RegOpc( dst ) );
7944   ins_pipe( ialu_reg );
7945 %}
7946 
7947 // Logical Shift Right by 8-bit immediate
7948 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7949   match(Set dst (URShiftI dst shift));
7950   effect(KILL cr);
7951 
7952   size(3);
7953   format %{ "SHR    $dst,$shift" %}
7954   opcode(0xC1, 0x5);  /* C1 /5 ib */
7955   ins_encode( RegOpcImm( dst, shift) );
7956   ins_pipe( ialu_reg );
7957 %}
7958 
7959 
7960 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7961 // This idiom is used by the compiler for the i2b bytecode.
7962 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7963   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7964 
7965   size(3);
7966   format %{ "MOVSX  $dst,$src :8" %}
7967   ins_encode %{
7968     __ movsbl($dst$$Register, $src$$Register);
7969   %}
7970   ins_pipe(ialu_reg_reg);
7971 %}
7972 
7973 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7974 // This idiom is used by the compiler the i2s bytecode.
7975 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7976   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7977 
7978   size(3);
7979   format %{ "MOVSX  $dst,$src :16" %}
7980   ins_encode %{
7981     __ movswl($dst$$Register, $src$$Register);
7982   %}
7983   ins_pipe(ialu_reg_reg);
7984 %}
7985 
7986 
7987 // Logical Shift Right by variable
7988 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7989   match(Set dst (URShiftI dst shift));
7990   effect(KILL cr);
7991 
7992   size(2);
7993   format %{ "SHR    $dst,$shift" %}
7994   opcode(0xD3, 0x5);  /* D3 /5 */
7995   ins_encode( OpcP, RegOpc( dst ) );
7996   ins_pipe( ialu_reg_reg );
7997 %}
7998 
7999 
8000 //----------Logical Instructions-----------------------------------------------
8001 //----------Integer Logical Instructions---------------------------------------
8002 // And Instructions
8003 // And Register with Register
8004 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8005   match(Set dst (AndI dst src));
8006   effect(KILL cr);
8007 
8008   size(2);
8009   format %{ "AND    $dst,$src" %}
8010   opcode(0x23);
8011   ins_encode( OpcP, RegReg( dst, src) );
8012   ins_pipe( ialu_reg_reg );
8013 %}
8014 
8015 // And Register with Immediate
8016 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8017   match(Set dst (AndI dst src));
8018   effect(KILL cr);
8019 
8020   format %{ "AND    $dst,$src" %}
8021   opcode(0x81,0x04);  /* Opcode 81 /4 */
8022   // ins_encode( RegImm( dst, src) );
8023   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8024   ins_pipe( ialu_reg );
8025 %}
8026 
8027 // And Register with Memory
8028 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8029   match(Set dst (AndI dst (LoadI src)));
8030   effect(KILL cr);
8031 
8032   ins_cost(125);
8033   format %{ "AND    $dst,$src" %}
8034   opcode(0x23);
8035   ins_encode( OpcP, RegMem( dst, src) );
8036   ins_pipe( ialu_reg_mem );
8037 %}
8038 
8039 // And Memory with Register
8040 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8041   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8042   effect(KILL cr);
8043 
8044   ins_cost(150);
8045   format %{ "AND    $dst,$src" %}
8046   opcode(0x21);  /* Opcode 21 /r */
8047   ins_encode( OpcP, RegMem( src, dst ) );
8048   ins_pipe( ialu_mem_reg );
8049 %}
8050 
8051 // And Memory with Immediate
8052 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8053   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8054   effect(KILL cr);
8055 
8056   ins_cost(125);
8057   format %{ "AND    $dst,$src" %}
8058   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8059   // ins_encode( MemImm( dst, src) );
8060   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8061   ins_pipe( ialu_mem_imm );
8062 %}
8063 
8064 // BMI1 instructions
8065 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8066   match(Set dst (AndI (XorI src1 minus_1) src2));
8067   predicate(UseBMI1Instructions);
8068   effect(KILL cr);
8069 
8070   format %{ "ANDNL  $dst, $src1, $src2" %}
8071 
8072   ins_encode %{
8073     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8074   %}
8075   ins_pipe(ialu_reg);
8076 %}
8077 
8078 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8079   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8080   predicate(UseBMI1Instructions);
8081   effect(KILL cr);
8082 
8083   ins_cost(125);
8084   format %{ "ANDNL  $dst, $src1, $src2" %}
8085 
8086   ins_encode %{
8087     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8088   %}
8089   ins_pipe(ialu_reg_mem);
8090 %}
8091 
8092 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8093   match(Set dst (AndI (SubI imm_zero src) src));
8094   predicate(UseBMI1Instructions);
8095   effect(KILL cr);
8096 
8097   format %{ "BLSIL  $dst, $src" %}
8098 
8099   ins_encode %{
8100     __ blsil($dst$$Register, $src$$Register);
8101   %}
8102   ins_pipe(ialu_reg);
8103 %}
8104 
8105 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8106   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8107   predicate(UseBMI1Instructions);
8108   effect(KILL cr);
8109 
8110   ins_cost(125);
8111   format %{ "BLSIL  $dst, $src" %}
8112 
8113   ins_encode %{
8114     __ blsil($dst$$Register, $src$$Address);
8115   %}
8116   ins_pipe(ialu_reg_mem);
8117 %}
8118 
8119 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8120 %{
8121   match(Set dst (XorI (AddI src minus_1) src));
8122   predicate(UseBMI1Instructions);
8123   effect(KILL cr);
8124 
8125   format %{ "BLSMSKL $dst, $src" %}
8126 
8127   ins_encode %{
8128     __ blsmskl($dst$$Register, $src$$Register);
8129   %}
8130 
8131   ins_pipe(ialu_reg);
8132 %}
8133 
8134 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8135 %{
8136   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8137   predicate(UseBMI1Instructions);
8138   effect(KILL cr);
8139 
8140   ins_cost(125);
8141   format %{ "BLSMSKL $dst, $src" %}
8142 
8143   ins_encode %{
8144     __ blsmskl($dst$$Register, $src$$Address);
8145   %}
8146 
8147   ins_pipe(ialu_reg_mem);
8148 %}
8149 
8150 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8151 %{
8152   match(Set dst (AndI (AddI src minus_1) src) );
8153   predicate(UseBMI1Instructions);
8154   effect(KILL cr);
8155 
8156   format %{ "BLSRL  $dst, $src" %}
8157 
8158   ins_encode %{
8159     __ blsrl($dst$$Register, $src$$Register);
8160   %}
8161 
8162   ins_pipe(ialu_reg);
8163 %}
8164 
8165 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8166 %{
8167   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8168   predicate(UseBMI1Instructions);
8169   effect(KILL cr);
8170 
8171   ins_cost(125);
8172   format %{ "BLSRL  $dst, $src" %}
8173 
8174   ins_encode %{
8175     __ blsrl($dst$$Register, $src$$Address);
8176   %}
8177 
8178   ins_pipe(ialu_reg_mem);
8179 %}
8180 
8181 // Or Instructions
8182 // Or Register with Register
8183 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8184   match(Set dst (OrI dst src));
8185   effect(KILL cr);
8186 
8187   size(2);
8188   format %{ "OR     $dst,$src" %}
8189   opcode(0x0B);
8190   ins_encode( OpcP, RegReg( dst, src) );
8191   ins_pipe( ialu_reg_reg );
8192 %}
8193 
8194 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8195   match(Set dst (OrI dst (CastP2X src)));
8196   effect(KILL cr);
8197 
8198   size(2);
8199   format %{ "OR     $dst,$src" %}
8200   opcode(0x0B);
8201   ins_encode( OpcP, RegReg( dst, src) );
8202   ins_pipe( ialu_reg_reg );
8203 %}
8204 
8205 
8206 // Or Register with Immediate
8207 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8208   match(Set dst (OrI dst src));
8209   effect(KILL cr);
8210 
8211   format %{ "OR     $dst,$src" %}
8212   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8213   // ins_encode( RegImm( dst, src) );
8214   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8215   ins_pipe( ialu_reg );
8216 %}
8217 
8218 // Or Register with Memory
8219 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8220   match(Set dst (OrI dst (LoadI src)));
8221   effect(KILL cr);
8222 
8223   ins_cost(125);
8224   format %{ "OR     $dst,$src" %}
8225   opcode(0x0B);
8226   ins_encode( OpcP, RegMem( dst, src) );
8227   ins_pipe( ialu_reg_mem );
8228 %}
8229 
8230 // Or Memory with Register
8231 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8232   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8233   effect(KILL cr);
8234 
8235   ins_cost(150);
8236   format %{ "OR     $dst,$src" %}
8237   opcode(0x09);  /* Opcode 09 /r */
8238   ins_encode( OpcP, RegMem( src, dst ) );
8239   ins_pipe( ialu_mem_reg );
8240 %}
8241 
8242 // Or Memory with Immediate
8243 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8244   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8245   effect(KILL cr);
8246 
8247   ins_cost(125);
8248   format %{ "OR     $dst,$src" %}
8249   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8250   // ins_encode( MemImm( dst, src) );
8251   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8252   ins_pipe( ialu_mem_imm );
8253 %}
8254 
8255 // ROL/ROR
8256 // ROL expand
8257 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8258   effect(USE_DEF dst, USE shift, KILL cr);
8259 
8260   format %{ "ROL    $dst, $shift" %}
8261   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8262   ins_encode( OpcP, RegOpc( dst ));
8263   ins_pipe( ialu_reg );
8264 %}
8265 
8266 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8267   effect(USE_DEF dst, USE shift, KILL cr);
8268 
8269   format %{ "ROL    $dst, $shift" %}
8270   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8271   ins_encode( RegOpcImm(dst, shift) );
8272   ins_pipe(ialu_reg);
8273 %}
8274 
8275 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8276   effect(USE_DEF dst, USE shift, KILL cr);
8277 
8278   format %{ "ROL    $dst, $shift" %}
8279   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8280   ins_encode(OpcP, RegOpc(dst));
8281   ins_pipe( ialu_reg_reg );
8282 %}
8283 // end of ROL expand
8284 
8285 // ROL 32bit by one once
8286 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8287   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8288 
8289   expand %{
8290     rolI_eReg_imm1(dst, lshift, cr);
8291   %}
8292 %}
8293 
8294 // ROL 32bit var by imm8 once
8295 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8296   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8297   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8298 
8299   expand %{
8300     rolI_eReg_imm8(dst, lshift, cr);
8301   %}
8302 %}
8303 
8304 // ROL 32bit var by var once
8305 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8306   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8307 
8308   expand %{
8309     rolI_eReg_CL(dst, shift, cr);
8310   %}
8311 %}
8312 
8313 // ROL 32bit var by var once
8314 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8315   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8316 
8317   expand %{
8318     rolI_eReg_CL(dst, shift, cr);
8319   %}
8320 %}
8321 
8322 // ROR expand
8323 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8324   effect(USE_DEF dst, USE shift, KILL cr);
8325 
8326   format %{ "ROR    $dst, $shift" %}
8327   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8328   ins_encode( OpcP, RegOpc( dst ) );
8329   ins_pipe( ialu_reg );
8330 %}
8331 
8332 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8333   effect (USE_DEF dst, USE shift, KILL cr);
8334 
8335   format %{ "ROR    $dst, $shift" %}
8336   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8337   ins_encode( RegOpcImm(dst, shift) );
8338   ins_pipe( ialu_reg );
8339 %}
8340 
8341 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8342   effect(USE_DEF dst, USE shift, KILL cr);
8343 
8344   format %{ "ROR    $dst, $shift" %}
8345   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8346   ins_encode(OpcP, RegOpc(dst));
8347   ins_pipe( ialu_reg_reg );
8348 %}
8349 // end of ROR expand
8350 
8351 // ROR right once
8352 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8353   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8354 
8355   expand %{
8356     rorI_eReg_imm1(dst, rshift, cr);
8357   %}
8358 %}
8359 
8360 // ROR 32bit by immI8 once
8361 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8362   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8363   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8364 
8365   expand %{
8366     rorI_eReg_imm8(dst, rshift, cr);
8367   %}
8368 %}
8369 
8370 // ROR 32bit var by var once
8371 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8372   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8373 
8374   expand %{
8375     rorI_eReg_CL(dst, shift, cr);
8376   %}
8377 %}
8378 
8379 // ROR 32bit var by var once
8380 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8381   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8382 
8383   expand %{
8384     rorI_eReg_CL(dst, shift, cr);
8385   %}
8386 %}
8387 
8388 // Xor Instructions
8389 // Xor Register with Register
8390 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8391   match(Set dst (XorI dst src));
8392   effect(KILL cr);
8393 
8394   size(2);
8395   format %{ "XOR    $dst,$src" %}
8396   opcode(0x33);
8397   ins_encode( OpcP, RegReg( dst, src) );
8398   ins_pipe( ialu_reg_reg );
8399 %}
8400 
8401 // Xor Register with Immediate -1
8402 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8403   match(Set dst (XorI dst imm));
8404 
8405   size(2);
8406   format %{ "NOT    $dst" %}
8407   ins_encode %{
8408      __ notl($dst$$Register);
8409   %}
8410   ins_pipe( ialu_reg );
8411 %}
8412 
8413 // Xor Register with Immediate
8414 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8415   match(Set dst (XorI dst src));
8416   effect(KILL cr);
8417 
8418   format %{ "XOR    $dst,$src" %}
8419   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8420   // ins_encode( RegImm( dst, src) );
8421   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8422   ins_pipe( ialu_reg );
8423 %}
8424 
8425 // Xor Register with Memory
8426 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8427   match(Set dst (XorI dst (LoadI src)));
8428   effect(KILL cr);
8429 
8430   ins_cost(125);
8431   format %{ "XOR    $dst,$src" %}
8432   opcode(0x33);
8433   ins_encode( OpcP, RegMem(dst, src) );
8434   ins_pipe( ialu_reg_mem );
8435 %}
8436 
8437 // Xor Memory with Register
8438 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8439   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8440   effect(KILL cr);
8441 
8442   ins_cost(150);
8443   format %{ "XOR    $dst,$src" %}
8444   opcode(0x31);  /* Opcode 31 /r */
8445   ins_encode( OpcP, RegMem( src, dst ) );
8446   ins_pipe( ialu_mem_reg );
8447 %}
8448 
8449 // Xor Memory with Immediate
8450 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8451   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8452   effect(KILL cr);
8453 
8454   ins_cost(125);
8455   format %{ "XOR    $dst,$src" %}
8456   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8457   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8458   ins_pipe( ialu_mem_imm );
8459 %}
8460 
8461 //----------Convert Int to Boolean---------------------------------------------
8462 
8463 instruct movI_nocopy(rRegI dst, rRegI src) %{
8464   effect( DEF dst, USE src );
8465   format %{ "MOV    $dst,$src" %}
8466   ins_encode( enc_Copy( dst, src) );
8467   ins_pipe( ialu_reg_reg );
8468 %}
8469 
8470 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8471   effect( USE_DEF dst, USE src, KILL cr );
8472 
8473   size(4);
8474   format %{ "NEG    $dst\n\t"
8475             "ADC    $dst,$src" %}
8476   ins_encode( neg_reg(dst),
8477               OpcRegReg(0x13,dst,src) );
8478   ins_pipe( ialu_reg_reg_long );
8479 %}
8480 
8481 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8482   match(Set dst (Conv2B src));
8483 
8484   expand %{
8485     movI_nocopy(dst,src);
8486     ci2b(dst,src,cr);
8487   %}
8488 %}
8489 
8490 instruct movP_nocopy(rRegI dst, eRegP src) %{
8491   effect( DEF dst, USE src );
8492   format %{ "MOV    $dst,$src" %}
8493   ins_encode( enc_Copy( dst, src) );
8494   ins_pipe( ialu_reg_reg );
8495 %}
8496 
8497 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8498   effect( USE_DEF dst, USE src, KILL cr );
8499   format %{ "NEG    $dst\n\t"
8500             "ADC    $dst,$src" %}
8501   ins_encode( neg_reg(dst),
8502               OpcRegReg(0x13,dst,src) );
8503   ins_pipe( ialu_reg_reg_long );
8504 %}
8505 
8506 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8507   match(Set dst (Conv2B src));
8508 
8509   expand %{
8510     movP_nocopy(dst,src);
8511     cp2b(dst,src,cr);
8512   %}
8513 %}
8514 
8515 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8516   match(Set dst (CmpLTMask p q));
8517   effect(KILL cr);
8518   ins_cost(400);
8519 
8520   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8521   format %{ "XOR    $dst,$dst\n\t"
8522             "CMP    $p,$q\n\t"
8523             "SETlt  $dst\n\t"
8524             "NEG    $dst" %}
8525   ins_encode %{
8526     Register Rp = $p$$Register;
8527     Register Rq = $q$$Register;
8528     Register Rd = $dst$$Register;
8529     Label done;
8530     __ xorl(Rd, Rd);
8531     __ cmpl(Rp, Rq);
8532     __ setb(Assembler::less, Rd);
8533     __ negl(Rd);
8534   %}
8535 
8536   ins_pipe(pipe_slow);
8537 %}
8538 
8539 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8540   match(Set dst (CmpLTMask dst zero));
8541   effect(DEF dst, KILL cr);
8542   ins_cost(100);
8543 
8544   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8545   ins_encode %{
8546   __ sarl($dst$$Register, 31);
8547   %}
8548   ins_pipe(ialu_reg);
8549 %}
8550 
8551 /* better to save a register than avoid a branch */
8552 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8553   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8554   effect(KILL cr);
8555   ins_cost(400);
8556   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8557             "JGE    done\n\t"
8558             "ADD    $p,$y\n"
8559             "done:  " %}
8560   ins_encode %{
8561     Register Rp = $p$$Register;
8562     Register Rq = $q$$Register;
8563     Register Ry = $y$$Register;
8564     Label done;
8565     __ subl(Rp, Rq);
8566     __ jccb(Assembler::greaterEqual, done);
8567     __ addl(Rp, Ry);
8568     __ bind(done);
8569   %}
8570 
8571   ins_pipe(pipe_cmplt);
8572 %}
8573 
8574 /* better to save a register than avoid a branch */
8575 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8576   match(Set y (AndI (CmpLTMask p q) y));
8577   effect(KILL cr);
8578 
8579   ins_cost(300);
8580 
8581   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8582             "JLT      done\n\t"
8583             "XORL     $y, $y\n"
8584             "done:  " %}
8585   ins_encode %{
8586     Register Rp = $p$$Register;
8587     Register Rq = $q$$Register;
8588     Register Ry = $y$$Register;
8589     Label done;
8590     __ cmpl(Rp, Rq);
8591     __ jccb(Assembler::less, done);
8592     __ xorl(Ry, Ry);
8593     __ bind(done);
8594   %}
8595 
8596   ins_pipe(pipe_cmplt);
8597 %}
8598 
8599 /* If I enable this, I encourage spilling in the inner loop of compress.
8600 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8601   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8602 */
8603 //----------Overflow Math Instructions-----------------------------------------
8604 
8605 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8606 %{
8607   match(Set cr (OverflowAddI op1 op2));
8608   effect(DEF cr, USE_KILL op1, USE op2);
8609 
8610   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8611 
8612   ins_encode %{
8613     __ addl($op1$$Register, $op2$$Register);
8614   %}
8615   ins_pipe(ialu_reg_reg);
8616 %}
8617 
8618 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8619 %{
8620   match(Set cr (OverflowAddI op1 op2));
8621   effect(DEF cr, USE_KILL op1, USE op2);
8622 
8623   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8624 
8625   ins_encode %{
8626     __ addl($op1$$Register, $op2$$constant);
8627   %}
8628   ins_pipe(ialu_reg_reg);
8629 %}
8630 
8631 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8632 %{
8633   match(Set cr (OverflowSubI op1 op2));
8634 
8635   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8636   ins_encode %{
8637     __ cmpl($op1$$Register, $op2$$Register);
8638   %}
8639   ins_pipe(ialu_reg_reg);
8640 %}
8641 
8642 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8643 %{
8644   match(Set cr (OverflowSubI op1 op2));
8645 
8646   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8647   ins_encode %{
8648     __ cmpl($op1$$Register, $op2$$constant);
8649   %}
8650   ins_pipe(ialu_reg_reg);
8651 %}
8652 
8653 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8654 %{
8655   match(Set cr (OverflowSubI zero op2));
8656   effect(DEF cr, USE_KILL op2);
8657 
8658   format %{ "NEG    $op2\t# overflow check int" %}
8659   ins_encode %{
8660     __ negl($op2$$Register);
8661   %}
8662   ins_pipe(ialu_reg_reg);
8663 %}
8664 
8665 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8666 %{
8667   match(Set cr (OverflowMulI op1 op2));
8668   effect(DEF cr, USE_KILL op1, USE op2);
8669 
8670   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8671   ins_encode %{
8672     __ imull($op1$$Register, $op2$$Register);
8673   %}
8674   ins_pipe(ialu_reg_reg_alu0);
8675 %}
8676 
8677 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8678 %{
8679   match(Set cr (OverflowMulI op1 op2));
8680   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8681 
8682   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8683   ins_encode %{
8684     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8685   %}
8686   ins_pipe(ialu_reg_reg_alu0);
8687 %}
8688 
8689 //----------Long Instructions------------------------------------------------
8690 // Add Long Register with Register
8691 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8692   match(Set dst (AddL dst src));
8693   effect(KILL cr);
8694   ins_cost(200);
8695   format %{ "ADD    $dst.lo,$src.lo\n\t"
8696             "ADC    $dst.hi,$src.hi" %}
8697   opcode(0x03, 0x13);
8698   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8699   ins_pipe( ialu_reg_reg_long );
8700 %}
8701 
8702 // Add Long Register with Immediate
8703 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8704   match(Set dst (AddL dst src));
8705   effect(KILL cr);
8706   format %{ "ADD    $dst.lo,$src.lo\n\t"
8707             "ADC    $dst.hi,$src.hi" %}
8708   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8709   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8710   ins_pipe( ialu_reg_long );
8711 %}
8712 
8713 // Add Long Register with Memory
8714 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8715   match(Set dst (AddL dst (LoadL mem)));
8716   effect(KILL cr);
8717   ins_cost(125);
8718   format %{ "ADD    $dst.lo,$mem\n\t"
8719             "ADC    $dst.hi,$mem+4" %}
8720   opcode(0x03, 0x13);
8721   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8722   ins_pipe( ialu_reg_long_mem );
8723 %}
8724 
8725 // Subtract Long Register with Register.
8726 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8727   match(Set dst (SubL dst src));
8728   effect(KILL cr);
8729   ins_cost(200);
8730   format %{ "SUB    $dst.lo,$src.lo\n\t"
8731             "SBB    $dst.hi,$src.hi" %}
8732   opcode(0x2B, 0x1B);
8733   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8734   ins_pipe( ialu_reg_reg_long );
8735 %}
8736 
8737 // Subtract Long Register with Immediate
8738 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8739   match(Set dst (SubL dst src));
8740   effect(KILL cr);
8741   format %{ "SUB    $dst.lo,$src.lo\n\t"
8742             "SBB    $dst.hi,$src.hi" %}
8743   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8744   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8745   ins_pipe( ialu_reg_long );
8746 %}
8747 
8748 // Subtract Long Register with Memory
8749 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8750   match(Set dst (SubL dst (LoadL mem)));
8751   effect(KILL cr);
8752   ins_cost(125);
8753   format %{ "SUB    $dst.lo,$mem\n\t"
8754             "SBB    $dst.hi,$mem+4" %}
8755   opcode(0x2B, 0x1B);
8756   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8757   ins_pipe( ialu_reg_long_mem );
8758 %}
8759 
8760 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8761   match(Set dst (SubL zero dst));
8762   effect(KILL cr);
8763   ins_cost(300);
8764   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8765   ins_encode( neg_long(dst) );
8766   ins_pipe( ialu_reg_reg_long );
8767 %}
8768 
8769 // And Long Register with Register
8770 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8771   match(Set dst (AndL dst src));
8772   effect(KILL cr);
8773   format %{ "AND    $dst.lo,$src.lo\n\t"
8774             "AND    $dst.hi,$src.hi" %}
8775   opcode(0x23,0x23);
8776   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8777   ins_pipe( ialu_reg_reg_long );
8778 %}
8779 
8780 // And Long Register with Immediate
8781 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8782   match(Set dst (AndL dst src));
8783   effect(KILL cr);
8784   format %{ "AND    $dst.lo,$src.lo\n\t"
8785             "AND    $dst.hi,$src.hi" %}
8786   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8787   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8788   ins_pipe( ialu_reg_long );
8789 %}
8790 
8791 // And Long Register with Memory
8792 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8793   match(Set dst (AndL dst (LoadL mem)));
8794   effect(KILL cr);
8795   ins_cost(125);
8796   format %{ "AND    $dst.lo,$mem\n\t"
8797             "AND    $dst.hi,$mem+4" %}
8798   opcode(0x23, 0x23);
8799   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8800   ins_pipe( ialu_reg_long_mem );
8801 %}
8802 
8803 // BMI1 instructions
8804 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8805   match(Set dst (AndL (XorL src1 minus_1) src2));
8806   predicate(UseBMI1Instructions);
8807   effect(KILL cr, TEMP dst);
8808 
8809   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8810             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8811          %}
8812 
8813   ins_encode %{
8814     Register Rdst = $dst$$Register;
8815     Register Rsrc1 = $src1$$Register;
8816     Register Rsrc2 = $src2$$Register;
8817     __ andnl(Rdst, Rsrc1, Rsrc2);
8818     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8819   %}
8820   ins_pipe(ialu_reg_reg_long);
8821 %}
8822 
8823 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8824   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8825   predicate(UseBMI1Instructions);
8826   effect(KILL cr, TEMP dst);
8827 
8828   ins_cost(125);
8829   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8830             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8831          %}
8832 
8833   ins_encode %{
8834     Register Rdst = $dst$$Register;
8835     Register Rsrc1 = $src1$$Register;
8836     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8837 
8838     __ andnl(Rdst, Rsrc1, $src2$$Address);
8839     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8840   %}
8841   ins_pipe(ialu_reg_mem);
8842 %}
8843 
8844 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8845   match(Set dst (AndL (SubL imm_zero src) src));
8846   predicate(UseBMI1Instructions);
8847   effect(KILL cr, TEMP dst);
8848 
8849   format %{ "MOVL   $dst.hi, 0\n\t"
8850             "BLSIL  $dst.lo, $src.lo\n\t"
8851             "JNZ    done\n\t"
8852             "BLSIL  $dst.hi, $src.hi\n"
8853             "done:"
8854          %}
8855 
8856   ins_encode %{
8857     Label done;
8858     Register Rdst = $dst$$Register;
8859     Register Rsrc = $src$$Register;
8860     __ movl(HIGH_FROM_LOW(Rdst), 0);
8861     __ blsil(Rdst, Rsrc);
8862     __ jccb(Assembler::notZero, done);
8863     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8864     __ bind(done);
8865   %}
8866   ins_pipe(ialu_reg);
8867 %}
8868 
8869 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8870   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8871   predicate(UseBMI1Instructions);
8872   effect(KILL cr, TEMP dst);
8873 
8874   ins_cost(125);
8875   format %{ "MOVL   $dst.hi, 0\n\t"
8876             "BLSIL  $dst.lo, $src\n\t"
8877             "JNZ    done\n\t"
8878             "BLSIL  $dst.hi, $src+4\n"
8879             "done:"
8880          %}
8881 
8882   ins_encode %{
8883     Label done;
8884     Register Rdst = $dst$$Register;
8885     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8886 
8887     __ movl(HIGH_FROM_LOW(Rdst), 0);
8888     __ blsil(Rdst, $src$$Address);
8889     __ jccb(Assembler::notZero, done);
8890     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8891     __ bind(done);
8892   %}
8893   ins_pipe(ialu_reg_mem);
8894 %}
8895 
8896 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8897 %{
8898   match(Set dst (XorL (AddL src minus_1) src));
8899   predicate(UseBMI1Instructions);
8900   effect(KILL cr, TEMP dst);
8901 
8902   format %{ "MOVL    $dst.hi, 0\n\t"
8903             "BLSMSKL $dst.lo, $src.lo\n\t"
8904             "JNC     done\n\t"
8905             "BLSMSKL $dst.hi, $src.hi\n"
8906             "done:"
8907          %}
8908 
8909   ins_encode %{
8910     Label done;
8911     Register Rdst = $dst$$Register;
8912     Register Rsrc = $src$$Register;
8913     __ movl(HIGH_FROM_LOW(Rdst), 0);
8914     __ blsmskl(Rdst, Rsrc);
8915     __ jccb(Assembler::carryClear, done);
8916     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8917     __ bind(done);
8918   %}
8919 
8920   ins_pipe(ialu_reg);
8921 %}
8922 
8923 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8924 %{
8925   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8926   predicate(UseBMI1Instructions);
8927   effect(KILL cr, TEMP dst);
8928 
8929   ins_cost(125);
8930   format %{ "MOVL    $dst.hi, 0\n\t"
8931             "BLSMSKL $dst.lo, $src\n\t"
8932             "JNC     done\n\t"
8933             "BLSMSKL $dst.hi, $src+4\n"
8934             "done:"
8935          %}
8936 
8937   ins_encode %{
8938     Label done;
8939     Register Rdst = $dst$$Register;
8940     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8941 
8942     __ movl(HIGH_FROM_LOW(Rdst), 0);
8943     __ blsmskl(Rdst, $src$$Address);
8944     __ jccb(Assembler::carryClear, done);
8945     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8946     __ bind(done);
8947   %}
8948 
8949   ins_pipe(ialu_reg_mem);
8950 %}
8951 
8952 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8953 %{
8954   match(Set dst (AndL (AddL src minus_1) src) );
8955   predicate(UseBMI1Instructions);
8956   effect(KILL cr, TEMP dst);
8957 
8958   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8959             "BLSRL  $dst.lo, $src.lo\n\t"
8960             "JNC    done\n\t"
8961             "BLSRL  $dst.hi, $src.hi\n"
8962             "done:"
8963   %}
8964 
8965   ins_encode %{
8966     Label done;
8967     Register Rdst = $dst$$Register;
8968     Register Rsrc = $src$$Register;
8969     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8970     __ blsrl(Rdst, Rsrc);
8971     __ jccb(Assembler::carryClear, done);
8972     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8973     __ bind(done);
8974   %}
8975 
8976   ins_pipe(ialu_reg);
8977 %}
8978 
8979 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8980 %{
8981   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8982   predicate(UseBMI1Instructions);
8983   effect(KILL cr, TEMP dst);
8984 
8985   ins_cost(125);
8986   format %{ "MOVL   $dst.hi, $src+4\n\t"
8987             "BLSRL  $dst.lo, $src\n\t"
8988             "JNC    done\n\t"
8989             "BLSRL  $dst.hi, $src+4\n"
8990             "done:"
8991   %}
8992 
8993   ins_encode %{
8994     Label done;
8995     Register Rdst = $dst$$Register;
8996     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8997     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8998     __ blsrl(Rdst, $src$$Address);
8999     __ jccb(Assembler::carryClear, done);
9000     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9001     __ bind(done);
9002   %}
9003 
9004   ins_pipe(ialu_reg_mem);
9005 %}
9006 
9007 // Or Long Register with Register
9008 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9009   match(Set dst (OrL dst src));
9010   effect(KILL cr);
9011   format %{ "OR     $dst.lo,$src.lo\n\t"
9012             "OR     $dst.hi,$src.hi" %}
9013   opcode(0x0B,0x0B);
9014   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9015   ins_pipe( ialu_reg_reg_long );
9016 %}
9017 
9018 // Or Long Register with Immediate
9019 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9020   match(Set dst (OrL dst src));
9021   effect(KILL cr);
9022   format %{ "OR     $dst.lo,$src.lo\n\t"
9023             "OR     $dst.hi,$src.hi" %}
9024   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9025   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9026   ins_pipe( ialu_reg_long );
9027 %}
9028 
9029 // Or Long Register with Memory
9030 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9031   match(Set dst (OrL dst (LoadL mem)));
9032   effect(KILL cr);
9033   ins_cost(125);
9034   format %{ "OR     $dst.lo,$mem\n\t"
9035             "OR     $dst.hi,$mem+4" %}
9036   opcode(0x0B,0x0B);
9037   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9038   ins_pipe( ialu_reg_long_mem );
9039 %}
9040 
9041 // Xor Long Register with Register
9042 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9043   match(Set dst (XorL dst src));
9044   effect(KILL cr);
9045   format %{ "XOR    $dst.lo,$src.lo\n\t"
9046             "XOR    $dst.hi,$src.hi" %}
9047   opcode(0x33,0x33);
9048   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9049   ins_pipe( ialu_reg_reg_long );
9050 %}
9051 
9052 // Xor Long Register with Immediate -1
9053 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9054   match(Set dst (XorL dst imm));
9055   format %{ "NOT    $dst.lo\n\t"
9056             "NOT    $dst.hi" %}
9057   ins_encode %{
9058      __ notl($dst$$Register);
9059      __ notl(HIGH_FROM_LOW($dst$$Register));
9060   %}
9061   ins_pipe( ialu_reg_long );
9062 %}
9063 
9064 // Xor Long Register with Immediate
9065 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9066   match(Set dst (XorL dst src));
9067   effect(KILL cr);
9068   format %{ "XOR    $dst.lo,$src.lo\n\t"
9069             "XOR    $dst.hi,$src.hi" %}
9070   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9071   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9072   ins_pipe( ialu_reg_long );
9073 %}
9074 
9075 // Xor Long Register with Memory
9076 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9077   match(Set dst (XorL dst (LoadL mem)));
9078   effect(KILL cr);
9079   ins_cost(125);
9080   format %{ "XOR    $dst.lo,$mem\n\t"
9081             "XOR    $dst.hi,$mem+4" %}
9082   opcode(0x33,0x33);
9083   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9084   ins_pipe( ialu_reg_long_mem );
9085 %}
9086 
9087 // Shift Left Long by 1
9088 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9089   predicate(UseNewLongLShift);
9090   match(Set dst (LShiftL dst cnt));
9091   effect(KILL cr);
9092   ins_cost(100);
9093   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9094             "ADC    $dst.hi,$dst.hi" %}
9095   ins_encode %{
9096     __ addl($dst$$Register,$dst$$Register);
9097     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9098   %}
9099   ins_pipe( ialu_reg_long );
9100 %}
9101 
9102 // Shift Left Long by 2
9103 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9104   predicate(UseNewLongLShift);
9105   match(Set dst (LShiftL dst cnt));
9106   effect(KILL cr);
9107   ins_cost(100);
9108   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9109             "ADC    $dst.hi,$dst.hi\n\t"
9110             "ADD    $dst.lo,$dst.lo\n\t"
9111             "ADC    $dst.hi,$dst.hi" %}
9112   ins_encode %{
9113     __ addl($dst$$Register,$dst$$Register);
9114     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9115     __ addl($dst$$Register,$dst$$Register);
9116     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9117   %}
9118   ins_pipe( ialu_reg_long );
9119 %}
9120 
9121 // Shift Left Long by 3
9122 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9123   predicate(UseNewLongLShift);
9124   match(Set dst (LShiftL dst cnt));
9125   effect(KILL cr);
9126   ins_cost(100);
9127   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9128             "ADC    $dst.hi,$dst.hi\n\t"
9129             "ADD    $dst.lo,$dst.lo\n\t"
9130             "ADC    $dst.hi,$dst.hi\n\t"
9131             "ADD    $dst.lo,$dst.lo\n\t"
9132             "ADC    $dst.hi,$dst.hi" %}
9133   ins_encode %{
9134     __ addl($dst$$Register,$dst$$Register);
9135     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9136     __ addl($dst$$Register,$dst$$Register);
9137     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9138     __ addl($dst$$Register,$dst$$Register);
9139     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9140   %}
9141   ins_pipe( ialu_reg_long );
9142 %}
9143 
9144 // Shift Left Long by 1-31
9145 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9146   match(Set dst (LShiftL dst cnt));
9147   effect(KILL cr);
9148   ins_cost(200);
9149   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9150             "SHL    $dst.lo,$cnt" %}
9151   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9152   ins_encode( move_long_small_shift(dst,cnt) );
9153   ins_pipe( ialu_reg_long );
9154 %}
9155 
9156 // Shift Left Long by 32-63
9157 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9158   match(Set dst (LShiftL dst cnt));
9159   effect(KILL cr);
9160   ins_cost(300);
9161   format %{ "MOV    $dst.hi,$dst.lo\n"
9162           "\tSHL    $dst.hi,$cnt-32\n"
9163           "\tXOR    $dst.lo,$dst.lo" %}
9164   opcode(0xC1, 0x4);  /* C1 /4 ib */
9165   ins_encode( move_long_big_shift_clr(dst,cnt) );
9166   ins_pipe( ialu_reg_long );
9167 %}
9168 
9169 // Shift Left Long by variable
9170 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9171   match(Set dst (LShiftL dst shift));
9172   effect(KILL cr);
9173   ins_cost(500+200);
9174   size(17);
9175   format %{ "TEST   $shift,32\n\t"
9176             "JEQ,s  small\n\t"
9177             "MOV    $dst.hi,$dst.lo\n\t"
9178             "XOR    $dst.lo,$dst.lo\n"
9179     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9180             "SHL    $dst.lo,$shift" %}
9181   ins_encode( shift_left_long( dst, shift ) );
9182   ins_pipe( pipe_slow );
9183 %}
9184 
9185 // Shift Right Long by 1-31
9186 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9187   match(Set dst (URShiftL dst cnt));
9188   effect(KILL cr);
9189   ins_cost(200);
9190   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9191             "SHR    $dst.hi,$cnt" %}
9192   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9193   ins_encode( move_long_small_shift(dst,cnt) );
9194   ins_pipe( ialu_reg_long );
9195 %}
9196 
9197 // Shift Right Long by 32-63
9198 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9199   match(Set dst (URShiftL dst cnt));
9200   effect(KILL cr);
9201   ins_cost(300);
9202   format %{ "MOV    $dst.lo,$dst.hi\n"
9203           "\tSHR    $dst.lo,$cnt-32\n"
9204           "\tXOR    $dst.hi,$dst.hi" %}
9205   opcode(0xC1, 0x5);  /* C1 /5 ib */
9206   ins_encode( move_long_big_shift_clr(dst,cnt) );
9207   ins_pipe( ialu_reg_long );
9208 %}
9209 
9210 // Shift Right Long by variable
9211 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9212   match(Set dst (URShiftL dst shift));
9213   effect(KILL cr);
9214   ins_cost(600);
9215   size(17);
9216   format %{ "TEST   $shift,32\n\t"
9217             "JEQ,s  small\n\t"
9218             "MOV    $dst.lo,$dst.hi\n\t"
9219             "XOR    $dst.hi,$dst.hi\n"
9220     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9221             "SHR    $dst.hi,$shift" %}
9222   ins_encode( shift_right_long( dst, shift ) );
9223   ins_pipe( pipe_slow );
9224 %}
9225 
9226 // Shift Right Long by 1-31
9227 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9228   match(Set dst (RShiftL dst cnt));
9229   effect(KILL cr);
9230   ins_cost(200);
9231   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9232             "SAR    $dst.hi,$cnt" %}
9233   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9234   ins_encode( move_long_small_shift(dst,cnt) );
9235   ins_pipe( ialu_reg_long );
9236 %}
9237 
9238 // Shift Right Long by 32-63
9239 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9240   match(Set dst (RShiftL dst cnt));
9241   effect(KILL cr);
9242   ins_cost(300);
9243   format %{ "MOV    $dst.lo,$dst.hi\n"
9244           "\tSAR    $dst.lo,$cnt-32\n"
9245           "\tSAR    $dst.hi,31" %}
9246   opcode(0xC1, 0x7);  /* C1 /7 ib */
9247   ins_encode( move_long_big_shift_sign(dst,cnt) );
9248   ins_pipe( ialu_reg_long );
9249 %}
9250 
9251 // Shift Right arithmetic Long by variable
9252 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9253   match(Set dst (RShiftL dst shift));
9254   effect(KILL cr);
9255   ins_cost(600);
9256   size(18);
9257   format %{ "TEST   $shift,32\n\t"
9258             "JEQ,s  small\n\t"
9259             "MOV    $dst.lo,$dst.hi\n\t"
9260             "SAR    $dst.hi,31\n"
9261     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9262             "SAR    $dst.hi,$shift" %}
9263   ins_encode( shift_right_arith_long( dst, shift ) );
9264   ins_pipe( pipe_slow );
9265 %}
9266 
9267 
9268 //----------Double Instructions------------------------------------------------
9269 // Double Math
9270 
9271 // Compare & branch
9272 
9273 // P6 version of float compare, sets condition codes in EFLAGS
9274 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9275   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9276   match(Set cr (CmpD src1 src2));
9277   effect(KILL rax);
9278   ins_cost(150);
9279   format %{ "FLD    $src1\n\t"
9280             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9281             "JNP    exit\n\t"
9282             "MOV    ah,1       // saw a NaN, set CF\n\t"
9283             "SAHF\n"
9284      "exit:\tNOP               // avoid branch to branch" %}
9285   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9286   ins_encode( Push_Reg_DPR(src1),
9287               OpcP, RegOpc(src2),
9288               cmpF_P6_fixup );
9289   ins_pipe( pipe_slow );
9290 %}
9291 
9292 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9293   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9294   match(Set cr (CmpD src1 src2));
9295   ins_cost(150);
9296   format %{ "FLD    $src1\n\t"
9297             "FUCOMIP ST,$src2  // P6 instruction" %}
9298   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9299   ins_encode( Push_Reg_DPR(src1),
9300               OpcP, RegOpc(src2));
9301   ins_pipe( pipe_slow );
9302 %}
9303 
9304 // Compare & branch
9305 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9306   predicate(UseSSE<=1);
9307   match(Set cr (CmpD src1 src2));
9308   effect(KILL rax);
9309   ins_cost(200);
9310   format %{ "FLD    $src1\n\t"
9311             "FCOMp  $src2\n\t"
9312             "FNSTSW AX\n\t"
9313             "TEST   AX,0x400\n\t"
9314             "JZ,s   flags\n\t"
9315             "MOV    AH,1\t# unordered treat as LT\n"
9316     "flags:\tSAHF" %}
9317   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9318   ins_encode( Push_Reg_DPR(src1),
9319               OpcP, RegOpc(src2),
9320               fpu_flags);
9321   ins_pipe( pipe_slow );
9322 %}
9323 
9324 // Compare vs zero into -1,0,1
9325 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9326   predicate(UseSSE<=1);
9327   match(Set dst (CmpD3 src1 zero));
9328   effect(KILL cr, KILL rax);
9329   ins_cost(280);
9330   format %{ "FTSTD  $dst,$src1" %}
9331   opcode(0xE4, 0xD9);
9332   ins_encode( Push_Reg_DPR(src1),
9333               OpcS, OpcP, PopFPU,
9334               CmpF_Result(dst));
9335   ins_pipe( pipe_slow );
9336 %}
9337 
9338 // Compare into -1,0,1
9339 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9340   predicate(UseSSE<=1);
9341   match(Set dst (CmpD3 src1 src2));
9342   effect(KILL cr, KILL rax);
9343   ins_cost(300);
9344   format %{ "FCMPD  $dst,$src1,$src2" %}
9345   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9346   ins_encode( Push_Reg_DPR(src1),
9347               OpcP, RegOpc(src2),
9348               CmpF_Result(dst));
9349   ins_pipe( pipe_slow );
9350 %}
9351 
9352 // float compare and set condition codes in EFLAGS by XMM regs
9353 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9354   predicate(UseSSE>=2);
9355   match(Set cr (CmpD src1 src2));
9356   ins_cost(145);
9357   format %{ "UCOMISD $src1,$src2\n\t"
9358             "JNP,s   exit\n\t"
9359             "PUSHF\t# saw NaN, set CF\n\t"
9360             "AND     [rsp], #0xffffff2b\n\t"
9361             "POPF\n"
9362     "exit:" %}
9363   ins_encode %{
9364     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9365     emit_cmpfp_fixup(_masm);
9366   %}
9367   ins_pipe( pipe_slow );
9368 %}
9369 
9370 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9371   predicate(UseSSE>=2);
9372   match(Set cr (CmpD src1 src2));
9373   ins_cost(100);
9374   format %{ "UCOMISD $src1,$src2" %}
9375   ins_encode %{
9376     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9377   %}
9378   ins_pipe( pipe_slow );
9379 %}
9380 
9381 // float compare and set condition codes in EFLAGS by XMM regs
9382 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9383   predicate(UseSSE>=2);
9384   match(Set cr (CmpD src1 (LoadD src2)));
9385   ins_cost(145);
9386   format %{ "UCOMISD $src1,$src2\n\t"
9387             "JNP,s   exit\n\t"
9388             "PUSHF\t# saw NaN, set CF\n\t"
9389             "AND     [rsp], #0xffffff2b\n\t"
9390             "POPF\n"
9391     "exit:" %}
9392   ins_encode %{
9393     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9394     emit_cmpfp_fixup(_masm);
9395   %}
9396   ins_pipe( pipe_slow );
9397 %}
9398 
9399 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9400   predicate(UseSSE>=2);
9401   match(Set cr (CmpD src1 (LoadD src2)));
9402   ins_cost(100);
9403   format %{ "UCOMISD $src1,$src2" %}
9404   ins_encode %{
9405     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9406   %}
9407   ins_pipe( pipe_slow );
9408 %}
9409 
9410 // Compare into -1,0,1 in XMM
9411 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9412   predicate(UseSSE>=2);
9413   match(Set dst (CmpD3 src1 src2));
9414   effect(KILL cr);
9415   ins_cost(255);
9416   format %{ "UCOMISD $src1, $src2\n\t"
9417             "MOV     $dst, #-1\n\t"
9418             "JP,s    done\n\t"
9419             "JB,s    done\n\t"
9420             "SETNE   $dst\n\t"
9421             "MOVZB   $dst, $dst\n"
9422     "done:" %}
9423   ins_encode %{
9424     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9425     emit_cmpfp3(_masm, $dst$$Register);
9426   %}
9427   ins_pipe( pipe_slow );
9428 %}
9429 
9430 // Compare into -1,0,1 in XMM and memory
9431 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9432   predicate(UseSSE>=2);
9433   match(Set dst (CmpD3 src1 (LoadD src2)));
9434   effect(KILL cr);
9435   ins_cost(275);
9436   format %{ "UCOMISD $src1, $src2\n\t"
9437             "MOV     $dst, #-1\n\t"
9438             "JP,s    done\n\t"
9439             "JB,s    done\n\t"
9440             "SETNE   $dst\n\t"
9441             "MOVZB   $dst, $dst\n"
9442     "done:" %}
9443   ins_encode %{
9444     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9445     emit_cmpfp3(_masm, $dst$$Register);
9446   %}
9447   ins_pipe( pipe_slow );
9448 %}
9449 
9450 
9451 instruct subDPR_reg(regDPR dst, regDPR src) %{
9452   predicate (UseSSE <=1);
9453   match(Set dst (SubD dst src));
9454 
9455   format %{ "FLD    $src\n\t"
9456             "DSUBp  $dst,ST" %}
9457   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9458   ins_cost(150);
9459   ins_encode( Push_Reg_DPR(src),
9460               OpcP, RegOpc(dst) );
9461   ins_pipe( fpu_reg_reg );
9462 %}
9463 
9464 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9465   predicate (UseSSE <=1);
9466   match(Set dst (RoundDouble (SubD src1 src2)));
9467   ins_cost(250);
9468 
9469   format %{ "FLD    $src2\n\t"
9470             "DSUB   ST,$src1\n\t"
9471             "FSTP_D $dst\t# D-round" %}
9472   opcode(0xD8, 0x5);
9473   ins_encode( Push_Reg_DPR(src2),
9474               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9475   ins_pipe( fpu_mem_reg_reg );
9476 %}
9477 
9478 
9479 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9480   predicate (UseSSE <=1);
9481   match(Set dst (SubD dst (LoadD src)));
9482   ins_cost(150);
9483 
9484   format %{ "FLD    $src\n\t"
9485             "DSUBp  $dst,ST" %}
9486   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9487   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9488               OpcP, RegOpc(dst) );
9489   ins_pipe( fpu_reg_mem );
9490 %}
9491 
9492 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9493   predicate (UseSSE<=1);
9494   match(Set dst (AbsD src));
9495   ins_cost(100);
9496   format %{ "FABS" %}
9497   opcode(0xE1, 0xD9);
9498   ins_encode( OpcS, OpcP );
9499   ins_pipe( fpu_reg_reg );
9500 %}
9501 
9502 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9503   predicate(UseSSE<=1);
9504   match(Set dst (NegD src));
9505   ins_cost(100);
9506   format %{ "FCHS" %}
9507   opcode(0xE0, 0xD9);
9508   ins_encode( OpcS, OpcP );
9509   ins_pipe( fpu_reg_reg );
9510 %}
9511 
9512 instruct addDPR_reg(regDPR dst, regDPR src) %{
9513   predicate(UseSSE<=1);
9514   match(Set dst (AddD dst src));
9515   format %{ "FLD    $src\n\t"
9516             "DADD   $dst,ST" %}
9517   size(4);
9518   ins_cost(150);
9519   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9520   ins_encode( Push_Reg_DPR(src),
9521               OpcP, RegOpc(dst) );
9522   ins_pipe( fpu_reg_reg );
9523 %}
9524 
9525 
9526 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9527   predicate(UseSSE<=1);
9528   match(Set dst (RoundDouble (AddD src1 src2)));
9529   ins_cost(250);
9530 
9531   format %{ "FLD    $src2\n\t"
9532             "DADD   ST,$src1\n\t"
9533             "FSTP_D $dst\t# D-round" %}
9534   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9535   ins_encode( Push_Reg_DPR(src2),
9536               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9537   ins_pipe( fpu_mem_reg_reg );
9538 %}
9539 
9540 
9541 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9542   predicate(UseSSE<=1);
9543   match(Set dst (AddD dst (LoadD src)));
9544   ins_cost(150);
9545 
9546   format %{ "FLD    $src\n\t"
9547             "DADDp  $dst,ST" %}
9548   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9549   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9550               OpcP, RegOpc(dst) );
9551   ins_pipe( fpu_reg_mem );
9552 %}
9553 
9554 // add-to-memory
9555 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9556   predicate(UseSSE<=1);
9557   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9558   ins_cost(150);
9559 
9560   format %{ "FLD_D  $dst\n\t"
9561             "DADD   ST,$src\n\t"
9562             "FST_D  $dst" %}
9563   opcode(0xDD, 0x0);
9564   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9565               Opcode(0xD8), RegOpc(src),
9566               set_instruction_start,
9567               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9568   ins_pipe( fpu_reg_mem );
9569 %}
9570 
9571 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9572   predicate(UseSSE<=1);
9573   match(Set dst (AddD dst con));
9574   ins_cost(125);
9575   format %{ "FLD1\n\t"
9576             "DADDp  $dst,ST" %}
9577   ins_encode %{
9578     __ fld1();
9579     __ faddp($dst$$reg);
9580   %}
9581   ins_pipe(fpu_reg);
9582 %}
9583 
9584 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9585   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9586   match(Set dst (AddD dst con));
9587   ins_cost(200);
9588   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9589             "DADDp  $dst,ST" %}
9590   ins_encode %{
9591     __ fld_d($constantaddress($con));
9592     __ faddp($dst$$reg);
9593   %}
9594   ins_pipe(fpu_reg_mem);
9595 %}
9596 
9597 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9598   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9599   match(Set dst (RoundDouble (AddD src con)));
9600   ins_cost(200);
9601   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9602             "DADD   ST,$src\n\t"
9603             "FSTP_D $dst\t# D-round" %}
9604   ins_encode %{
9605     __ fld_d($constantaddress($con));
9606     __ fadd($src$$reg);
9607     __ fstp_d(Address(rsp, $dst$$disp));
9608   %}
9609   ins_pipe(fpu_mem_reg_con);
9610 %}
9611 
9612 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9613   predicate(UseSSE<=1);
9614   match(Set dst (MulD dst src));
9615   format %{ "FLD    $src\n\t"
9616             "DMULp  $dst,ST" %}
9617   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9618   ins_cost(150);
9619   ins_encode( Push_Reg_DPR(src),
9620               OpcP, RegOpc(dst) );
9621   ins_pipe( fpu_reg_reg );
9622 %}
9623 
9624 // Strict FP instruction biases argument before multiply then
9625 // biases result to avoid double rounding of subnormals.
9626 //
9627 // scale arg1 by multiplying arg1 by 2^(-15360)
9628 // load arg2
9629 // multiply scaled arg1 by arg2
9630 // rescale product by 2^(15360)
9631 //
9632 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9633   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9634   match(Set dst (MulD dst src));
9635   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9636 
9637   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9638             "DMULp  $dst,ST\n\t"
9639             "FLD    $src\n\t"
9640             "DMULp  $dst,ST\n\t"
9641             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9642             "DMULp  $dst,ST\n\t" %}
9643   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9644   ins_encode( strictfp_bias1(dst),
9645               Push_Reg_DPR(src),
9646               OpcP, RegOpc(dst),
9647               strictfp_bias2(dst) );
9648   ins_pipe( fpu_reg_reg );
9649 %}
9650 
9651 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9652   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9653   match(Set dst (MulD dst con));
9654   ins_cost(200);
9655   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9656             "DMULp  $dst,ST" %}
9657   ins_encode %{
9658     __ fld_d($constantaddress($con));
9659     __ fmulp($dst$$reg);
9660   %}
9661   ins_pipe(fpu_reg_mem);
9662 %}
9663 
9664 
9665 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9666   predicate( UseSSE<=1 );
9667   match(Set dst (MulD dst (LoadD src)));
9668   ins_cost(200);
9669   format %{ "FLD_D  $src\n\t"
9670             "DMULp  $dst,ST" %}
9671   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9672   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9673               OpcP, RegOpc(dst) );
9674   ins_pipe( fpu_reg_mem );
9675 %}
9676 
9677 //
9678 // Cisc-alternate to reg-reg multiply
9679 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9680   predicate( UseSSE<=1 );
9681   match(Set dst (MulD src (LoadD mem)));
9682   ins_cost(250);
9683   format %{ "FLD_D  $mem\n\t"
9684             "DMUL   ST,$src\n\t"
9685             "FSTP_D $dst" %}
9686   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9687   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9688               OpcReg_FPR(src),
9689               Pop_Reg_DPR(dst) );
9690   ins_pipe( fpu_reg_reg_mem );
9691 %}
9692 
9693 
9694 // MACRO3 -- addDPR a mulDPR
9695 // This instruction is a '2-address' instruction in that the result goes
9696 // back to src2.  This eliminates a move from the macro; possibly the
9697 // register allocator will have to add it back (and maybe not).
9698 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9699   predicate( UseSSE<=1 );
9700   match(Set src2 (AddD (MulD src0 src1) src2));
9701   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9702             "DMUL   ST,$src1\n\t"
9703             "DADDp  $src2,ST" %}
9704   ins_cost(250);
9705   opcode(0xDD); /* LoadD DD /0 */
9706   ins_encode( Push_Reg_FPR(src0),
9707               FMul_ST_reg(src1),
9708               FAddP_reg_ST(src2) );
9709   ins_pipe( fpu_reg_reg_reg );
9710 %}
9711 
9712 
9713 // MACRO3 -- subDPR a mulDPR
9714 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9715   predicate( UseSSE<=1 );
9716   match(Set src2 (SubD (MulD src0 src1) src2));
9717   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9718             "DMUL   ST,$src1\n\t"
9719             "DSUBRp $src2,ST" %}
9720   ins_cost(250);
9721   ins_encode( Push_Reg_FPR(src0),
9722               FMul_ST_reg(src1),
9723               Opcode(0xDE), Opc_plus(0xE0,src2));
9724   ins_pipe( fpu_reg_reg_reg );
9725 %}
9726 
9727 
9728 instruct divDPR_reg(regDPR dst, regDPR src) %{
9729   predicate( UseSSE<=1 );
9730   match(Set dst (DivD dst src));
9731 
9732   format %{ "FLD    $src\n\t"
9733             "FDIVp  $dst,ST" %}
9734   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9735   ins_cost(150);
9736   ins_encode( Push_Reg_DPR(src),
9737               OpcP, RegOpc(dst) );
9738   ins_pipe( fpu_reg_reg );
9739 %}
9740 
9741 // Strict FP instruction biases argument before division then
9742 // biases result, to avoid double rounding of subnormals.
9743 //
9744 // scale dividend by multiplying dividend by 2^(-15360)
9745 // load divisor
9746 // divide scaled dividend by divisor
9747 // rescale quotient by 2^(15360)
9748 //
9749 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9750   predicate (UseSSE<=1);
9751   match(Set dst (DivD dst src));
9752   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9753   ins_cost(01);
9754 
9755   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9756             "DMULp  $dst,ST\n\t"
9757             "FLD    $src\n\t"
9758             "FDIVp  $dst,ST\n\t"
9759             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9760             "DMULp  $dst,ST\n\t" %}
9761   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9762   ins_encode( strictfp_bias1(dst),
9763               Push_Reg_DPR(src),
9764               OpcP, RegOpc(dst),
9765               strictfp_bias2(dst) );
9766   ins_pipe( fpu_reg_reg );
9767 %}
9768 
9769 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9770   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9771   match(Set dst (RoundDouble (DivD src1 src2)));
9772 
9773   format %{ "FLD    $src1\n\t"
9774             "FDIV   ST,$src2\n\t"
9775             "FSTP_D $dst\t# D-round" %}
9776   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9777   ins_encode( Push_Reg_DPR(src1),
9778               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9779   ins_pipe( fpu_mem_reg_reg );
9780 %}
9781 
9782 
9783 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9784   predicate(UseSSE<=1);
9785   match(Set dst (ModD dst src));
9786   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9787 
9788   format %{ "DMOD   $dst,$src" %}
9789   ins_cost(250);
9790   ins_encode(Push_Reg_Mod_DPR(dst, src),
9791               emitModDPR(),
9792               Push_Result_Mod_DPR(src),
9793               Pop_Reg_DPR(dst));
9794   ins_pipe( pipe_slow );
9795 %}
9796 
9797 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9798   predicate(UseSSE>=2);
9799   match(Set dst (ModD src0 src1));
9800   effect(KILL rax, KILL cr);
9801 
9802   format %{ "SUB    ESP,8\t # DMOD\n"
9803           "\tMOVSD  [ESP+0],$src1\n"
9804           "\tFLD_D  [ESP+0]\n"
9805           "\tMOVSD  [ESP+0],$src0\n"
9806           "\tFLD_D  [ESP+0]\n"
9807      "loop:\tFPREM\n"
9808           "\tFWAIT\n"
9809           "\tFNSTSW AX\n"
9810           "\tSAHF\n"
9811           "\tJP     loop\n"
9812           "\tFSTP_D [ESP+0]\n"
9813           "\tMOVSD  $dst,[ESP+0]\n"
9814           "\tADD    ESP,8\n"
9815           "\tFSTP   ST0\t # Restore FPU Stack"
9816     %}
9817   ins_cost(250);
9818   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9819   ins_pipe( pipe_slow );
9820 %}
9821 
9822 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9823   predicate (UseSSE<=1);
9824   match(Set dst(TanD src));
9825   format %{ "DTAN   $dst" %}
9826   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9827               Opcode(0xDD), Opcode(0xD8));   // fstp st
9828   ins_pipe( pipe_slow );
9829 %}
9830 
9831 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9832   predicate (UseSSE>=2);
9833   match(Set dst(TanD dst));
9834   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9835   format %{ "DTAN   $dst" %}
9836   ins_encode( Push_SrcD(dst),
9837               Opcode(0xD9), Opcode(0xF2),    // fptan
9838               Opcode(0xDD), Opcode(0xD8),   // fstp st
9839               Push_ResultD(dst) );
9840   ins_pipe( pipe_slow );
9841 %}
9842 
9843 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9844   predicate (UseSSE<=1);
9845   match(Set dst(AtanD dst src));
9846   format %{ "DATA   $dst,$src" %}
9847   opcode(0xD9, 0xF3);
9848   ins_encode( Push_Reg_DPR(src),
9849               OpcP, OpcS, RegOpc(dst) );
9850   ins_pipe( pipe_slow );
9851 %}
9852 
9853 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9854   predicate (UseSSE>=2);
9855   match(Set dst(AtanD dst src));
9856   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9857   format %{ "DATA   $dst,$src" %}
9858   opcode(0xD9, 0xF3);
9859   ins_encode( Push_SrcD(src),
9860               OpcP, OpcS, Push_ResultD(dst) );
9861   ins_pipe( pipe_slow );
9862 %}
9863 
9864 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9865   predicate (UseSSE<=1);
9866   match(Set dst (SqrtD src));
9867   format %{ "DSQRT  $dst,$src" %}
9868   opcode(0xFA, 0xD9);
9869   ins_encode( Push_Reg_DPR(src),
9870               OpcS, OpcP, Pop_Reg_DPR(dst) );
9871   ins_pipe( pipe_slow );
9872 %}
9873 
9874 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9875   predicate (UseSSE<=1);
9876   // The source Double operand on FPU stack
9877   match(Set dst (Log10D src));
9878   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9879   // fxch         ; swap ST(0) with ST(1)
9880   // fyl2x        ; compute log_10(2) * log_2(x)
9881   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9882             "FXCH   \n\t"
9883             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9884          %}
9885   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9886               Opcode(0xD9), Opcode(0xC9),   // fxch
9887               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9888 
9889   ins_pipe( pipe_slow );
9890 %}
9891 
9892 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9893   predicate (UseSSE>=2);
9894   effect(KILL cr);
9895   match(Set dst (Log10D src));
9896   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9897   // fyl2x        ; compute log_10(2) * log_2(x)
9898   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9899             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9900          %}
9901   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9902               Push_SrcD(src),
9903               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9904               Push_ResultD(dst));
9905 
9906   ins_pipe( pipe_slow );
9907 %}
9908 
9909 //-------------Float Instructions-------------------------------
9910 // Float Math
9911 
9912 // Code for float compare:
9913 //     fcompp();
9914 //     fwait(); fnstsw_ax();
9915 //     sahf();
9916 //     movl(dst, unordered_result);
9917 //     jcc(Assembler::parity, exit);
9918 //     movl(dst, less_result);
9919 //     jcc(Assembler::below, exit);
9920 //     movl(dst, equal_result);
9921 //     jcc(Assembler::equal, exit);
9922 //     movl(dst, greater_result);
9923 //   exit:
9924 
9925 // P6 version of float compare, sets condition codes in EFLAGS
9926 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9927   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9928   match(Set cr (CmpF src1 src2));
9929   effect(KILL rax);
9930   ins_cost(150);
9931   format %{ "FLD    $src1\n\t"
9932             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9933             "JNP    exit\n\t"
9934             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9935             "SAHF\n"
9936      "exit:\tNOP               // avoid branch to branch" %}
9937   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9938   ins_encode( Push_Reg_DPR(src1),
9939               OpcP, RegOpc(src2),
9940               cmpF_P6_fixup );
9941   ins_pipe( pipe_slow );
9942 %}
9943 
9944 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9945   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9946   match(Set cr (CmpF src1 src2));
9947   ins_cost(100);
9948   format %{ "FLD    $src1\n\t"
9949             "FUCOMIP ST,$src2  // P6 instruction" %}
9950   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9951   ins_encode( Push_Reg_DPR(src1),
9952               OpcP, RegOpc(src2));
9953   ins_pipe( pipe_slow );
9954 %}
9955 
9956 
9957 // Compare & branch
9958 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9959   predicate(UseSSE == 0);
9960   match(Set cr (CmpF src1 src2));
9961   effect(KILL rax);
9962   ins_cost(200);
9963   format %{ "FLD    $src1\n\t"
9964             "FCOMp  $src2\n\t"
9965             "FNSTSW AX\n\t"
9966             "TEST   AX,0x400\n\t"
9967             "JZ,s   flags\n\t"
9968             "MOV    AH,1\t# unordered treat as LT\n"
9969     "flags:\tSAHF" %}
9970   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9971   ins_encode( Push_Reg_DPR(src1),
9972               OpcP, RegOpc(src2),
9973               fpu_flags);
9974   ins_pipe( pipe_slow );
9975 %}
9976 
9977 // Compare vs zero into -1,0,1
9978 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9979   predicate(UseSSE == 0);
9980   match(Set dst (CmpF3 src1 zero));
9981   effect(KILL cr, KILL rax);
9982   ins_cost(280);
9983   format %{ "FTSTF  $dst,$src1" %}
9984   opcode(0xE4, 0xD9);
9985   ins_encode( Push_Reg_DPR(src1),
9986               OpcS, OpcP, PopFPU,
9987               CmpF_Result(dst));
9988   ins_pipe( pipe_slow );
9989 %}
9990 
9991 // Compare into -1,0,1
9992 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9993   predicate(UseSSE == 0);
9994   match(Set dst (CmpF3 src1 src2));
9995   effect(KILL cr, KILL rax);
9996   ins_cost(300);
9997   format %{ "FCMPF  $dst,$src1,$src2" %}
9998   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9999   ins_encode( Push_Reg_DPR(src1),
10000               OpcP, RegOpc(src2),
10001               CmpF_Result(dst));
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 // float compare and set condition codes in EFLAGS by XMM regs
10006 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10007   predicate(UseSSE>=1);
10008   match(Set cr (CmpF src1 src2));
10009   ins_cost(145);
10010   format %{ "UCOMISS $src1,$src2\n\t"
10011             "JNP,s   exit\n\t"
10012             "PUSHF\t# saw NaN, set CF\n\t"
10013             "AND     [rsp], #0xffffff2b\n\t"
10014             "POPF\n"
10015     "exit:" %}
10016   ins_encode %{
10017     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10018     emit_cmpfp_fixup(_masm);
10019   %}
10020   ins_pipe( pipe_slow );
10021 %}
10022 
10023 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10024   predicate(UseSSE>=1);
10025   match(Set cr (CmpF src1 src2));
10026   ins_cost(100);
10027   format %{ "UCOMISS $src1,$src2" %}
10028   ins_encode %{
10029     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10030   %}
10031   ins_pipe( pipe_slow );
10032 %}
10033 
10034 // float compare and set condition codes in EFLAGS by XMM regs
10035 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10036   predicate(UseSSE>=1);
10037   match(Set cr (CmpF src1 (LoadF src2)));
10038   ins_cost(165);
10039   format %{ "UCOMISS $src1,$src2\n\t"
10040             "JNP,s   exit\n\t"
10041             "PUSHF\t# saw NaN, set CF\n\t"
10042             "AND     [rsp], #0xffffff2b\n\t"
10043             "POPF\n"
10044     "exit:" %}
10045   ins_encode %{
10046     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10047     emit_cmpfp_fixup(_masm);
10048   %}
10049   ins_pipe( pipe_slow );
10050 %}
10051 
10052 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10053   predicate(UseSSE>=1);
10054   match(Set cr (CmpF src1 (LoadF src2)));
10055   ins_cost(100);
10056   format %{ "UCOMISS $src1,$src2" %}
10057   ins_encode %{
10058     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10059   %}
10060   ins_pipe( pipe_slow );
10061 %}
10062 
10063 // Compare into -1,0,1 in XMM
10064 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10065   predicate(UseSSE>=1);
10066   match(Set dst (CmpF3 src1 src2));
10067   effect(KILL cr);
10068   ins_cost(255);
10069   format %{ "UCOMISS $src1, $src2\n\t"
10070             "MOV     $dst, #-1\n\t"
10071             "JP,s    done\n\t"
10072             "JB,s    done\n\t"
10073             "SETNE   $dst\n\t"
10074             "MOVZB   $dst, $dst\n"
10075     "done:" %}
10076   ins_encode %{
10077     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10078     emit_cmpfp3(_masm, $dst$$Register);
10079   %}
10080   ins_pipe( pipe_slow );
10081 %}
10082 
10083 // Compare into -1,0,1 in XMM and memory
10084 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10085   predicate(UseSSE>=1);
10086   match(Set dst (CmpF3 src1 (LoadF src2)));
10087   effect(KILL cr);
10088   ins_cost(275);
10089   format %{ "UCOMISS $src1, $src2\n\t"
10090             "MOV     $dst, #-1\n\t"
10091             "JP,s    done\n\t"
10092             "JB,s    done\n\t"
10093             "SETNE   $dst\n\t"
10094             "MOVZB   $dst, $dst\n"
10095     "done:" %}
10096   ins_encode %{
10097     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10098     emit_cmpfp3(_masm, $dst$$Register);
10099   %}
10100   ins_pipe( pipe_slow );
10101 %}
10102 
10103 // Spill to obtain 24-bit precision
10104 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10105   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10106   match(Set dst (SubF src1 src2));
10107 
10108   format %{ "FSUB   $dst,$src1 - $src2" %}
10109   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10110   ins_encode( Push_Reg_FPR(src1),
10111               OpcReg_FPR(src2),
10112               Pop_Mem_FPR(dst) );
10113   ins_pipe( fpu_mem_reg_reg );
10114 %}
10115 //
10116 // This instruction does not round to 24-bits
10117 instruct subFPR_reg(regFPR dst, regFPR src) %{
10118   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10119   match(Set dst (SubF dst src));
10120 
10121   format %{ "FSUB   $dst,$src" %}
10122   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10123   ins_encode( Push_Reg_FPR(src),
10124               OpcP, RegOpc(dst) );
10125   ins_pipe( fpu_reg_reg );
10126 %}
10127 
10128 // Spill to obtain 24-bit precision
10129 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10130   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10131   match(Set dst (AddF src1 src2));
10132 
10133   format %{ "FADD   $dst,$src1,$src2" %}
10134   opcode(0xD8, 0x0); /* D8 C0+i */
10135   ins_encode( Push_Reg_FPR(src2),
10136               OpcReg_FPR(src1),
10137               Pop_Mem_FPR(dst) );
10138   ins_pipe( fpu_mem_reg_reg );
10139 %}
10140 //
10141 // This instruction does not round to 24-bits
10142 instruct addFPR_reg(regFPR dst, regFPR src) %{
10143   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10144   match(Set dst (AddF dst src));
10145 
10146   format %{ "FLD    $src\n\t"
10147             "FADDp  $dst,ST" %}
10148   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10149   ins_encode( Push_Reg_FPR(src),
10150               OpcP, RegOpc(dst) );
10151   ins_pipe( fpu_reg_reg );
10152 %}
10153 
10154 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10155   predicate(UseSSE==0);
10156   match(Set dst (AbsF src));
10157   ins_cost(100);
10158   format %{ "FABS" %}
10159   opcode(0xE1, 0xD9);
10160   ins_encode( OpcS, OpcP );
10161   ins_pipe( fpu_reg_reg );
10162 %}
10163 
10164 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10165   predicate(UseSSE==0);
10166   match(Set dst (NegF src));
10167   ins_cost(100);
10168   format %{ "FCHS" %}
10169   opcode(0xE0, 0xD9);
10170   ins_encode( OpcS, OpcP );
10171   ins_pipe( fpu_reg_reg );
10172 %}
10173 
10174 // Cisc-alternate to addFPR_reg
10175 // Spill to obtain 24-bit precision
10176 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10177   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10178   match(Set dst (AddF src1 (LoadF src2)));
10179 
10180   format %{ "FLD    $src2\n\t"
10181             "FADD   ST,$src1\n\t"
10182             "FSTP_S $dst" %}
10183   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10184   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10185               OpcReg_FPR(src1),
10186               Pop_Mem_FPR(dst) );
10187   ins_pipe( fpu_mem_reg_mem );
10188 %}
10189 //
10190 // Cisc-alternate to addFPR_reg
10191 // This instruction does not round to 24-bits
10192 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10193   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10194   match(Set dst (AddF dst (LoadF src)));
10195 
10196   format %{ "FADD   $dst,$src" %}
10197   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10198   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10199               OpcP, RegOpc(dst) );
10200   ins_pipe( fpu_reg_mem );
10201 %}
10202 
10203 // // Following two instructions for _222_mpegaudio
10204 // Spill to obtain 24-bit precision
10205 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10206   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10207   match(Set dst (AddF src1 src2));
10208 
10209   format %{ "FADD   $dst,$src1,$src2" %}
10210   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10211   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10212               OpcReg_FPR(src2),
10213               Pop_Mem_FPR(dst) );
10214   ins_pipe( fpu_mem_reg_mem );
10215 %}
10216 
10217 // Cisc-spill variant
10218 // Spill to obtain 24-bit precision
10219 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10220   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10221   match(Set dst (AddF src1 (LoadF src2)));
10222 
10223   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10224   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10225   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10226               set_instruction_start,
10227               OpcP, RMopc_Mem(secondary,src1),
10228               Pop_Mem_FPR(dst) );
10229   ins_pipe( fpu_mem_mem_mem );
10230 %}
10231 
10232 // Spill to obtain 24-bit precision
10233 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10234   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10235   match(Set dst (AddF src1 src2));
10236 
10237   format %{ "FADD   $dst,$src1,$src2" %}
10238   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10239   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10240               set_instruction_start,
10241               OpcP, RMopc_Mem(secondary,src1),
10242               Pop_Mem_FPR(dst) );
10243   ins_pipe( fpu_mem_mem_mem );
10244 %}
10245 
10246 
10247 // Spill to obtain 24-bit precision
10248 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10249   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10250   match(Set dst (AddF src con));
10251   format %{ "FLD    $src\n\t"
10252             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10253             "FSTP_S $dst"  %}
10254   ins_encode %{
10255     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10256     __ fadd_s($constantaddress($con));
10257     __ fstp_s(Address(rsp, $dst$$disp));
10258   %}
10259   ins_pipe(fpu_mem_reg_con);
10260 %}
10261 //
10262 // This instruction does not round to 24-bits
10263 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10264   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10265   match(Set dst (AddF src con));
10266   format %{ "FLD    $src\n\t"
10267             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10268             "FSTP   $dst"  %}
10269   ins_encode %{
10270     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10271     __ fadd_s($constantaddress($con));
10272     __ fstp_d($dst$$reg);
10273   %}
10274   ins_pipe(fpu_reg_reg_con);
10275 %}
10276 
10277 // Spill to obtain 24-bit precision
10278 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10279   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10280   match(Set dst (MulF src1 src2));
10281 
10282   format %{ "FLD    $src1\n\t"
10283             "FMUL   $src2\n\t"
10284             "FSTP_S $dst"  %}
10285   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10286   ins_encode( Push_Reg_FPR(src1),
10287               OpcReg_FPR(src2),
10288               Pop_Mem_FPR(dst) );
10289   ins_pipe( fpu_mem_reg_reg );
10290 %}
10291 //
10292 // This instruction does not round to 24-bits
10293 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10294   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10295   match(Set dst (MulF src1 src2));
10296 
10297   format %{ "FLD    $src1\n\t"
10298             "FMUL   $src2\n\t"
10299             "FSTP_S $dst"  %}
10300   opcode(0xD8, 0x1); /* D8 C8+i */
10301   ins_encode( Push_Reg_FPR(src2),
10302               OpcReg_FPR(src1),
10303               Pop_Reg_FPR(dst) );
10304   ins_pipe( fpu_reg_reg_reg );
10305 %}
10306 
10307 
10308 // Spill to obtain 24-bit precision
10309 // Cisc-alternate to reg-reg multiply
10310 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10311   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10312   match(Set dst (MulF src1 (LoadF src2)));
10313 
10314   format %{ "FLD_S  $src2\n\t"
10315             "FMUL   $src1\n\t"
10316             "FSTP_S $dst"  %}
10317   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10318   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10319               OpcReg_FPR(src1),
10320               Pop_Mem_FPR(dst) );
10321   ins_pipe( fpu_mem_reg_mem );
10322 %}
10323 //
10324 // This instruction does not round to 24-bits
10325 // Cisc-alternate to reg-reg multiply
10326 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10327   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10328   match(Set dst (MulF src1 (LoadF src2)));
10329 
10330   format %{ "FMUL   $dst,$src1,$src2" %}
10331   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10332   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10333               OpcReg_FPR(src1),
10334               Pop_Reg_FPR(dst) );
10335   ins_pipe( fpu_reg_reg_mem );
10336 %}
10337 
10338 // Spill to obtain 24-bit precision
10339 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10340   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10341   match(Set dst (MulF src1 src2));
10342 
10343   format %{ "FMUL   $dst,$src1,$src2" %}
10344   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10345   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10346               set_instruction_start,
10347               OpcP, RMopc_Mem(secondary,src1),
10348               Pop_Mem_FPR(dst) );
10349   ins_pipe( fpu_mem_mem_mem );
10350 %}
10351 
10352 // Spill to obtain 24-bit precision
10353 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (MulF src con));
10356 
10357   format %{ "FLD    $src\n\t"
10358             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10359             "FSTP_S $dst"  %}
10360   ins_encode %{
10361     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10362     __ fmul_s($constantaddress($con));
10363     __ fstp_s(Address(rsp, $dst$$disp));
10364   %}
10365   ins_pipe(fpu_mem_reg_con);
10366 %}
10367 //
10368 // This instruction does not round to 24-bits
10369 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10370   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10371   match(Set dst (MulF src con));
10372 
10373   format %{ "FLD    $src\n\t"
10374             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10375             "FSTP   $dst"  %}
10376   ins_encode %{
10377     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10378     __ fmul_s($constantaddress($con));
10379     __ fstp_d($dst$$reg);
10380   %}
10381   ins_pipe(fpu_reg_reg_con);
10382 %}
10383 
10384 
10385 //
10386 // MACRO1 -- subsume unshared load into mulFPR
10387 // This instruction does not round to 24-bits
10388 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10389   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10390   match(Set dst (MulF (LoadF mem1) src));
10391 
10392   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10393             "FMUL   ST,$src\n\t"
10394             "FSTP   $dst" %}
10395   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10396   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10397               OpcReg_FPR(src),
10398               Pop_Reg_FPR(dst) );
10399   ins_pipe( fpu_reg_reg_mem );
10400 %}
10401 //
10402 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10403 // This instruction does not round to 24-bits
10404 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10405   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10406   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10407   ins_cost(95);
10408 
10409   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10410             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10411             "FADD   ST,$src2\n\t"
10412             "FSTP   $dst" %}
10413   opcode(0xD9); /* LoadF D9 /0 */
10414   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10415               FMul_ST_reg(src1),
10416               FAdd_ST_reg(src2),
10417               Pop_Reg_FPR(dst) );
10418   ins_pipe( fpu_reg_mem_reg_reg );
10419 %}
10420 
10421 // MACRO3 -- addFPR a mulFPR
10422 // This instruction does not round to 24-bits.  It is a '2-address'
10423 // instruction in that the result goes back to src2.  This eliminates
10424 // a move from the macro; possibly the register allocator will have
10425 // to add it back (and maybe not).
10426 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10427   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10428   match(Set src2 (AddF (MulF src0 src1) src2));
10429 
10430   format %{ "FLD    $src0     ===MACRO3===\n\t"
10431             "FMUL   ST,$src1\n\t"
10432             "FADDP  $src2,ST" %}
10433   opcode(0xD9); /* LoadF D9 /0 */
10434   ins_encode( Push_Reg_FPR(src0),
10435               FMul_ST_reg(src1),
10436               FAddP_reg_ST(src2) );
10437   ins_pipe( fpu_reg_reg_reg );
10438 %}
10439 
10440 // MACRO4 -- divFPR subFPR
10441 // This instruction does not round to 24-bits
10442 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10443   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10444   match(Set dst (DivF (SubF src2 src1) src3));
10445 
10446   format %{ "FLD    $src2   ===MACRO4===\n\t"
10447             "FSUB   ST,$src1\n\t"
10448             "FDIV   ST,$src3\n\t"
10449             "FSTP  $dst" %}
10450   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10451   ins_encode( Push_Reg_FPR(src2),
10452               subFPR_divFPR_encode(src1,src3),
10453               Pop_Reg_FPR(dst) );
10454   ins_pipe( fpu_reg_reg_reg_reg );
10455 %}
10456 
10457 // Spill to obtain 24-bit precision
10458 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10459   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10460   match(Set dst (DivF src1 src2));
10461 
10462   format %{ "FDIV   $dst,$src1,$src2" %}
10463   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10464   ins_encode( Push_Reg_FPR(src1),
10465               OpcReg_FPR(src2),
10466               Pop_Mem_FPR(dst) );
10467   ins_pipe( fpu_mem_reg_reg );
10468 %}
10469 //
10470 // This instruction does not round to 24-bits
10471 instruct divFPR_reg(regFPR dst, regFPR src) %{
10472   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10473   match(Set dst (DivF dst src));
10474 
10475   format %{ "FDIV   $dst,$src" %}
10476   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10477   ins_encode( Push_Reg_FPR(src),
10478               OpcP, RegOpc(dst) );
10479   ins_pipe( fpu_reg_reg );
10480 %}
10481 
10482 
10483 // Spill to obtain 24-bit precision
10484 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10485   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10486   match(Set dst (ModF src1 src2));
10487   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10488 
10489   format %{ "FMOD   $dst,$src1,$src2" %}
10490   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10491               emitModDPR(),
10492               Push_Result_Mod_DPR(src2),
10493               Pop_Mem_FPR(dst));
10494   ins_pipe( pipe_slow );
10495 %}
10496 //
10497 // This instruction does not round to 24-bits
10498 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10499   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10500   match(Set dst (ModF dst src));
10501   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10502 
10503   format %{ "FMOD   $dst,$src" %}
10504   ins_encode(Push_Reg_Mod_DPR(dst, src),
10505               emitModDPR(),
10506               Push_Result_Mod_DPR(src),
10507               Pop_Reg_FPR(dst));
10508   ins_pipe( pipe_slow );
10509 %}
10510 
10511 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10512   predicate(UseSSE>=1);
10513   match(Set dst (ModF src0 src1));
10514   effect(KILL rax, KILL cr);
10515   format %{ "SUB    ESP,4\t # FMOD\n"
10516           "\tMOVSS  [ESP+0],$src1\n"
10517           "\tFLD_S  [ESP+0]\n"
10518           "\tMOVSS  [ESP+0],$src0\n"
10519           "\tFLD_S  [ESP+0]\n"
10520      "loop:\tFPREM\n"
10521           "\tFWAIT\n"
10522           "\tFNSTSW AX\n"
10523           "\tSAHF\n"
10524           "\tJP     loop\n"
10525           "\tFSTP_S [ESP+0]\n"
10526           "\tMOVSS  $dst,[ESP+0]\n"
10527           "\tADD    ESP,4\n"
10528           "\tFSTP   ST0\t # Restore FPU Stack"
10529     %}
10530   ins_cost(250);
10531   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10532   ins_pipe( pipe_slow );
10533 %}
10534 
10535 
10536 //----------Arithmetic Conversion Instructions---------------------------------
10537 // The conversions operations are all Alpha sorted.  Please keep it that way!
10538 
10539 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10540   predicate(UseSSE==0);
10541   match(Set dst (RoundFloat src));
10542   ins_cost(125);
10543   format %{ "FST_S  $dst,$src\t# F-round" %}
10544   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10545   ins_pipe( fpu_mem_reg );
10546 %}
10547 
10548 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10549   predicate(UseSSE<=1);
10550   match(Set dst (RoundDouble src));
10551   ins_cost(125);
10552   format %{ "FST_D  $dst,$src\t# D-round" %}
10553   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10554   ins_pipe( fpu_mem_reg );
10555 %}
10556 
10557 // Force rounding to 24-bit precision and 6-bit exponent
10558 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10559   predicate(UseSSE==0);
10560   match(Set dst (ConvD2F src));
10561   format %{ "FST_S  $dst,$src\t# F-round" %}
10562   expand %{
10563     roundFloat_mem_reg(dst,src);
10564   %}
10565 %}
10566 
10567 // Force rounding to 24-bit precision and 6-bit exponent
10568 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10569   predicate(UseSSE==1);
10570   match(Set dst (ConvD2F src));
10571   effect( KILL cr );
10572   format %{ "SUB    ESP,4\n\t"
10573             "FST_S  [ESP],$src\t# F-round\n\t"
10574             "MOVSS  $dst,[ESP]\n\t"
10575             "ADD ESP,4" %}
10576   ins_encode %{
10577     __ subptr(rsp, 4);
10578     if ($src$$reg != FPR1L_enc) {
10579       __ fld_s($src$$reg-1);
10580       __ fstp_s(Address(rsp, 0));
10581     } else {
10582       __ fst_s(Address(rsp, 0));
10583     }
10584     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10585     __ addptr(rsp, 4);
10586   %}
10587   ins_pipe( pipe_slow );
10588 %}
10589 
10590 // Force rounding double precision to single precision
10591 instruct convD2F_reg(regF dst, regD src) %{
10592   predicate(UseSSE>=2);
10593   match(Set dst (ConvD2F src));
10594   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10595   ins_encode %{
10596     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10597   %}
10598   ins_pipe( pipe_slow );
10599 %}
10600 
10601 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10602   predicate(UseSSE==0);
10603   match(Set dst (ConvF2D src));
10604   format %{ "FST_S  $dst,$src\t# D-round" %}
10605   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10606   ins_pipe( fpu_reg_reg );
10607 %}
10608 
10609 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10610   predicate(UseSSE==1);
10611   match(Set dst (ConvF2D src));
10612   format %{ "FST_D  $dst,$src\t# D-round" %}
10613   expand %{
10614     roundDouble_mem_reg(dst,src);
10615   %}
10616 %}
10617 
10618 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10619   predicate(UseSSE==1);
10620   match(Set dst (ConvF2D src));
10621   effect( KILL cr );
10622   format %{ "SUB    ESP,4\n\t"
10623             "MOVSS  [ESP] $src\n\t"
10624             "FLD_S  [ESP]\n\t"
10625             "ADD    ESP,4\n\t"
10626             "FSTP   $dst\t# D-round" %}
10627   ins_encode %{
10628     __ subptr(rsp, 4);
10629     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10630     __ fld_s(Address(rsp, 0));
10631     __ addptr(rsp, 4);
10632     __ fstp_d($dst$$reg);
10633   %}
10634   ins_pipe( pipe_slow );
10635 %}
10636 
10637 instruct convF2D_reg(regD dst, regF src) %{
10638   predicate(UseSSE>=2);
10639   match(Set dst (ConvF2D src));
10640   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10641   ins_encode %{
10642     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10643   %}
10644   ins_pipe( pipe_slow );
10645 %}
10646 
10647 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10648 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10649   predicate(UseSSE<=1);
10650   match(Set dst (ConvD2I src));
10651   effect( KILL tmp, KILL cr );
10652   format %{ "FLD    $src\t# Convert double to int \n\t"
10653             "FLDCW  trunc mode\n\t"
10654             "SUB    ESP,4\n\t"
10655             "FISTp  [ESP + #0]\n\t"
10656             "FLDCW  std/24-bit mode\n\t"
10657             "POP    EAX\n\t"
10658             "CMP    EAX,0x80000000\n\t"
10659             "JNE,s  fast\n\t"
10660             "FLD_D  $src\n\t"
10661             "CALL   d2i_wrapper\n"
10662       "fast:" %}
10663   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10664   ins_pipe( pipe_slow );
10665 %}
10666 
10667 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10668 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10669   predicate(UseSSE>=2);
10670   match(Set dst (ConvD2I src));
10671   effect( KILL tmp, KILL cr );
10672   format %{ "CVTTSD2SI $dst, $src\n\t"
10673             "CMP    $dst,0x80000000\n\t"
10674             "JNE,s  fast\n\t"
10675             "SUB    ESP, 8\n\t"
10676             "MOVSD  [ESP], $src\n\t"
10677             "FLD_D  [ESP]\n\t"
10678             "ADD    ESP, 8\n\t"
10679             "CALL   d2i_wrapper\n"
10680       "fast:" %}
10681   ins_encode %{
10682     Label fast;
10683     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10684     __ cmpl($dst$$Register, 0x80000000);
10685     __ jccb(Assembler::notEqual, fast);
10686     __ subptr(rsp, 8);
10687     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10688     __ fld_d(Address(rsp, 0));
10689     __ addptr(rsp, 8);
10690     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10691     __ bind(fast);
10692   %}
10693   ins_pipe( pipe_slow );
10694 %}
10695 
10696 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10697   predicate(UseSSE<=1);
10698   match(Set dst (ConvD2L src));
10699   effect( KILL cr );
10700   format %{ "FLD    $src\t# Convert double to long\n\t"
10701             "FLDCW  trunc mode\n\t"
10702             "SUB    ESP,8\n\t"
10703             "FISTp  [ESP + #0]\n\t"
10704             "FLDCW  std/24-bit mode\n\t"
10705             "POP    EAX\n\t"
10706             "POP    EDX\n\t"
10707             "CMP    EDX,0x80000000\n\t"
10708             "JNE,s  fast\n\t"
10709             "TEST   EAX,EAX\n\t"
10710             "JNE,s  fast\n\t"
10711             "FLD    $src\n\t"
10712             "CALL   d2l_wrapper\n"
10713       "fast:" %}
10714   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10719 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10720   predicate (UseSSE>=2);
10721   match(Set dst (ConvD2L src));
10722   effect( KILL cr );
10723   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10724             "MOVSD  [ESP],$src\n\t"
10725             "FLD_D  [ESP]\n\t"
10726             "FLDCW  trunc mode\n\t"
10727             "FISTp  [ESP + #0]\n\t"
10728             "FLDCW  std/24-bit mode\n\t"
10729             "POP    EAX\n\t"
10730             "POP    EDX\n\t"
10731             "CMP    EDX,0x80000000\n\t"
10732             "JNE,s  fast\n\t"
10733             "TEST   EAX,EAX\n\t"
10734             "JNE,s  fast\n\t"
10735             "SUB    ESP,8\n\t"
10736             "MOVSD  [ESP],$src\n\t"
10737             "FLD_D  [ESP]\n\t"
10738             "ADD    ESP,8\n\t"
10739             "CALL   d2l_wrapper\n"
10740       "fast:" %}
10741   ins_encode %{
10742     Label fast;
10743     __ subptr(rsp, 8);
10744     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10745     __ fld_d(Address(rsp, 0));
10746     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10747     __ fistp_d(Address(rsp, 0));
10748     // Restore the rounding mode, mask the exception
10749     if (Compile::current()->in_24_bit_fp_mode()) {
10750       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10751     } else {
10752       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10753     }
10754     // Load the converted long, adjust CPU stack
10755     __ pop(rax);
10756     __ pop(rdx);
10757     __ cmpl(rdx, 0x80000000);
10758     __ jccb(Assembler::notEqual, fast);
10759     __ testl(rax, rax);
10760     __ jccb(Assembler::notEqual, fast);
10761     __ subptr(rsp, 8);
10762     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10763     __ fld_d(Address(rsp, 0));
10764     __ addptr(rsp, 8);
10765     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10766     __ bind(fast);
10767   %}
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 // Convert a double to an int.  Java semantics require we do complex
10772 // manglations in the corner cases.  So we set the rounding mode to
10773 // 'zero', store the darned double down as an int, and reset the
10774 // rounding mode to 'nearest'.  The hardware stores a flag value down
10775 // if we would overflow or converted a NAN; we check for this and
10776 // and go the slow path if needed.
10777 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10778   predicate(UseSSE==0);
10779   match(Set dst (ConvF2I src));
10780   effect( KILL tmp, KILL cr );
10781   format %{ "FLD    $src\t# Convert float to int \n\t"
10782             "FLDCW  trunc mode\n\t"
10783             "SUB    ESP,4\n\t"
10784             "FISTp  [ESP + #0]\n\t"
10785             "FLDCW  std/24-bit mode\n\t"
10786             "POP    EAX\n\t"
10787             "CMP    EAX,0x80000000\n\t"
10788             "JNE,s  fast\n\t"
10789             "FLD    $src\n\t"
10790             "CALL   d2i_wrapper\n"
10791       "fast:" %}
10792   // DPR2I_encoding works for FPR2I
10793   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10794   ins_pipe( pipe_slow );
10795 %}
10796 
10797 // Convert a float in xmm to an int reg.
10798 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10799   predicate(UseSSE>=1);
10800   match(Set dst (ConvF2I src));
10801   effect( KILL tmp, KILL cr );
10802   format %{ "CVTTSS2SI $dst, $src\n\t"
10803             "CMP    $dst,0x80000000\n\t"
10804             "JNE,s  fast\n\t"
10805             "SUB    ESP, 4\n\t"
10806             "MOVSS  [ESP], $src\n\t"
10807             "FLD    [ESP]\n\t"
10808             "ADD    ESP, 4\n\t"
10809             "CALL   d2i_wrapper\n"
10810       "fast:" %}
10811   ins_encode %{
10812     Label fast;
10813     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10814     __ cmpl($dst$$Register, 0x80000000);
10815     __ jccb(Assembler::notEqual, fast);
10816     __ subptr(rsp, 4);
10817     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10818     __ fld_s(Address(rsp, 0));
10819     __ addptr(rsp, 4);
10820     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10821     __ bind(fast);
10822   %}
10823   ins_pipe( pipe_slow );
10824 %}
10825 
10826 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10827   predicate(UseSSE==0);
10828   match(Set dst (ConvF2L src));
10829   effect( KILL cr );
10830   format %{ "FLD    $src\t# Convert float to long\n\t"
10831             "FLDCW  trunc mode\n\t"
10832             "SUB    ESP,8\n\t"
10833             "FISTp  [ESP + #0]\n\t"
10834             "FLDCW  std/24-bit mode\n\t"
10835             "POP    EAX\n\t"
10836             "POP    EDX\n\t"
10837             "CMP    EDX,0x80000000\n\t"
10838             "JNE,s  fast\n\t"
10839             "TEST   EAX,EAX\n\t"
10840             "JNE,s  fast\n\t"
10841             "FLD    $src\n\t"
10842             "CALL   d2l_wrapper\n"
10843       "fast:" %}
10844   // DPR2L_encoding works for FPR2L
10845   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10846   ins_pipe( pipe_slow );
10847 %}
10848 
10849 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10850 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10851   predicate (UseSSE>=1);
10852   match(Set dst (ConvF2L src));
10853   effect( KILL cr );
10854   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10855             "MOVSS  [ESP],$src\n\t"
10856             "FLD_S  [ESP]\n\t"
10857             "FLDCW  trunc mode\n\t"
10858             "FISTp  [ESP + #0]\n\t"
10859             "FLDCW  std/24-bit mode\n\t"
10860             "POP    EAX\n\t"
10861             "POP    EDX\n\t"
10862             "CMP    EDX,0x80000000\n\t"
10863             "JNE,s  fast\n\t"
10864             "TEST   EAX,EAX\n\t"
10865             "JNE,s  fast\n\t"
10866             "SUB    ESP,4\t# Convert float to long\n\t"
10867             "MOVSS  [ESP],$src\n\t"
10868             "FLD_S  [ESP]\n\t"
10869             "ADD    ESP,4\n\t"
10870             "CALL   d2l_wrapper\n"
10871       "fast:" %}
10872   ins_encode %{
10873     Label fast;
10874     __ subptr(rsp, 8);
10875     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10876     __ fld_s(Address(rsp, 0));
10877     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10878     __ fistp_d(Address(rsp, 0));
10879     // Restore the rounding mode, mask the exception
10880     if (Compile::current()->in_24_bit_fp_mode()) {
10881       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10882     } else {
10883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10884     }
10885     // Load the converted long, adjust CPU stack
10886     __ pop(rax);
10887     __ pop(rdx);
10888     __ cmpl(rdx, 0x80000000);
10889     __ jccb(Assembler::notEqual, fast);
10890     __ testl(rax, rax);
10891     __ jccb(Assembler::notEqual, fast);
10892     __ subptr(rsp, 4);
10893     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10894     __ fld_s(Address(rsp, 0));
10895     __ addptr(rsp, 4);
10896     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10897     __ bind(fast);
10898   %}
10899   ins_pipe( pipe_slow );
10900 %}
10901 
10902 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10903   predicate( UseSSE<=1 );
10904   match(Set dst (ConvI2D src));
10905   format %{ "FILD   $src\n\t"
10906             "FSTP   $dst" %}
10907   opcode(0xDB, 0x0);  /* DB /0 */
10908   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10909   ins_pipe( fpu_reg_mem );
10910 %}
10911 
10912 instruct convI2D_reg(regD dst, rRegI src) %{
10913   predicate( UseSSE>=2 && !UseXmmI2D );
10914   match(Set dst (ConvI2D src));
10915   format %{ "CVTSI2SD $dst,$src" %}
10916   ins_encode %{
10917     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10918   %}
10919   ins_pipe( pipe_slow );
10920 %}
10921 
10922 instruct convI2D_mem(regD dst, memory mem) %{
10923   predicate( UseSSE>=2 );
10924   match(Set dst (ConvI2D (LoadI mem)));
10925   format %{ "CVTSI2SD $dst,$mem" %}
10926   ins_encode %{
10927     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10928   %}
10929   ins_pipe( pipe_slow );
10930 %}
10931 
10932 instruct convXI2D_reg(regD dst, rRegI src)
10933 %{
10934   predicate( UseSSE>=2 && UseXmmI2D );
10935   match(Set dst (ConvI2D src));
10936 
10937   format %{ "MOVD  $dst,$src\n\t"
10938             "CVTDQ2PD $dst,$dst\t# i2d" %}
10939   ins_encode %{
10940     __ movdl($dst$$XMMRegister, $src$$Register);
10941     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10942   %}
10943   ins_pipe(pipe_slow); // XXX
10944 %}
10945 
10946 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10947   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10948   match(Set dst (ConvI2D (LoadI mem)));
10949   format %{ "FILD   $mem\n\t"
10950             "FSTP   $dst" %}
10951   opcode(0xDB);      /* DB /0 */
10952   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10953               Pop_Reg_DPR(dst));
10954   ins_pipe( fpu_reg_mem );
10955 %}
10956 
10957 // Convert a byte to a float; no rounding step needed.
10958 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10959   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10960   match(Set dst (ConvI2F src));
10961   format %{ "FILD   $src\n\t"
10962             "FSTP   $dst" %}
10963 
10964   opcode(0xDB, 0x0);  /* DB /0 */
10965   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10966   ins_pipe( fpu_reg_mem );
10967 %}
10968 
10969 // In 24-bit mode, force exponent rounding by storing back out
10970 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10971   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10972   match(Set dst (ConvI2F src));
10973   ins_cost(200);
10974   format %{ "FILD   $src\n\t"
10975             "FSTP_S $dst" %}
10976   opcode(0xDB, 0x0);  /* DB /0 */
10977   ins_encode( Push_Mem_I(src),
10978               Pop_Mem_FPR(dst));
10979   ins_pipe( fpu_mem_mem );
10980 %}
10981 
10982 // In 24-bit mode, force exponent rounding by storing back out
10983 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10984   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10985   match(Set dst (ConvI2F (LoadI mem)));
10986   ins_cost(200);
10987   format %{ "FILD   $mem\n\t"
10988             "FSTP_S $dst" %}
10989   opcode(0xDB);  /* DB /0 */
10990   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10991               Pop_Mem_FPR(dst));
10992   ins_pipe( fpu_mem_mem );
10993 %}
10994 
10995 // This instruction does not round to 24-bits
10996 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10997   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10998   match(Set dst (ConvI2F src));
10999   format %{ "FILD   $src\n\t"
11000             "FSTP   $dst" %}
11001   opcode(0xDB, 0x0);  /* DB /0 */
11002   ins_encode( Push_Mem_I(src),
11003               Pop_Reg_FPR(dst));
11004   ins_pipe( fpu_reg_mem );
11005 %}
11006 
11007 // This instruction does not round to 24-bits
11008 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11009   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11010   match(Set dst (ConvI2F (LoadI mem)));
11011   format %{ "FILD   $mem\n\t"
11012             "FSTP   $dst" %}
11013   opcode(0xDB);      /* DB /0 */
11014   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11015               Pop_Reg_FPR(dst));
11016   ins_pipe( fpu_reg_mem );
11017 %}
11018 
11019 // Convert an int to a float in xmm; no rounding step needed.
11020 instruct convI2F_reg(regF dst, rRegI src) %{
11021   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11022   match(Set dst (ConvI2F src));
11023   format %{ "CVTSI2SS $dst, $src" %}
11024   ins_encode %{
11025     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11026   %}
11027   ins_pipe( pipe_slow );
11028 %}
11029 
11030  instruct convXI2F_reg(regF dst, rRegI src)
11031 %{
11032   predicate( UseSSE>=2 && UseXmmI2F );
11033   match(Set dst (ConvI2F src));
11034 
11035   format %{ "MOVD  $dst,$src\n\t"
11036             "CVTDQ2PS $dst,$dst\t# i2f" %}
11037   ins_encode %{
11038     __ movdl($dst$$XMMRegister, $src$$Register);
11039     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11040   %}
11041   ins_pipe(pipe_slow); // XXX
11042 %}
11043 
11044 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11045   match(Set dst (ConvI2L src));
11046   effect(KILL cr);
11047   ins_cost(375);
11048   format %{ "MOV    $dst.lo,$src\n\t"
11049             "MOV    $dst.hi,$src\n\t"
11050             "SAR    $dst.hi,31" %}
11051   ins_encode(convert_int_long(dst,src));
11052   ins_pipe( ialu_reg_reg_long );
11053 %}
11054 
11055 // Zero-extend convert int to long
11056 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11057   match(Set dst (AndL (ConvI2L src) mask) );
11058   effect( KILL flags );
11059   ins_cost(250);
11060   format %{ "MOV    $dst.lo,$src\n\t"
11061             "XOR    $dst.hi,$dst.hi" %}
11062   opcode(0x33); // XOR
11063   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11064   ins_pipe( ialu_reg_reg_long );
11065 %}
11066 
11067 // Zero-extend long
11068 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11069   match(Set dst (AndL src mask) );
11070   effect( KILL flags );
11071   ins_cost(250);
11072   format %{ "MOV    $dst.lo,$src.lo\n\t"
11073             "XOR    $dst.hi,$dst.hi\n\t" %}
11074   opcode(0x33); // XOR
11075   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11076   ins_pipe( ialu_reg_reg_long );
11077 %}
11078 
11079 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11080   predicate (UseSSE<=1);
11081   match(Set dst (ConvL2D src));
11082   effect( KILL cr );
11083   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11084             "PUSH   $src.lo\n\t"
11085             "FILD   ST,[ESP + #0]\n\t"
11086             "ADD    ESP,8\n\t"
11087             "FSTP_D $dst\t# D-round" %}
11088   opcode(0xDF, 0x5);  /* DF /5 */
11089   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11090   ins_pipe( pipe_slow );
11091 %}
11092 
11093 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11094   predicate (UseSSE>=2);
11095   match(Set dst (ConvL2D src));
11096   effect( KILL cr );
11097   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11098             "PUSH   $src.lo\n\t"
11099             "FILD_D [ESP]\n\t"
11100             "FSTP_D [ESP]\n\t"
11101             "MOVSD  $dst,[ESP]\n\t"
11102             "ADD    ESP,8" %}
11103   opcode(0xDF, 0x5);  /* DF /5 */
11104   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11105   ins_pipe( pipe_slow );
11106 %}
11107 
11108 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11109   predicate (UseSSE>=1);
11110   match(Set dst (ConvL2F src));
11111   effect( KILL cr );
11112   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11113             "PUSH   $src.lo\n\t"
11114             "FILD_D [ESP]\n\t"
11115             "FSTP_S [ESP]\n\t"
11116             "MOVSS  $dst,[ESP]\n\t"
11117             "ADD    ESP,8" %}
11118   opcode(0xDF, 0x5);  /* DF /5 */
11119   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11120   ins_pipe( pipe_slow );
11121 %}
11122 
11123 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11124   match(Set dst (ConvL2F src));
11125   effect( KILL cr );
11126   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11127             "PUSH   $src.lo\n\t"
11128             "FILD   ST,[ESP + #0]\n\t"
11129             "ADD    ESP,8\n\t"
11130             "FSTP_S $dst\t# F-round" %}
11131   opcode(0xDF, 0x5);  /* DF /5 */
11132   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11133   ins_pipe( pipe_slow );
11134 %}
11135 
11136 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11137   match(Set dst (ConvL2I src));
11138   effect( DEF dst, USE src );
11139   format %{ "MOV    $dst,$src.lo" %}
11140   ins_encode(enc_CopyL_Lo(dst,src));
11141   ins_pipe( ialu_reg_reg );
11142 %}
11143 
11144 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11145   match(Set dst (MoveF2I src));
11146   effect( DEF dst, USE src );
11147   ins_cost(100);
11148   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11149   ins_encode %{
11150     __ movl($dst$$Register, Address(rsp, $src$$disp));
11151   %}
11152   ins_pipe( ialu_reg_mem );
11153 %}
11154 
11155 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11156   predicate(UseSSE==0);
11157   match(Set dst (MoveF2I src));
11158   effect( DEF dst, USE src );
11159 
11160   ins_cost(125);
11161   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11162   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11163   ins_pipe( fpu_mem_reg );
11164 %}
11165 
11166 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11167   predicate(UseSSE>=1);
11168   match(Set dst (MoveF2I src));
11169   effect( DEF dst, USE src );
11170 
11171   ins_cost(95);
11172   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11173   ins_encode %{
11174     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11175   %}
11176   ins_pipe( pipe_slow );
11177 %}
11178 
11179 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11180   predicate(UseSSE>=2);
11181   match(Set dst (MoveF2I src));
11182   effect( DEF dst, USE src );
11183   ins_cost(85);
11184   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11185   ins_encode %{
11186     __ movdl($dst$$Register, $src$$XMMRegister);
11187   %}
11188   ins_pipe( pipe_slow );
11189 %}
11190 
11191 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11192   match(Set dst (MoveI2F src));
11193   effect( DEF dst, USE src );
11194 
11195   ins_cost(100);
11196   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11197   ins_encode %{
11198     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11199   %}
11200   ins_pipe( ialu_mem_reg );
11201 %}
11202 
11203 
11204 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11205   predicate(UseSSE==0);
11206   match(Set dst (MoveI2F src));
11207   effect(DEF dst, USE src);
11208 
11209   ins_cost(125);
11210   format %{ "FLD_S  $src\n\t"
11211             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11212   opcode(0xD9);               /* D9 /0, FLD m32real */
11213   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11214               Pop_Reg_FPR(dst) );
11215   ins_pipe( fpu_reg_mem );
11216 %}
11217 
11218 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11219   predicate(UseSSE>=1);
11220   match(Set dst (MoveI2F src));
11221   effect( DEF dst, USE src );
11222 
11223   ins_cost(95);
11224   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11225   ins_encode %{
11226     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11227   %}
11228   ins_pipe( pipe_slow );
11229 %}
11230 
11231 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11232   predicate(UseSSE>=2);
11233   match(Set dst (MoveI2F src));
11234   effect( DEF dst, USE src );
11235 
11236   ins_cost(85);
11237   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11238   ins_encode %{
11239     __ movdl($dst$$XMMRegister, $src$$Register);
11240   %}
11241   ins_pipe( pipe_slow );
11242 %}
11243 
11244 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11245   match(Set dst (MoveD2L src));
11246   effect(DEF dst, USE src);
11247 
11248   ins_cost(250);
11249   format %{ "MOV    $dst.lo,$src\n\t"
11250             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11251   opcode(0x8B, 0x8B);
11252   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11253   ins_pipe( ialu_mem_long_reg );
11254 %}
11255 
11256 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11257   predicate(UseSSE<=1);
11258   match(Set dst (MoveD2L src));
11259   effect(DEF dst, USE src);
11260 
11261   ins_cost(125);
11262   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11263   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11264   ins_pipe( fpu_mem_reg );
11265 %}
11266 
11267 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11268   predicate(UseSSE>=2);
11269   match(Set dst (MoveD2L src));
11270   effect(DEF dst, USE src);
11271   ins_cost(95);
11272   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11273   ins_encode %{
11274     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11275   %}
11276   ins_pipe( pipe_slow );
11277 %}
11278 
11279 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11280   predicate(UseSSE>=2);
11281   match(Set dst (MoveD2L src));
11282   effect(DEF dst, USE src, TEMP tmp);
11283   ins_cost(85);
11284   format %{ "MOVD   $dst.lo,$src\n\t"
11285             "PSHUFLW $tmp,$src,0x4E\n\t"
11286             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11287   ins_encode %{
11288     __ movdl($dst$$Register, $src$$XMMRegister);
11289     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11290     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11291   %}
11292   ins_pipe( pipe_slow );
11293 %}
11294 
11295 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11296   match(Set dst (MoveL2D src));
11297   effect(DEF dst, USE src);
11298 
11299   ins_cost(200);
11300   format %{ "MOV    $dst,$src.lo\n\t"
11301             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11302   opcode(0x89, 0x89);
11303   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11304   ins_pipe( ialu_mem_long_reg );
11305 %}
11306 
11307 
11308 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11309   predicate(UseSSE<=1);
11310   match(Set dst (MoveL2D src));
11311   effect(DEF dst, USE src);
11312   ins_cost(125);
11313 
11314   format %{ "FLD_D  $src\n\t"
11315             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11316   opcode(0xDD);               /* DD /0, FLD m64real */
11317   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11318               Pop_Reg_DPR(dst) );
11319   ins_pipe( fpu_reg_mem );
11320 %}
11321 
11322 
11323 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11324   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11325   match(Set dst (MoveL2D src));
11326   effect(DEF dst, USE src);
11327 
11328   ins_cost(95);
11329   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11330   ins_encode %{
11331     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11332   %}
11333   ins_pipe( pipe_slow );
11334 %}
11335 
11336 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11337   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11338   match(Set dst (MoveL2D src));
11339   effect(DEF dst, USE src);
11340 
11341   ins_cost(95);
11342   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11343   ins_encode %{
11344     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11345   %}
11346   ins_pipe( pipe_slow );
11347 %}
11348 
11349 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11350   predicate(UseSSE>=2);
11351   match(Set dst (MoveL2D src));
11352   effect(TEMP dst, USE src, TEMP tmp);
11353   ins_cost(85);
11354   format %{ "MOVD   $dst,$src.lo\n\t"
11355             "MOVD   $tmp,$src.hi\n\t"
11356             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11357   ins_encode %{
11358     __ movdl($dst$$XMMRegister, $src$$Register);
11359     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11360     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11361   %}
11362   ins_pipe( pipe_slow );
11363 %}
11364 
11365 
11366 // =======================================================================
11367 // fast clearing of an array
11368 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11369   predicate(!UseFastStosb && !((ClearArrayNode*)n)->is_large());
11370   match(Set dummy (ClearArray cnt base));
11371   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11372   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11373             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11374             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11375   ins_encode %{
11376     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11377   %}
11378   ins_pipe( pipe_slow );
11379 %}
11380 
11381 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11382   predicate(!UseFastStosb && ((ClearArrayNode*)n)->is_large());
11383   match(Set dummy (ClearArray cnt base));
11384   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11385   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11386             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11387             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11388   ins_encode %{
11389     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11390   %}
11391   ins_pipe( pipe_slow );
11392 %}
11393 
11394 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11395   predicate(UseFastStosb && !((ClearArrayNode*)n)->is_large());
11396   match(Set dummy (ClearArray cnt base));
11397   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11398   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11399             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11400             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11401   ins_encode %{
11402     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11403   %}
11404   ins_pipe( pipe_slow );
11405 %}
11406 
11407 instruct rep_fast_stosb_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11408   predicate(UseFastStosb && ((ClearArrayNode*)n)->is_large());
11409   match(Set dummy (ClearArray cnt base));
11410   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11411   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11412             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11413             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11414   ins_encode %{
11415     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11416   %}
11417   ins_pipe( pipe_slow );
11418 %}
11419 
11420 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11421                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11422   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11423   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11424   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11425 
11426   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11427   ins_encode %{
11428     __ string_compare($str1$$Register, $str2$$Register,
11429                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11430                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11431   %}
11432   ins_pipe( pipe_slow );
11433 %}
11434 
11435 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11436                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11437   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11438   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11439   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11440 
11441   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11442   ins_encode %{
11443     __ string_compare($str1$$Register, $str2$$Register,
11444                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11445                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11446   %}
11447   ins_pipe( pipe_slow );
11448 %}
11449 
11450 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11451                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11452   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11453   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11454   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11455 
11456   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11457   ins_encode %{
11458     __ string_compare($str1$$Register, $str2$$Register,
11459                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11460                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11461   %}
11462   ins_pipe( pipe_slow );
11463 %}
11464 
11465 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11466                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11467   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11468   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11469   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11470 
11471   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11472   ins_encode %{
11473     __ string_compare($str2$$Register, $str1$$Register,
11474                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11475                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11476   %}
11477   ins_pipe( pipe_slow );
11478 %}
11479 
11480 // fast string equals
11481 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11482                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11483   match(Set result (StrEquals (Binary str1 str2) cnt));
11484   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11485 
11486   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11487   ins_encode %{
11488     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11489                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11490                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11491   %}
11492 
11493   ins_pipe( pipe_slow );
11494 %}
11495 
11496 // fast search of substring with known size.
11497 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11498                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11499   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11500   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11501   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11502 
11503   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11504   ins_encode %{
11505     int icnt2 = (int)$int_cnt2$$constant;
11506     if (icnt2 >= 16) {
11507       // IndexOf for constant substrings with size >= 16 elements
11508       // which don't need to be loaded through stack.
11509       __ string_indexofC8($str1$$Register, $str2$$Register,
11510                           $cnt1$$Register, $cnt2$$Register,
11511                           icnt2, $result$$Register,
11512                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11513     } else {
11514       // Small strings are loaded through stack if they cross page boundary.
11515       __ string_indexof($str1$$Register, $str2$$Register,
11516                         $cnt1$$Register, $cnt2$$Register,
11517                         icnt2, $result$$Register,
11518                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11519     }
11520   %}
11521   ins_pipe( pipe_slow );
11522 %}
11523 
11524 // fast search of substring with known size.
11525 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11526                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11527   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11528   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11529   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11530 
11531   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11532   ins_encode %{
11533     int icnt2 = (int)$int_cnt2$$constant;
11534     if (icnt2 >= 8) {
11535       // IndexOf for constant substrings with size >= 8 elements
11536       // which don't need to be loaded through stack.
11537       __ string_indexofC8($str1$$Register, $str2$$Register,
11538                           $cnt1$$Register, $cnt2$$Register,
11539                           icnt2, $result$$Register,
11540                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11541     } else {
11542       // Small strings are loaded through stack if they cross page boundary.
11543       __ string_indexof($str1$$Register, $str2$$Register,
11544                         $cnt1$$Register, $cnt2$$Register,
11545                         icnt2, $result$$Register,
11546                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11547     }
11548   %}
11549   ins_pipe( pipe_slow );
11550 %}
11551 
11552 // fast search of substring with known size.
11553 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11554                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11555   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11556   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11557   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11558 
11559   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11560   ins_encode %{
11561     int icnt2 = (int)$int_cnt2$$constant;
11562     if (icnt2 >= 8) {
11563       // IndexOf for constant substrings with size >= 8 elements
11564       // which don't need to be loaded through stack.
11565       __ string_indexofC8($str1$$Register, $str2$$Register,
11566                           $cnt1$$Register, $cnt2$$Register,
11567                           icnt2, $result$$Register,
11568                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11569     } else {
11570       // Small strings are loaded through stack if they cross page boundary.
11571       __ string_indexof($str1$$Register, $str2$$Register,
11572                         $cnt1$$Register, $cnt2$$Register,
11573                         icnt2, $result$$Register,
11574                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11575     }
11576   %}
11577   ins_pipe( pipe_slow );
11578 %}
11579 
11580 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11581                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11582   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11583   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11584   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11585 
11586   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11587   ins_encode %{
11588     __ string_indexof($str1$$Register, $str2$$Register,
11589                       $cnt1$$Register, $cnt2$$Register,
11590                       (-1), $result$$Register,
11591                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11592   %}
11593   ins_pipe( pipe_slow );
11594 %}
11595 
11596 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11597                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11598   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11599   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11600   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11601 
11602   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11603   ins_encode %{
11604     __ string_indexof($str1$$Register, $str2$$Register,
11605                       $cnt1$$Register, $cnt2$$Register,
11606                       (-1), $result$$Register,
11607                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11608   %}
11609   ins_pipe( pipe_slow );
11610 %}
11611 
11612 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11613                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11614   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11615   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11616   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11617 
11618   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11619   ins_encode %{
11620     __ string_indexof($str1$$Register, $str2$$Register,
11621                       $cnt1$$Register, $cnt2$$Register,
11622                       (-1), $result$$Register,
11623                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11624   %}
11625   ins_pipe( pipe_slow );
11626 %}
11627 
11628 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11629                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11630   predicate(UseSSE42Intrinsics);
11631   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11632   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11633   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11634   ins_encode %{
11635     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11636                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11637   %}
11638   ins_pipe( pipe_slow );
11639 %}
11640 
11641 // fast array equals
11642 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11643                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11644 %{
11645   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11646   match(Set result (AryEq ary1 ary2));
11647   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11648   //ins_cost(300);
11649 
11650   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11651   ins_encode %{
11652     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11653                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11654                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11655   %}
11656   ins_pipe( pipe_slow );
11657 %}
11658 
11659 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11660                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11661 %{
11662   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11663   match(Set result (AryEq ary1 ary2));
11664   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11665   //ins_cost(300);
11666 
11667   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11668   ins_encode %{
11669     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11670                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11671                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11672   %}
11673   ins_pipe( pipe_slow );
11674 %}
11675 
11676 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11677                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11678 %{
11679   match(Set result (HasNegatives ary1 len));
11680   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11681 
11682   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11683   ins_encode %{
11684     __ has_negatives($ary1$$Register, $len$$Register,
11685                      $result$$Register, $tmp3$$Register,
11686                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11687   %}
11688   ins_pipe( pipe_slow );
11689 %}
11690 
11691 // fast char[] to byte[] compression
11692 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11693                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11694   match(Set result (StrCompressedCopy src (Binary dst len)));
11695   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11696 
11697   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11698   ins_encode %{
11699     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11700                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11701                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11702   %}
11703   ins_pipe( pipe_slow );
11704 %}
11705 
11706 // fast byte[] to char[] inflation
11707 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11708                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11709   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11710   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11711 
11712   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11713   ins_encode %{
11714     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11715                           $tmp1$$XMMRegister, $tmp2$$Register);
11716   %}
11717   ins_pipe( pipe_slow );
11718 %}
11719 
11720 // encode char[] to byte[] in ISO_8859_1
11721 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11722                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11723                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11724   match(Set result (EncodeISOArray src (Binary dst len)));
11725   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11726 
11727   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11728   ins_encode %{
11729     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11730                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11731                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11732   %}
11733   ins_pipe( pipe_slow );
11734 %}
11735 
11736 
11737 //----------Control Flow Instructions------------------------------------------
11738 // Signed compare Instructions
11739 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11740   match(Set cr (CmpI op1 op2));
11741   effect( DEF cr, USE op1, USE op2 );
11742   format %{ "CMP    $op1,$op2" %}
11743   opcode(0x3B);  /* Opcode 3B /r */
11744   ins_encode( OpcP, RegReg( op1, op2) );
11745   ins_pipe( ialu_cr_reg_reg );
11746 %}
11747 
11748 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11749   match(Set cr (CmpI op1 op2));
11750   effect( DEF cr, USE op1 );
11751   format %{ "CMP    $op1,$op2" %}
11752   opcode(0x81,0x07);  /* Opcode 81 /7 */
11753   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11754   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11755   ins_pipe( ialu_cr_reg_imm );
11756 %}
11757 
11758 // Cisc-spilled version of cmpI_eReg
11759 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11760   match(Set cr (CmpI op1 (LoadI op2)));
11761 
11762   format %{ "CMP    $op1,$op2" %}
11763   ins_cost(500);
11764   opcode(0x3B);  /* Opcode 3B /r */
11765   ins_encode( OpcP, RegMem( op1, op2) );
11766   ins_pipe( ialu_cr_reg_mem );
11767 %}
11768 
11769 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11770   match(Set cr (CmpI src zero));
11771   effect( DEF cr, USE src );
11772 
11773   format %{ "TEST   $src,$src" %}
11774   opcode(0x85);
11775   ins_encode( OpcP, RegReg( src, src ) );
11776   ins_pipe( ialu_cr_reg_imm );
11777 %}
11778 
11779 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11780   match(Set cr (CmpI (AndI src con) zero));
11781 
11782   format %{ "TEST   $src,$con" %}
11783   opcode(0xF7,0x00);
11784   ins_encode( OpcP, RegOpc(src), Con32(con) );
11785   ins_pipe( ialu_cr_reg_imm );
11786 %}
11787 
11788 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11789   match(Set cr (CmpI (AndI src mem) zero));
11790 
11791   format %{ "TEST   $src,$mem" %}
11792   opcode(0x85);
11793   ins_encode( OpcP, RegMem( src, mem ) );
11794   ins_pipe( ialu_cr_reg_mem );
11795 %}
11796 
11797 // Unsigned compare Instructions; really, same as signed except they
11798 // produce an eFlagsRegU instead of eFlagsReg.
11799 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11800   match(Set cr (CmpU op1 op2));
11801 
11802   format %{ "CMPu   $op1,$op2" %}
11803   opcode(0x3B);  /* Opcode 3B /r */
11804   ins_encode( OpcP, RegReg( op1, op2) );
11805   ins_pipe( ialu_cr_reg_reg );
11806 %}
11807 
11808 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11809   match(Set cr (CmpU op1 op2));
11810 
11811   format %{ "CMPu   $op1,$op2" %}
11812   opcode(0x81,0x07);  /* Opcode 81 /7 */
11813   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11814   ins_pipe( ialu_cr_reg_imm );
11815 %}
11816 
11817 // // Cisc-spilled version of cmpU_eReg
11818 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11819   match(Set cr (CmpU op1 (LoadI op2)));
11820 
11821   format %{ "CMPu   $op1,$op2" %}
11822   ins_cost(500);
11823   opcode(0x3B);  /* Opcode 3B /r */
11824   ins_encode( OpcP, RegMem( op1, op2) );
11825   ins_pipe( ialu_cr_reg_mem );
11826 %}
11827 
11828 // // Cisc-spilled version of cmpU_eReg
11829 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11830 //  match(Set cr (CmpU (LoadI op1) op2));
11831 //
11832 //  format %{ "CMPu   $op1,$op2" %}
11833 //  ins_cost(500);
11834 //  opcode(0x39);  /* Opcode 39 /r */
11835 //  ins_encode( OpcP, RegMem( op1, op2) );
11836 //%}
11837 
11838 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11839   match(Set cr (CmpU src zero));
11840 
11841   format %{ "TESTu  $src,$src" %}
11842   opcode(0x85);
11843   ins_encode( OpcP, RegReg( src, src ) );
11844   ins_pipe( ialu_cr_reg_imm );
11845 %}
11846 
11847 // Unsigned pointer compare Instructions
11848 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11849   match(Set cr (CmpP op1 op2));
11850 
11851   format %{ "CMPu   $op1,$op2" %}
11852   opcode(0x3B);  /* Opcode 3B /r */
11853   ins_encode( OpcP, RegReg( op1, op2) );
11854   ins_pipe( ialu_cr_reg_reg );
11855 %}
11856 
11857 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11858   match(Set cr (CmpP op1 op2));
11859 
11860   format %{ "CMPu   $op1,$op2" %}
11861   opcode(0x81,0x07);  /* Opcode 81 /7 */
11862   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11863   ins_pipe( ialu_cr_reg_imm );
11864 %}
11865 
11866 // // Cisc-spilled version of cmpP_eReg
11867 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11868   match(Set cr (CmpP op1 (LoadP op2)));
11869 
11870   format %{ "CMPu   $op1,$op2" %}
11871   ins_cost(500);
11872   opcode(0x3B);  /* Opcode 3B /r */
11873   ins_encode( OpcP, RegMem( op1, op2) );
11874   ins_pipe( ialu_cr_reg_mem );
11875 %}
11876 
11877 // // Cisc-spilled version of cmpP_eReg
11878 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11879 //  match(Set cr (CmpP (LoadP op1) op2));
11880 //
11881 //  format %{ "CMPu   $op1,$op2" %}
11882 //  ins_cost(500);
11883 //  opcode(0x39);  /* Opcode 39 /r */
11884 //  ins_encode( OpcP, RegMem( op1, op2) );
11885 //%}
11886 
11887 // Compare raw pointer (used in out-of-heap check).
11888 // Only works because non-oop pointers must be raw pointers
11889 // and raw pointers have no anti-dependencies.
11890 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11891   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11892   match(Set cr (CmpP op1 (LoadP op2)));
11893 
11894   format %{ "CMPu   $op1,$op2" %}
11895   opcode(0x3B);  /* Opcode 3B /r */
11896   ins_encode( OpcP, RegMem( op1, op2) );
11897   ins_pipe( ialu_cr_reg_mem );
11898 %}
11899 
11900 //
11901 // This will generate a signed flags result. This should be ok
11902 // since any compare to a zero should be eq/neq.
11903 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11904   match(Set cr (CmpP src zero));
11905 
11906   format %{ "TEST   $src,$src" %}
11907   opcode(0x85);
11908   ins_encode( OpcP, RegReg( src, src ) );
11909   ins_pipe( ialu_cr_reg_imm );
11910 %}
11911 
11912 // Cisc-spilled version of testP_reg
11913 // This will generate a signed flags result. This should be ok
11914 // since any compare to a zero should be eq/neq.
11915 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11916   match(Set cr (CmpP (LoadP op) zero));
11917 
11918   format %{ "TEST   $op,0xFFFFFFFF" %}
11919   ins_cost(500);
11920   opcode(0xF7);               /* Opcode F7 /0 */
11921   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11922   ins_pipe( ialu_cr_reg_imm );
11923 %}
11924 
11925 // Yanked all unsigned pointer compare operations.
11926 // Pointer compares are done with CmpP which is already unsigned.
11927 
11928 //----------Max and Min--------------------------------------------------------
11929 // Min Instructions
11930 ////
11931 //   *** Min and Max using the conditional move are slower than the
11932 //   *** branch version on a Pentium III.
11933 // // Conditional move for min
11934 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11935 //  effect( USE_DEF op2, USE op1, USE cr );
11936 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11937 //  opcode(0x4C,0x0F);
11938 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11939 //  ins_pipe( pipe_cmov_reg );
11940 //%}
11941 //
11942 //// Min Register with Register (P6 version)
11943 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11944 //  predicate(VM_Version::supports_cmov() );
11945 //  match(Set op2 (MinI op1 op2));
11946 //  ins_cost(200);
11947 //  expand %{
11948 //    eFlagsReg cr;
11949 //    compI_eReg(cr,op1,op2);
11950 //    cmovI_reg_lt(op2,op1,cr);
11951 //  %}
11952 //%}
11953 
11954 // Min Register with Register (generic version)
11955 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11956   match(Set dst (MinI dst src));
11957   effect(KILL flags);
11958   ins_cost(300);
11959 
11960   format %{ "MIN    $dst,$src" %}
11961   opcode(0xCC);
11962   ins_encode( min_enc(dst,src) );
11963   ins_pipe( pipe_slow );
11964 %}
11965 
11966 // Max Register with Register
11967 //   *** Min and Max using the conditional move are slower than the
11968 //   *** branch version on a Pentium III.
11969 // // Conditional move for max
11970 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11971 //  effect( USE_DEF op2, USE op1, USE cr );
11972 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11973 //  opcode(0x4F,0x0F);
11974 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11975 //  ins_pipe( pipe_cmov_reg );
11976 //%}
11977 //
11978 // // Max Register with Register (P6 version)
11979 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11980 //  predicate(VM_Version::supports_cmov() );
11981 //  match(Set op2 (MaxI op1 op2));
11982 //  ins_cost(200);
11983 //  expand %{
11984 //    eFlagsReg cr;
11985 //    compI_eReg(cr,op1,op2);
11986 //    cmovI_reg_gt(op2,op1,cr);
11987 //  %}
11988 //%}
11989 
11990 // Max Register with Register (generic version)
11991 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11992   match(Set dst (MaxI dst src));
11993   effect(KILL flags);
11994   ins_cost(300);
11995 
11996   format %{ "MAX    $dst,$src" %}
11997   opcode(0xCC);
11998   ins_encode( max_enc(dst,src) );
11999   ins_pipe( pipe_slow );
12000 %}
12001 
12002 // ============================================================================
12003 // Counted Loop limit node which represents exact final iterator value.
12004 // Note: the resulting value should fit into integer range since
12005 // counted loops have limit check on overflow.
12006 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12007   match(Set limit (LoopLimit (Binary init limit) stride));
12008   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12009   ins_cost(300);
12010 
12011   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12012   ins_encode %{
12013     int strd = (int)$stride$$constant;
12014     assert(strd != 1 && strd != -1, "sanity");
12015     int m1 = (strd > 0) ? 1 : -1;
12016     // Convert limit to long (EAX:EDX)
12017     __ cdql();
12018     // Convert init to long (init:tmp)
12019     __ movl($tmp$$Register, $init$$Register);
12020     __ sarl($tmp$$Register, 31);
12021     // $limit - $init
12022     __ subl($limit$$Register, $init$$Register);
12023     __ sbbl($limit_hi$$Register, $tmp$$Register);
12024     // + ($stride - 1)
12025     if (strd > 0) {
12026       __ addl($limit$$Register, (strd - 1));
12027       __ adcl($limit_hi$$Register, 0);
12028       __ movl($tmp$$Register, strd);
12029     } else {
12030       __ addl($limit$$Register, (strd + 1));
12031       __ adcl($limit_hi$$Register, -1);
12032       __ lneg($limit_hi$$Register, $limit$$Register);
12033       __ movl($tmp$$Register, -strd);
12034     }
12035     // signed devision: (EAX:EDX) / pos_stride
12036     __ idivl($tmp$$Register);
12037     if (strd < 0) {
12038       // restore sign
12039       __ negl($tmp$$Register);
12040     }
12041     // (EAX) * stride
12042     __ mull($tmp$$Register);
12043     // + init (ignore upper bits)
12044     __ addl($limit$$Register, $init$$Register);
12045   %}
12046   ins_pipe( pipe_slow );
12047 %}
12048 
12049 // ============================================================================
12050 // Branch Instructions
12051 // Jump Table
12052 instruct jumpXtnd(rRegI switch_val) %{
12053   match(Jump switch_val);
12054   ins_cost(350);
12055   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12056   ins_encode %{
12057     // Jump to Address(table_base + switch_reg)
12058     Address index(noreg, $switch_val$$Register, Address::times_1);
12059     __ jump(ArrayAddress($constantaddress, index));
12060   %}
12061   ins_pipe(pipe_jmp);
12062 %}
12063 
12064 // Jump Direct - Label defines a relative address from JMP+1
12065 instruct jmpDir(label labl) %{
12066   match(Goto);
12067   effect(USE labl);
12068 
12069   ins_cost(300);
12070   format %{ "JMP    $labl" %}
12071   size(5);
12072   ins_encode %{
12073     Label* L = $labl$$label;
12074     __ jmp(*L, false); // Always long jump
12075   %}
12076   ins_pipe( pipe_jmp );
12077 %}
12078 
12079 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12080 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12081   match(If cop cr);
12082   effect(USE labl);
12083 
12084   ins_cost(300);
12085   format %{ "J$cop    $labl" %}
12086   size(6);
12087   ins_encode %{
12088     Label* L = $labl$$label;
12089     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12090   %}
12091   ins_pipe( pipe_jcc );
12092 %}
12093 
12094 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12095 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12096   match(CountedLoopEnd cop cr);
12097   effect(USE labl);
12098 
12099   ins_cost(300);
12100   format %{ "J$cop    $labl\t# Loop end" %}
12101   size(6);
12102   ins_encode %{
12103     Label* L = $labl$$label;
12104     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12105   %}
12106   ins_pipe( pipe_jcc );
12107 %}
12108 
12109 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12110 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12111   match(CountedLoopEnd cop cmp);
12112   effect(USE labl);
12113 
12114   ins_cost(300);
12115   format %{ "J$cop,u  $labl\t# Loop end" %}
12116   size(6);
12117   ins_encode %{
12118     Label* L = $labl$$label;
12119     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12120   %}
12121   ins_pipe( pipe_jcc );
12122 %}
12123 
12124 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12125   match(CountedLoopEnd cop cmp);
12126   effect(USE labl);
12127 
12128   ins_cost(200);
12129   format %{ "J$cop,u  $labl\t# Loop end" %}
12130   size(6);
12131   ins_encode %{
12132     Label* L = $labl$$label;
12133     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12134   %}
12135   ins_pipe( pipe_jcc );
12136 %}
12137 
12138 // Jump Direct Conditional - using unsigned comparison
12139 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12140   match(If cop cmp);
12141   effect(USE labl);
12142 
12143   ins_cost(300);
12144   format %{ "J$cop,u  $labl" %}
12145   size(6);
12146   ins_encode %{
12147     Label* L = $labl$$label;
12148     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12149   %}
12150   ins_pipe(pipe_jcc);
12151 %}
12152 
12153 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12154   match(If cop cmp);
12155   effect(USE labl);
12156 
12157   ins_cost(200);
12158   format %{ "J$cop,u  $labl" %}
12159   size(6);
12160   ins_encode %{
12161     Label* L = $labl$$label;
12162     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12163   %}
12164   ins_pipe(pipe_jcc);
12165 %}
12166 
12167 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12168   match(If cop cmp);
12169   effect(USE labl);
12170 
12171   ins_cost(200);
12172   format %{ $$template
12173     if ($cop$$cmpcode == Assembler::notEqual) {
12174       $$emit$$"JP,u   $labl\n\t"
12175       $$emit$$"J$cop,u   $labl"
12176     } else {
12177       $$emit$$"JP,u   done\n\t"
12178       $$emit$$"J$cop,u   $labl\n\t"
12179       $$emit$$"done:"
12180     }
12181   %}
12182   ins_encode %{
12183     Label* l = $labl$$label;
12184     if ($cop$$cmpcode == Assembler::notEqual) {
12185       __ jcc(Assembler::parity, *l, false);
12186       __ jcc(Assembler::notEqual, *l, false);
12187     } else if ($cop$$cmpcode == Assembler::equal) {
12188       Label done;
12189       __ jccb(Assembler::parity, done);
12190       __ jcc(Assembler::equal, *l, false);
12191       __ bind(done);
12192     } else {
12193        ShouldNotReachHere();
12194     }
12195   %}
12196   ins_pipe(pipe_jcc);
12197 %}
12198 
12199 // ============================================================================
12200 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12201 // array for an instance of the superklass.  Set a hidden internal cache on a
12202 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12203 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12204 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12205   match(Set result (PartialSubtypeCheck sub super));
12206   effect( KILL rcx, KILL cr );
12207 
12208   ins_cost(1100);  // slightly larger than the next version
12209   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12210             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12211             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12212             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12213             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12214             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12215             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12216      "miss:\t" %}
12217 
12218   opcode(0x1); // Force a XOR of EDI
12219   ins_encode( enc_PartialSubtypeCheck() );
12220   ins_pipe( pipe_slow );
12221 %}
12222 
12223 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12224   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12225   effect( KILL rcx, KILL result );
12226 
12227   ins_cost(1000);
12228   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12229             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12230             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12231             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12232             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12233             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12234      "miss:\t" %}
12235 
12236   opcode(0x0);  // No need to XOR EDI
12237   ins_encode( enc_PartialSubtypeCheck() );
12238   ins_pipe( pipe_slow );
12239 %}
12240 
12241 // ============================================================================
12242 // Branch Instructions -- short offset versions
12243 //
12244 // These instructions are used to replace jumps of a long offset (the default
12245 // match) with jumps of a shorter offset.  These instructions are all tagged
12246 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12247 // match rules in general matching.  Instead, the ADLC generates a conversion
12248 // method in the MachNode which can be used to do in-place replacement of the
12249 // long variant with the shorter variant.  The compiler will determine if a
12250 // branch can be taken by the is_short_branch_offset() predicate in the machine
12251 // specific code section of the file.
12252 
12253 // Jump Direct - Label defines a relative address from JMP+1
12254 instruct jmpDir_short(label labl) %{
12255   match(Goto);
12256   effect(USE labl);
12257 
12258   ins_cost(300);
12259   format %{ "JMP,s  $labl" %}
12260   size(2);
12261   ins_encode %{
12262     Label* L = $labl$$label;
12263     __ jmpb(*L);
12264   %}
12265   ins_pipe( pipe_jmp );
12266   ins_short_branch(1);
12267 %}
12268 
12269 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12270 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12271   match(If cop cr);
12272   effect(USE labl);
12273 
12274   ins_cost(300);
12275   format %{ "J$cop,s  $labl" %}
12276   size(2);
12277   ins_encode %{
12278     Label* L = $labl$$label;
12279     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12280   %}
12281   ins_pipe( pipe_jcc );
12282   ins_short_branch(1);
12283 %}
12284 
12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12286 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12287   match(CountedLoopEnd cop cr);
12288   effect(USE labl);
12289 
12290   ins_cost(300);
12291   format %{ "J$cop,s  $labl\t# Loop end" %}
12292   size(2);
12293   ins_encode %{
12294     Label* L = $labl$$label;
12295     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12296   %}
12297   ins_pipe( pipe_jcc );
12298   ins_short_branch(1);
12299 %}
12300 
12301 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12302 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12303   match(CountedLoopEnd cop cmp);
12304   effect(USE labl);
12305 
12306   ins_cost(300);
12307   format %{ "J$cop,us $labl\t# Loop end" %}
12308   size(2);
12309   ins_encode %{
12310     Label* L = $labl$$label;
12311     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12312   %}
12313   ins_pipe( pipe_jcc );
12314   ins_short_branch(1);
12315 %}
12316 
12317 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12318   match(CountedLoopEnd cop cmp);
12319   effect(USE labl);
12320 
12321   ins_cost(300);
12322   format %{ "J$cop,us $labl\t# Loop end" %}
12323   size(2);
12324   ins_encode %{
12325     Label* L = $labl$$label;
12326     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12327   %}
12328   ins_pipe( pipe_jcc );
12329   ins_short_branch(1);
12330 %}
12331 
12332 // Jump Direct Conditional - using unsigned comparison
12333 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12334   match(If cop cmp);
12335   effect(USE labl);
12336 
12337   ins_cost(300);
12338   format %{ "J$cop,us $labl" %}
12339   size(2);
12340   ins_encode %{
12341     Label* L = $labl$$label;
12342     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12343   %}
12344   ins_pipe( pipe_jcc );
12345   ins_short_branch(1);
12346 %}
12347 
12348 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12349   match(If cop cmp);
12350   effect(USE labl);
12351 
12352   ins_cost(300);
12353   format %{ "J$cop,us $labl" %}
12354   size(2);
12355   ins_encode %{
12356     Label* L = $labl$$label;
12357     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12358   %}
12359   ins_pipe( pipe_jcc );
12360   ins_short_branch(1);
12361 %}
12362 
12363 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12364   match(If cop cmp);
12365   effect(USE labl);
12366 
12367   ins_cost(300);
12368   format %{ $$template
12369     if ($cop$$cmpcode == Assembler::notEqual) {
12370       $$emit$$"JP,u,s   $labl\n\t"
12371       $$emit$$"J$cop,u,s   $labl"
12372     } else {
12373       $$emit$$"JP,u,s   done\n\t"
12374       $$emit$$"J$cop,u,s  $labl\n\t"
12375       $$emit$$"done:"
12376     }
12377   %}
12378   size(4);
12379   ins_encode %{
12380     Label* l = $labl$$label;
12381     if ($cop$$cmpcode == Assembler::notEqual) {
12382       __ jccb(Assembler::parity, *l);
12383       __ jccb(Assembler::notEqual, *l);
12384     } else if ($cop$$cmpcode == Assembler::equal) {
12385       Label done;
12386       __ jccb(Assembler::parity, done);
12387       __ jccb(Assembler::equal, *l);
12388       __ bind(done);
12389     } else {
12390        ShouldNotReachHere();
12391     }
12392   %}
12393   ins_pipe(pipe_jcc);
12394   ins_short_branch(1);
12395 %}
12396 
12397 // ============================================================================
12398 // Long Compare
12399 //
12400 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12401 // is tricky.  The flavor of compare used depends on whether we are testing
12402 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12403 // The GE test is the negated LT test.  The LE test can be had by commuting
12404 // the operands (yielding a GE test) and then negating; negate again for the
12405 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12406 // NE test is negated from that.
12407 
12408 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12409 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12410 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12411 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12412 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12413 // foo match ends up with the wrong leaf.  One fix is to not match both
12414 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12415 // both forms beat the trinary form of long-compare and both are very useful
12416 // on Intel which has so few registers.
12417 
12418 // Manifest a CmpL result in an integer register.  Very painful.
12419 // This is the test to avoid.
12420 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12421   match(Set dst (CmpL3 src1 src2));
12422   effect( KILL flags );
12423   ins_cost(1000);
12424   format %{ "XOR    $dst,$dst\n\t"
12425             "CMP    $src1.hi,$src2.hi\n\t"
12426             "JLT,s  m_one\n\t"
12427             "JGT,s  p_one\n\t"
12428             "CMP    $src1.lo,$src2.lo\n\t"
12429             "JB,s   m_one\n\t"
12430             "JEQ,s  done\n"
12431     "p_one:\tINC    $dst\n\t"
12432             "JMP,s  done\n"
12433     "m_one:\tDEC    $dst\n"
12434      "done:" %}
12435   ins_encode %{
12436     Label p_one, m_one, done;
12437     __ xorptr($dst$$Register, $dst$$Register);
12438     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12439     __ jccb(Assembler::less,    m_one);
12440     __ jccb(Assembler::greater, p_one);
12441     __ cmpl($src1$$Register, $src2$$Register);
12442     __ jccb(Assembler::below,   m_one);
12443     __ jccb(Assembler::equal,   done);
12444     __ bind(p_one);
12445     __ incrementl($dst$$Register);
12446     __ jmpb(done);
12447     __ bind(m_one);
12448     __ decrementl($dst$$Register);
12449     __ bind(done);
12450   %}
12451   ins_pipe( pipe_slow );
12452 %}
12453 
12454 //======
12455 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12456 // compares.  Can be used for LE or GT compares by reversing arguments.
12457 // NOT GOOD FOR EQ/NE tests.
12458 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12459   match( Set flags (CmpL src zero ));
12460   ins_cost(100);
12461   format %{ "TEST   $src.hi,$src.hi" %}
12462   opcode(0x85);
12463   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12464   ins_pipe( ialu_cr_reg_reg );
12465 %}
12466 
12467 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12468 // compares.  Can be used for LE or GT compares by reversing arguments.
12469 // NOT GOOD FOR EQ/NE tests.
12470 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12471   match( Set flags (CmpL src1 src2 ));
12472   effect( TEMP tmp );
12473   ins_cost(300);
12474   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12475             "MOV    $tmp,$src1.hi\n\t"
12476             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12477   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12478   ins_pipe( ialu_cr_reg_reg );
12479 %}
12480 
12481 // Long compares reg < zero/req OR reg >= zero/req.
12482 // Just a wrapper for a normal branch, plus the predicate test.
12483 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12484   match(If cmp flags);
12485   effect(USE labl);
12486   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12487   expand %{
12488     jmpCon(cmp,flags,labl);    // JLT or JGE...
12489   %}
12490 %}
12491 
12492 // Compare 2 longs and CMOVE longs.
12493 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12494   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12495   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12496   ins_cost(400);
12497   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12498             "CMOV$cmp $dst.hi,$src.hi" %}
12499   opcode(0x0F,0x40);
12500   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12501   ins_pipe( pipe_cmov_reg_long );
12502 %}
12503 
12504 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12505   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12506   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12507   ins_cost(500);
12508   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12509             "CMOV$cmp $dst.hi,$src.hi" %}
12510   opcode(0x0F,0x40);
12511   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12512   ins_pipe( pipe_cmov_reg_long );
12513 %}
12514 
12515 // Compare 2 longs and CMOVE ints.
12516 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12517   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12518   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12519   ins_cost(200);
12520   format %{ "CMOV$cmp $dst,$src" %}
12521   opcode(0x0F,0x40);
12522   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12523   ins_pipe( pipe_cmov_reg );
12524 %}
12525 
12526 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12527   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12528   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12529   ins_cost(250);
12530   format %{ "CMOV$cmp $dst,$src" %}
12531   opcode(0x0F,0x40);
12532   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12533   ins_pipe( pipe_cmov_mem );
12534 %}
12535 
12536 // Compare 2 longs and CMOVE ints.
12537 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12538   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12539   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12540   ins_cost(200);
12541   format %{ "CMOV$cmp $dst,$src" %}
12542   opcode(0x0F,0x40);
12543   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12544   ins_pipe( pipe_cmov_reg );
12545 %}
12546 
12547 // Compare 2 longs and CMOVE doubles
12548 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12549   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12550   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12551   ins_cost(200);
12552   expand %{
12553     fcmovDPR_regS(cmp,flags,dst,src);
12554   %}
12555 %}
12556 
12557 // Compare 2 longs and CMOVE doubles
12558 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12559   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12560   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12561   ins_cost(200);
12562   expand %{
12563     fcmovD_regS(cmp,flags,dst,src);
12564   %}
12565 %}
12566 
12567 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12568   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12569   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12570   ins_cost(200);
12571   expand %{
12572     fcmovFPR_regS(cmp,flags,dst,src);
12573   %}
12574 %}
12575 
12576 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12577   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12578   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12579   ins_cost(200);
12580   expand %{
12581     fcmovF_regS(cmp,flags,dst,src);
12582   %}
12583 %}
12584 
12585 //======
12586 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12587 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12588   match( Set flags (CmpL src zero ));
12589   effect(TEMP tmp);
12590   ins_cost(200);
12591   format %{ "MOV    $tmp,$src.lo\n\t"
12592             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12593   ins_encode( long_cmp_flags0( src, tmp ) );
12594   ins_pipe( ialu_reg_reg_long );
12595 %}
12596 
12597 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12598 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12599   match( Set flags (CmpL src1 src2 ));
12600   ins_cost(200+300);
12601   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12602             "JNE,s  skip\n\t"
12603             "CMP    $src1.hi,$src2.hi\n\t"
12604      "skip:\t" %}
12605   ins_encode( long_cmp_flags1( src1, src2 ) );
12606   ins_pipe( ialu_cr_reg_reg );
12607 %}
12608 
12609 // Long compare reg == zero/reg OR reg != zero/reg
12610 // Just a wrapper for a normal branch, plus the predicate test.
12611 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12612   match(If cmp flags);
12613   effect(USE labl);
12614   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12615   expand %{
12616     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12617   %}
12618 %}
12619 
12620 // Compare 2 longs and CMOVE longs.
12621 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12622   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12623   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12624   ins_cost(400);
12625   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12626             "CMOV$cmp $dst.hi,$src.hi" %}
12627   opcode(0x0F,0x40);
12628   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12629   ins_pipe( pipe_cmov_reg_long );
12630 %}
12631 
12632 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12633   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12634   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12635   ins_cost(500);
12636   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12637             "CMOV$cmp $dst.hi,$src.hi" %}
12638   opcode(0x0F,0x40);
12639   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12640   ins_pipe( pipe_cmov_reg_long );
12641 %}
12642 
12643 // Compare 2 longs and CMOVE ints.
12644 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12645   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12646   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12647   ins_cost(200);
12648   format %{ "CMOV$cmp $dst,$src" %}
12649   opcode(0x0F,0x40);
12650   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12651   ins_pipe( pipe_cmov_reg );
12652 %}
12653 
12654 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12655   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12656   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12657   ins_cost(250);
12658   format %{ "CMOV$cmp $dst,$src" %}
12659   opcode(0x0F,0x40);
12660   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12661   ins_pipe( pipe_cmov_mem );
12662 %}
12663 
12664 // Compare 2 longs and CMOVE ints.
12665 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12666   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12667   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12668   ins_cost(200);
12669   format %{ "CMOV$cmp $dst,$src" %}
12670   opcode(0x0F,0x40);
12671   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12672   ins_pipe( pipe_cmov_reg );
12673 %}
12674 
12675 // Compare 2 longs and CMOVE doubles
12676 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12677   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12678   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12679   ins_cost(200);
12680   expand %{
12681     fcmovDPR_regS(cmp,flags,dst,src);
12682   %}
12683 %}
12684 
12685 // Compare 2 longs and CMOVE doubles
12686 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12687   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12688   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12689   ins_cost(200);
12690   expand %{
12691     fcmovD_regS(cmp,flags,dst,src);
12692   %}
12693 %}
12694 
12695 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12696   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12697   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12698   ins_cost(200);
12699   expand %{
12700     fcmovFPR_regS(cmp,flags,dst,src);
12701   %}
12702 %}
12703 
12704 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12705   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12706   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12707   ins_cost(200);
12708   expand %{
12709     fcmovF_regS(cmp,flags,dst,src);
12710   %}
12711 %}
12712 
12713 //======
12714 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12715 // Same as cmpL_reg_flags_LEGT except must negate src
12716 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12717   match( Set flags (CmpL src zero ));
12718   effect( TEMP tmp );
12719   ins_cost(300);
12720   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12721             "CMP    $tmp,$src.lo\n\t"
12722             "SBB    $tmp,$src.hi\n\t" %}
12723   ins_encode( long_cmp_flags3(src, tmp) );
12724   ins_pipe( ialu_reg_reg_long );
12725 %}
12726 
12727 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12728 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12729 // requires a commuted test to get the same result.
12730 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12731   match( Set flags (CmpL src1 src2 ));
12732   effect( TEMP tmp );
12733   ins_cost(300);
12734   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12735             "MOV    $tmp,$src2.hi\n\t"
12736             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12737   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12738   ins_pipe( ialu_cr_reg_reg );
12739 %}
12740 
12741 // Long compares reg < zero/req OR reg >= zero/req.
12742 // Just a wrapper for a normal branch, plus the predicate test
12743 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12744   match(If cmp flags);
12745   effect(USE labl);
12746   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12747   ins_cost(300);
12748   expand %{
12749     jmpCon(cmp,flags,labl);    // JGT or JLE...
12750   %}
12751 %}
12752 
12753 // Compare 2 longs and CMOVE longs.
12754 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12755   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12756   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12757   ins_cost(400);
12758   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12759             "CMOV$cmp $dst.hi,$src.hi" %}
12760   opcode(0x0F,0x40);
12761   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12762   ins_pipe( pipe_cmov_reg_long );
12763 %}
12764 
12765 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12766   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12767   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12768   ins_cost(500);
12769   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12770             "CMOV$cmp $dst.hi,$src.hi+4" %}
12771   opcode(0x0F,0x40);
12772   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12773   ins_pipe( pipe_cmov_reg_long );
12774 %}
12775 
12776 // Compare 2 longs and CMOVE ints.
12777 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12778   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12779   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12780   ins_cost(200);
12781   format %{ "CMOV$cmp $dst,$src" %}
12782   opcode(0x0F,0x40);
12783   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12784   ins_pipe( pipe_cmov_reg );
12785 %}
12786 
12787 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12788   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12789   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12790   ins_cost(250);
12791   format %{ "CMOV$cmp $dst,$src" %}
12792   opcode(0x0F,0x40);
12793   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12794   ins_pipe( pipe_cmov_mem );
12795 %}
12796 
12797 // Compare 2 longs and CMOVE ptrs.
12798 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12799   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12800   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12801   ins_cost(200);
12802   format %{ "CMOV$cmp $dst,$src" %}
12803   opcode(0x0F,0x40);
12804   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12805   ins_pipe( pipe_cmov_reg );
12806 %}
12807 
12808 // Compare 2 longs and CMOVE doubles
12809 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12810   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12811   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12812   ins_cost(200);
12813   expand %{
12814     fcmovDPR_regS(cmp,flags,dst,src);
12815   %}
12816 %}
12817 
12818 // Compare 2 longs and CMOVE doubles
12819 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12820   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12821   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12822   ins_cost(200);
12823   expand %{
12824     fcmovD_regS(cmp,flags,dst,src);
12825   %}
12826 %}
12827 
12828 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12829   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12830   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12831   ins_cost(200);
12832   expand %{
12833     fcmovFPR_regS(cmp,flags,dst,src);
12834   %}
12835 %}
12836 
12837 
12838 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12839   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12840   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12841   ins_cost(200);
12842   expand %{
12843     fcmovF_regS(cmp,flags,dst,src);
12844   %}
12845 %}
12846 
12847 
12848 // ============================================================================
12849 // Procedure Call/Return Instructions
12850 // Call Java Static Instruction
12851 // Note: If this code changes, the corresponding ret_addr_offset() and
12852 //       compute_padding() functions will have to be adjusted.
12853 instruct CallStaticJavaDirect(method meth) %{
12854   match(CallStaticJava);
12855   effect(USE meth);
12856 
12857   ins_cost(300);
12858   format %{ "CALL,static " %}
12859   opcode(0xE8); /* E8 cd */
12860   ins_encode( pre_call_resets,
12861               Java_Static_Call( meth ),
12862               call_epilog,
12863               post_call_FPU );
12864   ins_pipe( pipe_slow );
12865   ins_alignment(4);
12866 %}
12867 
12868 // Call Java Dynamic Instruction
12869 // Note: If this code changes, the corresponding ret_addr_offset() and
12870 //       compute_padding() functions will have to be adjusted.
12871 instruct CallDynamicJavaDirect(method meth) %{
12872   match(CallDynamicJava);
12873   effect(USE meth);
12874 
12875   ins_cost(300);
12876   format %{ "MOV    EAX,(oop)-1\n\t"
12877             "CALL,dynamic" %}
12878   opcode(0xE8); /* E8 cd */
12879   ins_encode( pre_call_resets,
12880               Java_Dynamic_Call( meth ),
12881               call_epilog,
12882               post_call_FPU );
12883   ins_pipe( pipe_slow );
12884   ins_alignment(4);
12885 %}
12886 
12887 // Call Runtime Instruction
12888 instruct CallRuntimeDirect(method meth) %{
12889   match(CallRuntime );
12890   effect(USE meth);
12891 
12892   ins_cost(300);
12893   format %{ "CALL,runtime " %}
12894   opcode(0xE8); /* E8 cd */
12895   // Use FFREEs to clear entries in float stack
12896   ins_encode( pre_call_resets,
12897               FFree_Float_Stack_All,
12898               Java_To_Runtime( meth ),
12899               post_call_FPU );
12900   ins_pipe( pipe_slow );
12901 %}
12902 
12903 // Call runtime without safepoint
12904 instruct CallLeafDirect(method meth) %{
12905   match(CallLeaf);
12906   effect(USE meth);
12907 
12908   ins_cost(300);
12909   format %{ "CALL_LEAF,runtime " %}
12910   opcode(0xE8); /* E8 cd */
12911   ins_encode( pre_call_resets,
12912               FFree_Float_Stack_All,
12913               Java_To_Runtime( meth ),
12914               Verify_FPU_For_Leaf, post_call_FPU );
12915   ins_pipe( pipe_slow );
12916 %}
12917 
12918 instruct CallLeafNoFPDirect(method meth) %{
12919   match(CallLeafNoFP);
12920   effect(USE meth);
12921 
12922   ins_cost(300);
12923   format %{ "CALL_LEAF_NOFP,runtime " %}
12924   opcode(0xE8); /* E8 cd */
12925   ins_encode(Java_To_Runtime(meth));
12926   ins_pipe( pipe_slow );
12927 %}
12928 
12929 
12930 // Return Instruction
12931 // Remove the return address & jump to it.
12932 instruct Ret() %{
12933   match(Return);
12934   format %{ "RET" %}
12935   opcode(0xC3);
12936   ins_encode(OpcP);
12937   ins_pipe( pipe_jmp );
12938 %}
12939 
12940 // Tail Call; Jump from runtime stub to Java code.
12941 // Also known as an 'interprocedural jump'.
12942 // Target of jump will eventually return to caller.
12943 // TailJump below removes the return address.
12944 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12945   match(TailCall jump_target method_oop );
12946   ins_cost(300);
12947   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12948   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12949   ins_encode( OpcP, RegOpc(jump_target) );
12950   ins_pipe( pipe_jmp );
12951 %}
12952 
12953 
12954 // Tail Jump; remove the return address; jump to target.
12955 // TailCall above leaves the return address around.
12956 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12957   match( TailJump jump_target ex_oop );
12958   ins_cost(300);
12959   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12960             "JMP    $jump_target " %}
12961   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12962   ins_encode( enc_pop_rdx,
12963               OpcP, RegOpc(jump_target) );
12964   ins_pipe( pipe_jmp );
12965 %}
12966 
12967 // Create exception oop: created by stack-crawling runtime code.
12968 // Created exception is now available to this handler, and is setup
12969 // just prior to jumping to this handler.  No code emitted.
12970 instruct CreateException( eAXRegP ex_oop )
12971 %{
12972   match(Set ex_oop (CreateEx));
12973 
12974   size(0);
12975   // use the following format syntax
12976   format %{ "# exception oop is in EAX; no code emitted" %}
12977   ins_encode();
12978   ins_pipe( empty );
12979 %}
12980 
12981 
12982 // Rethrow exception:
12983 // The exception oop will come in the first argument position.
12984 // Then JUMP (not call) to the rethrow stub code.
12985 instruct RethrowException()
12986 %{
12987   match(Rethrow);
12988 
12989   // use the following format syntax
12990   format %{ "JMP    rethrow_stub" %}
12991   ins_encode(enc_rethrow);
12992   ins_pipe( pipe_jmp );
12993 %}
12994 
12995 // inlined locking and unlocking
12996 
12997 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12998   predicate(Compile::current()->use_rtm());
12999   match(Set cr (FastLock object box));
13000   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13001   ins_cost(300);
13002   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13003   ins_encode %{
13004     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13005                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13006                  _counters, _rtm_counters, _stack_rtm_counters,
13007                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13008                  true, ra_->C->profile_rtm());
13009   %}
13010   ins_pipe(pipe_slow);
13011 %}
13012 
13013 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13014   predicate(!Compile::current()->use_rtm());
13015   match(Set cr (FastLock object box));
13016   effect(TEMP tmp, TEMP scr, USE_KILL box);
13017   ins_cost(300);
13018   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13019   ins_encode %{
13020     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13021                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13022   %}
13023   ins_pipe(pipe_slow);
13024 %}
13025 
13026 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13027   match(Set cr (FastUnlock object box));
13028   effect(TEMP tmp, USE_KILL box);
13029   ins_cost(300);
13030   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13031   ins_encode %{
13032     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13033   %}
13034   ins_pipe(pipe_slow);
13035 %}
13036 
13037 
13038 
13039 // ============================================================================
13040 // Safepoint Instruction
13041 instruct safePoint_poll(eFlagsReg cr) %{
13042   match(SafePoint);
13043   effect(KILL cr);
13044 
13045   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13046   // On SPARC that might be acceptable as we can generate the address with
13047   // just a sethi, saving an or.  By polling at offset 0 we can end up
13048   // putting additional pressure on the index-0 in the D$.  Because of
13049   // alignment (just like the situation at hand) the lower indices tend
13050   // to see more traffic.  It'd be better to change the polling address
13051   // to offset 0 of the last $line in the polling page.
13052 
13053   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13054   ins_cost(125);
13055   size(6) ;
13056   ins_encode( Safepoint_Poll() );
13057   ins_pipe( ialu_reg_mem );
13058 %}
13059 
13060 
13061 // ============================================================================
13062 // This name is KNOWN by the ADLC and cannot be changed.
13063 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13064 // for this guy.
13065 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13066   match(Set dst (ThreadLocal));
13067   effect(DEF dst, KILL cr);
13068 
13069   format %{ "MOV    $dst, Thread::current()" %}
13070   ins_encode %{
13071     Register dstReg = as_Register($dst$$reg);
13072     __ get_thread(dstReg);
13073   %}
13074   ins_pipe( ialu_reg_fat );
13075 %}
13076 
13077 
13078 
13079 //----------PEEPHOLE RULES-----------------------------------------------------
13080 // These must follow all instruction definitions as they use the names
13081 // defined in the instructions definitions.
13082 //
13083 // peepmatch ( root_instr_name [preceding_instruction]* );
13084 //
13085 // peepconstraint %{
13086 // (instruction_number.operand_name relational_op instruction_number.operand_name
13087 //  [, ...] );
13088 // // instruction numbers are zero-based using left to right order in peepmatch
13089 //
13090 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13091 // // provide an instruction_number.operand_name for each operand that appears
13092 // // in the replacement instruction's match rule
13093 //
13094 // ---------VM FLAGS---------------------------------------------------------
13095 //
13096 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13097 //
13098 // Each peephole rule is given an identifying number starting with zero and
13099 // increasing by one in the order seen by the parser.  An individual peephole
13100 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13101 // on the command-line.
13102 //
13103 // ---------CURRENT LIMITATIONS----------------------------------------------
13104 //
13105 // Only match adjacent instructions in same basic block
13106 // Only equality constraints
13107 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13108 // Only one replacement instruction
13109 //
13110 // ---------EXAMPLE----------------------------------------------------------
13111 //
13112 // // pertinent parts of existing instructions in architecture description
13113 // instruct movI(rRegI dst, rRegI src) %{
13114 //   match(Set dst (CopyI src));
13115 // %}
13116 //
13117 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13118 //   match(Set dst (AddI dst src));
13119 //   effect(KILL cr);
13120 // %}
13121 //
13122 // // Change (inc mov) to lea
13123 // peephole %{
13124 //   // increment preceeded by register-register move
13125 //   peepmatch ( incI_eReg movI );
13126 //   // require that the destination register of the increment
13127 //   // match the destination register of the move
13128 //   peepconstraint ( 0.dst == 1.dst );
13129 //   // construct a replacement instruction that sets
13130 //   // the destination to ( move's source register + one )
13131 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13132 // %}
13133 //
13134 // Implementation no longer uses movX instructions since
13135 // machine-independent system no longer uses CopyX nodes.
13136 //
13137 // peephole %{
13138 //   peepmatch ( incI_eReg movI );
13139 //   peepconstraint ( 0.dst == 1.dst );
13140 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13141 // %}
13142 //
13143 // peephole %{
13144 //   peepmatch ( decI_eReg movI );
13145 //   peepconstraint ( 0.dst == 1.dst );
13146 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13147 // %}
13148 //
13149 // peephole %{
13150 //   peepmatch ( addI_eReg_imm movI );
13151 //   peepconstraint ( 0.dst == 1.dst );
13152 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13153 // %}
13154 //
13155 // peephole %{
13156 //   peepmatch ( addP_eReg_imm movP );
13157 //   peepconstraint ( 0.dst == 1.dst );
13158 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13159 // %}
13160 
13161 // // Change load of spilled value to only a spill
13162 // instruct storeI(memory mem, rRegI src) %{
13163 //   match(Set mem (StoreI mem src));
13164 // %}
13165 //
13166 // instruct loadI(rRegI dst, memory mem) %{
13167 //   match(Set dst (LoadI mem));
13168 // %}
13169 //
13170 peephole %{
13171   peepmatch ( loadI storeI );
13172   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13173   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13174 %}
13175 
13176 //----------SMARTSPILL RULES---------------------------------------------------
13177 // These must follow all instruction definitions as they use the names
13178 // defined in the instructions definitions.