1 //
   2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     if (SafepointMechanism::uses_thread_local_poll()) {
 710       Register pollReg = as_Register(EBX_enc);
 711       MacroAssembler masm(&cbuf);
 712       masm.get_thread(pollReg);
 713       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 714       masm.relocate(relocInfo::poll_return_type);
 715       masm.testl(rax, Address(pollReg, 0));
 716     } else {
 717       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 718       emit_opcode(cbuf,0x85);
 719       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 720       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 721     }
 722   }
 723 }
 724 
 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 726   return MachNode::size(ra_); // too many variables; just compute it
 727                               // the hard way
 728 }
 729 
 730 int MachEpilogNode::reloc() const {
 731   return 0; // a large enough number
 732 }
 733 
 734 const Pipeline * MachEpilogNode::pipeline() const {
 735   return MachNode::pipeline_class();
 736 }
 737 
 738 int MachEpilogNode::safepoint_offset() const { return 0; }
 739 
 740 //=============================================================================
 741 
 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 743 static enum RC rc_class( OptoReg::Name reg ) {
 744 
 745   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 746   if (OptoReg::is_stack(reg)) return rc_stack;
 747 
 748   VMReg r = OptoReg::as_VMReg(reg);
 749   if (r->is_Register()) return rc_int;
 750   if (r->is_FloatRegister()) {
 751     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 752     return rc_float;
 753   }
 754   assert(r->is_XMMRegister(), "must be");
 755   return rc_xmm;
 756 }
 757 
 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 759                         int opcode, const char *op_str, int size, outputStream* st ) {
 760   if( cbuf ) {
 761     emit_opcode  (*cbuf, opcode );
 762     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 763 #ifndef PRODUCT
 764   } else if( !do_size ) {
 765     if( size != 0 ) st->print("\n\t");
 766     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 767       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 768       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 769     } else { // FLD, FST, PUSH, POP
 770       st->print("%s [ESP + #%d]",op_str,offset);
 771     }
 772 #endif
 773   }
 774   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 775   return size+3+offset_size;
 776 }
 777 
 778 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 780                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 781   int in_size_in_bits = Assembler::EVEX_32bit;
 782   int evex_encoding = 0;
 783   if (reg_lo+1 == reg_hi) {
 784     in_size_in_bits = Assembler::EVEX_64bit;
 785     evex_encoding = Assembler::VEX_W;
 786   }
 787   if (cbuf) {
 788     MacroAssembler _masm(cbuf);
 789     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 790     //                          it maps more cases to single byte displacement
 791     _masm.set_managed();
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 843     _masm.set_managed();
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 883     _masm.set_managed();
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 901     _masm.set_managed();
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Do we need to mask the count passed to shift instructions or does
1433 // the cpu only look at the lower 5/6 bits anyway?
1434 const bool Matcher::need_masked_shift_count = false;
1435 
1436 bool Matcher::narrow_oop_use_complex_address() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 bool Matcher::narrow_klass_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::const_oop_prefer_decode() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 bool Matcher::const_klass_prefer_decode() {
1452   ShouldNotCallThis();
1453   return true;
1454 }
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     // Clear upper bits of YMM registers when current compiled code uses
1886     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887     MacroAssembler _masm(&cbuf);
1888     __ vzeroupper();
1889     debug_only(int off1 = cbuf.insts_size());
1890     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891   %}
1892 
1893   enc_class post_call_FPU %{
1894     // If method sets FPU control word do it here also
1895     if (Compile::current()->in_24_bit_fp_mode()) {
1896       MacroAssembler masm(&cbuf);
1897       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898     }
1899   %}
1900 
1901   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903     // who we intended to call.
1904     cbuf.set_insts_mark();
1905     $$$emit8$primary;
1906 
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(),
1910                      RELOC_IMM32);
1911     } else {
1912       int method_index = resolved_method_index(cbuf);
1913       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                   : static_call_Relocation::spec(method_index);
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      rspec, RELOC_DISP32);
1917       // Emit stubs for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       }
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     emit_opcode(cbuf,0xF0);         // [Lock]
2091   %}
2092 
2093   // Cmp-xchg long value.
2094   // Note: we need to swap rbx, and rcx before and after the
2095   //       cmpxchg8 instruction because the instruction uses
2096   //       rcx as the high order word of the new value to store but
2097   //       our register encoding uses rbx,.
2098   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099 
2100     // XCHG  rbx,ecx
2101     emit_opcode(cbuf,0x87);
2102     emit_opcode(cbuf,0xD9);
2103     // [Lock]
2104     emit_opcode(cbuf,0xF0);
2105     // CMPXCHG8 [Eptr]
2106     emit_opcode(cbuf,0x0F);
2107     emit_opcode(cbuf,0xC7);
2108     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109     // XCHG  rbx,ecx
2110     emit_opcode(cbuf,0x87);
2111     emit_opcode(cbuf,0xD9);
2112   %}
2113 
2114   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115     // [Lock]
2116     emit_opcode(cbuf,0xF0);
2117 
2118     // CMPXCHG [Eptr]
2119     emit_opcode(cbuf,0x0F);
2120     emit_opcode(cbuf,0xB1);
2121     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2122   %}
2123 
2124   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2125     // [Lock]
2126     emit_opcode(cbuf,0xF0);
2127 
2128     // CMPXCHGB [Eptr]
2129     emit_opcode(cbuf,0x0F);
2130     emit_opcode(cbuf,0xB0);
2131     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2132   %}
2133 
2134   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2135     // [Lock]
2136     emit_opcode(cbuf,0xF0);
2137 
2138     // 16-bit mode
2139     emit_opcode(cbuf, 0x66);
2140 
2141     // CMPXCHGW [Eptr]
2142     emit_opcode(cbuf,0x0F);
2143     emit_opcode(cbuf,0xB1);
2144     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145   %}
2146 
2147   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2148     int res_encoding = $res$$reg;
2149 
2150     // MOV  res,0
2151     emit_opcode( cbuf, 0xB8 + res_encoding);
2152     emit_d32( cbuf, 0 );
2153     // JNE,s  fail
2154     emit_opcode(cbuf,0x75);
2155     emit_d8(cbuf, 5 );
2156     // MOV  res,1
2157     emit_opcode( cbuf, 0xB8 + res_encoding);
2158     emit_d32( cbuf, 1 );
2159     // fail:
2160   %}
2161 
2162   enc_class set_instruction_start( ) %{
2163     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2164   %}
2165 
2166   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2167     int reg_encoding = $ereg$$reg;
2168     int base  = $mem$$base;
2169     int index = $mem$$index;
2170     int scale = $mem$$scale;
2171     int displace = $mem$$disp;
2172     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2173     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2174   %}
2175 
2176   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2177     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2178     int base  = $mem$$base;
2179     int index = $mem$$index;
2180     int scale = $mem$$scale;
2181     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2182     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2183     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2184   %}
2185 
2186   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2187     int r1, r2;
2188     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190     emit_opcode(cbuf,0x0F);
2191     emit_opcode(cbuf,$tertiary);
2192     emit_rm(cbuf, 0x3, r1, r2);
2193     emit_d8(cbuf,$cnt$$constant);
2194     emit_d8(cbuf,$primary);
2195     emit_rm(cbuf, 0x3, $secondary, r1);
2196     emit_d8(cbuf,$cnt$$constant);
2197   %}
2198 
2199   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2200     emit_opcode( cbuf, 0x8B ); // Move
2201     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_d8(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_d8(cbuf,$primary);
2208     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2209     emit_d8(cbuf,31);
2210   %}
2211 
2212   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2213     int r1, r2;
2214     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2215     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2216 
2217     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2218     emit_rm(cbuf, 0x3, r1, r2);
2219     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220       emit_opcode(cbuf,$primary);
2221       emit_rm(cbuf, 0x3, $secondary, r1);
2222       emit_d8(cbuf,$cnt$$constant-32);
2223     }
2224     emit_opcode(cbuf,0x33);  // XOR r2,r2
2225     emit_rm(cbuf, 0x3, r2, r2);
2226   %}
2227 
2228   // Clone of RegMem but accepts an extra parameter to access each
2229   // half of a double in memory; it never needs relocation info.
2230   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2231     emit_opcode(cbuf,$opcode$$constant);
2232     int reg_encoding = $rm_reg$$reg;
2233     int base     = $mem$$base;
2234     int index    = $mem$$index;
2235     int scale    = $mem$$scale;
2236     int displace = $mem$$disp + $disp_for_half$$constant;
2237     relocInfo::relocType disp_reloc = relocInfo::none;
2238     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2239   %}
2240 
2241   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2242   //
2243   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2244   // and it never needs relocation information.
2245   // Frequently used to move data between FPU's Stack Top and memory.
2246   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2247     int rm_byte_opcode = $rm_opcode$$constant;
2248     int base     = $mem$$base;
2249     int index    = $mem$$index;
2250     int scale    = $mem$$scale;
2251     int displace = $mem$$disp;
2252     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2253     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2254   %}
2255 
2256   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2257     int rm_byte_opcode = $rm_opcode$$constant;
2258     int base     = $mem$$base;
2259     int index    = $mem$$index;
2260     int scale    = $mem$$scale;
2261     int displace = $mem$$disp;
2262     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2263     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2264   %}
2265 
2266   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2267     int reg_encoding = $dst$$reg;
2268     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2269     int index        = 0x04;            // 0x04 indicates no index
2270     int scale        = 0x00;            // 0x00 indicates no scale
2271     int displace     = $src1$$constant; // 0x00 indicates no displacement
2272     relocInfo::relocType disp_reloc = relocInfo::none;
2273     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2274   %}
2275 
2276   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2277     // Compare dst,src
2278     emit_opcode(cbuf,0x3B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280     // jmp dst < src around move
2281     emit_opcode(cbuf,0x7C);
2282     emit_d8(cbuf,2);
2283     // move dst,src
2284     emit_opcode(cbuf,0x8B);
2285     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286   %}
2287 
2288   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2289     // Compare dst,src
2290     emit_opcode(cbuf,0x3B);
2291     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2292     // jmp dst > src around move
2293     emit_opcode(cbuf,0x7F);
2294     emit_d8(cbuf,2);
2295     // move dst,src
2296     emit_opcode(cbuf,0x8B);
2297     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2298   %}
2299 
2300   enc_class enc_FPR_store(memory mem, regDPR src) %{
2301     // If src is FPR1, we can just FST to store it.
2302     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2303     int reg_encoding = 0x2; // Just store
2304     int base  = $mem$$base;
2305     int index = $mem$$index;
2306     int scale = $mem$$scale;
2307     int displace = $mem$$disp;
2308     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2309     if( $src$$reg != FPR1L_enc ) {
2310       reg_encoding = 0x3;  // Store & pop
2311       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2312       emit_d8( cbuf, 0xC0-1+$src$$reg );
2313     }
2314     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2315     emit_opcode(cbuf,$primary);
2316     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2317   %}
2318 
2319   enc_class neg_reg(rRegI dst) %{
2320     // NEG $dst
2321     emit_opcode(cbuf,0xF7);
2322     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2323   %}
2324 
2325   enc_class setLT_reg(eCXRegI dst) %{
2326     // SETLT $dst
2327     emit_opcode(cbuf,0x0F);
2328     emit_opcode(cbuf,0x9C);
2329     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2330   %}
2331 
2332   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2333     int tmpReg = $tmp$$reg;
2334 
2335     // SUB $p,$q
2336     emit_opcode(cbuf,0x2B);
2337     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2338     // SBB $tmp,$tmp
2339     emit_opcode(cbuf,0x1B);
2340     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2341     // AND $tmp,$y
2342     emit_opcode(cbuf,0x23);
2343     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2344     // ADD $p,$tmp
2345     emit_opcode(cbuf,0x03);
2346     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2347   %}
2348 
2349   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.hi,$dst.lo
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2360     // CLR    $dst.lo
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2363 // small:
2364     // SHLD   $dst.hi,$dst.lo,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xA5);
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2368     // SHL    $dst.lo,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2371   %}
2372 
2373   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x04);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // CLR    $dst.hi
2385     emit_opcode(cbuf, 0x33);
2386     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2387 // small:
2388     // SHRD   $dst.lo,$dst.hi,$shift
2389     emit_opcode(cbuf,0x0F);
2390     emit_opcode(cbuf,0xAD);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392     // SHR    $dst.hi,$shift"
2393     emit_opcode(cbuf,0xD3);
2394     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2395   %}
2396 
2397   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2398     // TEST shift,32
2399     emit_opcode(cbuf,0xF7);
2400     emit_rm(cbuf, 0x3, 0, ECX_enc);
2401     emit_d32(cbuf,0x20);
2402     // JEQ,s small
2403     emit_opcode(cbuf, 0x74);
2404     emit_d8(cbuf, 0x05);
2405     // MOV    $dst.lo,$dst.hi
2406     emit_opcode( cbuf, 0x8B );
2407     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2408     // SAR    $dst.hi,31
2409     emit_opcode(cbuf, 0xC1);
2410     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2411     emit_d8(cbuf, 0x1F );
2412 // small:
2413     // SHRD   $dst.lo,$dst.hi,$shift
2414     emit_opcode(cbuf,0x0F);
2415     emit_opcode(cbuf,0xAD);
2416     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417     // SAR    $dst.hi,$shift"
2418     emit_opcode(cbuf,0xD3);
2419     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2420   %}
2421 
2422 
2423   // ----------------- Encodings for floating point unit -----------------
2424   // May leave result in FPU-TOS or FPU reg depending on opcodes
2425   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2426     $$$emit8$primary;
2427     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2428   %}
2429 
2430   // Pop argument in FPR0 with FSTP ST(0)
2431   enc_class PopFPU() %{
2432     emit_opcode( cbuf, 0xDD );
2433     emit_d8( cbuf, 0xD8 );
2434   %}
2435 
2436   // !!!!! equivalent to Pop_Reg_F
2437   enc_class Pop_Reg_DPR( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2439     emit_d8( cbuf, 0xD8+$dst$$reg );
2440   %}
2441 
2442   enc_class Push_Reg_DPR( regDPR dst ) %{
2443     emit_opcode( cbuf, 0xD9 );
2444     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2445   %}
2446 
2447   enc_class strictfp_bias1( regDPR dst ) %{
2448     emit_opcode( cbuf, 0xDB );           // FLD m80real
2449     emit_opcode( cbuf, 0x2D );
2450     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2451     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2452     emit_opcode( cbuf, 0xC8+$dst$$reg );
2453   %}
2454 
2455   enc_class strictfp_bias2( regDPR dst ) %{
2456     emit_opcode( cbuf, 0xDB );           // FLD m80real
2457     emit_opcode( cbuf, 0x2D );
2458     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2459     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460     emit_opcode( cbuf, 0xC8+$dst$$reg );
2461   %}
2462 
2463   // Special case for moving an integer register to a stack slot.
2464   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2465     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2466   %}
2467 
2468   // Special case for moving a register to a stack slot.
2469   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470     // Opcode already emitted
2471     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2472     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2473     emit_d32(cbuf, $dst$$disp);   // Displacement
2474   %}
2475 
2476   // Push the integer in stackSlot 'src' onto FP-stack
2477   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2478     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2479   %}
2480 
2481   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2483     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2484   %}
2485 
2486   // Same as Pop_Mem_F except for opcode
2487   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2488   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2489     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2490   %}
2491 
2492   enc_class Pop_Reg_FPR( regFPR dst ) %{
2493     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2494     emit_d8( cbuf, 0xD8+$dst$$reg );
2495   %}
2496 
2497   enc_class Push_Reg_FPR( regFPR dst ) %{
2498     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2499     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2500   %}
2501 
2502   // Push FPU's float to a stack-slot, and pop FPU-stack
2503   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2504     int pop = 0x02;
2505     if ($src$$reg != FPR1L_enc) {
2506       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2507       emit_d8( cbuf, 0xC0-1+$src$$reg );
2508       pop = 0x03;
2509     }
2510     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2511   %}
2512 
2513   // Push FPU's double to a stack-slot, and pop FPU-stack
2514   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2515     int pop = 0x02;
2516     if ($src$$reg != FPR1L_enc) {
2517       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2518       emit_d8( cbuf, 0xC0-1+$src$$reg );
2519       pop = 0x03;
2520     }
2521     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2522   %}
2523 
2524   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2525   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2526     int pop = 0xD0 - 1; // -1 since we skip FLD
2527     if ($src$$reg != FPR1L_enc) {
2528       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2529       emit_d8( cbuf, 0xC0-1+$src$$reg );
2530       pop = 0xD8;
2531     }
2532     emit_opcode( cbuf, 0xDD );
2533     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2534   %}
2535 
2536 
2537   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2538     // load dst in FPR0
2539     emit_opcode( cbuf, 0xD9 );
2540     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2541     if ($src$$reg != FPR1L_enc) {
2542       // fincstp
2543       emit_opcode (cbuf, 0xD9);
2544       emit_opcode (cbuf, 0xF7);
2545       // swap src with FPR1:
2546       // FXCH FPR1 with src
2547       emit_opcode(cbuf, 0xD9);
2548       emit_d8(cbuf, 0xC8-1+$src$$reg );
2549       // fdecstp
2550       emit_opcode (cbuf, 0xD9);
2551       emit_opcode (cbuf, 0xF6);
2552     }
2553   %}
2554 
2555   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2561     __ fld_d(Address(rsp, 0));
2562   %}
2563 
2564   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ subptr(rsp, 4);
2567     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2568     __ fld_s(Address(rsp, 0));
2569     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2570     __ fld_s(Address(rsp, 0));
2571   %}
2572 
2573   enc_class Push_ResultD(regD dst) %{
2574     MacroAssembler _masm(&cbuf);
2575     __ fstp_d(Address(rsp, 0));
2576     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class Push_ResultF(regF dst, immI d8) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ fstp_s(Address(rsp, 0));
2583     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2584     __ addptr(rsp, $d8$$constant);
2585   %}
2586 
2587   enc_class Push_SrcD(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ subptr(rsp, 8);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class push_stack_temp_qword() %{
2595     MacroAssembler _masm(&cbuf);
2596     __ subptr(rsp, 8);
2597   %}
2598 
2599   enc_class pop_stack_temp_qword() %{
2600     MacroAssembler _masm(&cbuf);
2601     __ addptr(rsp, 8);
2602   %}
2603 
2604   enc_class push_xmm_to_fpr1(regD src) %{
2605     MacroAssembler _masm(&cbuf);
2606     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2607     __ fld_d(Address(rsp, 0));
2608   %}
2609 
2610   enc_class Push_Result_Mod_DPR( regDPR src) %{
2611     if ($src$$reg != FPR1L_enc) {
2612       // fincstp
2613       emit_opcode (cbuf, 0xD9);
2614       emit_opcode (cbuf, 0xF7);
2615       // FXCH FPR1 with src
2616       emit_opcode(cbuf, 0xD9);
2617       emit_d8(cbuf, 0xC8-1+$src$$reg );
2618       // fdecstp
2619       emit_opcode (cbuf, 0xD9);
2620       emit_opcode (cbuf, 0xF6);
2621     }
2622     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2623     // // FSTP   FPR$dst$$reg
2624     // emit_opcode( cbuf, 0xDD );
2625     // emit_d8( cbuf, 0xD8+$dst$$reg );
2626   %}
2627 
2628   enc_class fnstsw_sahf_skip_parity() %{
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jnp  ::skip
2635     emit_opcode( cbuf, 0x7B );
2636     emit_opcode( cbuf, 0x05 );
2637   %}
2638 
2639   enc_class emitModDPR() %{
2640     // fprem must be iterative
2641     // :: loop
2642     // fprem
2643     emit_opcode( cbuf, 0xD9 );
2644     emit_opcode( cbuf, 0xF8 );
2645     // wait
2646     emit_opcode( cbuf, 0x9b );
2647     // fnstsw ax
2648     emit_opcode( cbuf, 0xDF );
2649     emit_opcode( cbuf, 0xE0 );
2650     // sahf
2651     emit_opcode( cbuf, 0x9E );
2652     // jp  ::loop
2653     emit_opcode( cbuf, 0x0F );
2654     emit_opcode( cbuf, 0x8A );
2655     emit_opcode( cbuf, 0xF4 );
2656     emit_opcode( cbuf, 0xFF );
2657     emit_opcode( cbuf, 0xFF );
2658     emit_opcode( cbuf, 0xFF );
2659   %}
2660 
2661   enc_class fpu_flags() %{
2662     // fnstsw_ax
2663     emit_opcode( cbuf, 0xDF);
2664     emit_opcode( cbuf, 0xE0);
2665     // test ax,0x0400
2666     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2667     emit_opcode( cbuf, 0xA9 );
2668     emit_d16   ( cbuf, 0x0400 );
2669     // // // This sequence works, but stalls for 12-16 cycles on PPro
2670     // // test rax,0x0400
2671     // emit_opcode( cbuf, 0xA9 );
2672     // emit_d32   ( cbuf, 0x00000400 );
2673     //
2674     // jz exit (no unordered comparison)
2675     emit_opcode( cbuf, 0x74 );
2676     emit_d8    ( cbuf, 0x02 );
2677     // mov ah,1 - treat as LT case (set carry flag)
2678     emit_opcode( cbuf, 0xB4 );
2679     emit_d8    ( cbuf, 0x01 );
2680     // sahf
2681     emit_opcode( cbuf, 0x9E);
2682   %}
2683 
2684   enc_class cmpF_P6_fixup() %{
2685     // Fixup the integer flags in case comparison involved a NaN
2686     //
2687     // JNP exit (no unordered comparison, P-flag is set by NaN)
2688     emit_opcode( cbuf, 0x7B );
2689     emit_d8    ( cbuf, 0x03 );
2690     // MOV AH,1 - treat as LT case (set carry flag)
2691     emit_opcode( cbuf, 0xB4 );
2692     emit_d8    ( cbuf, 0x01 );
2693     // SAHF
2694     emit_opcode( cbuf, 0x9E);
2695     // NOP     // target for branch to avoid branch to branch
2696     emit_opcode( cbuf, 0x90);
2697   %}
2698 
2699 //     fnstsw_ax();
2700 //     sahf();
2701 //     movl(dst, nan_result);
2702 //     jcc(Assembler::parity, exit);
2703 //     movl(dst, less_result);
2704 //     jcc(Assembler::below, exit);
2705 //     movl(dst, equal_result);
2706 //     jcc(Assembler::equal, exit);
2707 //     movl(dst, greater_result);
2708 
2709 // less_result     =  1;
2710 // greater_result  = -1;
2711 // equal_result    = 0;
2712 // nan_result      = -1;
2713 
2714   enc_class CmpF_Result(rRegI dst) %{
2715     // fnstsw_ax();
2716     emit_opcode( cbuf, 0xDF);
2717     emit_opcode( cbuf, 0xE0);
2718     // sahf
2719     emit_opcode( cbuf, 0x9E);
2720     // movl(dst, nan_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, -1 );
2723     // jcc(Assembler::parity, exit);
2724     emit_opcode( cbuf, 0x7A );
2725     emit_d8    ( cbuf, 0x13 );
2726     // movl(dst, less_result);
2727     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728     emit_d32( cbuf, -1 );
2729     // jcc(Assembler::below, exit);
2730     emit_opcode( cbuf, 0x72 );
2731     emit_d8    ( cbuf, 0x0C );
2732     // movl(dst, equal_result);
2733     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2734     emit_d32( cbuf, 0 );
2735     // jcc(Assembler::equal, exit);
2736     emit_opcode( cbuf, 0x74 );
2737     emit_d8    ( cbuf, 0x05 );
2738     // movl(dst, greater_result);
2739     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2740     emit_d32( cbuf, 1 );
2741   %}
2742 
2743 
2744   // Compare the longs and set flags
2745   // BROKEN!  Do Not use as-is
2746   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2747     // CMP    $src1.hi,$src2.hi
2748     emit_opcode( cbuf, 0x3B );
2749     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2750     // JNE,s  done
2751     emit_opcode(cbuf,0x75);
2752     emit_d8(cbuf, 2 );
2753     // CMP    $src1.lo,$src2.lo
2754     emit_opcode( cbuf, 0x3B );
2755     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2756 // done:
2757   %}
2758 
2759   enc_class convert_int_long( regL dst, rRegI src ) %{
2760     // mov $dst.lo,$src
2761     int dst_encoding = $dst$$reg;
2762     int src_encoding = $src$$reg;
2763     encode_Copy( cbuf, dst_encoding  , src_encoding );
2764     // mov $dst.hi,$src
2765     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2766     // sar $dst.hi,31
2767     emit_opcode( cbuf, 0xC1 );
2768     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2769     emit_d8(cbuf, 0x1F );
2770   %}
2771 
2772   enc_class convert_long_double( eRegL src ) %{
2773     // push $src.hi
2774     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2775     // push $src.lo
2776     emit_opcode(cbuf, 0x50+$src$$reg  );
2777     // fild 64-bits at [SP]
2778     emit_opcode(cbuf,0xdf);
2779     emit_d8(cbuf, 0x6C);
2780     emit_d8(cbuf, 0x24);
2781     emit_d8(cbuf, 0x00);
2782     // pop stack
2783     emit_opcode(cbuf, 0x83); // add  SP, #8
2784     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2785     emit_d8(cbuf, 0x8);
2786   %}
2787 
2788   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2789     // IMUL   EDX:EAX,$src1
2790     emit_opcode( cbuf, 0xF7 );
2791     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2792     // SAR    EDX,$cnt-32
2793     int shift_count = ((int)$cnt$$constant) - 32;
2794     if (shift_count > 0) {
2795       emit_opcode(cbuf, 0xC1);
2796       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2797       emit_d8(cbuf, shift_count);
2798     }
2799   %}
2800 
2801   // this version doesn't have add sp, 8
2802   enc_class convert_long_double2( eRegL src ) %{
2803     // push $src.hi
2804     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2805     // push $src.lo
2806     emit_opcode(cbuf, 0x50+$src$$reg  );
2807     // fild 64-bits at [SP]
2808     emit_opcode(cbuf,0xdf);
2809     emit_d8(cbuf, 0x6C);
2810     emit_d8(cbuf, 0x24);
2811     emit_d8(cbuf, 0x00);
2812   %}
2813 
2814   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2815     // Basic idea: long = (long)int * (long)int
2816     // IMUL EDX:EAX, src
2817     emit_opcode( cbuf, 0xF7 );
2818     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2819   %}
2820 
2821   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2822     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2823     // MUL EDX:EAX, src
2824     emit_opcode( cbuf, 0xF7 );
2825     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2826   %}
2827 
2828   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2829     // Basic idea: lo(result) = lo(x_lo * y_lo)
2830     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2831     // MOV    $tmp,$src.lo
2832     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2833     // IMUL   $tmp,EDX
2834     emit_opcode( cbuf, 0x0F );
2835     emit_opcode( cbuf, 0xAF );
2836     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2837     // MOV    EDX,$src.hi
2838     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2839     // IMUL   EDX,EAX
2840     emit_opcode( cbuf, 0x0F );
2841     emit_opcode( cbuf, 0xAF );
2842     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2843     // ADD    $tmp,EDX
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2846     // MUL   EDX:EAX,$src.lo
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2852   %}
2853 
2854   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2855     // Basic idea: lo(result) = lo(src * y_lo)
2856     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2857     // IMUL   $tmp,EDX,$src
2858     emit_opcode( cbuf, 0x6B );
2859     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860     emit_d8( cbuf, (int)$src$$constant );
2861     // MOV    EDX,$src
2862     emit_opcode(cbuf, 0xB8 + EDX_enc);
2863     emit_d32( cbuf, (int)$src$$constant );
2864     // MUL   EDX:EAX,EDX
2865     emit_opcode( cbuf, 0xF7 );
2866     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2867     // ADD    EDX,ESI
2868     emit_opcode( cbuf, 0x03 );
2869     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2870   %}
2871 
2872   enc_class long_div( eRegL src1, eRegL src2 ) %{
2873     // PUSH src1.hi
2874     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875     // PUSH src1.lo
2876     emit_opcode(cbuf,               0x50+$src1$$reg  );
2877     // PUSH src2.hi
2878     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879     // PUSH src2.lo
2880     emit_opcode(cbuf,               0x50+$src2$$reg  );
2881     // CALL directly to the runtime
2882     cbuf.set_insts_mark();
2883     emit_opcode(cbuf,0xE8);       // Call into runtime
2884     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885     // Restore stack
2886     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888     emit_d8(cbuf, 4*4);
2889   %}
2890 
2891   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2892     // PUSH src1.hi
2893     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2894     // PUSH src1.lo
2895     emit_opcode(cbuf,               0x50+$src1$$reg  );
2896     // PUSH src2.hi
2897     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2898     // PUSH src2.lo
2899     emit_opcode(cbuf,               0x50+$src2$$reg  );
2900     // CALL directly to the runtime
2901     cbuf.set_insts_mark();
2902     emit_opcode(cbuf,0xE8);       // Call into runtime
2903     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2904     // Restore stack
2905     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2906     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2907     emit_d8(cbuf, 4*4);
2908   %}
2909 
2910   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2911     // MOV   $tmp,$src.lo
2912     emit_opcode(cbuf, 0x8B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2914     // OR    $tmp,$src.hi
2915     emit_opcode(cbuf, 0x0B);
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2917   %}
2918 
2919   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2920     // CMP    $src1.lo,$src2.lo
2921     emit_opcode( cbuf, 0x3B );
2922     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923     // JNE,s  skip
2924     emit_cc(cbuf, 0x70, 0x5);
2925     emit_d8(cbuf,2);
2926     // CMP    $src1.hi,$src2.hi
2927     emit_opcode( cbuf, 0x3B );
2928     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2929   %}
2930 
2931   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2932     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2933     emit_opcode( cbuf, 0x3B );
2934     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2935     // MOV    $tmp,$src1.hi
2936     emit_opcode( cbuf, 0x8B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2938     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2939     emit_opcode( cbuf, 0x1B );
2940     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2941   %}
2942 
2943   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2944     // XOR    $tmp,$tmp
2945     emit_opcode(cbuf,0x33);  // XOR
2946     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2947     // CMP    $tmp,$src.lo
2948     emit_opcode( cbuf, 0x3B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2950     // SBB    $tmp,$src.hi
2951     emit_opcode( cbuf, 0x1B );
2952     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2953   %}
2954 
2955  // Sniff, sniff... smells like Gnu Superoptimizer
2956   enc_class neg_long( eRegL dst ) %{
2957     emit_opcode(cbuf,0xF7);    // NEG hi
2958     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2959     emit_opcode(cbuf,0xF7);    // NEG lo
2960     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2961     emit_opcode(cbuf,0x83);    // SBB hi,0
2962     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2963     emit_d8    (cbuf,0 );
2964   %}
2965 
2966   enc_class enc_pop_rdx() %{
2967     emit_opcode(cbuf,0x5A);
2968   %}
2969 
2970   enc_class enc_rethrow() %{
2971     cbuf.set_insts_mark();
2972     emit_opcode(cbuf, 0xE9);        // jmp    entry
2973     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2974                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2975   %}
2976 
2977 
2978   // Convert a double to an int.  Java semantics require we do complex
2979   // manglelations in the corner cases.  So we set the rounding mode to
2980   // 'zero', store the darned double down as an int, and reset the
2981   // rounding mode to 'nearest'.  The hardware throws an exception which
2982   // patches up the correct value directly to the stack.
2983   enc_class DPR2I_encoding( regDPR src ) %{
2984     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2985     // exceptions here, so that a NAN or other corner-case value will
2986     // thrown an exception (but normal values get converted at full speed).
2987     // However, I2C adapters and other float-stack manglers leave pending
2988     // invalid-op exceptions hanging.  We would have to clear them before
2989     // enabling them and that is more expensive than just testing for the
2990     // invalid value Intel stores down in the corner cases.
2991     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2992     emit_opcode(cbuf,0x2D);
2993     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2994     // Allocate a word
2995     emit_opcode(cbuf,0x83);            // SUB ESP,4
2996     emit_opcode(cbuf,0xEC);
2997     emit_d8(cbuf,0x04);
2998     // Encoding assumes a double has been pushed into FPR0.
2999     // Store down the double as an int, popping the FPU stack
3000     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3001     emit_opcode(cbuf,0x1C);
3002     emit_d8(cbuf,0x24);
3003     // Restore the rounding mode; mask the exception
3004     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3005     emit_opcode(cbuf,0x2D);
3006     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3007         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3008         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3009 
3010     // Load the converted int; adjust CPU stack
3011     emit_opcode(cbuf,0x58);       // POP EAX
3012     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3013     emit_d32   (cbuf,0x80000000); //         0x80000000
3014     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3015     emit_d8    (cbuf,0x07);       // Size of slow_call
3016     // Push src onto stack slow-path
3017     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3018     emit_d8    (cbuf,0xC0-1+$src$$reg );
3019     // CALL directly to the runtime
3020     cbuf.set_insts_mark();
3021     emit_opcode(cbuf,0xE8);       // Call into runtime
3022     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3023     // Carry on here...
3024   %}
3025 
3026   enc_class DPR2L_encoding( regDPR src ) %{
3027     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3028     emit_opcode(cbuf,0x2D);
3029     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3030     // Allocate a word
3031     emit_opcode(cbuf,0x83);            // SUB ESP,8
3032     emit_opcode(cbuf,0xEC);
3033     emit_d8(cbuf,0x08);
3034     // Encoding assumes a double has been pushed into FPR0.
3035     // Store down the double as a long, popping the FPU stack
3036     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3037     emit_opcode(cbuf,0x3C);
3038     emit_d8(cbuf,0x24);
3039     // Restore the rounding mode; mask the exception
3040     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3041     emit_opcode(cbuf,0x2D);
3042     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3043         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3044         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3045 
3046     // Load the converted int; adjust CPU stack
3047     emit_opcode(cbuf,0x58);       // POP EAX
3048     emit_opcode(cbuf,0x5A);       // POP EDX
3049     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3050     emit_d8    (cbuf,0xFA);       // rdx
3051     emit_d32   (cbuf,0x80000000); //         0x80000000
3052     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3053     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3054     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3055     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3056     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3057     emit_d8    (cbuf,0x07);       // Size of slow_call
3058     // Push src onto stack slow-path
3059     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3060     emit_d8    (cbuf,0xC0-1+$src$$reg );
3061     // CALL directly to the runtime
3062     cbuf.set_insts_mark();
3063     emit_opcode(cbuf,0xE8);       // Call into runtime
3064     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065     // Carry on here...
3066   %}
3067 
3068   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3069     // Operand was loaded from memory into fp ST (stack top)
3070     // FMUL   ST,$src  /* D8 C8+i */
3071     emit_opcode(cbuf, 0xD8);
3072     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3073   %}
3074 
3075   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3076     // FADDP  ST,src2  /* D8 C0+i */
3077     emit_opcode(cbuf, 0xD8);
3078     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3079     //could use FADDP  src2,fpST  /* DE C0+i */
3080   %}
3081 
3082   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3083     // FADDP  src2,ST  /* DE C0+i */
3084     emit_opcode(cbuf, 0xDE);
3085     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3086   %}
3087 
3088   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3089     // Operand has been loaded into fp ST (stack top)
3090       // FSUB   ST,$src1
3091       emit_opcode(cbuf, 0xD8);
3092       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3093 
3094       // FDIV
3095       emit_opcode(cbuf, 0xD8);
3096       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3097   %}
3098 
3099   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3100     // Operand was loaded from memory into fp ST (stack top)
3101     // FADD   ST,$src  /* D8 C0+i */
3102     emit_opcode(cbuf, 0xD8);
3103     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104 
3105     // FMUL  ST,src2  /* D8 C*+i */
3106     emit_opcode(cbuf, 0xD8);
3107     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108   %}
3109 
3110 
3111   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3112     // Operand was loaded from memory into fp ST (stack top)
3113     // FADD   ST,$src  /* D8 C0+i */
3114     emit_opcode(cbuf, 0xD8);
3115     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3116 
3117     // FMULP  src2,ST  /* DE C8+i */
3118     emit_opcode(cbuf, 0xDE);
3119     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3120   %}
3121 
3122   // Atomically load the volatile long
3123   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x05;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3133   %}
3134 
3135   // Volatile Store Long.  Must be atomic, so move it into
3136   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3137   // target address before the store (for null-ptr checks)
3138   // so the memory operand is used twice in the encoding.
3139   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3140     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3141     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3142     emit_opcode(cbuf,0xDF);
3143     int rm_byte_opcode = 0x07;
3144     int base     = $mem$$base;
3145     int index    = $mem$$index;
3146     int scale    = $mem$$scale;
3147     int displace = $mem$$disp;
3148     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3149     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3150   %}
3151 
3152   // Safepoint Poll.  This polls the safepoint page, and causes an
3153   // exception if it is not readable. Unfortunately, it kills the condition code
3154   // in the process
3155   // We current use TESTL [spp],EDI
3156   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3157 
3158   enc_class Safepoint_Poll() %{
3159     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3160     emit_opcode(cbuf,0x85);
3161     emit_rm (cbuf, 0x0, 0x7, 0x5);
3162     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3163   %}
3164 %}
3165 
3166 
3167 //----------FRAME--------------------------------------------------------------
3168 // Definition of frame structure and management information.
3169 //
3170 //  S T A C K   L A Y O U T    Allocators stack-slot number
3171 //                             |   (to get allocators register number
3172 //  G  Owned by    |        |  v    add OptoReg::stack0())
3173 //  r   CALLER     |        |
3174 //  o     |        +--------+      pad to even-align allocators stack-slot
3175 //  w     V        |  pad0  |        numbers; owned by CALLER
3176 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3177 //  h     ^        |   in   |  5
3178 //        |        |  args  |  4   Holes in incoming args owned by SELF
3179 //  |     |        |        |  3
3180 //  |     |        +--------+
3181 //  V     |        | old out|      Empty on Intel, window on Sparc
3182 //        |    old |preserve|      Must be even aligned.
3183 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3184 //        |        |   in   |  3   area for Intel ret address
3185 //     Owned by    |preserve|      Empty on Sparc.
3186 //       SELF      +--------+
3187 //        |        |  pad2  |  2   pad to align old SP
3188 //        |        +--------+  1
3189 //        |        | locks  |  0
3190 //        |        +--------+----> OptoReg::stack0(), even aligned
3191 //        |        |  pad1  | 11   pad to align new SP
3192 //        |        +--------+
3193 //        |        |        | 10
3194 //        |        | spills |  9   spills
3195 //        V        |        |  8   (pad0 slot for callee)
3196 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3197 //        ^        |  out   |  7
3198 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3199 //     Owned by    +--------+
3200 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3201 //        |    new |preserve|      Must be even-aligned.
3202 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3203 //        |        |        |
3204 //
3205 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3206 //         known from SELF's arguments and the Java calling convention.
3207 //         Region 6-7 is determined per call site.
3208 // Note 2: If the calling convention leaves holes in the incoming argument
3209 //         area, those holes are owned by SELF.  Holes in the outgoing area
3210 //         are owned by the CALLEE.  Holes should not be nessecary in the
3211 //         incoming area, as the Java calling convention is completely under
3212 //         the control of the AD file.  Doubles can be sorted and packed to
3213 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3214 //         varargs C calling conventions.
3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3216 //         even aligned with pad0 as needed.
3217 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3218 //         region 6-11 is even aligned; it may be padded out more so that
3219 //         the region from SP to FP meets the minimum stack alignment.
3220 
3221 frame %{
3222   // What direction does stack grow in (assumed to be same for C & Java)
3223   stack_direction(TOWARDS_LOW);
3224 
3225   // These three registers define part of the calling convention
3226   // between compiled code and the interpreter.
3227   inline_cache_reg(EAX);                // Inline Cache Register
3228   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3229 
3230   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3231   cisc_spilling_operand_name(indOffset32);
3232 
3233   // Number of stack slots consumed by locking an object
3234   sync_stack_slots(1);
3235 
3236   // Compiled code's Frame Pointer
3237   frame_pointer(ESP);
3238   // Interpreter stores its frame pointer in a register which is
3239   // stored to the stack by I2CAdaptors.
3240   // I2CAdaptors convert from interpreted java to compiled java.
3241   interpreter_frame_pointer(EBP);
3242 
3243   // Stack alignment requirement
3244   // Alignment size in bytes (128-bit -> 16 bytes)
3245   stack_alignment(StackAlignmentInBytes);
3246 
3247   // Number of stack slots between incoming argument block and the start of
3248   // a new frame.  The PROLOG must add this many slots to the stack.  The
3249   // EPILOG must remove this many slots.  Intel needs one slot for
3250   // return address and one for rbp, (must save rbp)
3251   in_preserve_stack_slots(2+VerifyStackAtCalls);
3252 
3253   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3254   // for calls to C.  Supports the var-args backing area for register parms.
3255   varargs_C_out_slots_killed(0);
3256 
3257   // The after-PROLOG location of the return address.  Location of
3258   // return address specifies a type (REG or STACK) and a number
3259   // representing the register number (i.e. - use a register name) or
3260   // stack slot.
3261   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3262   // Otherwise, it is above the locks and verification slot and alignment word
3263   return_addr(STACK - 1 +
3264               align_up((Compile::current()->in_preserve_stack_slots() +
3265                         Compile::current()->fixed_slots()),
3266                        stack_alignment_in_slots()));
3267 
3268   // Body of function which returns an integer array locating
3269   // arguments either in registers or in stack slots.  Passed an array
3270   // of ideal registers called "sig" and a "length" count.  Stack-slot
3271   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272   // arguments for a CALLEE.  Incoming stack arguments are
3273   // automatically biased by the preserve_stack_slots field above.
3274   calling_convention %{
3275     // No difference between ingoing/outgoing just pass false
3276     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3277   %}
3278 
3279 
3280   // Body of function which returns an integer array locating
3281   // arguments either in registers or in stack slots.  Passed an array
3282   // of ideal registers called "sig" and a "length" count.  Stack-slot
3283   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3284   // arguments for a CALLEE.  Incoming stack arguments are
3285   // automatically biased by the preserve_stack_slots field above.
3286   c_calling_convention %{
3287     // This is obviously always outgoing
3288     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3289   %}
3290 
3291   // Location of C & interpreter return values
3292   c_return_value %{
3293     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3294     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3295     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3296 
3297     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3298     // that C functions return float and double results in XMM0.
3299     if( ideal_reg == Op_RegD && UseSSE>=2 )
3300       return OptoRegPair(XMM0b_num,XMM0_num);
3301     if( ideal_reg == Op_RegF && UseSSE>=2 )
3302       return OptoRegPair(OptoReg::Bad,XMM0_num);
3303 
3304     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305   %}
3306 
3307   // Location of return values
3308   return_value %{
3309     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3310     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3311     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3312     if( ideal_reg == Op_RegD && UseSSE>=2 )
3313       return OptoRegPair(XMM0b_num,XMM0_num);
3314     if( ideal_reg == Op_RegF && UseSSE>=1 )
3315       return OptoRegPair(OptoReg::Bad,XMM0_num);
3316     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3317   %}
3318 
3319 %}
3320 
3321 //----------ATTRIBUTES---------------------------------------------------------
3322 //----------Operand Attributes-------------------------------------------------
3323 op_attrib op_cost(0);        // Required cost attribute
3324 
3325 //----------Instruction Attributes---------------------------------------------
3326 ins_attrib ins_cost(100);       // Required cost attribute
3327 ins_attrib ins_size(8);         // Required size attribute (in bits)
3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3329                                 // non-matching short branch variant of some
3330                                                             // long branch?
3331 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3332                                 // specifies the alignment that some part of the instruction (not
3333                                 // necessarily the start) requires.  If > 1, a compute_padding()
3334                                 // function must be provided for the instruction
3335 
3336 //----------OPERANDS-----------------------------------------------------------
3337 // Operand definitions must precede instruction definitions for correct parsing
3338 // in the ADLC because operands constitute user defined types which are used in
3339 // instruction definitions.
3340 
3341 //----------Simple Operands----------------------------------------------------
3342 // Immediate Operands
3343 // Integer Immediate
3344 operand immI() %{
3345   match(ConI);
3346 
3347   op_cost(10);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for test vs zero
3353 operand immI0() %{
3354   predicate(n->get_int() == 0);
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 // Constant for increment
3363 operand immI1() %{
3364   predicate(n->get_int() == 1);
3365   match(ConI);
3366 
3367   op_cost(0);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 // Constant for decrement
3373 operand immI_M1() %{
3374   predicate(n->get_int() == -1);
3375   match(ConI);
3376 
3377   op_cost(0);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 // Valid scale values for addressing modes
3383 operand immI2() %{
3384   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3385   match(ConI);
3386 
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 operand immI8() %{
3392   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3393   match(ConI);
3394 
3395   op_cost(5);
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 operand immI16() %{
3401   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3402   match(ConI);
3403 
3404   op_cost(10);
3405   format %{ %}
3406   interface(CONST_INTER);
3407 %}
3408 
3409 // Int Immediate non-negative
3410 operand immU31()
3411 %{
3412   predicate(n->get_int() >= 0);
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 // Constant for long shifts
3421 operand immI_32() %{
3422   predicate( n->get_int() == 32 );
3423   match(ConI);
3424 
3425   op_cost(0);
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1_31() %{
3431   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_32_63() %{
3440   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3441   match(ConI);
3442   op_cost(0);
3443 
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_1() %{
3449   predicate( n->get_int() == 1 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 operand immI_2() %{
3458   predicate( n->get_int() == 2 );
3459   match(ConI);
3460 
3461   op_cost(0);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 operand immI_3() %{
3467   predicate( n->get_int() == 3 );
3468   match(ConI);
3469 
3470   op_cost(0);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Pointer Immediate
3476 operand immP() %{
3477   match(ConP);
3478 
3479   op_cost(10);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // NULL Pointer Immediate
3485 operand immP0() %{
3486   predicate( n->get_ptr() == 0 );
3487   match(ConP);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate
3495 operand immL() %{
3496   match(ConL);
3497 
3498   op_cost(20);
3499   format %{ %}
3500   interface(CONST_INTER);
3501 %}
3502 
3503 // Long Immediate zero
3504 operand immL0() %{
3505   predicate( n->get_long() == 0L );
3506   match(ConL);
3507   op_cost(0);
3508 
3509   format %{ %}
3510   interface(CONST_INTER);
3511 %}
3512 
3513 // Long Immediate zero
3514 operand immL_M1() %{
3515   predicate( n->get_long() == -1L );
3516   match(ConL);
3517   op_cost(0);
3518 
3519   format %{ %}
3520   interface(CONST_INTER);
3521 %}
3522 
3523 // Long immediate from 0 to 127.
3524 // Used for a shorter form of long mul by 10.
3525 operand immL_127() %{
3526   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3527   match(ConL);
3528   op_cost(0);
3529 
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Long Immediate: low 32-bit mask
3535 operand immL_32bits() %{
3536   predicate(n->get_long() == 0xFFFFFFFFL);
3537   match(ConL);
3538   op_cost(0);
3539 
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Long Immediate: low 32-bit mask
3545 operand immL32() %{
3546   predicate(n->get_long() == (int)(n->get_long()));
3547   match(ConL);
3548   op_cost(20);
3549 
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 //Double Immediate zero
3555 operand immDPR0() %{
3556   // Do additional (and counter-intuitive) test against NaN to work around VC++
3557   // bug that generates code such that NaNs compare equal to 0.0
3558   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3559   match(ConD);
3560 
3561   op_cost(5);
3562   format %{ %}
3563   interface(CONST_INTER);
3564 %}
3565 
3566 // Double Immediate one
3567 operand immDPR1() %{
3568   predicate( UseSSE<=1 && n->getd() == 1.0 );
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate
3577 operand immDPR() %{
3578   predicate(UseSSE<=1);
3579   match(ConD);
3580 
3581   op_cost(5);
3582   format %{ %}
3583   interface(CONST_INTER);
3584 %}
3585 
3586 operand immD() %{
3587   predicate(UseSSE>=2);
3588   match(ConD);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Double Immediate zero
3596 operand immD0() %{
3597   // Do additional (and counter-intuitive) test against NaN to work around VC++
3598   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3599   // compare equal to -0.0.
3600   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3601   match(ConD);
3602 
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 // Float Immediate zero
3608 operand immFPR0() %{
3609   predicate(UseSSE == 0 && n->getf() == 0.0F);
3610   match(ConF);
3611 
3612   op_cost(5);
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Float Immediate one
3618 operand immFPR1() %{
3619   predicate(UseSSE == 0 && n->getf() == 1.0F);
3620   match(ConF);
3621 
3622   op_cost(5);
3623   format %{ %}
3624   interface(CONST_INTER);
3625 %}
3626 
3627 // Float Immediate
3628 operand immFPR() %{
3629   predicate( UseSSE == 0 );
3630   match(ConF);
3631 
3632   op_cost(5);
3633   format %{ %}
3634   interface(CONST_INTER);
3635 %}
3636 
3637 // Float Immediate
3638 operand immF() %{
3639   predicate(UseSSE >= 1);
3640   match(ConF);
3641 
3642   op_cost(5);
3643   format %{ %}
3644   interface(CONST_INTER);
3645 %}
3646 
3647 // Float Immediate zero.  Zero and not -0.0
3648 operand immF0() %{
3649   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3650   match(ConF);
3651 
3652   op_cost(5);
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Immediates for special shifts (sign extend)
3658 
3659 // Constants for increment
3660 operand immI_16() %{
3661   predicate( n->get_int() == 16 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 operand immI_24() %{
3669   predicate( n->get_int() == 24 );
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Constant for byte-wide masking
3677 operand immI_255() %{
3678   predicate( n->get_int() == 255 );
3679   match(ConI);
3680 
3681   format %{ %}
3682   interface(CONST_INTER);
3683 %}
3684 
3685 // Constant for short-wide masking
3686 operand immI_65535() %{
3687   predicate(n->get_int() == 65535);
3688   match(ConI);
3689 
3690   format %{ %}
3691   interface(CONST_INTER);
3692 %}
3693 
3694 // Register Operands
3695 // Integer Register
3696 operand rRegI() %{
3697   constraint(ALLOC_IN_RC(int_reg));
3698   match(RegI);
3699   match(xRegI);
3700   match(eAXRegI);
3701   match(eBXRegI);
3702   match(eCXRegI);
3703   match(eDXRegI);
3704   match(eDIRegI);
3705   match(eSIRegI);
3706 
3707   format %{ %}
3708   interface(REG_INTER);
3709 %}
3710 
3711 // Subset of Integer Register
3712 operand xRegI(rRegI reg) %{
3713   constraint(ALLOC_IN_RC(int_x_reg));
3714   match(reg);
3715   match(eAXRegI);
3716   match(eBXRegI);
3717   match(eCXRegI);
3718   match(eDXRegI);
3719 
3720   format %{ %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Special Registers
3725 operand eAXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(eax_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EAX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 // Special Registers
3735 operand eBXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(ebx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EBX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eCXRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(ecx_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "ECX" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand eDXRegI(xRegI reg) %{
3754   constraint(ALLOC_IN_RC(edx_reg));
3755   match(reg);
3756   match(rRegI);
3757 
3758   format %{ "EDX" %}
3759   interface(REG_INTER);
3760 %}
3761 
3762 operand eDIRegI(xRegI reg) %{
3763   constraint(ALLOC_IN_RC(edi_reg));
3764   match(reg);
3765   match(rRegI);
3766 
3767   format %{ "EDI" %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand naxRegI() %{
3772   constraint(ALLOC_IN_RC(nax_reg));
3773   match(RegI);
3774   match(eCXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 operand nadxRegI() %{
3784   constraint(ALLOC_IN_RC(nadx_reg));
3785   match(RegI);
3786   match(eBXRegI);
3787   match(eCXRegI);
3788   match(eSIRegI);
3789   match(eDIRegI);
3790 
3791   format %{ %}
3792   interface(REG_INTER);
3793 %}
3794 
3795 operand ncxRegI() %{
3796   constraint(ALLOC_IN_RC(ncx_reg));
3797   match(RegI);
3798   match(eAXRegI);
3799   match(eDXRegI);
3800   match(eSIRegI);
3801   match(eDIRegI);
3802 
3803   format %{ %}
3804   interface(REG_INTER);
3805 %}
3806 
3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3808 // //
3809 operand eSIRegI(xRegI reg) %{
3810    constraint(ALLOC_IN_RC(esi_reg));
3811    match(reg);
3812    match(rRegI);
3813 
3814    format %{ "ESI" %}
3815    interface(REG_INTER);
3816 %}
3817 
3818 // Pointer Register
3819 operand anyRegP() %{
3820   constraint(ALLOC_IN_RC(any_reg));
3821   match(RegP);
3822   match(eAXRegP);
3823   match(eBXRegP);
3824   match(eCXRegP);
3825   match(eDIRegP);
3826   match(eRegP);
3827 
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 operand eRegP() %{
3833   constraint(ALLOC_IN_RC(int_reg));
3834   match(RegP);
3835   match(eAXRegP);
3836   match(eBXRegP);
3837   match(eCXRegP);
3838   match(eDIRegP);
3839 
3840   format %{ %}
3841   interface(REG_INTER);
3842 %}
3843 
3844 // On windows95, EBP is not safe to use for implicit null tests.
3845 operand eRegP_no_EBP() %{
3846   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3847   match(RegP);
3848   match(eAXRegP);
3849   match(eBXRegP);
3850   match(eCXRegP);
3851   match(eDIRegP);
3852 
3853   op_cost(100);
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 operand naxRegP() %{
3859   constraint(ALLOC_IN_RC(nax_reg));
3860   match(RegP);
3861   match(eBXRegP);
3862   match(eDXRegP);
3863   match(eCXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 operand nabxRegP() %{
3872   constraint(ALLOC_IN_RC(nabx_reg));
3873   match(RegP);
3874   match(eCXRegP);
3875   match(eDXRegP);
3876   match(eSIRegP);
3877   match(eDIRegP);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 operand pRegP() %{
3884   constraint(ALLOC_IN_RC(p_reg));
3885   match(RegP);
3886   match(eBXRegP);
3887   match(eDXRegP);
3888   match(eSIRegP);
3889   match(eDIRegP);
3890 
3891   format %{ %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Special Registers
3896 // Return a pointer value
3897 operand eAXRegP(eRegP reg) %{
3898   constraint(ALLOC_IN_RC(eax_reg));
3899   match(reg);
3900   format %{ "EAX" %}
3901   interface(REG_INTER);
3902 %}
3903 
3904 // Used in AtomicAdd
3905 operand eBXRegP(eRegP reg) %{
3906   constraint(ALLOC_IN_RC(ebx_reg));
3907   match(reg);
3908   format %{ "EBX" %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Tail-call (interprocedural jump) to interpreter
3913 operand eCXRegP(eRegP reg) %{
3914   constraint(ALLOC_IN_RC(ecx_reg));
3915   match(reg);
3916   format %{ "ECX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eSIRegP(eRegP reg) %{
3921   constraint(ALLOC_IN_RC(esi_reg));
3922   match(reg);
3923   format %{ "ESI" %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 // Used in rep stosw
3928 operand eDIRegP(eRegP reg) %{
3929   constraint(ALLOC_IN_RC(edi_reg));
3930   match(reg);
3931   format %{ "EDI" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eRegL() %{
3936   constraint(ALLOC_IN_RC(long_reg));
3937   match(RegL);
3938   match(eADXRegL);
3939 
3940   format %{ %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 operand eADXRegL( eRegL reg ) %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(reg);
3947 
3948   format %{ "EDX:EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eBCXRegL( eRegL reg ) %{
3953   constraint(ALLOC_IN_RC(ebcx_reg));
3954   match(reg);
3955 
3956   format %{ "EBX:ECX" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Special case for integer high multiply
3961 operand eADXRegL_low_only() %{
3962   constraint(ALLOC_IN_RC(eadx_reg));
3963   match(RegL);
3964 
3965   format %{ "EAX" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 // Flags register, used as output of compare instructions
3970 operand eFlagsReg() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973 
3974   format %{ "EFLAGS" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Flags register, used as output of FLOATING POINT compare instructions
3979 operand eFlagsRegU() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982 
3983   format %{ "EFLAGS_U" %}
3984   interface(REG_INTER);
3985 %}
3986 
3987 operand eFlagsRegUCF() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990   predicate(false);
3991 
3992   format %{ "EFLAGS_U_CF" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 // Condition Code Register used by long compare
3997 operand flagsReg_long_LTGE() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LTGE" %}
4001   interface(REG_INTER);
4002 %}
4003 operand flagsReg_long_EQNE() %{
4004   constraint(ALLOC_IN_RC(int_flags));
4005   match(RegFlags);
4006   format %{ "FLAGS_EQNE" %}
4007   interface(REG_INTER);
4008 %}
4009 operand flagsReg_long_LEGT() %{
4010   constraint(ALLOC_IN_RC(int_flags));
4011   match(RegFlags);
4012   format %{ "FLAGS_LEGT" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 // Condition Code Register used by unsigned long compare
4017 operand flagsReg_ulong_LTGE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_U_LTGE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_ulong_EQNE() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_U_EQNE" %}
4027   interface(REG_INTER);
4028 %}
4029 operand flagsReg_ulong_LEGT() %{
4030   constraint(ALLOC_IN_RC(int_flags));
4031   match(RegFlags);
4032   format %{ "FLAGS_U_LEGT" %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Float register operands
4037 operand regDPR() %{
4038   predicate( UseSSE < 2 );
4039   constraint(ALLOC_IN_RC(fp_dbl_reg));
4040   match(RegD);
4041   match(regDPR1);
4042   match(regDPR2);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 operand regDPR1(regDPR reg) %{
4048   predicate( UseSSE < 2 );
4049   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4050   match(reg);
4051   format %{ "FPR1" %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 operand regDPR2(regDPR reg) %{
4056   predicate( UseSSE < 2 );
4057   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4058   match(reg);
4059   format %{ "FPR2" %}
4060   interface(REG_INTER);
4061 %}
4062 
4063 operand regnotDPR1(regDPR reg) %{
4064   predicate( UseSSE < 2 );
4065   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4066   match(reg);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Float register operands
4072 operand regFPR() %{
4073   predicate( UseSSE < 2 );
4074   constraint(ALLOC_IN_RC(fp_flt_reg));
4075   match(RegF);
4076   match(regFPR1);
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 // Float register operands
4082 operand regFPR1(regFPR reg) %{
4083   predicate( UseSSE < 2 );
4084   constraint(ALLOC_IN_RC(fp_flt_reg0));
4085   match(reg);
4086   format %{ "FPR1" %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 // XMM Float register operands
4091 operand regF() %{
4092   predicate( UseSSE>=1 );
4093   constraint(ALLOC_IN_RC(float_reg_legacy));
4094   match(RegF);
4095   format %{ %}
4096   interface(REG_INTER);
4097 %}
4098 
4099 // Float register operands
4100 operand vlRegF() %{
4101    constraint(ALLOC_IN_RC(float_reg_vl));
4102    match(RegF);
4103 
4104    format %{ %}
4105    interface(REG_INTER);
4106 %}
4107 
4108 // XMM Double register operands
4109 operand regD() %{
4110   predicate( UseSSE>=2 );
4111   constraint(ALLOC_IN_RC(double_reg_legacy));
4112   match(RegD);
4113   format %{ %}
4114   interface(REG_INTER);
4115 %}
4116 
4117 // Double register operands
4118 operand vlRegD() %{
4119    constraint(ALLOC_IN_RC(double_reg_vl));
4120    match(RegD);
4121 
4122    format %{ %}
4123    interface(REG_INTER);
4124 %}
4125 
4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4127 // runtime code generation via reg_class_dynamic.
4128 operand vecS() %{
4129   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4130   match(VecS);
4131 
4132   format %{ %}
4133   interface(REG_INTER);
4134 %}
4135 
4136 operand legVecS() %{
4137   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4138   match(VecS);
4139 
4140   format %{ %}
4141   interface(REG_INTER);
4142 %}
4143 
4144 operand vecD() %{
4145   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4146   match(VecD);
4147 
4148   format %{ %}
4149   interface(REG_INTER);
4150 %}
4151 
4152 operand legVecD() %{
4153   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4154   match(VecD);
4155 
4156   format %{ %}
4157   interface(REG_INTER);
4158 %}
4159 
4160 operand vecX() %{
4161   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4162   match(VecX);
4163 
4164   format %{ %}
4165   interface(REG_INTER);
4166 %}
4167 
4168 operand legVecX() %{
4169   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4170   match(VecX);
4171 
4172   format %{ %}
4173   interface(REG_INTER);
4174 %}
4175 
4176 operand vecY() %{
4177   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4178   match(VecY);
4179 
4180   format %{ %}
4181   interface(REG_INTER);
4182 %}
4183 
4184 operand legVecY() %{
4185   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4186   match(VecY);
4187 
4188   format %{ %}
4189   interface(REG_INTER);
4190 %}
4191 
4192 //----------Memory Operands----------------------------------------------------
4193 // Direct Memory Operand
4194 operand direct(immP addr) %{
4195   match(addr);
4196 
4197   format %{ "[$addr]" %}
4198   interface(MEMORY_INTER) %{
4199     base(0xFFFFFFFF);
4200     index(0x4);
4201     scale(0x0);
4202     disp($addr);
4203   %}
4204 %}
4205 
4206 // Indirect Memory Operand
4207 operand indirect(eRegP reg) %{
4208   constraint(ALLOC_IN_RC(int_reg));
4209   match(reg);
4210 
4211   format %{ "[$reg]" %}
4212   interface(MEMORY_INTER) %{
4213     base($reg);
4214     index(0x4);
4215     scale(0x0);
4216     disp(0x0);
4217   %}
4218 %}
4219 
4220 // Indirect Memory Plus Short Offset Operand
4221 operand indOffset8(eRegP reg, immI8 off) %{
4222   match(AddP reg off);
4223 
4224   format %{ "[$reg + $off]" %}
4225   interface(MEMORY_INTER) %{
4226     base($reg);
4227     index(0x4);
4228     scale(0x0);
4229     disp($off);
4230   %}
4231 %}
4232 
4233 // Indirect Memory Plus Long Offset Operand
4234 operand indOffset32(eRegP reg, immI off) %{
4235   match(AddP reg off);
4236 
4237   format %{ "[$reg + $off]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index(0x4);
4241     scale(0x0);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 // Indirect Memory Plus Long Offset Operand
4247 operand indOffset32X(rRegI reg, immP off) %{
4248   match(AddP off reg);
4249 
4250   format %{ "[$reg + $off]" %}
4251   interface(MEMORY_INTER) %{
4252     base($reg);
4253     index(0x4);
4254     scale(0x0);
4255     disp($off);
4256   %}
4257 %}
4258 
4259 // Indirect Memory Plus Index Register Plus Offset Operand
4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4261   match(AddP (AddP reg ireg) off);
4262 
4263   op_cost(10);
4264   format %{"[$reg + $off + $ireg]" %}
4265   interface(MEMORY_INTER) %{
4266     base($reg);
4267     index($ireg);
4268     scale(0x0);
4269     disp($off);
4270   %}
4271 %}
4272 
4273 // Indirect Memory Plus Index Register Plus Offset Operand
4274 operand indIndex(eRegP reg, rRegI ireg) %{
4275   match(AddP reg ireg);
4276 
4277   op_cost(10);
4278   format %{"[$reg + $ireg]" %}
4279   interface(MEMORY_INTER) %{
4280     base($reg);
4281     index($ireg);
4282     scale(0x0);
4283     disp(0x0);
4284   %}
4285 %}
4286 
4287 // // -------------------------------------------------------------------------
4288 // // 486 architecture doesn't support "scale * index + offset" with out a base
4289 // // -------------------------------------------------------------------------
4290 // // Scaled Memory Operands
4291 // // Indirect Memory Times Scale Plus Offset Operand
4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4293 //   match(AddP off (LShiftI ireg scale));
4294 //
4295 //   op_cost(10);
4296 //   format %{"[$off + $ireg << $scale]" %}
4297 //   interface(MEMORY_INTER) %{
4298 //     base(0x4);
4299 //     index($ireg);
4300 //     scale($scale);
4301 //     disp($off);
4302 //   %}
4303 // %}
4304 
4305 // Indirect Memory Times Scale Plus Index Register
4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4307   match(AddP reg (LShiftI ireg scale));
4308 
4309   op_cost(10);
4310   format %{"[$reg + $ireg << $scale]" %}
4311   interface(MEMORY_INTER) %{
4312     base($reg);
4313     index($ireg);
4314     scale($scale);
4315     disp(0x0);
4316   %}
4317 %}
4318 
4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4321   match(AddP (AddP reg (LShiftI ireg scale)) off);
4322 
4323   op_cost(10);
4324   format %{"[$reg + $off + $ireg << $scale]" %}
4325   interface(MEMORY_INTER) %{
4326     base($reg);
4327     index($ireg);
4328     scale($scale);
4329     disp($off);
4330   %}
4331 %}
4332 
4333 //----------Load Long Memory Operands------------------------------------------
4334 // The load-long idiom will use it's address expression again after loading
4335 // the first word of the long.  If the load-long destination overlaps with
4336 // registers used in the addressing expression, the 2nd half will be loaded
4337 // from a clobbered address.  Fix this by requiring that load-long use
4338 // address registers that do not overlap with the load-long target.
4339 
4340 // load-long support
4341 operand load_long_RegP() %{
4342   constraint(ALLOC_IN_RC(esi_reg));
4343   match(RegP);
4344   match(eSIRegP);
4345   op_cost(100);
4346   format %{  %}
4347   interface(REG_INTER);
4348 %}
4349 
4350 // Indirect Memory Operand Long
4351 operand load_long_indirect(load_long_RegP reg) %{
4352   constraint(ALLOC_IN_RC(esi_reg));
4353   match(reg);
4354 
4355   format %{ "[$reg]" %}
4356   interface(MEMORY_INTER) %{
4357     base($reg);
4358     index(0x4);
4359     scale(0x0);
4360     disp(0x0);
4361   %}
4362 %}
4363 
4364 // Indirect Memory Plus Long Offset Operand
4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4366   match(AddP reg off);
4367 
4368   format %{ "[$reg + $off]" %}
4369   interface(MEMORY_INTER) %{
4370     base($reg);
4371     index(0x4);
4372     scale(0x0);
4373     disp($off);
4374   %}
4375 %}
4376 
4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4378 
4379 
4380 //----------Special Memory Operands--------------------------------------------
4381 // Stack Slot Operand - This operand is used for loading and storing temporary
4382 //                      values on the stack where a match requires a value to
4383 //                      flow through memory.
4384 operand stackSlotP(sRegP reg) %{
4385   constraint(ALLOC_IN_RC(stack_slots));
4386   // No match rule because this operand is only generated in matching
4387   format %{ "[$reg]" %}
4388   interface(MEMORY_INTER) %{
4389     base(0x4);   // ESP
4390     index(0x4);  // No Index
4391     scale(0x0);  // No Scale
4392     disp($reg);  // Stack Offset
4393   %}
4394 %}
4395 
4396 operand stackSlotI(sRegI reg) %{
4397   constraint(ALLOC_IN_RC(stack_slots));
4398   // No match rule because this operand is only generated in matching
4399   format %{ "[$reg]" %}
4400   interface(MEMORY_INTER) %{
4401     base(0x4);   // ESP
4402     index(0x4);  // No Index
4403     scale(0x0);  // No Scale
4404     disp($reg);  // Stack Offset
4405   %}
4406 %}
4407 
4408 operand stackSlotF(sRegF reg) %{
4409   constraint(ALLOC_IN_RC(stack_slots));
4410   // No match rule because this operand is only generated in matching
4411   format %{ "[$reg]" %}
4412   interface(MEMORY_INTER) %{
4413     base(0x4);   // ESP
4414     index(0x4);  // No Index
4415     scale(0x0);  // No Scale
4416     disp($reg);  // Stack Offset
4417   %}
4418 %}
4419 
4420 operand stackSlotD(sRegD reg) %{
4421   constraint(ALLOC_IN_RC(stack_slots));
4422   // No match rule because this operand is only generated in matching
4423   format %{ "[$reg]" %}
4424   interface(MEMORY_INTER) %{
4425     base(0x4);   // ESP
4426     index(0x4);  // No Index
4427     scale(0x0);  // No Scale
4428     disp($reg);  // Stack Offset
4429   %}
4430 %}
4431 
4432 operand stackSlotL(sRegL reg) %{
4433   constraint(ALLOC_IN_RC(stack_slots));
4434   // No match rule because this operand is only generated in matching
4435   format %{ "[$reg]" %}
4436   interface(MEMORY_INTER) %{
4437     base(0x4);   // ESP
4438     index(0x4);  // No Index
4439     scale(0x0);  // No Scale
4440     disp($reg);  // Stack Offset
4441   %}
4442 %}
4443 
4444 //----------Memory Operands - Win95 Implicit Null Variants----------------
4445 // Indirect Memory Operand
4446 operand indirect_win95_safe(eRegP_no_EBP reg)
4447 %{
4448   constraint(ALLOC_IN_RC(int_reg));
4449   match(reg);
4450 
4451   op_cost(100);
4452   format %{ "[$reg]" %}
4453   interface(MEMORY_INTER) %{
4454     base($reg);
4455     index(0x4);
4456     scale(0x0);
4457     disp(0x0);
4458   %}
4459 %}
4460 
4461 // Indirect Memory Plus Short Offset Operand
4462 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4463 %{
4464   match(AddP reg off);
4465 
4466   op_cost(100);
4467   format %{ "[$reg + $off]" %}
4468   interface(MEMORY_INTER) %{
4469     base($reg);
4470     index(0x4);
4471     scale(0x0);
4472     disp($off);
4473   %}
4474 %}
4475 
4476 // Indirect Memory Plus Long Offset Operand
4477 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4478 %{
4479   match(AddP reg off);
4480 
4481   op_cost(100);
4482   format %{ "[$reg + $off]" %}
4483   interface(MEMORY_INTER) %{
4484     base($reg);
4485     index(0x4);
4486     scale(0x0);
4487     disp($off);
4488   %}
4489 %}
4490 
4491 // Indirect Memory Plus Index Register Plus Offset Operand
4492 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4493 %{
4494   match(AddP (AddP reg ireg) off);
4495 
4496   op_cost(100);
4497   format %{"[$reg + $off + $ireg]" %}
4498   interface(MEMORY_INTER) %{
4499     base($reg);
4500     index($ireg);
4501     scale(0x0);
4502     disp($off);
4503   %}
4504 %}
4505 
4506 // Indirect Memory Times Scale Plus Index Register
4507 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4508 %{
4509   match(AddP reg (LShiftI ireg scale));
4510 
4511   op_cost(100);
4512   format %{"[$reg + $ireg << $scale]" %}
4513   interface(MEMORY_INTER) %{
4514     base($reg);
4515     index($ireg);
4516     scale($scale);
4517     disp(0x0);
4518   %}
4519 %}
4520 
4521 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4522 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4523 %{
4524   match(AddP (AddP reg (LShiftI ireg scale)) off);
4525 
4526   op_cost(100);
4527   format %{"[$reg + $off + $ireg << $scale]" %}
4528   interface(MEMORY_INTER) %{
4529     base($reg);
4530     index($ireg);
4531     scale($scale);
4532     disp($off);
4533   %}
4534 %}
4535 
4536 //----------Conditional Branch Operands----------------------------------------
4537 // Comparison Op  - This is the operation of the comparison, and is limited to
4538 //                  the following set of codes:
4539 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4540 //
4541 // Other attributes of the comparison, such as unsignedness, are specified
4542 // by the comparison instruction that sets a condition code flags register.
4543 // That result is represented by a flags operand whose subtype is appropriate
4544 // to the unsignedness (etc.) of the comparison.
4545 //
4546 // Later, the instruction which matches both the Comparison Op (a Bool) and
4547 // the flags (produced by the Cmp) specifies the coding of the comparison op
4548 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4549 
4550 // Comparision Code
4551 operand cmpOp() %{
4552   match(Bool);
4553 
4554   format %{ "" %}
4555   interface(COND_INTER) %{
4556     equal(0x4, "e");
4557     not_equal(0x5, "ne");
4558     less(0xC, "l");
4559     greater_equal(0xD, "ge");
4560     less_equal(0xE, "le");
4561     greater(0xF, "g");
4562     overflow(0x0, "o");
4563     no_overflow(0x1, "no");
4564   %}
4565 %}
4566 
4567 // Comparison Code, unsigned compare.  Used by FP also, with
4568 // C2 (unordered) turned into GT or LT already.  The other bits
4569 // C0 and C3 are turned into Carry & Zero flags.
4570 operand cmpOpU() %{
4571   match(Bool);
4572 
4573   format %{ "" %}
4574   interface(COND_INTER) %{
4575     equal(0x4, "e");
4576     not_equal(0x5, "ne");
4577     less(0x2, "b");
4578     greater_equal(0x3, "nb");
4579     less_equal(0x6, "be");
4580     greater(0x7, "nbe");
4581     overflow(0x0, "o");
4582     no_overflow(0x1, "no");
4583   %}
4584 %}
4585 
4586 // Floating comparisons that don't require any fixup for the unordered case
4587 operand cmpOpUCF() %{
4588   match(Bool);
4589   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4590             n->as_Bool()->_test._test == BoolTest::ge ||
4591             n->as_Bool()->_test._test == BoolTest::le ||
4592             n->as_Bool()->_test._test == BoolTest::gt);
4593   format %{ "" %}
4594   interface(COND_INTER) %{
4595     equal(0x4, "e");
4596     not_equal(0x5, "ne");
4597     less(0x2, "b");
4598     greater_equal(0x3, "nb");
4599     less_equal(0x6, "be");
4600     greater(0x7, "nbe");
4601     overflow(0x0, "o");
4602     no_overflow(0x1, "no");
4603   %}
4604 %}
4605 
4606 
4607 // Floating comparisons that can be fixed up with extra conditional jumps
4608 operand cmpOpUCF2() %{
4609   match(Bool);
4610   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4611             n->as_Bool()->_test._test == BoolTest::eq);
4612   format %{ "" %}
4613   interface(COND_INTER) %{
4614     equal(0x4, "e");
4615     not_equal(0x5, "ne");
4616     less(0x2, "b");
4617     greater_equal(0x3, "nb");
4618     less_equal(0x6, "be");
4619     greater(0x7, "nbe");
4620     overflow(0x0, "o");
4621     no_overflow(0x1, "no");
4622   %}
4623 %}
4624 
4625 // Comparison Code for FP conditional move
4626 operand cmpOp_fcmov() %{
4627   match(Bool);
4628 
4629   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4630             n->as_Bool()->_test._test != BoolTest::no_overflow);
4631   format %{ "" %}
4632   interface(COND_INTER) %{
4633     equal        (0x0C8);
4634     not_equal    (0x1C8);
4635     less         (0x0C0);
4636     greater_equal(0x1C0);
4637     less_equal   (0x0D0);
4638     greater      (0x1D0);
4639     overflow(0x0, "o"); // not really supported by the instruction
4640     no_overflow(0x1, "no"); // not really supported by the instruction
4641   %}
4642 %}
4643 
4644 // Comparison Code used in long compares
4645 operand cmpOp_commute() %{
4646   match(Bool);
4647 
4648   format %{ "" %}
4649   interface(COND_INTER) %{
4650     equal(0x4, "e");
4651     not_equal(0x5, "ne");
4652     less(0xF, "g");
4653     greater_equal(0xE, "le");
4654     less_equal(0xD, "ge");
4655     greater(0xC, "l");
4656     overflow(0x0, "o");
4657     no_overflow(0x1, "no");
4658   %}
4659 %}
4660 
4661 // Comparison Code used in unsigned long compares
4662 operand cmpOpU_commute() %{
4663   match(Bool);
4664 
4665   format %{ "" %}
4666   interface(COND_INTER) %{
4667     equal(0x4, "e");
4668     not_equal(0x5, "ne");
4669     less(0x7, "nbe");
4670     greater_equal(0x6, "be");
4671     less_equal(0x3, "nb");
4672     greater(0x2, "b");
4673     overflow(0x0, "o");
4674     no_overflow(0x1, "no");
4675   %}
4676 %}
4677 
4678 //----------OPERAND CLASSES----------------------------------------------------
4679 // Operand Classes are groups of operands that are used as to simplify
4680 // instruction definitions by not requiring the AD writer to specify separate
4681 // instructions for every form of operand when the instruction accepts
4682 // multiple operand types with the same basic encoding and format.  The classic
4683 // case of this is memory operands.
4684 
4685 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4686                indIndex, indIndexScale, indIndexScaleOffset);
4687 
4688 // Long memory operations are encoded in 2 instructions and a +4 offset.
4689 // This means some kind of offset is always required and you cannot use
4690 // an oop as the offset (done when working on static globals).
4691 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4692                     indIndex, indIndexScale, indIndexScaleOffset);
4693 
4694 
4695 //----------PIPELINE-----------------------------------------------------------
4696 // Rules which define the behavior of the target architectures pipeline.
4697 pipeline %{
4698 
4699 //----------ATTRIBUTES---------------------------------------------------------
4700 attributes %{
4701   variable_size_instructions;        // Fixed size instructions
4702   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4703   instruction_unit_size = 1;         // An instruction is 1 bytes long
4704   instruction_fetch_unit_size = 16;  // The processor fetches one line
4705   instruction_fetch_units = 1;       // of 16 bytes
4706 
4707   // List of nop instructions
4708   nops( MachNop );
4709 %}
4710 
4711 //----------RESOURCES----------------------------------------------------------
4712 // Resources are the functional units available to the machine
4713 
4714 // Generic P2/P3 pipeline
4715 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4716 // 3 instructions decoded per cycle.
4717 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4718 // 2 ALU op, only ALU0 handles mul/div instructions.
4719 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4720            MS0, MS1, MEM = MS0 | MS1,
4721            BR, FPU,
4722            ALU0, ALU1, ALU = ALU0 | ALU1 );
4723 
4724 //----------PIPELINE DESCRIPTION-----------------------------------------------
4725 // Pipeline Description specifies the stages in the machine's pipeline
4726 
4727 // Generic P2/P3 pipeline
4728 pipe_desc(S0, S1, S2, S3, S4, S5);
4729 
4730 //----------PIPELINE CLASSES---------------------------------------------------
4731 // Pipeline Classes describe the stages in which input and output are
4732 // referenced by the hardware pipeline.
4733 
4734 // Naming convention: ialu or fpu
4735 // Then: _reg
4736 // Then: _reg if there is a 2nd register
4737 // Then: _long if it's a pair of instructions implementing a long
4738 // Then: _fat if it requires the big decoder
4739 //   Or: _mem if it requires the big decoder and a memory unit.
4740 
4741 // Integer ALU reg operation
4742 pipe_class ialu_reg(rRegI dst) %{
4743     single_instruction;
4744     dst    : S4(write);
4745     dst    : S3(read);
4746     DECODE : S0;        // any decoder
4747     ALU    : S3;        // any alu
4748 %}
4749 
4750 // Long ALU reg operation
4751 pipe_class ialu_reg_long(eRegL dst) %{
4752     instruction_count(2);
4753     dst    : S4(write);
4754     dst    : S3(read);
4755     DECODE : S0(2);     // any 2 decoders
4756     ALU    : S3(2);     // both alus
4757 %}
4758 
4759 // Integer ALU reg operation using big decoder
4760 pipe_class ialu_reg_fat(rRegI dst) %{
4761     single_instruction;
4762     dst    : S4(write);
4763     dst    : S3(read);
4764     D0     : S0;        // big decoder only
4765     ALU    : S3;        // any alu
4766 %}
4767 
4768 // Long ALU reg operation using big decoder
4769 pipe_class ialu_reg_long_fat(eRegL dst) %{
4770     instruction_count(2);
4771     dst    : S4(write);
4772     dst    : S3(read);
4773     D0     : S0(2);     // big decoder only; twice
4774     ALU    : S3(2);     // any 2 alus
4775 %}
4776 
4777 // Integer ALU reg-reg operation
4778 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4779     single_instruction;
4780     dst    : S4(write);
4781     src    : S3(read);
4782     DECODE : S0;        // any decoder
4783     ALU    : S3;        // any alu
4784 %}
4785 
4786 // Long ALU reg-reg operation
4787 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4788     instruction_count(2);
4789     dst    : S4(write);
4790     src    : S3(read);
4791     DECODE : S0(2);     // any 2 decoders
4792     ALU    : S3(2);     // both alus
4793 %}
4794 
4795 // Integer ALU reg-reg operation
4796 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4797     single_instruction;
4798     dst    : S4(write);
4799     src    : S3(read);
4800     D0     : S0;        // big decoder only
4801     ALU    : S3;        // any alu
4802 %}
4803 
4804 // Long ALU reg-reg operation
4805 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4806     instruction_count(2);
4807     dst    : S4(write);
4808     src    : S3(read);
4809     D0     : S0(2);     // big decoder only; twice
4810     ALU    : S3(2);     // both alus
4811 %}
4812 
4813 // Integer ALU reg-mem operation
4814 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4815     single_instruction;
4816     dst    : S5(write);
4817     mem    : S3(read);
4818     D0     : S0;        // big decoder only
4819     ALU    : S4;        // any alu
4820     MEM    : S3;        // any mem
4821 %}
4822 
4823 // Long ALU reg-mem operation
4824 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4825     instruction_count(2);
4826     dst    : S5(write);
4827     mem    : S3(read);
4828     D0     : S0(2);     // big decoder only; twice
4829     ALU    : S4(2);     // any 2 alus
4830     MEM    : S3(2);     // both mems
4831 %}
4832 
4833 // Integer mem operation (prefetch)
4834 pipe_class ialu_mem(memory mem)
4835 %{
4836     single_instruction;
4837     mem    : S3(read);
4838     D0     : S0;        // big decoder only
4839     MEM    : S3;        // any mem
4840 %}
4841 
4842 // Integer Store to Memory
4843 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4844     single_instruction;
4845     mem    : S3(read);
4846     src    : S5(read);
4847     D0     : S0;        // big decoder only
4848     ALU    : S4;        // any alu
4849     MEM    : S3;
4850 %}
4851 
4852 // Long Store to Memory
4853 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4854     instruction_count(2);
4855     mem    : S3(read);
4856     src    : S5(read);
4857     D0     : S0(2);     // big decoder only; twice
4858     ALU    : S4(2);     // any 2 alus
4859     MEM    : S3(2);     // Both mems
4860 %}
4861 
4862 // Integer Store to Memory
4863 pipe_class ialu_mem_imm(memory mem) %{
4864     single_instruction;
4865     mem    : S3(read);
4866     D0     : S0;        // big decoder only
4867     ALU    : S4;        // any alu
4868     MEM    : S3;
4869 %}
4870 
4871 // Integer ALU0 reg-reg operation
4872 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4873     single_instruction;
4874     dst    : S4(write);
4875     src    : S3(read);
4876     D0     : S0;        // Big decoder only
4877     ALU0   : S3;        // only alu0
4878 %}
4879 
4880 // Integer ALU0 reg-mem operation
4881 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4882     single_instruction;
4883     dst    : S5(write);
4884     mem    : S3(read);
4885     D0     : S0;        // big decoder only
4886     ALU0   : S4;        // ALU0 only
4887     MEM    : S3;        // any mem
4888 %}
4889 
4890 // Integer ALU reg-reg operation
4891 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4892     single_instruction;
4893     cr     : S4(write);
4894     src1   : S3(read);
4895     src2   : S3(read);
4896     DECODE : S0;        // any decoder
4897     ALU    : S3;        // any alu
4898 %}
4899 
4900 // Integer ALU reg-imm operation
4901 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4902     single_instruction;
4903     cr     : S4(write);
4904     src1   : S3(read);
4905     DECODE : S0;        // any decoder
4906     ALU    : S3;        // any alu
4907 %}
4908 
4909 // Integer ALU reg-mem operation
4910 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4911     single_instruction;
4912     cr     : S4(write);
4913     src1   : S3(read);
4914     src2   : S3(read);
4915     D0     : S0;        // big decoder only
4916     ALU    : S4;        // any alu
4917     MEM    : S3;
4918 %}
4919 
4920 // Conditional move reg-reg
4921 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4922     instruction_count(4);
4923     y      : S4(read);
4924     q      : S3(read);
4925     p      : S3(read);
4926     DECODE : S0(4);     // any decoder
4927 %}
4928 
4929 // Conditional move reg-reg
4930 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4931     single_instruction;
4932     dst    : S4(write);
4933     src    : S3(read);
4934     cr     : S3(read);
4935     DECODE : S0;        // any decoder
4936 %}
4937 
4938 // Conditional move reg-mem
4939 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4940     single_instruction;
4941     dst    : S4(write);
4942     src    : S3(read);
4943     cr     : S3(read);
4944     DECODE : S0;        // any decoder
4945     MEM    : S3;
4946 %}
4947 
4948 // Conditional move reg-reg long
4949 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4950     single_instruction;
4951     dst    : S4(write);
4952     src    : S3(read);
4953     cr     : S3(read);
4954     DECODE : S0(2);     // any 2 decoders
4955 %}
4956 
4957 // Conditional move double reg-reg
4958 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4959     single_instruction;
4960     dst    : S4(write);
4961     src    : S3(read);
4962     cr     : S3(read);
4963     DECODE : S0;        // any decoder
4964 %}
4965 
4966 // Float reg-reg operation
4967 pipe_class fpu_reg(regDPR dst) %{
4968     instruction_count(2);
4969     dst    : S3(read);
4970     DECODE : S0(2);     // any 2 decoders
4971     FPU    : S3;
4972 %}
4973 
4974 // Float reg-reg operation
4975 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4976     instruction_count(2);
4977     dst    : S4(write);
4978     src    : S3(read);
4979     DECODE : S0(2);     // any 2 decoders
4980     FPU    : S3;
4981 %}
4982 
4983 // Float reg-reg operation
4984 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4985     instruction_count(3);
4986     dst    : S4(write);
4987     src1   : S3(read);
4988     src2   : S3(read);
4989     DECODE : S0(3);     // any 3 decoders
4990     FPU    : S3(2);
4991 %}
4992 
4993 // Float reg-reg operation
4994 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4995     instruction_count(4);
4996     dst    : S4(write);
4997     src1   : S3(read);
4998     src2   : S3(read);
4999     src3   : S3(read);
5000     DECODE : S0(4);     // any 3 decoders
5001     FPU    : S3(2);
5002 %}
5003 
5004 // Float reg-reg operation
5005 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5006     instruction_count(4);
5007     dst    : S4(write);
5008     src1   : S3(read);
5009     src2   : S3(read);
5010     src3   : S3(read);
5011     DECODE : S1(3);     // any 3 decoders
5012     D0     : S0;        // Big decoder only
5013     FPU    : S3(2);
5014     MEM    : S3;
5015 %}
5016 
5017 // Float reg-mem operation
5018 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5019     instruction_count(2);
5020     dst    : S5(write);
5021     mem    : S3(read);
5022     D0     : S0;        // big decoder only
5023     DECODE : S1;        // any decoder for FPU POP
5024     FPU    : S4;
5025     MEM    : S3;        // any mem
5026 %}
5027 
5028 // Float reg-mem operation
5029 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5030     instruction_count(3);
5031     dst    : S5(write);
5032     src1   : S3(read);
5033     mem    : S3(read);
5034     D0     : S0;        // big decoder only
5035     DECODE : S1(2);     // any decoder for FPU POP
5036     FPU    : S4;
5037     MEM    : S3;        // any mem
5038 %}
5039 
5040 // Float mem-reg operation
5041 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5042     instruction_count(2);
5043     src    : S5(read);
5044     mem    : S3(read);
5045     DECODE : S0;        // any decoder for FPU PUSH
5046     D0     : S1;        // big decoder only
5047     FPU    : S4;
5048     MEM    : S3;        // any mem
5049 %}
5050 
5051 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5052     instruction_count(3);
5053     src1   : S3(read);
5054     src2   : S3(read);
5055     mem    : S3(read);
5056     DECODE : S0(2);     // any decoder for FPU PUSH
5057     D0     : S1;        // big decoder only
5058     FPU    : S4;
5059     MEM    : S3;        // any mem
5060 %}
5061 
5062 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5063     instruction_count(3);
5064     src1   : S3(read);
5065     src2   : S3(read);
5066     mem    : S4(read);
5067     DECODE : S0;        // any decoder for FPU PUSH
5068     D0     : S0(2);     // big decoder only
5069     FPU    : S4;
5070     MEM    : S3(2);     // any mem
5071 %}
5072 
5073 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5074     instruction_count(2);
5075     src1   : S3(read);
5076     dst    : S4(read);
5077     D0     : S0(2);     // big decoder only
5078     MEM    : S3(2);     // any mem
5079 %}
5080 
5081 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5082     instruction_count(3);
5083     src1   : S3(read);
5084     src2   : S3(read);
5085     dst    : S4(read);
5086     D0     : S0(3);     // big decoder only
5087     FPU    : S4;
5088     MEM    : S3(3);     // any mem
5089 %}
5090 
5091 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5092     instruction_count(3);
5093     src1   : S4(read);
5094     mem    : S4(read);
5095     DECODE : S0;        // any decoder for FPU PUSH
5096     D0     : S0(2);     // big decoder only
5097     FPU    : S4;
5098     MEM    : S3(2);     // any mem
5099 %}
5100 
5101 // Float load constant
5102 pipe_class fpu_reg_con(regDPR dst) %{
5103     instruction_count(2);
5104     dst    : S5(write);
5105     D0     : S0;        // big decoder only for the load
5106     DECODE : S1;        // any decoder for FPU POP
5107     FPU    : S4;
5108     MEM    : S3;        // any mem
5109 %}
5110 
5111 // Float load constant
5112 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5113     instruction_count(3);
5114     dst    : S5(write);
5115     src    : S3(read);
5116     D0     : S0;        // big decoder only for the load
5117     DECODE : S1(2);     // any decoder for FPU POP
5118     FPU    : S4;
5119     MEM    : S3;        // any mem
5120 %}
5121 
5122 // UnConditional branch
5123 pipe_class pipe_jmp( label labl ) %{
5124     single_instruction;
5125     BR   : S3;
5126 %}
5127 
5128 // Conditional branch
5129 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5130     single_instruction;
5131     cr    : S1(read);
5132     BR    : S3;
5133 %}
5134 
5135 // Allocation idiom
5136 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5137     instruction_count(1); force_serialization;
5138     fixed_latency(6);
5139     heap_ptr : S3(read);
5140     DECODE   : S0(3);
5141     D0       : S2;
5142     MEM      : S3;
5143     ALU      : S3(2);
5144     dst      : S5(write);
5145     BR       : S5;
5146 %}
5147 
5148 // Generic big/slow expanded idiom
5149 pipe_class pipe_slow(  ) %{
5150     instruction_count(10); multiple_bundles; force_serialization;
5151     fixed_latency(100);
5152     D0  : S0(2);
5153     MEM : S3(2);
5154 %}
5155 
5156 // The real do-nothing guy
5157 pipe_class empty( ) %{
5158     instruction_count(0);
5159 %}
5160 
5161 // Define the class for the Nop node
5162 define %{
5163    MachNop = empty;
5164 %}
5165 
5166 %}
5167 
5168 //----------INSTRUCTIONS-------------------------------------------------------
5169 //
5170 // match      -- States which machine-independent subtree may be replaced
5171 //               by this instruction.
5172 // ins_cost   -- The estimated cost of this instruction is used by instruction
5173 //               selection to identify a minimum cost tree of machine
5174 //               instructions that matches a tree of machine-independent
5175 //               instructions.
5176 // format     -- A string providing the disassembly for this instruction.
5177 //               The value of an instruction's operand may be inserted
5178 //               by referring to it with a '$' prefix.
5179 // opcode     -- Three instruction opcodes may be provided.  These are referred
5180 //               to within an encode class as $primary, $secondary, and $tertiary
5181 //               respectively.  The primary opcode is commonly used to
5182 //               indicate the type of machine instruction, while secondary
5183 //               and tertiary are often used for prefix options or addressing
5184 //               modes.
5185 // ins_encode -- A list of encode classes with parameters. The encode class
5186 //               name must have been defined in an 'enc_class' specification
5187 //               in the encode section of the architecture description.
5188 
5189 //----------BSWAP-Instruction--------------------------------------------------
5190 instruct bytes_reverse_int(rRegI dst) %{
5191   match(Set dst (ReverseBytesI dst));
5192 
5193   format %{ "BSWAP  $dst" %}
5194   opcode(0x0F, 0xC8);
5195   ins_encode( OpcP, OpcSReg(dst) );
5196   ins_pipe( ialu_reg );
5197 %}
5198 
5199 instruct bytes_reverse_long(eRegL dst) %{
5200   match(Set dst (ReverseBytesL dst));
5201 
5202   format %{ "BSWAP  $dst.lo\n\t"
5203             "BSWAP  $dst.hi\n\t"
5204             "XCHG   $dst.lo $dst.hi" %}
5205 
5206   ins_cost(125);
5207   ins_encode( bswap_long_bytes(dst) );
5208   ins_pipe( ialu_reg_reg);
5209 %}
5210 
5211 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5212   match(Set dst (ReverseBytesUS dst));
5213   effect(KILL cr);
5214 
5215   format %{ "BSWAP  $dst\n\t"
5216             "SHR    $dst,16\n\t" %}
5217   ins_encode %{
5218     __ bswapl($dst$$Register);
5219     __ shrl($dst$$Register, 16);
5220   %}
5221   ins_pipe( ialu_reg );
5222 %}
5223 
5224 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5225   match(Set dst (ReverseBytesS dst));
5226   effect(KILL cr);
5227 
5228   format %{ "BSWAP  $dst\n\t"
5229             "SAR    $dst,16\n\t" %}
5230   ins_encode %{
5231     __ bswapl($dst$$Register);
5232     __ sarl($dst$$Register, 16);
5233   %}
5234   ins_pipe( ialu_reg );
5235 %}
5236 
5237 
5238 //---------- Zeros Count Instructions ------------------------------------------
5239 
5240 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5241   predicate(UseCountLeadingZerosInstruction);
5242   match(Set dst (CountLeadingZerosI src));
5243   effect(KILL cr);
5244 
5245   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5246   ins_encode %{
5247     __ lzcntl($dst$$Register, $src$$Register);
5248   %}
5249   ins_pipe(ialu_reg);
5250 %}
5251 
5252 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5253   predicate(!UseCountLeadingZerosInstruction);
5254   match(Set dst (CountLeadingZerosI src));
5255   effect(KILL cr);
5256 
5257   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5258             "JNZ    skip\n\t"
5259             "MOV    $dst, -1\n"
5260       "skip:\n\t"
5261             "NEG    $dst\n\t"
5262             "ADD    $dst, 31" %}
5263   ins_encode %{
5264     Register Rdst = $dst$$Register;
5265     Register Rsrc = $src$$Register;
5266     Label skip;
5267     __ bsrl(Rdst, Rsrc);
5268     __ jccb(Assembler::notZero, skip);
5269     __ movl(Rdst, -1);
5270     __ bind(skip);
5271     __ negl(Rdst);
5272     __ addl(Rdst, BitsPerInt - 1);
5273   %}
5274   ins_pipe(ialu_reg);
5275 %}
5276 
5277 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5278   predicate(UseCountLeadingZerosInstruction);
5279   match(Set dst (CountLeadingZerosL src));
5280   effect(TEMP dst, KILL cr);
5281 
5282   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5283             "JNC    done\n\t"
5284             "LZCNT  $dst, $src.lo\n\t"
5285             "ADD    $dst, 32\n"
5286       "done:" %}
5287   ins_encode %{
5288     Register Rdst = $dst$$Register;
5289     Register Rsrc = $src$$Register;
5290     Label done;
5291     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5292     __ jccb(Assembler::carryClear, done);
5293     __ lzcntl(Rdst, Rsrc);
5294     __ addl(Rdst, BitsPerInt);
5295     __ bind(done);
5296   %}
5297   ins_pipe(ialu_reg);
5298 %}
5299 
5300 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5301   predicate(!UseCountLeadingZerosInstruction);
5302   match(Set dst (CountLeadingZerosL src));
5303   effect(TEMP dst, KILL cr);
5304 
5305   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5306             "JZ     msw_is_zero\n\t"
5307             "ADD    $dst, 32\n\t"
5308             "JMP    not_zero\n"
5309       "msw_is_zero:\n\t"
5310             "BSR    $dst, $src.lo\n\t"
5311             "JNZ    not_zero\n\t"
5312             "MOV    $dst, -1\n"
5313       "not_zero:\n\t"
5314             "NEG    $dst\n\t"
5315             "ADD    $dst, 63\n" %}
5316  ins_encode %{
5317     Register Rdst = $dst$$Register;
5318     Register Rsrc = $src$$Register;
5319     Label msw_is_zero;
5320     Label not_zero;
5321     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5322     __ jccb(Assembler::zero, msw_is_zero);
5323     __ addl(Rdst, BitsPerInt);
5324     __ jmpb(not_zero);
5325     __ bind(msw_is_zero);
5326     __ bsrl(Rdst, Rsrc);
5327     __ jccb(Assembler::notZero, not_zero);
5328     __ movl(Rdst, -1);
5329     __ bind(not_zero);
5330     __ negl(Rdst);
5331     __ addl(Rdst, BitsPerLong - 1);
5332   %}
5333   ins_pipe(ialu_reg);
5334 %}
5335 
5336 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5337   predicate(UseCountTrailingZerosInstruction);
5338   match(Set dst (CountTrailingZerosI src));
5339   effect(KILL cr);
5340 
5341   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5342   ins_encode %{
5343     __ tzcntl($dst$$Register, $src$$Register);
5344   %}
5345   ins_pipe(ialu_reg);
5346 %}
5347 
5348 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5349   predicate(!UseCountTrailingZerosInstruction);
5350   match(Set dst (CountTrailingZerosI src));
5351   effect(KILL cr);
5352 
5353   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5354             "JNZ    done\n\t"
5355             "MOV    $dst, 32\n"
5356       "done:" %}
5357   ins_encode %{
5358     Register Rdst = $dst$$Register;
5359     Label done;
5360     __ bsfl(Rdst, $src$$Register);
5361     __ jccb(Assembler::notZero, done);
5362     __ movl(Rdst, BitsPerInt);
5363     __ bind(done);
5364   %}
5365   ins_pipe(ialu_reg);
5366 %}
5367 
5368 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5369   predicate(UseCountTrailingZerosInstruction);
5370   match(Set dst (CountTrailingZerosL src));
5371   effect(TEMP dst, KILL cr);
5372 
5373   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5374             "JNC    done\n\t"
5375             "TZCNT  $dst, $src.hi\n\t"
5376             "ADD    $dst, 32\n"
5377             "done:" %}
5378   ins_encode %{
5379     Register Rdst = $dst$$Register;
5380     Register Rsrc = $src$$Register;
5381     Label done;
5382     __ tzcntl(Rdst, Rsrc);
5383     __ jccb(Assembler::carryClear, done);
5384     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5385     __ addl(Rdst, BitsPerInt);
5386     __ bind(done);
5387   %}
5388   ins_pipe(ialu_reg);
5389 %}
5390 
5391 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5392   predicate(!UseCountTrailingZerosInstruction);
5393   match(Set dst (CountTrailingZerosL src));
5394   effect(TEMP dst, KILL cr);
5395 
5396   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5397             "JNZ    done\n\t"
5398             "BSF    $dst, $src.hi\n\t"
5399             "JNZ    msw_not_zero\n\t"
5400             "MOV    $dst, 32\n"
5401       "msw_not_zero:\n\t"
5402             "ADD    $dst, 32\n"
5403       "done:" %}
5404   ins_encode %{
5405     Register Rdst = $dst$$Register;
5406     Register Rsrc = $src$$Register;
5407     Label msw_not_zero;
5408     Label done;
5409     __ bsfl(Rdst, Rsrc);
5410     __ jccb(Assembler::notZero, done);
5411     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5412     __ jccb(Assembler::notZero, msw_not_zero);
5413     __ movl(Rdst, BitsPerInt);
5414     __ bind(msw_not_zero);
5415     __ addl(Rdst, BitsPerInt);
5416     __ bind(done);
5417   %}
5418   ins_pipe(ialu_reg);
5419 %}
5420 
5421 
5422 //---------- Population Count Instructions -------------------------------------
5423 
5424 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5425   predicate(UsePopCountInstruction);
5426   match(Set dst (PopCountI src));
5427   effect(KILL cr);
5428 
5429   format %{ "POPCNT $dst, $src" %}
5430   ins_encode %{
5431     __ popcntl($dst$$Register, $src$$Register);
5432   %}
5433   ins_pipe(ialu_reg);
5434 %}
5435 
5436 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5437   predicate(UsePopCountInstruction);
5438   match(Set dst (PopCountI (LoadI mem)));
5439   effect(KILL cr);
5440 
5441   format %{ "POPCNT $dst, $mem" %}
5442   ins_encode %{
5443     __ popcntl($dst$$Register, $mem$$Address);
5444   %}
5445   ins_pipe(ialu_reg);
5446 %}
5447 
5448 // Note: Long.bitCount(long) returns an int.
5449 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5450   predicate(UsePopCountInstruction);
5451   match(Set dst (PopCountL src));
5452   effect(KILL cr, TEMP tmp, TEMP dst);
5453 
5454   format %{ "POPCNT $dst, $src.lo\n\t"
5455             "POPCNT $tmp, $src.hi\n\t"
5456             "ADD    $dst, $tmp" %}
5457   ins_encode %{
5458     __ popcntl($dst$$Register, $src$$Register);
5459     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5460     __ addl($dst$$Register, $tmp$$Register);
5461   %}
5462   ins_pipe(ialu_reg);
5463 %}
5464 
5465 // Note: Long.bitCount(long) returns an int.
5466 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5467   predicate(UsePopCountInstruction);
5468   match(Set dst (PopCountL (LoadL mem)));
5469   effect(KILL cr, TEMP tmp, TEMP dst);
5470 
5471   format %{ "POPCNT $dst, $mem\n\t"
5472             "POPCNT $tmp, $mem+4\n\t"
5473             "ADD    $dst, $tmp" %}
5474   ins_encode %{
5475     //__ popcntl($dst$$Register, $mem$$Address$$first);
5476     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5477     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5478     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5479     __ addl($dst$$Register, $tmp$$Register);
5480   %}
5481   ins_pipe(ialu_reg);
5482 %}
5483 
5484 
5485 //----------Load/Store/Move Instructions---------------------------------------
5486 //----------Load Instructions--------------------------------------------------
5487 // Load Byte (8bit signed)
5488 instruct loadB(xRegI dst, memory mem) %{
5489   match(Set dst (LoadB mem));
5490 
5491   ins_cost(125);
5492   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5493 
5494   ins_encode %{
5495     __ movsbl($dst$$Register, $mem$$Address);
5496   %}
5497 
5498   ins_pipe(ialu_reg_mem);
5499 %}
5500 
5501 // Load Byte (8bit signed) into Long Register
5502 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5503   match(Set dst (ConvI2L (LoadB mem)));
5504   effect(KILL cr);
5505 
5506   ins_cost(375);
5507   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5508             "MOV    $dst.hi,$dst.lo\n\t"
5509             "SAR    $dst.hi,7" %}
5510 
5511   ins_encode %{
5512     __ movsbl($dst$$Register, $mem$$Address);
5513     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5514     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5515   %}
5516 
5517   ins_pipe(ialu_reg_mem);
5518 %}
5519 
5520 // Load Unsigned Byte (8bit UNsigned)
5521 instruct loadUB(xRegI dst, memory mem) %{
5522   match(Set dst (LoadUB mem));
5523 
5524   ins_cost(125);
5525   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5526 
5527   ins_encode %{
5528     __ movzbl($dst$$Register, $mem$$Address);
5529   %}
5530 
5531   ins_pipe(ialu_reg_mem);
5532 %}
5533 
5534 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5535 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5536   match(Set dst (ConvI2L (LoadUB mem)));
5537   effect(KILL cr);
5538 
5539   ins_cost(250);
5540   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5541             "XOR    $dst.hi,$dst.hi" %}
5542 
5543   ins_encode %{
5544     Register Rdst = $dst$$Register;
5545     __ movzbl(Rdst, $mem$$Address);
5546     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5547   %}
5548 
5549   ins_pipe(ialu_reg_mem);
5550 %}
5551 
5552 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5553 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5554   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5555   effect(KILL cr);
5556 
5557   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5558             "XOR    $dst.hi,$dst.hi\n\t"
5559             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5560   ins_encode %{
5561     Register Rdst = $dst$$Register;
5562     __ movzbl(Rdst, $mem$$Address);
5563     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5564     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5565   %}
5566   ins_pipe(ialu_reg_mem);
5567 %}
5568 
5569 // Load Short (16bit signed)
5570 instruct loadS(rRegI dst, memory mem) %{
5571   match(Set dst (LoadS mem));
5572 
5573   ins_cost(125);
5574   format %{ "MOVSX  $dst,$mem\t# short" %}
5575 
5576   ins_encode %{
5577     __ movswl($dst$$Register, $mem$$Address);
5578   %}
5579 
5580   ins_pipe(ialu_reg_mem);
5581 %}
5582 
5583 // Load Short (16 bit signed) to Byte (8 bit signed)
5584 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5585   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5586 
5587   ins_cost(125);
5588   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5589   ins_encode %{
5590     __ movsbl($dst$$Register, $mem$$Address);
5591   %}
5592   ins_pipe(ialu_reg_mem);
5593 %}
5594 
5595 // Load Short (16bit signed) into Long Register
5596 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5597   match(Set dst (ConvI2L (LoadS mem)));
5598   effect(KILL cr);
5599 
5600   ins_cost(375);
5601   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5602             "MOV    $dst.hi,$dst.lo\n\t"
5603             "SAR    $dst.hi,15" %}
5604 
5605   ins_encode %{
5606     __ movswl($dst$$Register, $mem$$Address);
5607     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5608     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5609   %}
5610 
5611   ins_pipe(ialu_reg_mem);
5612 %}
5613 
5614 // Load Unsigned Short/Char (16bit unsigned)
5615 instruct loadUS(rRegI dst, memory mem) %{
5616   match(Set dst (LoadUS mem));
5617 
5618   ins_cost(125);
5619   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5620 
5621   ins_encode %{
5622     __ movzwl($dst$$Register, $mem$$Address);
5623   %}
5624 
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5629 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5630   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5631 
5632   ins_cost(125);
5633   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5634   ins_encode %{
5635     __ movsbl($dst$$Register, $mem$$Address);
5636   %}
5637   ins_pipe(ialu_reg_mem);
5638 %}
5639 
5640 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5641 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5642   match(Set dst (ConvI2L (LoadUS mem)));
5643   effect(KILL cr);
5644 
5645   ins_cost(250);
5646   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5647             "XOR    $dst.hi,$dst.hi" %}
5648 
5649   ins_encode %{
5650     __ movzwl($dst$$Register, $mem$$Address);
5651     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5652   %}
5653 
5654   ins_pipe(ialu_reg_mem);
5655 %}
5656 
5657 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5658 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5659   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5660   effect(KILL cr);
5661 
5662   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5663             "XOR    $dst.hi,$dst.hi" %}
5664   ins_encode %{
5665     Register Rdst = $dst$$Register;
5666     __ movzbl(Rdst, $mem$$Address);
5667     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5668   %}
5669   ins_pipe(ialu_reg_mem);
5670 %}
5671 
5672 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5673 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5674   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5675   effect(KILL cr);
5676 
5677   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5678             "XOR    $dst.hi,$dst.hi\n\t"
5679             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5680   ins_encode %{
5681     Register Rdst = $dst$$Register;
5682     __ movzwl(Rdst, $mem$$Address);
5683     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5684     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5685   %}
5686   ins_pipe(ialu_reg_mem);
5687 %}
5688 
5689 // Load Integer
5690 instruct loadI(rRegI dst, memory mem) %{
5691   match(Set dst (LoadI mem));
5692 
5693   ins_cost(125);
5694   format %{ "MOV    $dst,$mem\t# int" %}
5695 
5696   ins_encode %{
5697     __ movl($dst$$Register, $mem$$Address);
5698   %}
5699 
5700   ins_pipe(ialu_reg_mem);
5701 %}
5702 
5703 // Load Integer (32 bit signed) to Byte (8 bit signed)
5704 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5705   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5706 
5707   ins_cost(125);
5708   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5709   ins_encode %{
5710     __ movsbl($dst$$Register, $mem$$Address);
5711   %}
5712   ins_pipe(ialu_reg_mem);
5713 %}
5714 
5715 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5716 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5717   match(Set dst (AndI (LoadI mem) mask));
5718 
5719   ins_cost(125);
5720   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5721   ins_encode %{
5722     __ movzbl($dst$$Register, $mem$$Address);
5723   %}
5724   ins_pipe(ialu_reg_mem);
5725 %}
5726 
5727 // Load Integer (32 bit signed) to Short (16 bit signed)
5728 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5729   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5730 
5731   ins_cost(125);
5732   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5733   ins_encode %{
5734     __ movswl($dst$$Register, $mem$$Address);
5735   %}
5736   ins_pipe(ialu_reg_mem);
5737 %}
5738 
5739 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5740 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5741   match(Set dst (AndI (LoadI mem) mask));
5742 
5743   ins_cost(125);
5744   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5745   ins_encode %{
5746     __ movzwl($dst$$Register, $mem$$Address);
5747   %}
5748   ins_pipe(ialu_reg_mem);
5749 %}
5750 
5751 // Load Integer into Long Register
5752 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5753   match(Set dst (ConvI2L (LoadI mem)));
5754   effect(KILL cr);
5755 
5756   ins_cost(375);
5757   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5758             "MOV    $dst.hi,$dst.lo\n\t"
5759             "SAR    $dst.hi,31" %}
5760 
5761   ins_encode %{
5762     __ movl($dst$$Register, $mem$$Address);
5763     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5764     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5765   %}
5766 
5767   ins_pipe(ialu_reg_mem);
5768 %}
5769 
5770 // Load Integer with mask 0xFF into Long Register
5771 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5772   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5773   effect(KILL cr);
5774 
5775   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5776             "XOR    $dst.hi,$dst.hi" %}
5777   ins_encode %{
5778     Register Rdst = $dst$$Register;
5779     __ movzbl(Rdst, $mem$$Address);
5780     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5781   %}
5782   ins_pipe(ialu_reg_mem);
5783 %}
5784 
5785 // Load Integer with mask 0xFFFF into Long Register
5786 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5787   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5788   effect(KILL cr);
5789 
5790   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5791             "XOR    $dst.hi,$dst.hi" %}
5792   ins_encode %{
5793     Register Rdst = $dst$$Register;
5794     __ movzwl(Rdst, $mem$$Address);
5795     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5796   %}
5797   ins_pipe(ialu_reg_mem);
5798 %}
5799 
5800 // Load Integer with 31-bit mask into Long Register
5801 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5802   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5803   effect(KILL cr);
5804 
5805   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5806             "XOR    $dst.hi,$dst.hi\n\t"
5807             "AND    $dst.lo,$mask" %}
5808   ins_encode %{
5809     Register Rdst = $dst$$Register;
5810     __ movl(Rdst, $mem$$Address);
5811     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5812     __ andl(Rdst, $mask$$constant);
5813   %}
5814   ins_pipe(ialu_reg_mem);
5815 %}
5816 
5817 // Load Unsigned Integer into Long Register
5818 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5819   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5820   effect(KILL cr);
5821 
5822   ins_cost(250);
5823   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5824             "XOR    $dst.hi,$dst.hi" %}
5825 
5826   ins_encode %{
5827     __ movl($dst$$Register, $mem$$Address);
5828     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5829   %}
5830 
5831   ins_pipe(ialu_reg_mem);
5832 %}
5833 
5834 // Load Long.  Cannot clobber address while loading, so restrict address
5835 // register to ESI
5836 instruct loadL(eRegL dst, load_long_memory mem) %{
5837   predicate(!((LoadLNode*)n)->require_atomic_access());
5838   match(Set dst (LoadL mem));
5839 
5840   ins_cost(250);
5841   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5842             "MOV    $dst.hi,$mem+4" %}
5843 
5844   ins_encode %{
5845     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5846     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5847     __ movl($dst$$Register, Amemlo);
5848     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5849   %}
5850 
5851   ins_pipe(ialu_reg_long_mem);
5852 %}
5853 
5854 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5855 // then store it down to the stack and reload on the int
5856 // side.
5857 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5858   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5859   match(Set dst (LoadL mem));
5860 
5861   ins_cost(200);
5862   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5863             "FISTp  $dst" %}
5864   ins_encode(enc_loadL_volatile(mem,dst));
5865   ins_pipe( fpu_reg_mem );
5866 %}
5867 
5868 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5869   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5870   match(Set dst (LoadL mem));
5871   effect(TEMP tmp);
5872   ins_cost(180);
5873   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5874             "MOVSD  $dst,$tmp" %}
5875   ins_encode %{
5876     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5877     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5878   %}
5879   ins_pipe( pipe_slow );
5880 %}
5881 
5882 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5883   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5884   match(Set dst (LoadL mem));
5885   effect(TEMP tmp);
5886   ins_cost(160);
5887   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5888             "MOVD   $dst.lo,$tmp\n\t"
5889             "PSRLQ  $tmp,32\n\t"
5890             "MOVD   $dst.hi,$tmp" %}
5891   ins_encode %{
5892     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5893     __ movdl($dst$$Register, $tmp$$XMMRegister);
5894     __ psrlq($tmp$$XMMRegister, 32);
5895     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5896   %}
5897   ins_pipe( pipe_slow );
5898 %}
5899 
5900 // Load Range
5901 instruct loadRange(rRegI dst, memory mem) %{
5902   match(Set dst (LoadRange mem));
5903 
5904   ins_cost(125);
5905   format %{ "MOV    $dst,$mem" %}
5906   opcode(0x8B);
5907   ins_encode( OpcP, RegMem(dst,mem));
5908   ins_pipe( ialu_reg_mem );
5909 %}
5910 
5911 
5912 // Load Pointer
5913 instruct loadP(eRegP dst, memory mem) %{
5914   match(Set dst (LoadP mem));
5915 
5916   ins_cost(125);
5917   format %{ "MOV    $dst,$mem" %}
5918   opcode(0x8B);
5919   ins_encode( OpcP, RegMem(dst,mem));
5920   ins_pipe( ialu_reg_mem );
5921 %}
5922 
5923 // Load Klass Pointer
5924 instruct loadKlass(eRegP dst, memory mem) %{
5925   match(Set dst (LoadKlass mem));
5926 
5927   ins_cost(125);
5928   format %{ "MOV    $dst,$mem" %}
5929   opcode(0x8B);
5930   ins_encode( OpcP, RegMem(dst,mem));
5931   ins_pipe( ialu_reg_mem );
5932 %}
5933 
5934 // Load Double
5935 instruct loadDPR(regDPR dst, memory mem) %{
5936   predicate(UseSSE<=1);
5937   match(Set dst (LoadD mem));
5938 
5939   ins_cost(150);
5940   format %{ "FLD_D  ST,$mem\n\t"
5941             "FSTP   $dst" %}
5942   opcode(0xDD);               /* DD /0 */
5943   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5944               Pop_Reg_DPR(dst) );
5945   ins_pipe( fpu_reg_mem );
5946 %}
5947 
5948 // Load Double to XMM
5949 instruct loadD(regD dst, memory mem) %{
5950   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5951   match(Set dst (LoadD mem));
5952   ins_cost(145);
5953   format %{ "MOVSD  $dst,$mem" %}
5954   ins_encode %{
5955     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5956   %}
5957   ins_pipe( pipe_slow );
5958 %}
5959 
5960 instruct loadD_partial(regD dst, memory mem) %{
5961   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5962   match(Set dst (LoadD mem));
5963   ins_cost(145);
5964   format %{ "MOVLPD $dst,$mem" %}
5965   ins_encode %{
5966     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5967   %}
5968   ins_pipe( pipe_slow );
5969 %}
5970 
5971 // Load to XMM register (single-precision floating point)
5972 // MOVSS instruction
5973 instruct loadF(regF dst, memory mem) %{
5974   predicate(UseSSE>=1);
5975   match(Set dst (LoadF mem));
5976   ins_cost(145);
5977   format %{ "MOVSS  $dst,$mem" %}
5978   ins_encode %{
5979     __ movflt ($dst$$XMMRegister, $mem$$Address);
5980   %}
5981   ins_pipe( pipe_slow );
5982 %}
5983 
5984 // Load Float
5985 instruct loadFPR(regFPR dst, memory mem) %{
5986   predicate(UseSSE==0);
5987   match(Set dst (LoadF mem));
5988 
5989   ins_cost(150);
5990   format %{ "FLD_S  ST,$mem\n\t"
5991             "FSTP   $dst" %}
5992   opcode(0xD9);               /* D9 /0 */
5993   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5994               Pop_Reg_FPR(dst) );
5995   ins_pipe( fpu_reg_mem );
5996 %}
5997 
5998 // Load Effective Address
5999 instruct leaP8(eRegP dst, indOffset8 mem) %{
6000   match(Set dst mem);
6001 
6002   ins_cost(110);
6003   format %{ "LEA    $dst,$mem" %}
6004   opcode(0x8D);
6005   ins_encode( OpcP, RegMem(dst,mem));
6006   ins_pipe( ialu_reg_reg_fat );
6007 %}
6008 
6009 instruct leaP32(eRegP dst, indOffset32 mem) %{
6010   match(Set dst mem);
6011 
6012   ins_cost(110);
6013   format %{ "LEA    $dst,$mem" %}
6014   opcode(0x8D);
6015   ins_encode( OpcP, RegMem(dst,mem));
6016   ins_pipe( ialu_reg_reg_fat );
6017 %}
6018 
6019 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6020   match(Set dst mem);
6021 
6022   ins_cost(110);
6023   format %{ "LEA    $dst,$mem" %}
6024   opcode(0x8D);
6025   ins_encode( OpcP, RegMem(dst,mem));
6026   ins_pipe( ialu_reg_reg_fat );
6027 %}
6028 
6029 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6030   match(Set dst mem);
6031 
6032   ins_cost(110);
6033   format %{ "LEA    $dst,$mem" %}
6034   opcode(0x8D);
6035   ins_encode( OpcP, RegMem(dst,mem));
6036   ins_pipe( ialu_reg_reg_fat );
6037 %}
6038 
6039 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6040   match(Set dst mem);
6041 
6042   ins_cost(110);
6043   format %{ "LEA    $dst,$mem" %}
6044   opcode(0x8D);
6045   ins_encode( OpcP, RegMem(dst,mem));
6046   ins_pipe( ialu_reg_reg_fat );
6047 %}
6048 
6049 // Load Constant
6050 instruct loadConI(rRegI dst, immI src) %{
6051   match(Set dst src);
6052 
6053   format %{ "MOV    $dst,$src" %}
6054   ins_encode( LdImmI(dst, src) );
6055   ins_pipe( ialu_reg_fat );
6056 %}
6057 
6058 // Load Constant zero
6059 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6060   match(Set dst src);
6061   effect(KILL cr);
6062 
6063   ins_cost(50);
6064   format %{ "XOR    $dst,$dst" %}
6065   opcode(0x33);  /* + rd */
6066   ins_encode( OpcP, RegReg( dst, dst ) );
6067   ins_pipe( ialu_reg );
6068 %}
6069 
6070 instruct loadConP(eRegP dst, immP src) %{
6071   match(Set dst src);
6072 
6073   format %{ "MOV    $dst,$src" %}
6074   opcode(0xB8);  /* + rd */
6075   ins_encode( LdImmP(dst, src) );
6076   ins_pipe( ialu_reg_fat );
6077 %}
6078 
6079 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6080   match(Set dst src);
6081   effect(KILL cr);
6082   ins_cost(200);
6083   format %{ "MOV    $dst.lo,$src.lo\n\t"
6084             "MOV    $dst.hi,$src.hi" %}
6085   opcode(0xB8);
6086   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6087   ins_pipe( ialu_reg_long_fat );
6088 %}
6089 
6090 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6091   match(Set dst src);
6092   effect(KILL cr);
6093   ins_cost(150);
6094   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6095             "XOR    $dst.hi,$dst.hi" %}
6096   opcode(0x33,0x33);
6097   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6098   ins_pipe( ialu_reg_long );
6099 %}
6100 
6101 // The instruction usage is guarded by predicate in operand immFPR().
6102 instruct loadConFPR(regFPR dst, immFPR con) %{
6103   match(Set dst con);
6104   ins_cost(125);
6105   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6106             "FSTP   $dst" %}
6107   ins_encode %{
6108     __ fld_s($constantaddress($con));
6109     __ fstp_d($dst$$reg);
6110   %}
6111   ins_pipe(fpu_reg_con);
6112 %}
6113 
6114 // The instruction usage is guarded by predicate in operand immFPR0().
6115 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6116   match(Set dst con);
6117   ins_cost(125);
6118   format %{ "FLDZ   ST\n\t"
6119             "FSTP   $dst" %}
6120   ins_encode %{
6121     __ fldz();
6122     __ fstp_d($dst$$reg);
6123   %}
6124   ins_pipe(fpu_reg_con);
6125 %}
6126 
6127 // The instruction usage is guarded by predicate in operand immFPR1().
6128 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6129   match(Set dst con);
6130   ins_cost(125);
6131   format %{ "FLD1   ST\n\t"
6132             "FSTP   $dst" %}
6133   ins_encode %{
6134     __ fld1();
6135     __ fstp_d($dst$$reg);
6136   %}
6137   ins_pipe(fpu_reg_con);
6138 %}
6139 
6140 // The instruction usage is guarded by predicate in operand immF().
6141 instruct loadConF(regF dst, immF con) %{
6142   match(Set dst con);
6143   ins_cost(125);
6144   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6145   ins_encode %{
6146     __ movflt($dst$$XMMRegister, $constantaddress($con));
6147   %}
6148   ins_pipe(pipe_slow);
6149 %}
6150 
6151 // The instruction usage is guarded by predicate in operand immF0().
6152 instruct loadConF0(regF dst, immF0 src) %{
6153   match(Set dst src);
6154   ins_cost(100);
6155   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6156   ins_encode %{
6157     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6158   %}
6159   ins_pipe(pipe_slow);
6160 %}
6161 
6162 // The instruction usage is guarded by predicate in operand immDPR().
6163 instruct loadConDPR(regDPR dst, immDPR con) %{
6164   match(Set dst con);
6165   ins_cost(125);
6166 
6167   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6168             "FSTP   $dst" %}
6169   ins_encode %{
6170     __ fld_d($constantaddress($con));
6171     __ fstp_d($dst$$reg);
6172   %}
6173   ins_pipe(fpu_reg_con);
6174 %}
6175 
6176 // The instruction usage is guarded by predicate in operand immDPR0().
6177 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6178   match(Set dst con);
6179   ins_cost(125);
6180 
6181   format %{ "FLDZ   ST\n\t"
6182             "FSTP   $dst" %}
6183   ins_encode %{
6184     __ fldz();
6185     __ fstp_d($dst$$reg);
6186   %}
6187   ins_pipe(fpu_reg_con);
6188 %}
6189 
6190 // The instruction usage is guarded by predicate in operand immDPR1().
6191 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6192   match(Set dst con);
6193   ins_cost(125);
6194 
6195   format %{ "FLD1   ST\n\t"
6196             "FSTP   $dst" %}
6197   ins_encode %{
6198     __ fld1();
6199     __ fstp_d($dst$$reg);
6200   %}
6201   ins_pipe(fpu_reg_con);
6202 %}
6203 
6204 // The instruction usage is guarded by predicate in operand immD().
6205 instruct loadConD(regD dst, immD con) %{
6206   match(Set dst con);
6207   ins_cost(125);
6208   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6209   ins_encode %{
6210     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6211   %}
6212   ins_pipe(pipe_slow);
6213 %}
6214 
6215 // The instruction usage is guarded by predicate in operand immD0().
6216 instruct loadConD0(regD dst, immD0 src) %{
6217   match(Set dst src);
6218   ins_cost(100);
6219   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6220   ins_encode %{
6221     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6222   %}
6223   ins_pipe( pipe_slow );
6224 %}
6225 
6226 // Load Stack Slot
6227 instruct loadSSI(rRegI dst, stackSlotI src) %{
6228   match(Set dst src);
6229   ins_cost(125);
6230 
6231   format %{ "MOV    $dst,$src" %}
6232   opcode(0x8B);
6233   ins_encode( OpcP, RegMem(dst,src));
6234   ins_pipe( ialu_reg_mem );
6235 %}
6236 
6237 instruct loadSSL(eRegL dst, stackSlotL src) %{
6238   match(Set dst src);
6239 
6240   ins_cost(200);
6241   format %{ "MOV    $dst,$src.lo\n\t"
6242             "MOV    $dst+4,$src.hi" %}
6243   opcode(0x8B, 0x8B);
6244   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6245   ins_pipe( ialu_mem_long_reg );
6246 %}
6247 
6248 // Load Stack Slot
6249 instruct loadSSP(eRegP dst, stackSlotP src) %{
6250   match(Set dst src);
6251   ins_cost(125);
6252 
6253   format %{ "MOV    $dst,$src" %}
6254   opcode(0x8B);
6255   ins_encode( OpcP, RegMem(dst,src));
6256   ins_pipe( ialu_reg_mem );
6257 %}
6258 
6259 // Load Stack Slot
6260 instruct loadSSF(regFPR dst, stackSlotF src) %{
6261   match(Set dst src);
6262   ins_cost(125);
6263 
6264   format %{ "FLD_S  $src\n\t"
6265             "FSTP   $dst" %}
6266   opcode(0xD9);               /* D9 /0, FLD m32real */
6267   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6268               Pop_Reg_FPR(dst) );
6269   ins_pipe( fpu_reg_mem );
6270 %}
6271 
6272 // Load Stack Slot
6273 instruct loadSSD(regDPR dst, stackSlotD src) %{
6274   match(Set dst src);
6275   ins_cost(125);
6276 
6277   format %{ "FLD_D  $src\n\t"
6278             "FSTP   $dst" %}
6279   opcode(0xDD);               /* DD /0, FLD m64real */
6280   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6281               Pop_Reg_DPR(dst) );
6282   ins_pipe( fpu_reg_mem );
6283 %}
6284 
6285 // Prefetch instructions for allocation.
6286 // Must be safe to execute with invalid address (cannot fault).
6287 
6288 instruct prefetchAlloc0( memory mem ) %{
6289   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6290   match(PrefetchAllocation mem);
6291   ins_cost(0);
6292   size(0);
6293   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6294   ins_encode();
6295   ins_pipe(empty);
6296 %}
6297 
6298 instruct prefetchAlloc( memory mem ) %{
6299   predicate(AllocatePrefetchInstr==3);
6300   match( PrefetchAllocation mem );
6301   ins_cost(100);
6302 
6303   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6304   ins_encode %{
6305     __ prefetchw($mem$$Address);
6306   %}
6307   ins_pipe(ialu_mem);
6308 %}
6309 
6310 instruct prefetchAllocNTA( memory mem ) %{
6311   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6312   match(PrefetchAllocation mem);
6313   ins_cost(100);
6314 
6315   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6316   ins_encode %{
6317     __ prefetchnta($mem$$Address);
6318   %}
6319   ins_pipe(ialu_mem);
6320 %}
6321 
6322 instruct prefetchAllocT0( memory mem ) %{
6323   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6324   match(PrefetchAllocation mem);
6325   ins_cost(100);
6326 
6327   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6328   ins_encode %{
6329     __ prefetcht0($mem$$Address);
6330   %}
6331   ins_pipe(ialu_mem);
6332 %}
6333 
6334 instruct prefetchAllocT2( memory mem ) %{
6335   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6336   match(PrefetchAllocation mem);
6337   ins_cost(100);
6338 
6339   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6340   ins_encode %{
6341     __ prefetcht2($mem$$Address);
6342   %}
6343   ins_pipe(ialu_mem);
6344 %}
6345 
6346 //----------Store Instructions-------------------------------------------------
6347 
6348 // Store Byte
6349 instruct storeB(memory mem, xRegI src) %{
6350   match(Set mem (StoreB mem src));
6351 
6352   ins_cost(125);
6353   format %{ "MOV8   $mem,$src" %}
6354   opcode(0x88);
6355   ins_encode( OpcP, RegMem( src, mem ) );
6356   ins_pipe( ialu_mem_reg );
6357 %}
6358 
6359 // Store Char/Short
6360 instruct storeC(memory mem, rRegI src) %{
6361   match(Set mem (StoreC mem src));
6362 
6363   ins_cost(125);
6364   format %{ "MOV16  $mem,$src" %}
6365   opcode(0x89, 0x66);
6366   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6367   ins_pipe( ialu_mem_reg );
6368 %}
6369 
6370 // Store Integer
6371 instruct storeI(memory mem, rRegI src) %{
6372   match(Set mem (StoreI mem src));
6373 
6374   ins_cost(125);
6375   format %{ "MOV    $mem,$src" %}
6376   opcode(0x89);
6377   ins_encode( OpcP, RegMem( src, mem ) );
6378   ins_pipe( ialu_mem_reg );
6379 %}
6380 
6381 // Store Long
6382 instruct storeL(long_memory mem, eRegL src) %{
6383   predicate(!((StoreLNode*)n)->require_atomic_access());
6384   match(Set mem (StoreL mem src));
6385 
6386   ins_cost(200);
6387   format %{ "MOV    $mem,$src.lo\n\t"
6388             "MOV    $mem+4,$src.hi" %}
6389   opcode(0x89, 0x89);
6390   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6391   ins_pipe( ialu_mem_long_reg );
6392 %}
6393 
6394 // Store Long to Integer
6395 instruct storeL2I(memory mem, eRegL src) %{
6396   match(Set mem (StoreI mem (ConvL2I src)));
6397 
6398   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6399   ins_encode %{
6400     __ movl($mem$$Address, $src$$Register);
6401   %}
6402   ins_pipe(ialu_mem_reg);
6403 %}
6404 
6405 // Volatile Store Long.  Must be atomic, so move it into
6406 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6407 // target address before the store (for null-ptr checks)
6408 // so the memory operand is used twice in the encoding.
6409 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6410   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6411   match(Set mem (StoreL mem src));
6412   effect( KILL cr );
6413   ins_cost(400);
6414   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6415             "FILD   $src\n\t"
6416             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6417   opcode(0x3B);
6418   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6419   ins_pipe( fpu_reg_mem );
6420 %}
6421 
6422 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6423   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6424   match(Set mem (StoreL mem src));
6425   effect( TEMP tmp, KILL cr );
6426   ins_cost(380);
6427   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6428             "MOVSD  $tmp,$src\n\t"
6429             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6430   ins_encode %{
6431     __ cmpl(rax, $mem$$Address);
6432     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6433     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6434   %}
6435   ins_pipe( pipe_slow );
6436 %}
6437 
6438 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6439   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6440   match(Set mem (StoreL mem src));
6441   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6442   ins_cost(360);
6443   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6444             "MOVD   $tmp,$src.lo\n\t"
6445             "MOVD   $tmp2,$src.hi\n\t"
6446             "PUNPCKLDQ $tmp,$tmp2\n\t"
6447             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6448   ins_encode %{
6449     __ cmpl(rax, $mem$$Address);
6450     __ movdl($tmp$$XMMRegister, $src$$Register);
6451     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6452     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6453     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6454   %}
6455   ins_pipe( pipe_slow );
6456 %}
6457 
6458 // Store Pointer; for storing unknown oops and raw pointers
6459 instruct storeP(memory mem, anyRegP src) %{
6460   match(Set mem (StoreP mem src));
6461 
6462   ins_cost(125);
6463   format %{ "MOV    $mem,$src" %}
6464   opcode(0x89);
6465   ins_encode( OpcP, RegMem( src, mem ) );
6466   ins_pipe( ialu_mem_reg );
6467 %}
6468 
6469 // Store Integer Immediate
6470 instruct storeImmI(memory mem, immI src) %{
6471   match(Set mem (StoreI mem src));
6472 
6473   ins_cost(150);
6474   format %{ "MOV    $mem,$src" %}
6475   opcode(0xC7);               /* C7 /0 */
6476   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6477   ins_pipe( ialu_mem_imm );
6478 %}
6479 
6480 // Store Short/Char Immediate
6481 instruct storeImmI16(memory mem, immI16 src) %{
6482   predicate(UseStoreImmI16);
6483   match(Set mem (StoreC mem src));
6484 
6485   ins_cost(150);
6486   format %{ "MOV16  $mem,$src" %}
6487   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6488   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6489   ins_pipe( ialu_mem_imm );
6490 %}
6491 
6492 // Store Pointer Immediate; null pointers or constant oops that do not
6493 // need card-mark barriers.
6494 instruct storeImmP(memory mem, immP src) %{
6495   match(Set mem (StoreP mem src));
6496 
6497   ins_cost(150);
6498   format %{ "MOV    $mem,$src" %}
6499   opcode(0xC7);               /* C7 /0 */
6500   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6501   ins_pipe( ialu_mem_imm );
6502 %}
6503 
6504 // Store Byte Immediate
6505 instruct storeImmB(memory mem, immI8 src) %{
6506   match(Set mem (StoreB mem src));
6507 
6508   ins_cost(150);
6509   format %{ "MOV8   $mem,$src" %}
6510   opcode(0xC6);               /* C6 /0 */
6511   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6512   ins_pipe( ialu_mem_imm );
6513 %}
6514 
6515 // Store CMS card-mark Immediate
6516 instruct storeImmCM(memory mem, immI8 src) %{
6517   match(Set mem (StoreCM mem src));
6518 
6519   ins_cost(150);
6520   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6521   opcode(0xC6);               /* C6 /0 */
6522   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6523   ins_pipe( ialu_mem_imm );
6524 %}
6525 
6526 // Store Double
6527 instruct storeDPR( memory mem, regDPR1 src) %{
6528   predicate(UseSSE<=1);
6529   match(Set mem (StoreD mem src));
6530 
6531   ins_cost(100);
6532   format %{ "FST_D  $mem,$src" %}
6533   opcode(0xDD);       /* DD /2 */
6534   ins_encode( enc_FPR_store(mem,src) );
6535   ins_pipe( fpu_mem_reg );
6536 %}
6537 
6538 // Store double does rounding on x86
6539 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6540   predicate(UseSSE<=1);
6541   match(Set mem (StoreD mem (RoundDouble src)));
6542 
6543   ins_cost(100);
6544   format %{ "FST_D  $mem,$src\t# round" %}
6545   opcode(0xDD);       /* DD /2 */
6546   ins_encode( enc_FPR_store(mem,src) );
6547   ins_pipe( fpu_mem_reg );
6548 %}
6549 
6550 // Store XMM register to memory (double-precision floating points)
6551 // MOVSD instruction
6552 instruct storeD(memory mem, regD src) %{
6553   predicate(UseSSE>=2);
6554   match(Set mem (StoreD mem src));
6555   ins_cost(95);
6556   format %{ "MOVSD  $mem,$src" %}
6557   ins_encode %{
6558     __ movdbl($mem$$Address, $src$$XMMRegister);
6559   %}
6560   ins_pipe( pipe_slow );
6561 %}
6562 
6563 // Load Double
6564 instruct MoveD2VL(vlRegD dst, regD src) %{
6565   match(Set dst src);
6566   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6567   ins_encode %{
6568     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6569   %}
6570   ins_pipe( fpu_reg_reg );
6571 %}
6572 
6573 // Load Double
6574 instruct MoveVL2D(regD dst, vlRegD src) %{
6575   match(Set dst src);
6576   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6577   ins_encode %{
6578     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6579   %}
6580   ins_pipe( fpu_reg_reg );
6581 %}
6582 
6583 // Store XMM register to memory (single-precision floating point)
6584 // MOVSS instruction
6585 instruct storeF(memory mem, regF src) %{
6586   predicate(UseSSE>=1);
6587   match(Set mem (StoreF mem src));
6588   ins_cost(95);
6589   format %{ "MOVSS  $mem,$src" %}
6590   ins_encode %{
6591     __ movflt($mem$$Address, $src$$XMMRegister);
6592   %}
6593   ins_pipe( pipe_slow );
6594 %}
6595 
6596 // Load Float
6597 instruct MoveF2VL(vlRegF dst, regF src) %{
6598   match(Set dst src);
6599   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6600   ins_encode %{
6601     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6602   %}
6603   ins_pipe( fpu_reg_reg );
6604 %}
6605 
6606 // Load Float
6607 instruct MoveVL2F(regF dst, vlRegF src) %{
6608   match(Set dst src);
6609   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6610   ins_encode %{
6611     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6612   %}
6613   ins_pipe( fpu_reg_reg );
6614 %}
6615 
6616 // Store Float
6617 instruct storeFPR( memory mem, regFPR1 src) %{
6618   predicate(UseSSE==0);
6619   match(Set mem (StoreF mem src));
6620 
6621   ins_cost(100);
6622   format %{ "FST_S  $mem,$src" %}
6623   opcode(0xD9);       /* D9 /2 */
6624   ins_encode( enc_FPR_store(mem,src) );
6625   ins_pipe( fpu_mem_reg );
6626 %}
6627 
6628 // Store Float does rounding on x86
6629 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6630   predicate(UseSSE==0);
6631   match(Set mem (StoreF mem (RoundFloat src)));
6632 
6633   ins_cost(100);
6634   format %{ "FST_S  $mem,$src\t# round" %}
6635   opcode(0xD9);       /* D9 /2 */
6636   ins_encode( enc_FPR_store(mem,src) );
6637   ins_pipe( fpu_mem_reg );
6638 %}
6639 
6640 // Store Float does rounding on x86
6641 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6642   predicate(UseSSE<=1);
6643   match(Set mem (StoreF mem (ConvD2F src)));
6644 
6645   ins_cost(100);
6646   format %{ "FST_S  $mem,$src\t# D-round" %}
6647   opcode(0xD9);       /* D9 /2 */
6648   ins_encode( enc_FPR_store(mem,src) );
6649   ins_pipe( fpu_mem_reg );
6650 %}
6651 
6652 // Store immediate Float value (it is faster than store from FPU register)
6653 // The instruction usage is guarded by predicate in operand immFPR().
6654 instruct storeFPR_imm( memory mem, immFPR src) %{
6655   match(Set mem (StoreF mem src));
6656 
6657   ins_cost(50);
6658   format %{ "MOV    $mem,$src\t# store float" %}
6659   opcode(0xC7);               /* C7 /0 */
6660   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6661   ins_pipe( ialu_mem_imm );
6662 %}
6663 
6664 // Store immediate Float value (it is faster than store from XMM register)
6665 // The instruction usage is guarded by predicate in operand immF().
6666 instruct storeF_imm( memory mem, immF src) %{
6667   match(Set mem (StoreF mem src));
6668 
6669   ins_cost(50);
6670   format %{ "MOV    $mem,$src\t# store float" %}
6671   opcode(0xC7);               /* C7 /0 */
6672   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6673   ins_pipe( ialu_mem_imm );
6674 %}
6675 
6676 // Store Integer to stack slot
6677 instruct storeSSI(stackSlotI dst, rRegI src) %{
6678   match(Set dst src);
6679 
6680   ins_cost(100);
6681   format %{ "MOV    $dst,$src" %}
6682   opcode(0x89);
6683   ins_encode( OpcPRegSS( dst, src ) );
6684   ins_pipe( ialu_mem_reg );
6685 %}
6686 
6687 // Store Integer to stack slot
6688 instruct storeSSP(stackSlotP dst, eRegP src) %{
6689   match(Set dst src);
6690 
6691   ins_cost(100);
6692   format %{ "MOV    $dst,$src" %}
6693   opcode(0x89);
6694   ins_encode( OpcPRegSS( dst, src ) );
6695   ins_pipe( ialu_mem_reg );
6696 %}
6697 
6698 // Store Long to stack slot
6699 instruct storeSSL(stackSlotL dst, eRegL src) %{
6700   match(Set dst src);
6701 
6702   ins_cost(200);
6703   format %{ "MOV    $dst,$src.lo\n\t"
6704             "MOV    $dst+4,$src.hi" %}
6705   opcode(0x89, 0x89);
6706   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6707   ins_pipe( ialu_mem_long_reg );
6708 %}
6709 
6710 //----------MemBar Instructions-----------------------------------------------
6711 // Memory barrier flavors
6712 
6713 instruct membar_acquire() %{
6714   match(MemBarAcquire);
6715   match(LoadFence);
6716   ins_cost(400);
6717 
6718   size(0);
6719   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6720   ins_encode();
6721   ins_pipe(empty);
6722 %}
6723 
6724 instruct membar_acquire_lock() %{
6725   match(MemBarAcquireLock);
6726   ins_cost(0);
6727 
6728   size(0);
6729   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6730   ins_encode( );
6731   ins_pipe(empty);
6732 %}
6733 
6734 instruct membar_release() %{
6735   match(MemBarRelease);
6736   match(StoreFence);
6737   ins_cost(400);
6738 
6739   size(0);
6740   format %{ "MEMBAR-release ! (empty encoding)" %}
6741   ins_encode( );
6742   ins_pipe(empty);
6743 %}
6744 
6745 instruct membar_release_lock() %{
6746   match(MemBarReleaseLock);
6747   ins_cost(0);
6748 
6749   size(0);
6750   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6751   ins_encode( );
6752   ins_pipe(empty);
6753 %}
6754 
6755 instruct membar_volatile(eFlagsReg cr) %{
6756   match(MemBarVolatile);
6757   effect(KILL cr);
6758   ins_cost(400);
6759 
6760   format %{
6761     $$template
6762     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6763   %}
6764   ins_encode %{
6765     __ membar(Assembler::StoreLoad);
6766   %}
6767   ins_pipe(pipe_slow);
6768 %}
6769 
6770 instruct unnecessary_membar_volatile() %{
6771   match(MemBarVolatile);
6772   predicate(Matcher::post_store_load_barrier(n));
6773   ins_cost(0);
6774 
6775   size(0);
6776   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6777   ins_encode( );
6778   ins_pipe(empty);
6779 %}
6780 
6781 instruct membar_storestore() %{
6782   match(MemBarStoreStore);
6783   ins_cost(0);
6784 
6785   size(0);
6786   format %{ "MEMBAR-storestore (empty encoding)" %}
6787   ins_encode( );
6788   ins_pipe(empty);
6789 %}
6790 
6791 //----------Move Instructions--------------------------------------------------
6792 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6793   match(Set dst (CastX2P src));
6794   format %{ "# X2P  $dst, $src" %}
6795   ins_encode( /*empty encoding*/ );
6796   ins_cost(0);
6797   ins_pipe(empty);
6798 %}
6799 
6800 instruct castP2X(rRegI dst, eRegP src ) %{
6801   match(Set dst (CastP2X src));
6802   ins_cost(50);
6803   format %{ "MOV    $dst, $src\t# CastP2X" %}
6804   ins_encode( enc_Copy( dst, src) );
6805   ins_pipe( ialu_reg_reg );
6806 %}
6807 
6808 //----------Conditional Move---------------------------------------------------
6809 // Conditional move
6810 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6811   predicate(!VM_Version::supports_cmov() );
6812   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6813   ins_cost(200);
6814   format %{ "J$cop,us skip\t# signed cmove\n\t"
6815             "MOV    $dst,$src\n"
6816       "skip:" %}
6817   ins_encode %{
6818     Label Lskip;
6819     // Invert sense of branch from sense of CMOV
6820     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6821     __ movl($dst$$Register, $src$$Register);
6822     __ bind(Lskip);
6823   %}
6824   ins_pipe( pipe_cmov_reg );
6825 %}
6826 
6827 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6828   predicate(!VM_Version::supports_cmov() );
6829   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6830   ins_cost(200);
6831   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6832             "MOV    $dst,$src\n"
6833       "skip:" %}
6834   ins_encode %{
6835     Label Lskip;
6836     // Invert sense of branch from sense of CMOV
6837     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6838     __ movl($dst$$Register, $src$$Register);
6839     __ bind(Lskip);
6840   %}
6841   ins_pipe( pipe_cmov_reg );
6842 %}
6843 
6844 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6845   predicate(VM_Version::supports_cmov() );
6846   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6847   ins_cost(200);
6848   format %{ "CMOV$cop $dst,$src" %}
6849   opcode(0x0F,0x40);
6850   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6851   ins_pipe( pipe_cmov_reg );
6852 %}
6853 
6854 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6855   predicate(VM_Version::supports_cmov() );
6856   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6857   ins_cost(200);
6858   format %{ "CMOV$cop $dst,$src" %}
6859   opcode(0x0F,0x40);
6860   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6861   ins_pipe( pipe_cmov_reg );
6862 %}
6863 
6864 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6865   predicate(VM_Version::supports_cmov() );
6866   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6867   ins_cost(200);
6868   expand %{
6869     cmovI_regU(cop, cr, dst, src);
6870   %}
6871 %}
6872 
6873 // Conditional move
6874 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6875   predicate(VM_Version::supports_cmov() );
6876   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6877   ins_cost(250);
6878   format %{ "CMOV$cop $dst,$src" %}
6879   opcode(0x0F,0x40);
6880   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6881   ins_pipe( pipe_cmov_mem );
6882 %}
6883 
6884 // Conditional move
6885 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6886   predicate(VM_Version::supports_cmov() );
6887   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6888   ins_cost(250);
6889   format %{ "CMOV$cop $dst,$src" %}
6890   opcode(0x0F,0x40);
6891   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6892   ins_pipe( pipe_cmov_mem );
6893 %}
6894 
6895 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6896   predicate(VM_Version::supports_cmov() );
6897   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6898   ins_cost(250);
6899   expand %{
6900     cmovI_memU(cop, cr, dst, src);
6901   %}
6902 %}
6903 
6904 // Conditional move
6905 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6906   predicate(VM_Version::supports_cmov() );
6907   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6908   ins_cost(200);
6909   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6910   opcode(0x0F,0x40);
6911   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6912   ins_pipe( pipe_cmov_reg );
6913 %}
6914 
6915 // Conditional move (non-P6 version)
6916 // Note:  a CMoveP is generated for  stubs and native wrappers
6917 //        regardless of whether we are on a P6, so we
6918 //        emulate a cmov here
6919 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6920   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6921   ins_cost(300);
6922   format %{ "Jn$cop   skip\n\t"
6923           "MOV    $dst,$src\t# pointer\n"
6924       "skip:" %}
6925   opcode(0x8b);
6926   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6927   ins_pipe( pipe_cmov_reg );
6928 %}
6929 
6930 // Conditional move
6931 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6932   predicate(VM_Version::supports_cmov() );
6933   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6934   ins_cost(200);
6935   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6936   opcode(0x0F,0x40);
6937   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6938   ins_pipe( pipe_cmov_reg );
6939 %}
6940 
6941 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6942   predicate(VM_Version::supports_cmov() );
6943   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6944   ins_cost(200);
6945   expand %{
6946     cmovP_regU(cop, cr, dst, src);
6947   %}
6948 %}
6949 
6950 // DISABLED: Requires the ADLC to emit a bottom_type call that
6951 // correctly meets the two pointer arguments; one is an incoming
6952 // register but the other is a memory operand.  ALSO appears to
6953 // be buggy with implicit null checks.
6954 //
6955 //// Conditional move
6956 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6957 //  predicate(VM_Version::supports_cmov() );
6958 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6959 //  ins_cost(250);
6960 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6961 //  opcode(0x0F,0x40);
6962 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6963 //  ins_pipe( pipe_cmov_mem );
6964 //%}
6965 //
6966 //// Conditional move
6967 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6968 //  predicate(VM_Version::supports_cmov() );
6969 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6970 //  ins_cost(250);
6971 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6972 //  opcode(0x0F,0x40);
6973 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6974 //  ins_pipe( pipe_cmov_mem );
6975 //%}
6976 
6977 // Conditional move
6978 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6979   predicate(UseSSE<=1);
6980   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6981   ins_cost(200);
6982   format %{ "FCMOV$cop $dst,$src\t# double" %}
6983   opcode(0xDA);
6984   ins_encode( enc_cmov_dpr(cop,src) );
6985   ins_pipe( pipe_cmovDPR_reg );
6986 %}
6987 
6988 // Conditional move
6989 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6990   predicate(UseSSE==0);
6991   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6992   ins_cost(200);
6993   format %{ "FCMOV$cop $dst,$src\t# float" %}
6994   opcode(0xDA);
6995   ins_encode( enc_cmov_dpr(cop,src) );
6996   ins_pipe( pipe_cmovDPR_reg );
6997 %}
6998 
6999 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7000 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7001   predicate(UseSSE<=1);
7002   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7003   ins_cost(200);
7004   format %{ "Jn$cop   skip\n\t"
7005             "MOV    $dst,$src\t# double\n"
7006       "skip:" %}
7007   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7008   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7009   ins_pipe( pipe_cmovDPR_reg );
7010 %}
7011 
7012 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7013 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7014   predicate(UseSSE==0);
7015   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7016   ins_cost(200);
7017   format %{ "Jn$cop    skip\n\t"
7018             "MOV    $dst,$src\t# float\n"
7019       "skip:" %}
7020   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7021   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7022   ins_pipe( pipe_cmovDPR_reg );
7023 %}
7024 
7025 // No CMOVE with SSE/SSE2
7026 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7027   predicate (UseSSE>=1);
7028   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7029   ins_cost(200);
7030   format %{ "Jn$cop   skip\n\t"
7031             "MOVSS  $dst,$src\t# float\n"
7032       "skip:" %}
7033   ins_encode %{
7034     Label skip;
7035     // Invert sense of branch from sense of CMOV
7036     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7037     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7038     __ bind(skip);
7039   %}
7040   ins_pipe( pipe_slow );
7041 %}
7042 
7043 // No CMOVE with SSE/SSE2
7044 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7045   predicate (UseSSE>=2);
7046   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7047   ins_cost(200);
7048   format %{ "Jn$cop   skip\n\t"
7049             "MOVSD  $dst,$src\t# float\n"
7050       "skip:" %}
7051   ins_encode %{
7052     Label skip;
7053     // Invert sense of branch from sense of CMOV
7054     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7055     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7056     __ bind(skip);
7057   %}
7058   ins_pipe( pipe_slow );
7059 %}
7060 
7061 // unsigned version
7062 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7063   predicate (UseSSE>=1);
7064   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7065   ins_cost(200);
7066   format %{ "Jn$cop   skip\n\t"
7067             "MOVSS  $dst,$src\t# float\n"
7068       "skip:" %}
7069   ins_encode %{
7070     Label skip;
7071     // Invert sense of branch from sense of CMOV
7072     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7073     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7074     __ bind(skip);
7075   %}
7076   ins_pipe( pipe_slow );
7077 %}
7078 
7079 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7080   predicate (UseSSE>=1);
7081   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7082   ins_cost(200);
7083   expand %{
7084     fcmovF_regU(cop, cr, dst, src);
7085   %}
7086 %}
7087 
7088 // unsigned version
7089 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7090   predicate (UseSSE>=2);
7091   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7092   ins_cost(200);
7093   format %{ "Jn$cop   skip\n\t"
7094             "MOVSD  $dst,$src\t# float\n"
7095       "skip:" %}
7096   ins_encode %{
7097     Label skip;
7098     // Invert sense of branch from sense of CMOV
7099     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7100     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7101     __ bind(skip);
7102   %}
7103   ins_pipe( pipe_slow );
7104 %}
7105 
7106 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7107   predicate (UseSSE>=2);
7108   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7109   ins_cost(200);
7110   expand %{
7111     fcmovD_regU(cop, cr, dst, src);
7112   %}
7113 %}
7114 
7115 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7116   predicate(VM_Version::supports_cmov() );
7117   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7118   ins_cost(200);
7119   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7120             "CMOV$cop $dst.hi,$src.hi" %}
7121   opcode(0x0F,0x40);
7122   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7123   ins_pipe( pipe_cmov_reg_long );
7124 %}
7125 
7126 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7127   predicate(VM_Version::supports_cmov() );
7128   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7129   ins_cost(200);
7130   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7131             "CMOV$cop $dst.hi,$src.hi" %}
7132   opcode(0x0F,0x40);
7133   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7134   ins_pipe( pipe_cmov_reg_long );
7135 %}
7136 
7137 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7138   predicate(VM_Version::supports_cmov() );
7139   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7140   ins_cost(200);
7141   expand %{
7142     cmovL_regU(cop, cr, dst, src);
7143   %}
7144 %}
7145 
7146 //----------Arithmetic Instructions--------------------------------------------
7147 //----------Addition Instructions----------------------------------------------
7148 
7149 // Integer Addition Instructions
7150 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7151   match(Set dst (AddI dst src));
7152   effect(KILL cr);
7153 
7154   size(2);
7155   format %{ "ADD    $dst,$src" %}
7156   opcode(0x03);
7157   ins_encode( OpcP, RegReg( dst, src) );
7158   ins_pipe( ialu_reg_reg );
7159 %}
7160 
7161 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7162   match(Set dst (AddI dst src));
7163   effect(KILL cr);
7164 
7165   format %{ "ADD    $dst,$src" %}
7166   opcode(0x81, 0x00); /* /0 id */
7167   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7168   ins_pipe( ialu_reg );
7169 %}
7170 
7171 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7172   predicate(UseIncDec);
7173   match(Set dst (AddI dst src));
7174   effect(KILL cr);
7175 
7176   size(1);
7177   format %{ "INC    $dst" %}
7178   opcode(0x40); /*  */
7179   ins_encode( Opc_plus( primary, dst ) );
7180   ins_pipe( ialu_reg );
7181 %}
7182 
7183 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7184   match(Set dst (AddI src0 src1));
7185   ins_cost(110);
7186 
7187   format %{ "LEA    $dst,[$src0 + $src1]" %}
7188   opcode(0x8D); /* 0x8D /r */
7189   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7190   ins_pipe( ialu_reg_reg );
7191 %}
7192 
7193 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7194   match(Set dst (AddP src0 src1));
7195   ins_cost(110);
7196 
7197   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7198   opcode(0x8D); /* 0x8D /r */
7199   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7200   ins_pipe( ialu_reg_reg );
7201 %}
7202 
7203 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7204   predicate(UseIncDec);
7205   match(Set dst (AddI dst src));
7206   effect(KILL cr);
7207 
7208   size(1);
7209   format %{ "DEC    $dst" %}
7210   opcode(0x48); /*  */
7211   ins_encode( Opc_plus( primary, dst ) );
7212   ins_pipe( ialu_reg );
7213 %}
7214 
7215 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7216   match(Set dst (AddP dst src));
7217   effect(KILL cr);
7218 
7219   size(2);
7220   format %{ "ADD    $dst,$src" %}
7221   opcode(0x03);
7222   ins_encode( OpcP, RegReg( dst, src) );
7223   ins_pipe( ialu_reg_reg );
7224 %}
7225 
7226 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7227   match(Set dst (AddP dst src));
7228   effect(KILL cr);
7229 
7230   format %{ "ADD    $dst,$src" %}
7231   opcode(0x81,0x00); /* Opcode 81 /0 id */
7232   // ins_encode( RegImm( dst, src) );
7233   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7234   ins_pipe( ialu_reg );
7235 %}
7236 
7237 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7238   match(Set dst (AddI dst (LoadI src)));
7239   effect(KILL cr);
7240 
7241   ins_cost(125);
7242   format %{ "ADD    $dst,$src" %}
7243   opcode(0x03);
7244   ins_encode( OpcP, RegMem( dst, src) );
7245   ins_pipe( ialu_reg_mem );
7246 %}
7247 
7248 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7249   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7250   effect(KILL cr);
7251 
7252   ins_cost(150);
7253   format %{ "ADD    $dst,$src" %}
7254   opcode(0x01);  /* Opcode 01 /r */
7255   ins_encode( OpcP, RegMem( src, dst ) );
7256   ins_pipe( ialu_mem_reg );
7257 %}
7258 
7259 // Add Memory with Immediate
7260 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7261   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7262   effect(KILL cr);
7263 
7264   ins_cost(125);
7265   format %{ "ADD    $dst,$src" %}
7266   opcode(0x81);               /* Opcode 81 /0 id */
7267   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7268   ins_pipe( ialu_mem_imm );
7269 %}
7270 
7271 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7272   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7273   effect(KILL cr);
7274 
7275   ins_cost(125);
7276   format %{ "INC    $dst" %}
7277   opcode(0xFF);               /* Opcode FF /0 */
7278   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7279   ins_pipe( ialu_mem_imm );
7280 %}
7281 
7282 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7283   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7284   effect(KILL cr);
7285 
7286   ins_cost(125);
7287   format %{ "DEC    $dst" %}
7288   opcode(0xFF);               /* Opcode FF /1 */
7289   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7290   ins_pipe( ialu_mem_imm );
7291 %}
7292 
7293 
7294 instruct checkCastPP( eRegP dst ) %{
7295   match(Set dst (CheckCastPP dst));
7296 
7297   size(0);
7298   format %{ "#checkcastPP of $dst" %}
7299   ins_encode( /*empty encoding*/ );
7300   ins_pipe( empty );
7301 %}
7302 
7303 instruct castPP( eRegP dst ) %{
7304   match(Set dst (CastPP dst));
7305   format %{ "#castPP of $dst" %}
7306   ins_encode( /*empty encoding*/ );
7307   ins_pipe( empty );
7308 %}
7309 
7310 instruct castII( rRegI dst ) %{
7311   match(Set dst (CastII dst));
7312   format %{ "#castII of $dst" %}
7313   ins_encode( /*empty encoding*/ );
7314   ins_cost(0);
7315   ins_pipe( empty );
7316 %}
7317 
7318 
7319 // Load-locked - same as a regular pointer load when used with compare-swap
7320 instruct loadPLocked(eRegP dst, memory mem) %{
7321   match(Set dst (LoadPLocked mem));
7322 
7323   ins_cost(125);
7324   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7325   opcode(0x8B);
7326   ins_encode( OpcP, RegMem(dst,mem));
7327   ins_pipe( ialu_reg_mem );
7328 %}
7329 
7330 // Conditional-store of the updated heap-top.
7331 // Used during allocation of the shared heap.
7332 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7333 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7334   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7335   // EAX is killed if there is contention, but then it's also unused.
7336   // In the common case of no contention, EAX holds the new oop address.
7337   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7338   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7339   ins_pipe( pipe_cmpxchg );
7340 %}
7341 
7342 // Conditional-store of an int value.
7343 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7344 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7345   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7346   effect(KILL oldval);
7347   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7348   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7349   ins_pipe( pipe_cmpxchg );
7350 %}
7351 
7352 // Conditional-store of a long value.
7353 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7354 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7355   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7356   effect(KILL oldval);
7357   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7358             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7359             "XCHG   EBX,ECX"
7360   %}
7361   ins_encode %{
7362     // Note: we need to swap rbx, and rcx before and after the
7363     //       cmpxchg8 instruction because the instruction uses
7364     //       rcx as the high order word of the new value to store but
7365     //       our register encoding uses rbx.
7366     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7367     __ lock();
7368     __ cmpxchg8($mem$$Address);
7369     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7370   %}
7371   ins_pipe( pipe_cmpxchg );
7372 %}
7373 
7374 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7375 
7376 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7377   predicate(VM_Version::supports_cx8());
7378   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7379   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7380   effect(KILL cr, KILL oldval);
7381   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7382             "MOV    $res,0\n\t"
7383             "JNE,s  fail\n\t"
7384             "MOV    $res,1\n"
7385           "fail:" %}
7386   ins_encode( enc_cmpxchg8(mem_ptr),
7387               enc_flags_ne_to_boolean(res) );
7388   ins_pipe( pipe_cmpxchg );
7389 %}
7390 
7391 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7392   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7393   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7394   effect(KILL cr, KILL oldval);
7395   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7396             "MOV    $res,0\n\t"
7397             "JNE,s  fail\n\t"
7398             "MOV    $res,1\n"
7399           "fail:" %}
7400   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7401   ins_pipe( pipe_cmpxchg );
7402 %}
7403 
7404 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7405   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7406   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7407   effect(KILL cr, KILL oldval);
7408   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7409             "MOV    $res,0\n\t"
7410             "JNE,s  fail\n\t"
7411             "MOV    $res,1\n"
7412           "fail:" %}
7413   ins_encode( enc_cmpxchgb(mem_ptr),
7414               enc_flags_ne_to_boolean(res) );
7415   ins_pipe( pipe_cmpxchg );
7416 %}
7417 
7418 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7419   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7420   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7421   effect(KILL cr, KILL oldval);
7422   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7423             "MOV    $res,0\n\t"
7424             "JNE,s  fail\n\t"
7425             "MOV    $res,1\n"
7426           "fail:" %}
7427   ins_encode( enc_cmpxchgw(mem_ptr),
7428               enc_flags_ne_to_boolean(res) );
7429   ins_pipe( pipe_cmpxchg );
7430 %}
7431 
7432 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7433   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7434   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7435   effect(KILL cr, KILL oldval);
7436   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7437             "MOV    $res,0\n\t"
7438             "JNE,s  fail\n\t"
7439             "MOV    $res,1\n"
7440           "fail:" %}
7441   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7442   ins_pipe( pipe_cmpxchg );
7443 %}
7444 
7445 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7446   predicate(VM_Version::supports_cx8());
7447   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7448   effect(KILL cr);
7449   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7450   ins_encode( enc_cmpxchg8(mem_ptr) );
7451   ins_pipe( pipe_cmpxchg );
7452 %}
7453 
7454 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7455   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7456   effect(KILL cr);
7457   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7458   ins_encode( enc_cmpxchg(mem_ptr) );
7459   ins_pipe( pipe_cmpxchg );
7460 %}
7461 
7462 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7463   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7464   effect(KILL cr);
7465   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7466   ins_encode( enc_cmpxchgb(mem_ptr) );
7467   ins_pipe( pipe_cmpxchg );
7468 %}
7469 
7470 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7471   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7472   effect(KILL cr);
7473   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7474   ins_encode( enc_cmpxchgw(mem_ptr) );
7475   ins_pipe( pipe_cmpxchg );
7476 %}
7477 
7478 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7479   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7480   effect(KILL cr);
7481   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7482   ins_encode( enc_cmpxchg(mem_ptr) );
7483   ins_pipe( pipe_cmpxchg );
7484 %}
7485 
7486 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7487   predicate(n->as_LoadStore()->result_not_used());
7488   match(Set dummy (GetAndAddB mem add));
7489   effect(KILL cr);
7490   format %{ "ADDB  [$mem],$add" %}
7491   ins_encode %{
7492     __ lock();
7493     __ addb($mem$$Address, $add$$constant);
7494   %}
7495   ins_pipe( pipe_cmpxchg );
7496 %}
7497 
7498 // Important to match to xRegI: only 8-bit regs.
7499 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7500   match(Set newval (GetAndAddB mem newval));
7501   effect(KILL cr);
7502   format %{ "XADDB  [$mem],$newval" %}
7503   ins_encode %{
7504     __ lock();
7505     __ xaddb($mem$$Address, $newval$$Register);
7506   %}
7507   ins_pipe( pipe_cmpxchg );
7508 %}
7509 
7510 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7511   predicate(n->as_LoadStore()->result_not_used());
7512   match(Set dummy (GetAndAddS mem add));
7513   effect(KILL cr);
7514   format %{ "ADDS  [$mem],$add" %}
7515   ins_encode %{
7516     __ lock();
7517     __ addw($mem$$Address, $add$$constant);
7518   %}
7519   ins_pipe( pipe_cmpxchg );
7520 %}
7521 
7522 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7523   match(Set newval (GetAndAddS mem newval));
7524   effect(KILL cr);
7525   format %{ "XADDS  [$mem],$newval" %}
7526   ins_encode %{
7527     __ lock();
7528     __ xaddw($mem$$Address, $newval$$Register);
7529   %}
7530   ins_pipe( pipe_cmpxchg );
7531 %}
7532 
7533 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7534   predicate(n->as_LoadStore()->result_not_used());
7535   match(Set dummy (GetAndAddI mem add));
7536   effect(KILL cr);
7537   format %{ "ADDL  [$mem],$add" %}
7538   ins_encode %{
7539     __ lock();
7540     __ addl($mem$$Address, $add$$constant);
7541   %}
7542   ins_pipe( pipe_cmpxchg );
7543 %}
7544 
7545 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7546   match(Set newval (GetAndAddI mem newval));
7547   effect(KILL cr);
7548   format %{ "XADDL  [$mem],$newval" %}
7549   ins_encode %{
7550     __ lock();
7551     __ xaddl($mem$$Address, $newval$$Register);
7552   %}
7553   ins_pipe( pipe_cmpxchg );
7554 %}
7555 
7556 // Important to match to xRegI: only 8-bit regs.
7557 instruct xchgB( memory mem, xRegI newval) %{
7558   match(Set newval (GetAndSetB mem newval));
7559   format %{ "XCHGB  $newval,[$mem]" %}
7560   ins_encode %{
7561     __ xchgb($newval$$Register, $mem$$Address);
7562   %}
7563   ins_pipe( pipe_cmpxchg );
7564 %}
7565 
7566 instruct xchgS( memory mem, rRegI newval) %{
7567   match(Set newval (GetAndSetS mem newval));
7568   format %{ "XCHGW  $newval,[$mem]" %}
7569   ins_encode %{
7570     __ xchgw($newval$$Register, $mem$$Address);
7571   %}
7572   ins_pipe( pipe_cmpxchg );
7573 %}
7574 
7575 instruct xchgI( memory mem, rRegI newval) %{
7576   match(Set newval (GetAndSetI mem newval));
7577   format %{ "XCHGL  $newval,[$mem]" %}
7578   ins_encode %{
7579     __ xchgl($newval$$Register, $mem$$Address);
7580   %}
7581   ins_pipe( pipe_cmpxchg );
7582 %}
7583 
7584 instruct xchgP( memory mem, pRegP newval) %{
7585   match(Set newval (GetAndSetP mem newval));
7586   format %{ "XCHGL  $newval,[$mem]" %}
7587   ins_encode %{
7588     __ xchgl($newval$$Register, $mem$$Address);
7589   %}
7590   ins_pipe( pipe_cmpxchg );
7591 %}
7592 
7593 //----------Subtraction Instructions-------------------------------------------
7594 
7595 // Integer Subtraction Instructions
7596 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7597   match(Set dst (SubI dst src));
7598   effect(KILL cr);
7599 
7600   size(2);
7601   format %{ "SUB    $dst,$src" %}
7602   opcode(0x2B);
7603   ins_encode( OpcP, RegReg( dst, src) );
7604   ins_pipe( ialu_reg_reg );
7605 %}
7606 
7607 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7608   match(Set dst (SubI dst src));
7609   effect(KILL cr);
7610 
7611   format %{ "SUB    $dst,$src" %}
7612   opcode(0x81,0x05);  /* Opcode 81 /5 */
7613   // ins_encode( RegImm( dst, src) );
7614   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7615   ins_pipe( ialu_reg );
7616 %}
7617 
7618 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7619   match(Set dst (SubI dst (LoadI src)));
7620   effect(KILL cr);
7621 
7622   ins_cost(125);
7623   format %{ "SUB    $dst,$src" %}
7624   opcode(0x2B);
7625   ins_encode( OpcP, RegMem( dst, src) );
7626   ins_pipe( ialu_reg_mem );
7627 %}
7628 
7629 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7630   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7631   effect(KILL cr);
7632 
7633   ins_cost(150);
7634   format %{ "SUB    $dst,$src" %}
7635   opcode(0x29);  /* Opcode 29 /r */
7636   ins_encode( OpcP, RegMem( src, dst ) );
7637   ins_pipe( ialu_mem_reg );
7638 %}
7639 
7640 // Subtract from a pointer
7641 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7642   match(Set dst (AddP dst (SubI zero src)));
7643   effect(KILL cr);
7644 
7645   size(2);
7646   format %{ "SUB    $dst,$src" %}
7647   opcode(0x2B);
7648   ins_encode( OpcP, RegReg( dst, src) );
7649   ins_pipe( ialu_reg_reg );
7650 %}
7651 
7652 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7653   match(Set dst (SubI zero dst));
7654   effect(KILL cr);
7655 
7656   size(2);
7657   format %{ "NEG    $dst" %}
7658   opcode(0xF7,0x03);  // Opcode F7 /3
7659   ins_encode( OpcP, RegOpc( dst ) );
7660   ins_pipe( ialu_reg );
7661 %}
7662 
7663 //----------Multiplication/Division Instructions-------------------------------
7664 // Integer Multiplication Instructions
7665 // Multiply Register
7666 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7667   match(Set dst (MulI dst src));
7668   effect(KILL cr);
7669 
7670   size(3);
7671   ins_cost(300);
7672   format %{ "IMUL   $dst,$src" %}
7673   opcode(0xAF, 0x0F);
7674   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7675   ins_pipe( ialu_reg_reg_alu0 );
7676 %}
7677 
7678 // Multiply 32-bit Immediate
7679 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7680   match(Set dst (MulI src imm));
7681   effect(KILL cr);
7682 
7683   ins_cost(300);
7684   format %{ "IMUL   $dst,$src,$imm" %}
7685   opcode(0x69);  /* 69 /r id */
7686   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7687   ins_pipe( ialu_reg_reg_alu0 );
7688 %}
7689 
7690 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7691   match(Set dst src);
7692   effect(KILL cr);
7693 
7694   // Note that this is artificially increased to make it more expensive than loadConL
7695   ins_cost(250);
7696   format %{ "MOV    EAX,$src\t// low word only" %}
7697   opcode(0xB8);
7698   ins_encode( LdImmL_Lo(dst, src) );
7699   ins_pipe( ialu_reg_fat );
7700 %}
7701 
7702 // Multiply by 32-bit Immediate, taking the shifted high order results
7703 //  (special case for shift by 32)
7704 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7705   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7706   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7707              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7708              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7709   effect(USE src1, KILL cr);
7710 
7711   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7712   ins_cost(0*100 + 1*400 - 150);
7713   format %{ "IMUL   EDX:EAX,$src1" %}
7714   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7715   ins_pipe( pipe_slow );
7716 %}
7717 
7718 // Multiply by 32-bit Immediate, taking the shifted high order results
7719 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7720   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7721   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7722              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7723              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7724   effect(USE src1, KILL cr);
7725 
7726   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7727   ins_cost(1*100 + 1*400 - 150);
7728   format %{ "IMUL   EDX:EAX,$src1\n\t"
7729             "SAR    EDX,$cnt-32" %}
7730   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7731   ins_pipe( pipe_slow );
7732 %}
7733 
7734 // Multiply Memory 32-bit Immediate
7735 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7736   match(Set dst (MulI (LoadI src) imm));
7737   effect(KILL cr);
7738 
7739   ins_cost(300);
7740   format %{ "IMUL   $dst,$src,$imm" %}
7741   opcode(0x69);  /* 69 /r id */
7742   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7743   ins_pipe( ialu_reg_mem_alu0 );
7744 %}
7745 
7746 // Multiply Memory
7747 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7748   match(Set dst (MulI dst (LoadI src)));
7749   effect(KILL cr);
7750 
7751   ins_cost(350);
7752   format %{ "IMUL   $dst,$src" %}
7753   opcode(0xAF, 0x0F);
7754   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7755   ins_pipe( ialu_reg_mem_alu0 );
7756 %}
7757 
7758 // Multiply Register Int to Long
7759 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7760   // Basic Idea: long = (long)int * (long)int
7761   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7762   effect(DEF dst, USE src, USE src1, KILL flags);
7763 
7764   ins_cost(300);
7765   format %{ "IMUL   $dst,$src1" %}
7766 
7767   ins_encode( long_int_multiply( dst, src1 ) );
7768   ins_pipe( ialu_reg_reg_alu0 );
7769 %}
7770 
7771 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7772   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7773   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7774   effect(KILL flags);
7775 
7776   ins_cost(300);
7777   format %{ "MUL    $dst,$src1" %}
7778 
7779   ins_encode( long_uint_multiply(dst, src1) );
7780   ins_pipe( ialu_reg_reg_alu0 );
7781 %}
7782 
7783 // Multiply Register Long
7784 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7785   match(Set dst (MulL dst src));
7786   effect(KILL cr, TEMP tmp);
7787   ins_cost(4*100+3*400);
7788 // Basic idea: lo(result) = lo(x_lo * y_lo)
7789 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7790   format %{ "MOV    $tmp,$src.lo\n\t"
7791             "IMUL   $tmp,EDX\n\t"
7792             "MOV    EDX,$src.hi\n\t"
7793             "IMUL   EDX,EAX\n\t"
7794             "ADD    $tmp,EDX\n\t"
7795             "MUL    EDX:EAX,$src.lo\n\t"
7796             "ADD    EDX,$tmp" %}
7797   ins_encode( long_multiply( dst, src, tmp ) );
7798   ins_pipe( pipe_slow );
7799 %}
7800 
7801 // Multiply Register Long where the left operand's high 32 bits are zero
7802 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7803   predicate(is_operand_hi32_zero(n->in(1)));
7804   match(Set dst (MulL dst src));
7805   effect(KILL cr, TEMP tmp);
7806   ins_cost(2*100+2*400);
7807 // Basic idea: lo(result) = lo(x_lo * y_lo)
7808 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7809   format %{ "MOV    $tmp,$src.hi\n\t"
7810             "IMUL   $tmp,EAX\n\t"
7811             "MUL    EDX:EAX,$src.lo\n\t"
7812             "ADD    EDX,$tmp" %}
7813   ins_encode %{
7814     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7815     __ imull($tmp$$Register, rax);
7816     __ mull($src$$Register);
7817     __ addl(rdx, $tmp$$Register);
7818   %}
7819   ins_pipe( pipe_slow );
7820 %}
7821 
7822 // Multiply Register Long where the right operand's high 32 bits are zero
7823 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7824   predicate(is_operand_hi32_zero(n->in(2)));
7825   match(Set dst (MulL dst src));
7826   effect(KILL cr, TEMP tmp);
7827   ins_cost(2*100+2*400);
7828 // Basic idea: lo(result) = lo(x_lo * y_lo)
7829 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7830   format %{ "MOV    $tmp,$src.lo\n\t"
7831             "IMUL   $tmp,EDX\n\t"
7832             "MUL    EDX:EAX,$src.lo\n\t"
7833             "ADD    EDX,$tmp" %}
7834   ins_encode %{
7835     __ movl($tmp$$Register, $src$$Register);
7836     __ imull($tmp$$Register, rdx);
7837     __ mull($src$$Register);
7838     __ addl(rdx, $tmp$$Register);
7839   %}
7840   ins_pipe( pipe_slow );
7841 %}
7842 
7843 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7844 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7845   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7846   match(Set dst (MulL dst src));
7847   effect(KILL cr);
7848   ins_cost(1*400);
7849 // Basic idea: lo(result) = lo(x_lo * y_lo)
7850 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7851   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7852   ins_encode %{
7853     __ mull($src$$Register);
7854   %}
7855   ins_pipe( pipe_slow );
7856 %}
7857 
7858 // Multiply Register Long by small constant
7859 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7860   match(Set dst (MulL dst src));
7861   effect(KILL cr, TEMP tmp);
7862   ins_cost(2*100+2*400);
7863   size(12);
7864 // Basic idea: lo(result) = lo(src * EAX)
7865 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7866   format %{ "IMUL   $tmp,EDX,$src\n\t"
7867             "MOV    EDX,$src\n\t"
7868             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7869             "ADD    EDX,$tmp" %}
7870   ins_encode( long_multiply_con( dst, src, tmp ) );
7871   ins_pipe( pipe_slow );
7872 %}
7873 
7874 // Integer DIV with Register
7875 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7876   match(Set rax (DivI rax div));
7877   effect(KILL rdx, KILL cr);
7878   size(26);
7879   ins_cost(30*100+10*100);
7880   format %{ "CMP    EAX,0x80000000\n\t"
7881             "JNE,s  normal\n\t"
7882             "XOR    EDX,EDX\n\t"
7883             "CMP    ECX,-1\n\t"
7884             "JE,s   done\n"
7885     "normal: CDQ\n\t"
7886             "IDIV   $div\n\t"
7887     "done:"        %}
7888   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7889   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7890   ins_pipe( ialu_reg_reg_alu0 );
7891 %}
7892 
7893 // Divide Register Long
7894 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7895   match(Set dst (DivL src1 src2));
7896   effect( KILL cr, KILL cx, KILL bx );
7897   ins_cost(10000);
7898   format %{ "PUSH   $src1.hi\n\t"
7899             "PUSH   $src1.lo\n\t"
7900             "PUSH   $src2.hi\n\t"
7901             "PUSH   $src2.lo\n\t"
7902             "CALL   SharedRuntime::ldiv\n\t"
7903             "ADD    ESP,16" %}
7904   ins_encode( long_div(src1,src2) );
7905   ins_pipe( pipe_slow );
7906 %}
7907 
7908 // Integer DIVMOD with Register, both quotient and mod results
7909 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7910   match(DivModI rax div);
7911   effect(KILL cr);
7912   size(26);
7913   ins_cost(30*100+10*100);
7914   format %{ "CMP    EAX,0x80000000\n\t"
7915             "JNE,s  normal\n\t"
7916             "XOR    EDX,EDX\n\t"
7917             "CMP    ECX,-1\n\t"
7918             "JE,s   done\n"
7919     "normal: CDQ\n\t"
7920             "IDIV   $div\n\t"
7921     "done:"        %}
7922   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7923   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7924   ins_pipe( pipe_slow );
7925 %}
7926 
7927 // Integer MOD with Register
7928 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7929   match(Set rdx (ModI rax div));
7930   effect(KILL rax, KILL cr);
7931 
7932   size(26);
7933   ins_cost(300);
7934   format %{ "CDQ\n\t"
7935             "IDIV   $div" %}
7936   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7937   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7938   ins_pipe( ialu_reg_reg_alu0 );
7939 %}
7940 
7941 // Remainder Register Long
7942 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7943   match(Set dst (ModL src1 src2));
7944   effect( KILL cr, KILL cx, KILL bx );
7945   ins_cost(10000);
7946   format %{ "PUSH   $src1.hi\n\t"
7947             "PUSH   $src1.lo\n\t"
7948             "PUSH   $src2.hi\n\t"
7949             "PUSH   $src2.lo\n\t"
7950             "CALL   SharedRuntime::lrem\n\t"
7951             "ADD    ESP,16" %}
7952   ins_encode( long_mod(src1,src2) );
7953   ins_pipe( pipe_slow );
7954 %}
7955 
7956 // Divide Register Long (no special case since divisor != -1)
7957 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7958   match(Set dst (DivL dst imm));
7959   effect( TEMP tmp, TEMP tmp2, KILL cr );
7960   ins_cost(1000);
7961   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7962             "XOR    $tmp2,$tmp2\n\t"
7963             "CMP    $tmp,EDX\n\t"
7964             "JA,s   fast\n\t"
7965             "MOV    $tmp2,EAX\n\t"
7966             "MOV    EAX,EDX\n\t"
7967             "MOV    EDX,0\n\t"
7968             "JLE,s  pos\n\t"
7969             "LNEG   EAX : $tmp2\n\t"
7970             "DIV    $tmp # unsigned division\n\t"
7971             "XCHG   EAX,$tmp2\n\t"
7972             "DIV    $tmp\n\t"
7973             "LNEG   $tmp2 : EAX\n\t"
7974             "JMP,s  done\n"
7975     "pos:\n\t"
7976             "DIV    $tmp\n\t"
7977             "XCHG   EAX,$tmp2\n"
7978     "fast:\n\t"
7979             "DIV    $tmp\n"
7980     "done:\n\t"
7981             "MOV    EDX,$tmp2\n\t"
7982             "NEG    EDX:EAX # if $imm < 0" %}
7983   ins_encode %{
7984     int con = (int)$imm$$constant;
7985     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7986     int pcon = (con > 0) ? con : -con;
7987     Label Lfast, Lpos, Ldone;
7988 
7989     __ movl($tmp$$Register, pcon);
7990     __ xorl($tmp2$$Register,$tmp2$$Register);
7991     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7992     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7993 
7994     __ movl($tmp2$$Register, $dst$$Register); // save
7995     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7996     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7997     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7998 
7999     // Negative dividend.
8000     // convert value to positive to use unsigned division
8001     __ lneg($dst$$Register, $tmp2$$Register);
8002     __ divl($tmp$$Register);
8003     __ xchgl($dst$$Register, $tmp2$$Register);
8004     __ divl($tmp$$Register);
8005     // revert result back to negative
8006     __ lneg($tmp2$$Register, $dst$$Register);
8007     __ jmpb(Ldone);
8008 
8009     __ bind(Lpos);
8010     __ divl($tmp$$Register); // Use unsigned division
8011     __ xchgl($dst$$Register, $tmp2$$Register);
8012     // Fallthrow for final divide, tmp2 has 32 bit hi result
8013 
8014     __ bind(Lfast);
8015     // fast path: src is positive
8016     __ divl($tmp$$Register); // Use unsigned division
8017 
8018     __ bind(Ldone);
8019     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8020     if (con < 0) {
8021       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8022     }
8023   %}
8024   ins_pipe( pipe_slow );
8025 %}
8026 
8027 // Remainder Register Long (remainder fit into 32 bits)
8028 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8029   match(Set dst (ModL dst imm));
8030   effect( TEMP tmp, TEMP tmp2, KILL cr );
8031   ins_cost(1000);
8032   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8033             "CMP    $tmp,EDX\n\t"
8034             "JA,s   fast\n\t"
8035             "MOV    $tmp2,EAX\n\t"
8036             "MOV    EAX,EDX\n\t"
8037             "MOV    EDX,0\n\t"
8038             "JLE,s  pos\n\t"
8039             "LNEG   EAX : $tmp2\n\t"
8040             "DIV    $tmp # unsigned division\n\t"
8041             "MOV    EAX,$tmp2\n\t"
8042             "DIV    $tmp\n\t"
8043             "NEG    EDX\n\t"
8044             "JMP,s  done\n"
8045     "pos:\n\t"
8046             "DIV    $tmp\n\t"
8047             "MOV    EAX,$tmp2\n"
8048     "fast:\n\t"
8049             "DIV    $tmp\n"
8050     "done:\n\t"
8051             "MOV    EAX,EDX\n\t"
8052             "SAR    EDX,31\n\t" %}
8053   ins_encode %{
8054     int con = (int)$imm$$constant;
8055     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8056     int pcon = (con > 0) ? con : -con;
8057     Label  Lfast, Lpos, Ldone;
8058 
8059     __ movl($tmp$$Register, pcon);
8060     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8061     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8062 
8063     __ movl($tmp2$$Register, $dst$$Register); // save
8064     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8065     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8066     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8067 
8068     // Negative dividend.
8069     // convert value to positive to use unsigned division
8070     __ lneg($dst$$Register, $tmp2$$Register);
8071     __ divl($tmp$$Register);
8072     __ movl($dst$$Register, $tmp2$$Register);
8073     __ divl($tmp$$Register);
8074     // revert remainder back to negative
8075     __ negl(HIGH_FROM_LOW($dst$$Register));
8076     __ jmpb(Ldone);
8077 
8078     __ bind(Lpos);
8079     __ divl($tmp$$Register);
8080     __ movl($dst$$Register, $tmp2$$Register);
8081 
8082     __ bind(Lfast);
8083     // fast path: src is positive
8084     __ divl($tmp$$Register);
8085 
8086     __ bind(Ldone);
8087     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8088     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8089 
8090   %}
8091   ins_pipe( pipe_slow );
8092 %}
8093 
8094 // Integer Shift Instructions
8095 // Shift Left by one
8096 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8097   match(Set dst (LShiftI dst shift));
8098   effect(KILL cr);
8099 
8100   size(2);
8101   format %{ "SHL    $dst,$shift" %}
8102   opcode(0xD1, 0x4);  /* D1 /4 */
8103   ins_encode( OpcP, RegOpc( dst ) );
8104   ins_pipe( ialu_reg );
8105 %}
8106 
8107 // Shift Left by 8-bit immediate
8108 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8109   match(Set dst (LShiftI dst shift));
8110   effect(KILL cr);
8111 
8112   size(3);
8113   format %{ "SHL    $dst,$shift" %}
8114   opcode(0xC1, 0x4);  /* C1 /4 ib */
8115   ins_encode( RegOpcImm( dst, shift) );
8116   ins_pipe( ialu_reg );
8117 %}
8118 
8119 // Shift Left by variable
8120 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8121   match(Set dst (LShiftI dst shift));
8122   effect(KILL cr);
8123 
8124   size(2);
8125   format %{ "SHL    $dst,$shift" %}
8126   opcode(0xD3, 0x4);  /* D3 /4 */
8127   ins_encode( OpcP, RegOpc( dst ) );
8128   ins_pipe( ialu_reg_reg );
8129 %}
8130 
8131 // Arithmetic shift right by one
8132 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8133   match(Set dst (RShiftI dst shift));
8134   effect(KILL cr);
8135 
8136   size(2);
8137   format %{ "SAR    $dst,$shift" %}
8138   opcode(0xD1, 0x7);  /* D1 /7 */
8139   ins_encode( OpcP, RegOpc( dst ) );
8140   ins_pipe( ialu_reg );
8141 %}
8142 
8143 // Arithmetic shift right by one
8144 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8145   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8146   effect(KILL cr);
8147   format %{ "SAR    $dst,$shift" %}
8148   opcode(0xD1, 0x7);  /* D1 /7 */
8149   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8150   ins_pipe( ialu_mem_imm );
8151 %}
8152 
8153 // Arithmetic Shift Right by 8-bit immediate
8154 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8155   match(Set dst (RShiftI dst shift));
8156   effect(KILL cr);
8157 
8158   size(3);
8159   format %{ "SAR    $dst,$shift" %}
8160   opcode(0xC1, 0x7);  /* C1 /7 ib */
8161   ins_encode( RegOpcImm( dst, shift ) );
8162   ins_pipe( ialu_mem_imm );
8163 %}
8164 
8165 // Arithmetic Shift Right by 8-bit immediate
8166 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8167   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8168   effect(KILL cr);
8169 
8170   format %{ "SAR    $dst,$shift" %}
8171   opcode(0xC1, 0x7);  /* C1 /7 ib */
8172   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8173   ins_pipe( ialu_mem_imm );
8174 %}
8175 
8176 // Arithmetic Shift Right by variable
8177 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8178   match(Set dst (RShiftI dst shift));
8179   effect(KILL cr);
8180 
8181   size(2);
8182   format %{ "SAR    $dst,$shift" %}
8183   opcode(0xD3, 0x7);  /* D3 /7 */
8184   ins_encode( OpcP, RegOpc( dst ) );
8185   ins_pipe( ialu_reg_reg );
8186 %}
8187 
8188 // Logical shift right by one
8189 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8190   match(Set dst (URShiftI dst shift));
8191   effect(KILL cr);
8192 
8193   size(2);
8194   format %{ "SHR    $dst,$shift" %}
8195   opcode(0xD1, 0x5);  /* D1 /5 */
8196   ins_encode( OpcP, RegOpc( dst ) );
8197   ins_pipe( ialu_reg );
8198 %}
8199 
8200 // Logical Shift Right by 8-bit immediate
8201 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8202   match(Set dst (URShiftI dst shift));
8203   effect(KILL cr);
8204 
8205   size(3);
8206   format %{ "SHR    $dst,$shift" %}
8207   opcode(0xC1, 0x5);  /* C1 /5 ib */
8208   ins_encode( RegOpcImm( dst, shift) );
8209   ins_pipe( ialu_reg );
8210 %}
8211 
8212 
8213 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8214 // This idiom is used by the compiler for the i2b bytecode.
8215 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8216   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8217 
8218   size(3);
8219   format %{ "MOVSX  $dst,$src :8" %}
8220   ins_encode %{
8221     __ movsbl($dst$$Register, $src$$Register);
8222   %}
8223   ins_pipe(ialu_reg_reg);
8224 %}
8225 
8226 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8227 // This idiom is used by the compiler the i2s bytecode.
8228 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8229   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8230 
8231   size(3);
8232   format %{ "MOVSX  $dst,$src :16" %}
8233   ins_encode %{
8234     __ movswl($dst$$Register, $src$$Register);
8235   %}
8236   ins_pipe(ialu_reg_reg);
8237 %}
8238 
8239 
8240 // Logical Shift Right by variable
8241 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8242   match(Set dst (URShiftI dst shift));
8243   effect(KILL cr);
8244 
8245   size(2);
8246   format %{ "SHR    $dst,$shift" %}
8247   opcode(0xD3, 0x5);  /* D3 /5 */
8248   ins_encode( OpcP, RegOpc( dst ) );
8249   ins_pipe( ialu_reg_reg );
8250 %}
8251 
8252 
8253 //----------Logical Instructions-----------------------------------------------
8254 //----------Integer Logical Instructions---------------------------------------
8255 // And Instructions
8256 // And Register with Register
8257 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8258   match(Set dst (AndI dst src));
8259   effect(KILL cr);
8260 
8261   size(2);
8262   format %{ "AND    $dst,$src" %}
8263   opcode(0x23);
8264   ins_encode( OpcP, RegReg( dst, src) );
8265   ins_pipe( ialu_reg_reg );
8266 %}
8267 
8268 // And Register with Immediate
8269 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8270   match(Set dst (AndI dst src));
8271   effect(KILL cr);
8272 
8273   format %{ "AND    $dst,$src" %}
8274   opcode(0x81,0x04);  /* Opcode 81 /4 */
8275   // ins_encode( RegImm( dst, src) );
8276   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8277   ins_pipe( ialu_reg );
8278 %}
8279 
8280 // And Register with Memory
8281 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8282   match(Set dst (AndI dst (LoadI src)));
8283   effect(KILL cr);
8284 
8285   ins_cost(125);
8286   format %{ "AND    $dst,$src" %}
8287   opcode(0x23);
8288   ins_encode( OpcP, RegMem( dst, src) );
8289   ins_pipe( ialu_reg_mem );
8290 %}
8291 
8292 // And Memory with Register
8293 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8294   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8295   effect(KILL cr);
8296 
8297   ins_cost(150);
8298   format %{ "AND    $dst,$src" %}
8299   opcode(0x21);  /* Opcode 21 /r */
8300   ins_encode( OpcP, RegMem( src, dst ) );
8301   ins_pipe( ialu_mem_reg );
8302 %}
8303 
8304 // And Memory with Immediate
8305 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8306   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8307   effect(KILL cr);
8308 
8309   ins_cost(125);
8310   format %{ "AND    $dst,$src" %}
8311   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8312   // ins_encode( MemImm( dst, src) );
8313   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8314   ins_pipe( ialu_mem_imm );
8315 %}
8316 
8317 // BMI1 instructions
8318 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8319   match(Set dst (AndI (XorI src1 minus_1) src2));
8320   predicate(UseBMI1Instructions);
8321   effect(KILL cr);
8322 
8323   format %{ "ANDNL  $dst, $src1, $src2" %}
8324 
8325   ins_encode %{
8326     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8327   %}
8328   ins_pipe(ialu_reg);
8329 %}
8330 
8331 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8332   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8333   predicate(UseBMI1Instructions);
8334   effect(KILL cr);
8335 
8336   ins_cost(125);
8337   format %{ "ANDNL  $dst, $src1, $src2" %}
8338 
8339   ins_encode %{
8340     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8341   %}
8342   ins_pipe(ialu_reg_mem);
8343 %}
8344 
8345 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8346   match(Set dst (AndI (SubI imm_zero src) src));
8347   predicate(UseBMI1Instructions);
8348   effect(KILL cr);
8349 
8350   format %{ "BLSIL  $dst, $src" %}
8351 
8352   ins_encode %{
8353     __ blsil($dst$$Register, $src$$Register);
8354   %}
8355   ins_pipe(ialu_reg);
8356 %}
8357 
8358 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8359   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8360   predicate(UseBMI1Instructions);
8361   effect(KILL cr);
8362 
8363   ins_cost(125);
8364   format %{ "BLSIL  $dst, $src" %}
8365 
8366   ins_encode %{
8367     __ blsil($dst$$Register, $src$$Address);
8368   %}
8369   ins_pipe(ialu_reg_mem);
8370 %}
8371 
8372 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8373 %{
8374   match(Set dst (XorI (AddI src minus_1) src));
8375   predicate(UseBMI1Instructions);
8376   effect(KILL cr);
8377 
8378   format %{ "BLSMSKL $dst, $src" %}
8379 
8380   ins_encode %{
8381     __ blsmskl($dst$$Register, $src$$Register);
8382   %}
8383 
8384   ins_pipe(ialu_reg);
8385 %}
8386 
8387 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8388 %{
8389   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8390   predicate(UseBMI1Instructions);
8391   effect(KILL cr);
8392 
8393   ins_cost(125);
8394   format %{ "BLSMSKL $dst, $src" %}
8395 
8396   ins_encode %{
8397     __ blsmskl($dst$$Register, $src$$Address);
8398   %}
8399 
8400   ins_pipe(ialu_reg_mem);
8401 %}
8402 
8403 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8404 %{
8405   match(Set dst (AndI (AddI src minus_1) src) );
8406   predicate(UseBMI1Instructions);
8407   effect(KILL cr);
8408 
8409   format %{ "BLSRL  $dst, $src" %}
8410 
8411   ins_encode %{
8412     __ blsrl($dst$$Register, $src$$Register);
8413   %}
8414 
8415   ins_pipe(ialu_reg);
8416 %}
8417 
8418 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8419 %{
8420   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8421   predicate(UseBMI1Instructions);
8422   effect(KILL cr);
8423 
8424   ins_cost(125);
8425   format %{ "BLSRL  $dst, $src" %}
8426 
8427   ins_encode %{
8428     __ blsrl($dst$$Register, $src$$Address);
8429   %}
8430 
8431   ins_pipe(ialu_reg_mem);
8432 %}
8433 
8434 // Or Instructions
8435 // Or Register with Register
8436 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8437   match(Set dst (OrI dst src));
8438   effect(KILL cr);
8439 
8440   size(2);
8441   format %{ "OR     $dst,$src" %}
8442   opcode(0x0B);
8443   ins_encode( OpcP, RegReg( dst, src) );
8444   ins_pipe( ialu_reg_reg );
8445 %}
8446 
8447 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8448   match(Set dst (OrI dst (CastP2X src)));
8449   effect(KILL cr);
8450 
8451   size(2);
8452   format %{ "OR     $dst,$src" %}
8453   opcode(0x0B);
8454   ins_encode( OpcP, RegReg( dst, src) );
8455   ins_pipe( ialu_reg_reg );
8456 %}
8457 
8458 
8459 // Or Register with Immediate
8460 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8461   match(Set dst (OrI dst src));
8462   effect(KILL cr);
8463 
8464   format %{ "OR     $dst,$src" %}
8465   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8466   // ins_encode( RegImm( dst, src) );
8467   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8468   ins_pipe( ialu_reg );
8469 %}
8470 
8471 // Or Register with Memory
8472 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8473   match(Set dst (OrI dst (LoadI src)));
8474   effect(KILL cr);
8475 
8476   ins_cost(125);
8477   format %{ "OR     $dst,$src" %}
8478   opcode(0x0B);
8479   ins_encode( OpcP, RegMem( dst, src) );
8480   ins_pipe( ialu_reg_mem );
8481 %}
8482 
8483 // Or Memory with Register
8484 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8485   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8486   effect(KILL cr);
8487 
8488   ins_cost(150);
8489   format %{ "OR     $dst,$src" %}
8490   opcode(0x09);  /* Opcode 09 /r */
8491   ins_encode( OpcP, RegMem( src, dst ) );
8492   ins_pipe( ialu_mem_reg );
8493 %}
8494 
8495 // Or Memory with Immediate
8496 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8497   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8498   effect(KILL cr);
8499 
8500   ins_cost(125);
8501   format %{ "OR     $dst,$src" %}
8502   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8503   // ins_encode( MemImm( dst, src) );
8504   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8505   ins_pipe( ialu_mem_imm );
8506 %}
8507 
8508 // ROL/ROR
8509 // ROL expand
8510 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8511   effect(USE_DEF dst, USE shift, KILL cr);
8512 
8513   format %{ "ROL    $dst, $shift" %}
8514   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8515   ins_encode( OpcP, RegOpc( dst ));
8516   ins_pipe( ialu_reg );
8517 %}
8518 
8519 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8520   effect(USE_DEF dst, USE shift, KILL cr);
8521 
8522   format %{ "ROL    $dst, $shift" %}
8523   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8524   ins_encode( RegOpcImm(dst, shift) );
8525   ins_pipe(ialu_reg);
8526 %}
8527 
8528 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8529   effect(USE_DEF dst, USE shift, KILL cr);
8530 
8531   format %{ "ROL    $dst, $shift" %}
8532   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8533   ins_encode(OpcP, RegOpc(dst));
8534   ins_pipe( ialu_reg_reg );
8535 %}
8536 // end of ROL expand
8537 
8538 // ROL 32bit by one once
8539 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8540   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8541 
8542   expand %{
8543     rolI_eReg_imm1(dst, lshift, cr);
8544   %}
8545 %}
8546 
8547 // ROL 32bit var by imm8 once
8548 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8549   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8550   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8551 
8552   expand %{
8553     rolI_eReg_imm8(dst, lshift, cr);
8554   %}
8555 %}
8556 
8557 // ROL 32bit var by var once
8558 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8559   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8560 
8561   expand %{
8562     rolI_eReg_CL(dst, shift, cr);
8563   %}
8564 %}
8565 
8566 // ROL 32bit var by var once
8567 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8568   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8569 
8570   expand %{
8571     rolI_eReg_CL(dst, shift, cr);
8572   %}
8573 %}
8574 
8575 // ROR expand
8576 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8577   effect(USE_DEF dst, USE shift, KILL cr);
8578 
8579   format %{ "ROR    $dst, $shift" %}
8580   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8581   ins_encode( OpcP, RegOpc( dst ) );
8582   ins_pipe( ialu_reg );
8583 %}
8584 
8585 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8586   effect (USE_DEF dst, USE shift, KILL cr);
8587 
8588   format %{ "ROR    $dst, $shift" %}
8589   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8590   ins_encode( RegOpcImm(dst, shift) );
8591   ins_pipe( ialu_reg );
8592 %}
8593 
8594 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8595   effect(USE_DEF dst, USE shift, KILL cr);
8596 
8597   format %{ "ROR    $dst, $shift" %}
8598   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8599   ins_encode(OpcP, RegOpc(dst));
8600   ins_pipe( ialu_reg_reg );
8601 %}
8602 // end of ROR expand
8603 
8604 // ROR right once
8605 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8606   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8607 
8608   expand %{
8609     rorI_eReg_imm1(dst, rshift, cr);
8610   %}
8611 %}
8612 
8613 // ROR 32bit by immI8 once
8614 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8615   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8616   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8617 
8618   expand %{
8619     rorI_eReg_imm8(dst, rshift, cr);
8620   %}
8621 %}
8622 
8623 // ROR 32bit var by var once
8624 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8625   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8626 
8627   expand %{
8628     rorI_eReg_CL(dst, shift, cr);
8629   %}
8630 %}
8631 
8632 // ROR 32bit var by var once
8633 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8634   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8635 
8636   expand %{
8637     rorI_eReg_CL(dst, shift, cr);
8638   %}
8639 %}
8640 
8641 // Xor Instructions
8642 // Xor Register with Register
8643 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8644   match(Set dst (XorI dst src));
8645   effect(KILL cr);
8646 
8647   size(2);
8648   format %{ "XOR    $dst,$src" %}
8649   opcode(0x33);
8650   ins_encode( OpcP, RegReg( dst, src) );
8651   ins_pipe( ialu_reg_reg );
8652 %}
8653 
8654 // Xor Register with Immediate -1
8655 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8656   match(Set dst (XorI dst imm));
8657 
8658   size(2);
8659   format %{ "NOT    $dst" %}
8660   ins_encode %{
8661      __ notl($dst$$Register);
8662   %}
8663   ins_pipe( ialu_reg );
8664 %}
8665 
8666 // Xor Register with Immediate
8667 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8668   match(Set dst (XorI dst src));
8669   effect(KILL cr);
8670 
8671   format %{ "XOR    $dst,$src" %}
8672   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8673   // ins_encode( RegImm( dst, src) );
8674   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8675   ins_pipe( ialu_reg );
8676 %}
8677 
8678 // Xor Register with Memory
8679 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8680   match(Set dst (XorI dst (LoadI src)));
8681   effect(KILL cr);
8682 
8683   ins_cost(125);
8684   format %{ "XOR    $dst,$src" %}
8685   opcode(0x33);
8686   ins_encode( OpcP, RegMem(dst, src) );
8687   ins_pipe( ialu_reg_mem );
8688 %}
8689 
8690 // Xor Memory with Register
8691 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8692   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8693   effect(KILL cr);
8694 
8695   ins_cost(150);
8696   format %{ "XOR    $dst,$src" %}
8697   opcode(0x31);  /* Opcode 31 /r */
8698   ins_encode( OpcP, RegMem( src, dst ) );
8699   ins_pipe( ialu_mem_reg );
8700 %}
8701 
8702 // Xor Memory with Immediate
8703 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8704   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8705   effect(KILL cr);
8706 
8707   ins_cost(125);
8708   format %{ "XOR    $dst,$src" %}
8709   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8710   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8711   ins_pipe( ialu_mem_imm );
8712 %}
8713 
8714 //----------Convert Int to Boolean---------------------------------------------
8715 
8716 instruct movI_nocopy(rRegI dst, rRegI src) %{
8717   effect( DEF dst, USE src );
8718   format %{ "MOV    $dst,$src" %}
8719   ins_encode( enc_Copy( dst, src) );
8720   ins_pipe( ialu_reg_reg );
8721 %}
8722 
8723 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8724   effect( USE_DEF dst, USE src, KILL cr );
8725 
8726   size(4);
8727   format %{ "NEG    $dst\n\t"
8728             "ADC    $dst,$src" %}
8729   ins_encode( neg_reg(dst),
8730               OpcRegReg(0x13,dst,src) );
8731   ins_pipe( ialu_reg_reg_long );
8732 %}
8733 
8734 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8735   match(Set dst (Conv2B src));
8736 
8737   expand %{
8738     movI_nocopy(dst,src);
8739     ci2b(dst,src,cr);
8740   %}
8741 %}
8742 
8743 instruct movP_nocopy(rRegI dst, eRegP src) %{
8744   effect( DEF dst, USE src );
8745   format %{ "MOV    $dst,$src" %}
8746   ins_encode( enc_Copy( dst, src) );
8747   ins_pipe( ialu_reg_reg );
8748 %}
8749 
8750 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8751   effect( USE_DEF dst, USE src, KILL cr );
8752   format %{ "NEG    $dst\n\t"
8753             "ADC    $dst,$src" %}
8754   ins_encode( neg_reg(dst),
8755               OpcRegReg(0x13,dst,src) );
8756   ins_pipe( ialu_reg_reg_long );
8757 %}
8758 
8759 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8760   match(Set dst (Conv2B src));
8761 
8762   expand %{
8763     movP_nocopy(dst,src);
8764     cp2b(dst,src,cr);
8765   %}
8766 %}
8767 
8768 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8769   match(Set dst (CmpLTMask p q));
8770   effect(KILL cr);
8771   ins_cost(400);
8772 
8773   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8774   format %{ "XOR    $dst,$dst\n\t"
8775             "CMP    $p,$q\n\t"
8776             "SETlt  $dst\n\t"
8777             "NEG    $dst" %}
8778   ins_encode %{
8779     Register Rp = $p$$Register;
8780     Register Rq = $q$$Register;
8781     Register Rd = $dst$$Register;
8782     Label done;
8783     __ xorl(Rd, Rd);
8784     __ cmpl(Rp, Rq);
8785     __ setb(Assembler::less, Rd);
8786     __ negl(Rd);
8787   %}
8788 
8789   ins_pipe(pipe_slow);
8790 %}
8791 
8792 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8793   match(Set dst (CmpLTMask dst zero));
8794   effect(DEF dst, KILL cr);
8795   ins_cost(100);
8796 
8797   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8798   ins_encode %{
8799   __ sarl($dst$$Register, 31);
8800   %}
8801   ins_pipe(ialu_reg);
8802 %}
8803 
8804 /* better to save a register than avoid a branch */
8805 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8806   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8807   effect(KILL cr);
8808   ins_cost(400);
8809   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8810             "JGE    done\n\t"
8811             "ADD    $p,$y\n"
8812             "done:  " %}
8813   ins_encode %{
8814     Register Rp = $p$$Register;
8815     Register Rq = $q$$Register;
8816     Register Ry = $y$$Register;
8817     Label done;
8818     __ subl(Rp, Rq);
8819     __ jccb(Assembler::greaterEqual, done);
8820     __ addl(Rp, Ry);
8821     __ bind(done);
8822   %}
8823 
8824   ins_pipe(pipe_cmplt);
8825 %}
8826 
8827 /* better to save a register than avoid a branch */
8828 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8829   match(Set y (AndI (CmpLTMask p q) y));
8830   effect(KILL cr);
8831 
8832   ins_cost(300);
8833 
8834   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8835             "JLT      done\n\t"
8836             "XORL     $y, $y\n"
8837             "done:  " %}
8838   ins_encode %{
8839     Register Rp = $p$$Register;
8840     Register Rq = $q$$Register;
8841     Register Ry = $y$$Register;
8842     Label done;
8843     __ cmpl(Rp, Rq);
8844     __ jccb(Assembler::less, done);
8845     __ xorl(Ry, Ry);
8846     __ bind(done);
8847   %}
8848 
8849   ins_pipe(pipe_cmplt);
8850 %}
8851 
8852 /* If I enable this, I encourage spilling in the inner loop of compress.
8853 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8854   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8855 */
8856 //----------Overflow Math Instructions-----------------------------------------
8857 
8858 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8859 %{
8860   match(Set cr (OverflowAddI op1 op2));
8861   effect(DEF cr, USE_KILL op1, USE op2);
8862 
8863   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8864 
8865   ins_encode %{
8866     __ addl($op1$$Register, $op2$$Register);
8867   %}
8868   ins_pipe(ialu_reg_reg);
8869 %}
8870 
8871 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8872 %{
8873   match(Set cr (OverflowAddI op1 op2));
8874   effect(DEF cr, USE_KILL op1, USE op2);
8875 
8876   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8877 
8878   ins_encode %{
8879     __ addl($op1$$Register, $op2$$constant);
8880   %}
8881   ins_pipe(ialu_reg_reg);
8882 %}
8883 
8884 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8885 %{
8886   match(Set cr (OverflowSubI op1 op2));
8887 
8888   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8889   ins_encode %{
8890     __ cmpl($op1$$Register, $op2$$Register);
8891   %}
8892   ins_pipe(ialu_reg_reg);
8893 %}
8894 
8895 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8896 %{
8897   match(Set cr (OverflowSubI op1 op2));
8898 
8899   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8900   ins_encode %{
8901     __ cmpl($op1$$Register, $op2$$constant);
8902   %}
8903   ins_pipe(ialu_reg_reg);
8904 %}
8905 
8906 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8907 %{
8908   match(Set cr (OverflowSubI zero op2));
8909   effect(DEF cr, USE_KILL op2);
8910 
8911   format %{ "NEG    $op2\t# overflow check int" %}
8912   ins_encode %{
8913     __ negl($op2$$Register);
8914   %}
8915   ins_pipe(ialu_reg_reg);
8916 %}
8917 
8918 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8919 %{
8920   match(Set cr (OverflowMulI op1 op2));
8921   effect(DEF cr, USE_KILL op1, USE op2);
8922 
8923   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8924   ins_encode %{
8925     __ imull($op1$$Register, $op2$$Register);
8926   %}
8927   ins_pipe(ialu_reg_reg_alu0);
8928 %}
8929 
8930 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8931 %{
8932   match(Set cr (OverflowMulI op1 op2));
8933   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8934 
8935   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8936   ins_encode %{
8937     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8938   %}
8939   ins_pipe(ialu_reg_reg_alu0);
8940 %}
8941 
8942 //----------Long Instructions------------------------------------------------
8943 // Add Long Register with Register
8944 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8945   match(Set dst (AddL dst src));
8946   effect(KILL cr);
8947   ins_cost(200);
8948   format %{ "ADD    $dst.lo,$src.lo\n\t"
8949             "ADC    $dst.hi,$src.hi" %}
8950   opcode(0x03, 0x13);
8951   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8952   ins_pipe( ialu_reg_reg_long );
8953 %}
8954 
8955 // Add Long Register with Immediate
8956 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8957   match(Set dst (AddL dst src));
8958   effect(KILL cr);
8959   format %{ "ADD    $dst.lo,$src.lo\n\t"
8960             "ADC    $dst.hi,$src.hi" %}
8961   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8962   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8963   ins_pipe( ialu_reg_long );
8964 %}
8965 
8966 // Add Long Register with Memory
8967 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8968   match(Set dst (AddL dst (LoadL mem)));
8969   effect(KILL cr);
8970   ins_cost(125);
8971   format %{ "ADD    $dst.lo,$mem\n\t"
8972             "ADC    $dst.hi,$mem+4" %}
8973   opcode(0x03, 0x13);
8974   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8975   ins_pipe( ialu_reg_long_mem );
8976 %}
8977 
8978 // Subtract Long Register with Register.
8979 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8980   match(Set dst (SubL dst src));
8981   effect(KILL cr);
8982   ins_cost(200);
8983   format %{ "SUB    $dst.lo,$src.lo\n\t"
8984             "SBB    $dst.hi,$src.hi" %}
8985   opcode(0x2B, 0x1B);
8986   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8987   ins_pipe( ialu_reg_reg_long );
8988 %}
8989 
8990 // Subtract Long Register with Immediate
8991 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8992   match(Set dst (SubL dst src));
8993   effect(KILL cr);
8994   format %{ "SUB    $dst.lo,$src.lo\n\t"
8995             "SBB    $dst.hi,$src.hi" %}
8996   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8997   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8998   ins_pipe( ialu_reg_long );
8999 %}
9000 
9001 // Subtract Long Register with Memory
9002 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9003   match(Set dst (SubL dst (LoadL mem)));
9004   effect(KILL cr);
9005   ins_cost(125);
9006   format %{ "SUB    $dst.lo,$mem\n\t"
9007             "SBB    $dst.hi,$mem+4" %}
9008   opcode(0x2B, 0x1B);
9009   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9010   ins_pipe( ialu_reg_long_mem );
9011 %}
9012 
9013 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9014   match(Set dst (SubL zero dst));
9015   effect(KILL cr);
9016   ins_cost(300);
9017   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9018   ins_encode( neg_long(dst) );
9019   ins_pipe( ialu_reg_reg_long );
9020 %}
9021 
9022 // And Long Register with Register
9023 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9024   match(Set dst (AndL dst src));
9025   effect(KILL cr);
9026   format %{ "AND    $dst.lo,$src.lo\n\t"
9027             "AND    $dst.hi,$src.hi" %}
9028   opcode(0x23,0x23);
9029   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9030   ins_pipe( ialu_reg_reg_long );
9031 %}
9032 
9033 // And Long Register with Immediate
9034 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9035   match(Set dst (AndL dst src));
9036   effect(KILL cr);
9037   format %{ "AND    $dst.lo,$src.lo\n\t"
9038             "AND    $dst.hi,$src.hi" %}
9039   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9040   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9041   ins_pipe( ialu_reg_long );
9042 %}
9043 
9044 // And Long Register with Memory
9045 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9046   match(Set dst (AndL dst (LoadL mem)));
9047   effect(KILL cr);
9048   ins_cost(125);
9049   format %{ "AND    $dst.lo,$mem\n\t"
9050             "AND    $dst.hi,$mem+4" %}
9051   opcode(0x23, 0x23);
9052   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9053   ins_pipe( ialu_reg_long_mem );
9054 %}
9055 
9056 // BMI1 instructions
9057 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9058   match(Set dst (AndL (XorL src1 minus_1) src2));
9059   predicate(UseBMI1Instructions);
9060   effect(KILL cr, TEMP dst);
9061 
9062   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9063             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9064          %}
9065 
9066   ins_encode %{
9067     Register Rdst = $dst$$Register;
9068     Register Rsrc1 = $src1$$Register;
9069     Register Rsrc2 = $src2$$Register;
9070     __ andnl(Rdst, Rsrc1, Rsrc2);
9071     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9072   %}
9073   ins_pipe(ialu_reg_reg_long);
9074 %}
9075 
9076 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9077   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9078   predicate(UseBMI1Instructions);
9079   effect(KILL cr, TEMP dst);
9080 
9081   ins_cost(125);
9082   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9083             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9084          %}
9085 
9086   ins_encode %{
9087     Register Rdst = $dst$$Register;
9088     Register Rsrc1 = $src1$$Register;
9089     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9090 
9091     __ andnl(Rdst, Rsrc1, $src2$$Address);
9092     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9093   %}
9094   ins_pipe(ialu_reg_mem);
9095 %}
9096 
9097 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9098   match(Set dst (AndL (SubL imm_zero src) src));
9099   predicate(UseBMI1Instructions);
9100   effect(KILL cr, TEMP dst);
9101 
9102   format %{ "MOVL   $dst.hi, 0\n\t"
9103             "BLSIL  $dst.lo, $src.lo\n\t"
9104             "JNZ    done\n\t"
9105             "BLSIL  $dst.hi, $src.hi\n"
9106             "done:"
9107          %}
9108 
9109   ins_encode %{
9110     Label done;
9111     Register Rdst = $dst$$Register;
9112     Register Rsrc = $src$$Register;
9113     __ movl(HIGH_FROM_LOW(Rdst), 0);
9114     __ blsil(Rdst, Rsrc);
9115     __ jccb(Assembler::notZero, done);
9116     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9117     __ bind(done);
9118   %}
9119   ins_pipe(ialu_reg);
9120 %}
9121 
9122 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9123   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9124   predicate(UseBMI1Instructions);
9125   effect(KILL cr, TEMP dst);
9126 
9127   ins_cost(125);
9128   format %{ "MOVL   $dst.hi, 0\n\t"
9129             "BLSIL  $dst.lo, $src\n\t"
9130             "JNZ    done\n\t"
9131             "BLSIL  $dst.hi, $src+4\n"
9132             "done:"
9133          %}
9134 
9135   ins_encode %{
9136     Label done;
9137     Register Rdst = $dst$$Register;
9138     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9139 
9140     __ movl(HIGH_FROM_LOW(Rdst), 0);
9141     __ blsil(Rdst, $src$$Address);
9142     __ jccb(Assembler::notZero, done);
9143     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9144     __ bind(done);
9145   %}
9146   ins_pipe(ialu_reg_mem);
9147 %}
9148 
9149 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9150 %{
9151   match(Set dst (XorL (AddL src minus_1) src));
9152   predicate(UseBMI1Instructions);
9153   effect(KILL cr, TEMP dst);
9154 
9155   format %{ "MOVL    $dst.hi, 0\n\t"
9156             "BLSMSKL $dst.lo, $src.lo\n\t"
9157             "JNC     done\n\t"
9158             "BLSMSKL $dst.hi, $src.hi\n"
9159             "done:"
9160          %}
9161 
9162   ins_encode %{
9163     Label done;
9164     Register Rdst = $dst$$Register;
9165     Register Rsrc = $src$$Register;
9166     __ movl(HIGH_FROM_LOW(Rdst), 0);
9167     __ blsmskl(Rdst, Rsrc);
9168     __ jccb(Assembler::carryClear, done);
9169     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9170     __ bind(done);
9171   %}
9172 
9173   ins_pipe(ialu_reg);
9174 %}
9175 
9176 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9177 %{
9178   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9179   predicate(UseBMI1Instructions);
9180   effect(KILL cr, TEMP dst);
9181 
9182   ins_cost(125);
9183   format %{ "MOVL    $dst.hi, 0\n\t"
9184             "BLSMSKL $dst.lo, $src\n\t"
9185             "JNC     done\n\t"
9186             "BLSMSKL $dst.hi, $src+4\n"
9187             "done:"
9188          %}
9189 
9190   ins_encode %{
9191     Label done;
9192     Register Rdst = $dst$$Register;
9193     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9194 
9195     __ movl(HIGH_FROM_LOW(Rdst), 0);
9196     __ blsmskl(Rdst, $src$$Address);
9197     __ jccb(Assembler::carryClear, done);
9198     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9199     __ bind(done);
9200   %}
9201 
9202   ins_pipe(ialu_reg_mem);
9203 %}
9204 
9205 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9206 %{
9207   match(Set dst (AndL (AddL src minus_1) src) );
9208   predicate(UseBMI1Instructions);
9209   effect(KILL cr, TEMP dst);
9210 
9211   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9212             "BLSRL  $dst.lo, $src.lo\n\t"
9213             "JNC    done\n\t"
9214             "BLSRL  $dst.hi, $src.hi\n"
9215             "done:"
9216   %}
9217 
9218   ins_encode %{
9219     Label done;
9220     Register Rdst = $dst$$Register;
9221     Register Rsrc = $src$$Register;
9222     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9223     __ blsrl(Rdst, Rsrc);
9224     __ jccb(Assembler::carryClear, done);
9225     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9226     __ bind(done);
9227   %}
9228 
9229   ins_pipe(ialu_reg);
9230 %}
9231 
9232 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9233 %{
9234   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9235   predicate(UseBMI1Instructions);
9236   effect(KILL cr, TEMP dst);
9237 
9238   ins_cost(125);
9239   format %{ "MOVL   $dst.hi, $src+4\n\t"
9240             "BLSRL  $dst.lo, $src\n\t"
9241             "JNC    done\n\t"
9242             "BLSRL  $dst.hi, $src+4\n"
9243             "done:"
9244   %}
9245 
9246   ins_encode %{
9247     Label done;
9248     Register Rdst = $dst$$Register;
9249     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9250     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9251     __ blsrl(Rdst, $src$$Address);
9252     __ jccb(Assembler::carryClear, done);
9253     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9254     __ bind(done);
9255   %}
9256 
9257   ins_pipe(ialu_reg_mem);
9258 %}
9259 
9260 // Or Long Register with Register
9261 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9262   match(Set dst (OrL dst src));
9263   effect(KILL cr);
9264   format %{ "OR     $dst.lo,$src.lo\n\t"
9265             "OR     $dst.hi,$src.hi" %}
9266   opcode(0x0B,0x0B);
9267   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9268   ins_pipe( ialu_reg_reg_long );
9269 %}
9270 
9271 // Or Long Register with Immediate
9272 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9273   match(Set dst (OrL dst src));
9274   effect(KILL cr);
9275   format %{ "OR     $dst.lo,$src.lo\n\t"
9276             "OR     $dst.hi,$src.hi" %}
9277   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9278   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9279   ins_pipe( ialu_reg_long );
9280 %}
9281 
9282 // Or Long Register with Memory
9283 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9284   match(Set dst (OrL dst (LoadL mem)));
9285   effect(KILL cr);
9286   ins_cost(125);
9287   format %{ "OR     $dst.lo,$mem\n\t"
9288             "OR     $dst.hi,$mem+4" %}
9289   opcode(0x0B,0x0B);
9290   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9291   ins_pipe( ialu_reg_long_mem );
9292 %}
9293 
9294 // Xor Long Register with Register
9295 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9296   match(Set dst (XorL dst src));
9297   effect(KILL cr);
9298   format %{ "XOR    $dst.lo,$src.lo\n\t"
9299             "XOR    $dst.hi,$src.hi" %}
9300   opcode(0x33,0x33);
9301   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9302   ins_pipe( ialu_reg_reg_long );
9303 %}
9304 
9305 // Xor Long Register with Immediate -1
9306 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9307   match(Set dst (XorL dst imm));
9308   format %{ "NOT    $dst.lo\n\t"
9309             "NOT    $dst.hi" %}
9310   ins_encode %{
9311      __ notl($dst$$Register);
9312      __ notl(HIGH_FROM_LOW($dst$$Register));
9313   %}
9314   ins_pipe( ialu_reg_long );
9315 %}
9316 
9317 // Xor Long Register with Immediate
9318 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9319   match(Set dst (XorL dst src));
9320   effect(KILL cr);
9321   format %{ "XOR    $dst.lo,$src.lo\n\t"
9322             "XOR    $dst.hi,$src.hi" %}
9323   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9324   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9325   ins_pipe( ialu_reg_long );
9326 %}
9327 
9328 // Xor Long Register with Memory
9329 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9330   match(Set dst (XorL dst (LoadL mem)));
9331   effect(KILL cr);
9332   ins_cost(125);
9333   format %{ "XOR    $dst.lo,$mem\n\t"
9334             "XOR    $dst.hi,$mem+4" %}
9335   opcode(0x33,0x33);
9336   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9337   ins_pipe( ialu_reg_long_mem );
9338 %}
9339 
9340 // Shift Left Long by 1
9341 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9342   predicate(UseNewLongLShift);
9343   match(Set dst (LShiftL dst cnt));
9344   effect(KILL cr);
9345   ins_cost(100);
9346   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9347             "ADC    $dst.hi,$dst.hi" %}
9348   ins_encode %{
9349     __ addl($dst$$Register,$dst$$Register);
9350     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9351   %}
9352   ins_pipe( ialu_reg_long );
9353 %}
9354 
9355 // Shift Left Long by 2
9356 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9357   predicate(UseNewLongLShift);
9358   match(Set dst (LShiftL dst cnt));
9359   effect(KILL cr);
9360   ins_cost(100);
9361   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9362             "ADC    $dst.hi,$dst.hi\n\t"
9363             "ADD    $dst.lo,$dst.lo\n\t"
9364             "ADC    $dst.hi,$dst.hi" %}
9365   ins_encode %{
9366     __ addl($dst$$Register,$dst$$Register);
9367     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9368     __ addl($dst$$Register,$dst$$Register);
9369     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9370   %}
9371   ins_pipe( ialu_reg_long );
9372 %}
9373 
9374 // Shift Left Long by 3
9375 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9376   predicate(UseNewLongLShift);
9377   match(Set dst (LShiftL dst cnt));
9378   effect(KILL cr);
9379   ins_cost(100);
9380   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9381             "ADC    $dst.hi,$dst.hi\n\t"
9382             "ADD    $dst.lo,$dst.lo\n\t"
9383             "ADC    $dst.hi,$dst.hi\n\t"
9384             "ADD    $dst.lo,$dst.lo\n\t"
9385             "ADC    $dst.hi,$dst.hi" %}
9386   ins_encode %{
9387     __ addl($dst$$Register,$dst$$Register);
9388     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9389     __ addl($dst$$Register,$dst$$Register);
9390     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9391     __ addl($dst$$Register,$dst$$Register);
9392     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9393   %}
9394   ins_pipe( ialu_reg_long );
9395 %}
9396 
9397 // Shift Left Long by 1-31
9398 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9399   match(Set dst (LShiftL dst cnt));
9400   effect(KILL cr);
9401   ins_cost(200);
9402   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9403             "SHL    $dst.lo,$cnt" %}
9404   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9405   ins_encode( move_long_small_shift(dst,cnt) );
9406   ins_pipe( ialu_reg_long );
9407 %}
9408 
9409 // Shift Left Long by 32-63
9410 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9411   match(Set dst (LShiftL dst cnt));
9412   effect(KILL cr);
9413   ins_cost(300);
9414   format %{ "MOV    $dst.hi,$dst.lo\n"
9415           "\tSHL    $dst.hi,$cnt-32\n"
9416           "\tXOR    $dst.lo,$dst.lo" %}
9417   opcode(0xC1, 0x4);  /* C1 /4 ib */
9418   ins_encode( move_long_big_shift_clr(dst,cnt) );
9419   ins_pipe( ialu_reg_long );
9420 %}
9421 
9422 // Shift Left Long by variable
9423 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9424   match(Set dst (LShiftL dst shift));
9425   effect(KILL cr);
9426   ins_cost(500+200);
9427   size(17);
9428   format %{ "TEST   $shift,32\n\t"
9429             "JEQ,s  small\n\t"
9430             "MOV    $dst.hi,$dst.lo\n\t"
9431             "XOR    $dst.lo,$dst.lo\n"
9432     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9433             "SHL    $dst.lo,$shift" %}
9434   ins_encode( shift_left_long( dst, shift ) );
9435   ins_pipe( pipe_slow );
9436 %}
9437 
9438 // Shift Right Long by 1-31
9439 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9440   match(Set dst (URShiftL dst cnt));
9441   effect(KILL cr);
9442   ins_cost(200);
9443   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9444             "SHR    $dst.hi,$cnt" %}
9445   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9446   ins_encode( move_long_small_shift(dst,cnt) );
9447   ins_pipe( ialu_reg_long );
9448 %}
9449 
9450 // Shift Right Long by 32-63
9451 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9452   match(Set dst (URShiftL dst cnt));
9453   effect(KILL cr);
9454   ins_cost(300);
9455   format %{ "MOV    $dst.lo,$dst.hi\n"
9456           "\tSHR    $dst.lo,$cnt-32\n"
9457           "\tXOR    $dst.hi,$dst.hi" %}
9458   opcode(0xC1, 0x5);  /* C1 /5 ib */
9459   ins_encode( move_long_big_shift_clr(dst,cnt) );
9460   ins_pipe( ialu_reg_long );
9461 %}
9462 
9463 // Shift Right Long by variable
9464 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9465   match(Set dst (URShiftL dst shift));
9466   effect(KILL cr);
9467   ins_cost(600);
9468   size(17);
9469   format %{ "TEST   $shift,32\n\t"
9470             "JEQ,s  small\n\t"
9471             "MOV    $dst.lo,$dst.hi\n\t"
9472             "XOR    $dst.hi,$dst.hi\n"
9473     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9474             "SHR    $dst.hi,$shift" %}
9475   ins_encode( shift_right_long( dst, shift ) );
9476   ins_pipe( pipe_slow );
9477 %}
9478 
9479 // Shift Right Long by 1-31
9480 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9481   match(Set dst (RShiftL dst cnt));
9482   effect(KILL cr);
9483   ins_cost(200);
9484   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9485             "SAR    $dst.hi,$cnt" %}
9486   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9487   ins_encode( move_long_small_shift(dst,cnt) );
9488   ins_pipe( ialu_reg_long );
9489 %}
9490 
9491 // Shift Right Long by 32-63
9492 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9493   match(Set dst (RShiftL dst cnt));
9494   effect(KILL cr);
9495   ins_cost(300);
9496   format %{ "MOV    $dst.lo,$dst.hi\n"
9497           "\tSAR    $dst.lo,$cnt-32\n"
9498           "\tSAR    $dst.hi,31" %}
9499   opcode(0xC1, 0x7);  /* C1 /7 ib */
9500   ins_encode( move_long_big_shift_sign(dst,cnt) );
9501   ins_pipe( ialu_reg_long );
9502 %}
9503 
9504 // Shift Right arithmetic Long by variable
9505 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9506   match(Set dst (RShiftL dst shift));
9507   effect(KILL cr);
9508   ins_cost(600);
9509   size(18);
9510   format %{ "TEST   $shift,32\n\t"
9511             "JEQ,s  small\n\t"
9512             "MOV    $dst.lo,$dst.hi\n\t"
9513             "SAR    $dst.hi,31\n"
9514     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9515             "SAR    $dst.hi,$shift" %}
9516   ins_encode( shift_right_arith_long( dst, shift ) );
9517   ins_pipe( pipe_slow );
9518 %}
9519 
9520 
9521 //----------Double Instructions------------------------------------------------
9522 // Double Math
9523 
9524 // Compare & branch
9525 
9526 // P6 version of float compare, sets condition codes in EFLAGS
9527 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9528   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9529   match(Set cr (CmpD src1 src2));
9530   effect(KILL rax);
9531   ins_cost(150);
9532   format %{ "FLD    $src1\n\t"
9533             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9534             "JNP    exit\n\t"
9535             "MOV    ah,1       // saw a NaN, set CF\n\t"
9536             "SAHF\n"
9537      "exit:\tNOP               // avoid branch to branch" %}
9538   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9539   ins_encode( Push_Reg_DPR(src1),
9540               OpcP, RegOpc(src2),
9541               cmpF_P6_fixup );
9542   ins_pipe( pipe_slow );
9543 %}
9544 
9545 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9546   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9547   match(Set cr (CmpD src1 src2));
9548   ins_cost(150);
9549   format %{ "FLD    $src1\n\t"
9550             "FUCOMIP ST,$src2  // P6 instruction" %}
9551   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9552   ins_encode( Push_Reg_DPR(src1),
9553               OpcP, RegOpc(src2));
9554   ins_pipe( pipe_slow );
9555 %}
9556 
9557 // Compare & branch
9558 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9559   predicate(UseSSE<=1);
9560   match(Set cr (CmpD src1 src2));
9561   effect(KILL rax);
9562   ins_cost(200);
9563   format %{ "FLD    $src1\n\t"
9564             "FCOMp  $src2\n\t"
9565             "FNSTSW AX\n\t"
9566             "TEST   AX,0x400\n\t"
9567             "JZ,s   flags\n\t"
9568             "MOV    AH,1\t# unordered treat as LT\n"
9569     "flags:\tSAHF" %}
9570   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9571   ins_encode( Push_Reg_DPR(src1),
9572               OpcP, RegOpc(src2),
9573               fpu_flags);
9574   ins_pipe( pipe_slow );
9575 %}
9576 
9577 // Compare vs zero into -1,0,1
9578 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9579   predicate(UseSSE<=1);
9580   match(Set dst (CmpD3 src1 zero));
9581   effect(KILL cr, KILL rax);
9582   ins_cost(280);
9583   format %{ "FTSTD  $dst,$src1" %}
9584   opcode(0xE4, 0xD9);
9585   ins_encode( Push_Reg_DPR(src1),
9586               OpcS, OpcP, PopFPU,
9587               CmpF_Result(dst));
9588   ins_pipe( pipe_slow );
9589 %}
9590 
9591 // Compare into -1,0,1
9592 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9593   predicate(UseSSE<=1);
9594   match(Set dst (CmpD3 src1 src2));
9595   effect(KILL cr, KILL rax);
9596   ins_cost(300);
9597   format %{ "FCMPD  $dst,$src1,$src2" %}
9598   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9599   ins_encode( Push_Reg_DPR(src1),
9600               OpcP, RegOpc(src2),
9601               CmpF_Result(dst));
9602   ins_pipe( pipe_slow );
9603 %}
9604 
9605 // float compare and set condition codes in EFLAGS by XMM regs
9606 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9607   predicate(UseSSE>=2);
9608   match(Set cr (CmpD src1 src2));
9609   ins_cost(145);
9610   format %{ "UCOMISD $src1,$src2\n\t"
9611             "JNP,s   exit\n\t"
9612             "PUSHF\t# saw NaN, set CF\n\t"
9613             "AND     [rsp], #0xffffff2b\n\t"
9614             "POPF\n"
9615     "exit:" %}
9616   ins_encode %{
9617     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9618     emit_cmpfp_fixup(_masm);
9619   %}
9620   ins_pipe( pipe_slow );
9621 %}
9622 
9623 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9624   predicate(UseSSE>=2);
9625   match(Set cr (CmpD src1 src2));
9626   ins_cost(100);
9627   format %{ "UCOMISD $src1,$src2" %}
9628   ins_encode %{
9629     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9630   %}
9631   ins_pipe( pipe_slow );
9632 %}
9633 
9634 // float compare and set condition codes in EFLAGS by XMM regs
9635 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9636   predicate(UseSSE>=2);
9637   match(Set cr (CmpD src1 (LoadD src2)));
9638   ins_cost(145);
9639   format %{ "UCOMISD $src1,$src2\n\t"
9640             "JNP,s   exit\n\t"
9641             "PUSHF\t# saw NaN, set CF\n\t"
9642             "AND     [rsp], #0xffffff2b\n\t"
9643             "POPF\n"
9644     "exit:" %}
9645   ins_encode %{
9646     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9647     emit_cmpfp_fixup(_masm);
9648   %}
9649   ins_pipe( pipe_slow );
9650 %}
9651 
9652 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9653   predicate(UseSSE>=2);
9654   match(Set cr (CmpD src1 (LoadD src2)));
9655   ins_cost(100);
9656   format %{ "UCOMISD $src1,$src2" %}
9657   ins_encode %{
9658     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9659   %}
9660   ins_pipe( pipe_slow );
9661 %}
9662 
9663 // Compare into -1,0,1 in XMM
9664 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9665   predicate(UseSSE>=2);
9666   match(Set dst (CmpD3 src1 src2));
9667   effect(KILL cr);
9668   ins_cost(255);
9669   format %{ "UCOMISD $src1, $src2\n\t"
9670             "MOV     $dst, #-1\n\t"
9671             "JP,s    done\n\t"
9672             "JB,s    done\n\t"
9673             "SETNE   $dst\n\t"
9674             "MOVZB   $dst, $dst\n"
9675     "done:" %}
9676   ins_encode %{
9677     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9678     emit_cmpfp3(_masm, $dst$$Register);
9679   %}
9680   ins_pipe( pipe_slow );
9681 %}
9682 
9683 // Compare into -1,0,1 in XMM and memory
9684 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9685   predicate(UseSSE>=2);
9686   match(Set dst (CmpD3 src1 (LoadD src2)));
9687   effect(KILL cr);
9688   ins_cost(275);
9689   format %{ "UCOMISD $src1, $src2\n\t"
9690             "MOV     $dst, #-1\n\t"
9691             "JP,s    done\n\t"
9692             "JB,s    done\n\t"
9693             "SETNE   $dst\n\t"
9694             "MOVZB   $dst, $dst\n"
9695     "done:" %}
9696   ins_encode %{
9697     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9698     emit_cmpfp3(_masm, $dst$$Register);
9699   %}
9700   ins_pipe( pipe_slow );
9701 %}
9702 
9703 
9704 instruct subDPR_reg(regDPR dst, regDPR src) %{
9705   predicate (UseSSE <=1);
9706   match(Set dst (SubD dst src));
9707 
9708   format %{ "FLD    $src\n\t"
9709             "DSUBp  $dst,ST" %}
9710   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9711   ins_cost(150);
9712   ins_encode( Push_Reg_DPR(src),
9713               OpcP, RegOpc(dst) );
9714   ins_pipe( fpu_reg_reg );
9715 %}
9716 
9717 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9718   predicate (UseSSE <=1);
9719   match(Set dst (RoundDouble (SubD src1 src2)));
9720   ins_cost(250);
9721 
9722   format %{ "FLD    $src2\n\t"
9723             "DSUB   ST,$src1\n\t"
9724             "FSTP_D $dst\t# D-round" %}
9725   opcode(0xD8, 0x5);
9726   ins_encode( Push_Reg_DPR(src2),
9727               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9728   ins_pipe( fpu_mem_reg_reg );
9729 %}
9730 
9731 
9732 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9733   predicate (UseSSE <=1);
9734   match(Set dst (SubD dst (LoadD src)));
9735   ins_cost(150);
9736 
9737   format %{ "FLD    $src\n\t"
9738             "DSUBp  $dst,ST" %}
9739   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9740   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9741               OpcP, RegOpc(dst) );
9742   ins_pipe( fpu_reg_mem );
9743 %}
9744 
9745 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9746   predicate (UseSSE<=1);
9747   match(Set dst (AbsD src));
9748   ins_cost(100);
9749   format %{ "FABS" %}
9750   opcode(0xE1, 0xD9);
9751   ins_encode( OpcS, OpcP );
9752   ins_pipe( fpu_reg_reg );
9753 %}
9754 
9755 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9756   predicate(UseSSE<=1);
9757   match(Set dst (NegD src));
9758   ins_cost(100);
9759   format %{ "FCHS" %}
9760   opcode(0xE0, 0xD9);
9761   ins_encode( OpcS, OpcP );
9762   ins_pipe( fpu_reg_reg );
9763 %}
9764 
9765 instruct addDPR_reg(regDPR dst, regDPR src) %{
9766   predicate(UseSSE<=1);
9767   match(Set dst (AddD dst src));
9768   format %{ "FLD    $src\n\t"
9769             "DADD   $dst,ST" %}
9770   size(4);
9771   ins_cost(150);
9772   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9773   ins_encode( Push_Reg_DPR(src),
9774               OpcP, RegOpc(dst) );
9775   ins_pipe( fpu_reg_reg );
9776 %}
9777 
9778 
9779 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9780   predicate(UseSSE<=1);
9781   match(Set dst (RoundDouble (AddD src1 src2)));
9782   ins_cost(250);
9783 
9784   format %{ "FLD    $src2\n\t"
9785             "DADD   ST,$src1\n\t"
9786             "FSTP_D $dst\t# D-round" %}
9787   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9788   ins_encode( Push_Reg_DPR(src2),
9789               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9790   ins_pipe( fpu_mem_reg_reg );
9791 %}
9792 
9793 
9794 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9795   predicate(UseSSE<=1);
9796   match(Set dst (AddD dst (LoadD src)));
9797   ins_cost(150);
9798 
9799   format %{ "FLD    $src\n\t"
9800             "DADDp  $dst,ST" %}
9801   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9802   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9803               OpcP, RegOpc(dst) );
9804   ins_pipe( fpu_reg_mem );
9805 %}
9806 
9807 // add-to-memory
9808 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9809   predicate(UseSSE<=1);
9810   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9811   ins_cost(150);
9812 
9813   format %{ "FLD_D  $dst\n\t"
9814             "DADD   ST,$src\n\t"
9815             "FST_D  $dst" %}
9816   opcode(0xDD, 0x0);
9817   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9818               Opcode(0xD8), RegOpc(src),
9819               set_instruction_start,
9820               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9821   ins_pipe( fpu_reg_mem );
9822 %}
9823 
9824 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9825   predicate(UseSSE<=1);
9826   match(Set dst (AddD dst con));
9827   ins_cost(125);
9828   format %{ "FLD1\n\t"
9829             "DADDp  $dst,ST" %}
9830   ins_encode %{
9831     __ fld1();
9832     __ faddp($dst$$reg);
9833   %}
9834   ins_pipe(fpu_reg);
9835 %}
9836 
9837 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9838   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9839   match(Set dst (AddD dst con));
9840   ins_cost(200);
9841   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9842             "DADDp  $dst,ST" %}
9843   ins_encode %{
9844     __ fld_d($constantaddress($con));
9845     __ faddp($dst$$reg);
9846   %}
9847   ins_pipe(fpu_reg_mem);
9848 %}
9849 
9850 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9851   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9852   match(Set dst (RoundDouble (AddD src con)));
9853   ins_cost(200);
9854   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9855             "DADD   ST,$src\n\t"
9856             "FSTP_D $dst\t# D-round" %}
9857   ins_encode %{
9858     __ fld_d($constantaddress($con));
9859     __ fadd($src$$reg);
9860     __ fstp_d(Address(rsp, $dst$$disp));
9861   %}
9862   ins_pipe(fpu_mem_reg_con);
9863 %}
9864 
9865 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9866   predicate(UseSSE<=1);
9867   match(Set dst (MulD dst src));
9868   format %{ "FLD    $src\n\t"
9869             "DMULp  $dst,ST" %}
9870   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9871   ins_cost(150);
9872   ins_encode( Push_Reg_DPR(src),
9873               OpcP, RegOpc(dst) );
9874   ins_pipe( fpu_reg_reg );
9875 %}
9876 
9877 // Strict FP instruction biases argument before multiply then
9878 // biases result to avoid double rounding of subnormals.
9879 //
9880 // scale arg1 by multiplying arg1 by 2^(-15360)
9881 // load arg2
9882 // multiply scaled arg1 by arg2
9883 // rescale product by 2^(15360)
9884 //
9885 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9886   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9887   match(Set dst (MulD dst src));
9888   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9889 
9890   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9891             "DMULp  $dst,ST\n\t"
9892             "FLD    $src\n\t"
9893             "DMULp  $dst,ST\n\t"
9894             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9895             "DMULp  $dst,ST\n\t" %}
9896   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9897   ins_encode( strictfp_bias1(dst),
9898               Push_Reg_DPR(src),
9899               OpcP, RegOpc(dst),
9900               strictfp_bias2(dst) );
9901   ins_pipe( fpu_reg_reg );
9902 %}
9903 
9904 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9905   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9906   match(Set dst (MulD dst con));
9907   ins_cost(200);
9908   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9909             "DMULp  $dst,ST" %}
9910   ins_encode %{
9911     __ fld_d($constantaddress($con));
9912     __ fmulp($dst$$reg);
9913   %}
9914   ins_pipe(fpu_reg_mem);
9915 %}
9916 
9917 
9918 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9919   predicate( UseSSE<=1 );
9920   match(Set dst (MulD dst (LoadD src)));
9921   ins_cost(200);
9922   format %{ "FLD_D  $src\n\t"
9923             "DMULp  $dst,ST" %}
9924   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9925   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9926               OpcP, RegOpc(dst) );
9927   ins_pipe( fpu_reg_mem );
9928 %}
9929 
9930 //
9931 // Cisc-alternate to reg-reg multiply
9932 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9933   predicate( UseSSE<=1 );
9934   match(Set dst (MulD src (LoadD mem)));
9935   ins_cost(250);
9936   format %{ "FLD_D  $mem\n\t"
9937             "DMUL   ST,$src\n\t"
9938             "FSTP_D $dst" %}
9939   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9940   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9941               OpcReg_FPR(src),
9942               Pop_Reg_DPR(dst) );
9943   ins_pipe( fpu_reg_reg_mem );
9944 %}
9945 
9946 
9947 // MACRO3 -- addDPR a mulDPR
9948 // This instruction is a '2-address' instruction in that the result goes
9949 // back to src2.  This eliminates a move from the macro; possibly the
9950 // register allocator will have to add it back (and maybe not).
9951 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9952   predicate( UseSSE<=1 );
9953   match(Set src2 (AddD (MulD src0 src1) src2));
9954   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9955             "DMUL   ST,$src1\n\t"
9956             "DADDp  $src2,ST" %}
9957   ins_cost(250);
9958   opcode(0xDD); /* LoadD DD /0 */
9959   ins_encode( Push_Reg_FPR(src0),
9960               FMul_ST_reg(src1),
9961               FAddP_reg_ST(src2) );
9962   ins_pipe( fpu_reg_reg_reg );
9963 %}
9964 
9965 
9966 // MACRO3 -- subDPR a mulDPR
9967 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9968   predicate( UseSSE<=1 );
9969   match(Set src2 (SubD (MulD src0 src1) src2));
9970   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9971             "DMUL   ST,$src1\n\t"
9972             "DSUBRp $src2,ST" %}
9973   ins_cost(250);
9974   ins_encode( Push_Reg_FPR(src0),
9975               FMul_ST_reg(src1),
9976               Opcode(0xDE), Opc_plus(0xE0,src2));
9977   ins_pipe( fpu_reg_reg_reg );
9978 %}
9979 
9980 
9981 instruct divDPR_reg(regDPR dst, regDPR src) %{
9982   predicate( UseSSE<=1 );
9983   match(Set dst (DivD dst src));
9984 
9985   format %{ "FLD    $src\n\t"
9986             "FDIVp  $dst,ST" %}
9987   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9988   ins_cost(150);
9989   ins_encode( Push_Reg_DPR(src),
9990               OpcP, RegOpc(dst) );
9991   ins_pipe( fpu_reg_reg );
9992 %}
9993 
9994 // Strict FP instruction biases argument before division then
9995 // biases result, to avoid double rounding of subnormals.
9996 //
9997 // scale dividend by multiplying dividend by 2^(-15360)
9998 // load divisor
9999 // divide scaled dividend by divisor
10000 // rescale quotient by 2^(15360)
10001 //
10002 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10003   predicate (UseSSE<=1);
10004   match(Set dst (DivD dst src));
10005   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10006   ins_cost(01);
10007 
10008   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10009             "DMULp  $dst,ST\n\t"
10010             "FLD    $src\n\t"
10011             "FDIVp  $dst,ST\n\t"
10012             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10013             "DMULp  $dst,ST\n\t" %}
10014   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10015   ins_encode( strictfp_bias1(dst),
10016               Push_Reg_DPR(src),
10017               OpcP, RegOpc(dst),
10018               strictfp_bias2(dst) );
10019   ins_pipe( fpu_reg_reg );
10020 %}
10021 
10022 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10023   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10024   match(Set dst (RoundDouble (DivD src1 src2)));
10025 
10026   format %{ "FLD    $src1\n\t"
10027             "FDIV   ST,$src2\n\t"
10028             "FSTP_D $dst\t# D-round" %}
10029   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10030   ins_encode( Push_Reg_DPR(src1),
10031               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10032   ins_pipe( fpu_mem_reg_reg );
10033 %}
10034 
10035 
10036 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10037   predicate(UseSSE<=1);
10038   match(Set dst (ModD dst src));
10039   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10040 
10041   format %{ "DMOD   $dst,$src" %}
10042   ins_cost(250);
10043   ins_encode(Push_Reg_Mod_DPR(dst, src),
10044               emitModDPR(),
10045               Push_Result_Mod_DPR(src),
10046               Pop_Reg_DPR(dst));
10047   ins_pipe( pipe_slow );
10048 %}
10049 
10050 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10051   predicate(UseSSE>=2);
10052   match(Set dst (ModD src0 src1));
10053   effect(KILL rax, KILL cr);
10054 
10055   format %{ "SUB    ESP,8\t # DMOD\n"
10056           "\tMOVSD  [ESP+0],$src1\n"
10057           "\tFLD_D  [ESP+0]\n"
10058           "\tMOVSD  [ESP+0],$src0\n"
10059           "\tFLD_D  [ESP+0]\n"
10060      "loop:\tFPREM\n"
10061           "\tFWAIT\n"
10062           "\tFNSTSW AX\n"
10063           "\tSAHF\n"
10064           "\tJP     loop\n"
10065           "\tFSTP_D [ESP+0]\n"
10066           "\tMOVSD  $dst,[ESP+0]\n"
10067           "\tADD    ESP,8\n"
10068           "\tFSTP   ST0\t # Restore FPU Stack"
10069     %}
10070   ins_cost(250);
10071   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10072   ins_pipe( pipe_slow );
10073 %}
10074 
10075 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10076   predicate (UseSSE<=1);
10077   match(Set dst(AtanD dst src));
10078   format %{ "DATA   $dst,$src" %}
10079   opcode(0xD9, 0xF3);
10080   ins_encode( Push_Reg_DPR(src),
10081               OpcP, OpcS, RegOpc(dst) );
10082   ins_pipe( pipe_slow );
10083 %}
10084 
10085 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10086   predicate (UseSSE>=2);
10087   match(Set dst(AtanD dst src));
10088   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10089   format %{ "DATA   $dst,$src" %}
10090   opcode(0xD9, 0xF3);
10091   ins_encode( Push_SrcD(src),
10092               OpcP, OpcS, Push_ResultD(dst) );
10093   ins_pipe( pipe_slow );
10094 %}
10095 
10096 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10097   predicate (UseSSE<=1);
10098   match(Set dst (SqrtD src));
10099   format %{ "DSQRT  $dst,$src" %}
10100   opcode(0xFA, 0xD9);
10101   ins_encode( Push_Reg_DPR(src),
10102               OpcS, OpcP, Pop_Reg_DPR(dst) );
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 //-------------Float Instructions-------------------------------
10107 // Float Math
10108 
10109 // Code for float compare:
10110 //     fcompp();
10111 //     fwait(); fnstsw_ax();
10112 //     sahf();
10113 //     movl(dst, unordered_result);
10114 //     jcc(Assembler::parity, exit);
10115 //     movl(dst, less_result);
10116 //     jcc(Assembler::below, exit);
10117 //     movl(dst, equal_result);
10118 //     jcc(Assembler::equal, exit);
10119 //     movl(dst, greater_result);
10120 //   exit:
10121 
10122 // P6 version of float compare, sets condition codes in EFLAGS
10123 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10124   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10125   match(Set cr (CmpF src1 src2));
10126   effect(KILL rax);
10127   ins_cost(150);
10128   format %{ "FLD    $src1\n\t"
10129             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10130             "JNP    exit\n\t"
10131             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10132             "SAHF\n"
10133      "exit:\tNOP               // avoid branch to branch" %}
10134   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10135   ins_encode( Push_Reg_DPR(src1),
10136               OpcP, RegOpc(src2),
10137               cmpF_P6_fixup );
10138   ins_pipe( pipe_slow );
10139 %}
10140 
10141 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10142   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10143   match(Set cr (CmpF src1 src2));
10144   ins_cost(100);
10145   format %{ "FLD    $src1\n\t"
10146             "FUCOMIP ST,$src2  // P6 instruction" %}
10147   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10148   ins_encode( Push_Reg_DPR(src1),
10149               OpcP, RegOpc(src2));
10150   ins_pipe( pipe_slow );
10151 %}
10152 
10153 
10154 // Compare & branch
10155 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10156   predicate(UseSSE == 0);
10157   match(Set cr (CmpF src1 src2));
10158   effect(KILL rax);
10159   ins_cost(200);
10160   format %{ "FLD    $src1\n\t"
10161             "FCOMp  $src2\n\t"
10162             "FNSTSW AX\n\t"
10163             "TEST   AX,0x400\n\t"
10164             "JZ,s   flags\n\t"
10165             "MOV    AH,1\t# unordered treat as LT\n"
10166     "flags:\tSAHF" %}
10167   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10168   ins_encode( Push_Reg_DPR(src1),
10169               OpcP, RegOpc(src2),
10170               fpu_flags);
10171   ins_pipe( pipe_slow );
10172 %}
10173 
10174 // Compare vs zero into -1,0,1
10175 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10176   predicate(UseSSE == 0);
10177   match(Set dst (CmpF3 src1 zero));
10178   effect(KILL cr, KILL rax);
10179   ins_cost(280);
10180   format %{ "FTSTF  $dst,$src1" %}
10181   opcode(0xE4, 0xD9);
10182   ins_encode( Push_Reg_DPR(src1),
10183               OpcS, OpcP, PopFPU,
10184               CmpF_Result(dst));
10185   ins_pipe( pipe_slow );
10186 %}
10187 
10188 // Compare into -1,0,1
10189 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10190   predicate(UseSSE == 0);
10191   match(Set dst (CmpF3 src1 src2));
10192   effect(KILL cr, KILL rax);
10193   ins_cost(300);
10194   format %{ "FCMPF  $dst,$src1,$src2" %}
10195   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10196   ins_encode( Push_Reg_DPR(src1),
10197               OpcP, RegOpc(src2),
10198               CmpF_Result(dst));
10199   ins_pipe( pipe_slow );
10200 %}
10201 
10202 // float compare and set condition codes in EFLAGS by XMM regs
10203 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10204   predicate(UseSSE>=1);
10205   match(Set cr (CmpF src1 src2));
10206   ins_cost(145);
10207   format %{ "UCOMISS $src1,$src2\n\t"
10208             "JNP,s   exit\n\t"
10209             "PUSHF\t# saw NaN, set CF\n\t"
10210             "AND     [rsp], #0xffffff2b\n\t"
10211             "POPF\n"
10212     "exit:" %}
10213   ins_encode %{
10214     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10215     emit_cmpfp_fixup(_masm);
10216   %}
10217   ins_pipe( pipe_slow );
10218 %}
10219 
10220 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10221   predicate(UseSSE>=1);
10222   match(Set cr (CmpF src1 src2));
10223   ins_cost(100);
10224   format %{ "UCOMISS $src1,$src2" %}
10225   ins_encode %{
10226     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10227   %}
10228   ins_pipe( pipe_slow );
10229 %}
10230 
10231 // float compare and set condition codes in EFLAGS by XMM regs
10232 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10233   predicate(UseSSE>=1);
10234   match(Set cr (CmpF src1 (LoadF src2)));
10235   ins_cost(165);
10236   format %{ "UCOMISS $src1,$src2\n\t"
10237             "JNP,s   exit\n\t"
10238             "PUSHF\t# saw NaN, set CF\n\t"
10239             "AND     [rsp], #0xffffff2b\n\t"
10240             "POPF\n"
10241     "exit:" %}
10242   ins_encode %{
10243     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10244     emit_cmpfp_fixup(_masm);
10245   %}
10246   ins_pipe( pipe_slow );
10247 %}
10248 
10249 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10250   predicate(UseSSE>=1);
10251   match(Set cr (CmpF src1 (LoadF src2)));
10252   ins_cost(100);
10253   format %{ "UCOMISS $src1,$src2" %}
10254   ins_encode %{
10255     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10256   %}
10257   ins_pipe( pipe_slow );
10258 %}
10259 
10260 // Compare into -1,0,1 in XMM
10261 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10262   predicate(UseSSE>=1);
10263   match(Set dst (CmpF3 src1 src2));
10264   effect(KILL cr);
10265   ins_cost(255);
10266   format %{ "UCOMISS $src1, $src2\n\t"
10267             "MOV     $dst, #-1\n\t"
10268             "JP,s    done\n\t"
10269             "JB,s    done\n\t"
10270             "SETNE   $dst\n\t"
10271             "MOVZB   $dst, $dst\n"
10272     "done:" %}
10273   ins_encode %{
10274     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10275     emit_cmpfp3(_masm, $dst$$Register);
10276   %}
10277   ins_pipe( pipe_slow );
10278 %}
10279 
10280 // Compare into -1,0,1 in XMM and memory
10281 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10282   predicate(UseSSE>=1);
10283   match(Set dst (CmpF3 src1 (LoadF src2)));
10284   effect(KILL cr);
10285   ins_cost(275);
10286   format %{ "UCOMISS $src1, $src2\n\t"
10287             "MOV     $dst, #-1\n\t"
10288             "JP,s    done\n\t"
10289             "JB,s    done\n\t"
10290             "SETNE   $dst\n\t"
10291             "MOVZB   $dst, $dst\n"
10292     "done:" %}
10293   ins_encode %{
10294     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10295     emit_cmpfp3(_masm, $dst$$Register);
10296   %}
10297   ins_pipe( pipe_slow );
10298 %}
10299 
10300 // Spill to obtain 24-bit precision
10301 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10302   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10303   match(Set dst (SubF src1 src2));
10304 
10305   format %{ "FSUB   $dst,$src1 - $src2" %}
10306   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10307   ins_encode( Push_Reg_FPR(src1),
10308               OpcReg_FPR(src2),
10309               Pop_Mem_FPR(dst) );
10310   ins_pipe( fpu_mem_reg_reg );
10311 %}
10312 //
10313 // This instruction does not round to 24-bits
10314 instruct subFPR_reg(regFPR dst, regFPR src) %{
10315   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10316   match(Set dst (SubF dst src));
10317 
10318   format %{ "FSUB   $dst,$src" %}
10319   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10320   ins_encode( Push_Reg_FPR(src),
10321               OpcP, RegOpc(dst) );
10322   ins_pipe( fpu_reg_reg );
10323 %}
10324 
10325 // Spill to obtain 24-bit precision
10326 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10327   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10328   match(Set dst (AddF src1 src2));
10329 
10330   format %{ "FADD   $dst,$src1,$src2" %}
10331   opcode(0xD8, 0x0); /* D8 C0+i */
10332   ins_encode( Push_Reg_FPR(src2),
10333               OpcReg_FPR(src1),
10334               Pop_Mem_FPR(dst) );
10335   ins_pipe( fpu_mem_reg_reg );
10336 %}
10337 //
10338 // This instruction does not round to 24-bits
10339 instruct addFPR_reg(regFPR dst, regFPR src) %{
10340   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10341   match(Set dst (AddF dst src));
10342 
10343   format %{ "FLD    $src\n\t"
10344             "FADDp  $dst,ST" %}
10345   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10346   ins_encode( Push_Reg_FPR(src),
10347               OpcP, RegOpc(dst) );
10348   ins_pipe( fpu_reg_reg );
10349 %}
10350 
10351 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10352   predicate(UseSSE==0);
10353   match(Set dst (AbsF src));
10354   ins_cost(100);
10355   format %{ "FABS" %}
10356   opcode(0xE1, 0xD9);
10357   ins_encode( OpcS, OpcP );
10358   ins_pipe( fpu_reg_reg );
10359 %}
10360 
10361 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10362   predicate(UseSSE==0);
10363   match(Set dst (NegF src));
10364   ins_cost(100);
10365   format %{ "FCHS" %}
10366   opcode(0xE0, 0xD9);
10367   ins_encode( OpcS, OpcP );
10368   ins_pipe( fpu_reg_reg );
10369 %}
10370 
10371 // Cisc-alternate to addFPR_reg
10372 // Spill to obtain 24-bit precision
10373 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10374   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10375   match(Set dst (AddF src1 (LoadF src2)));
10376 
10377   format %{ "FLD    $src2\n\t"
10378             "FADD   ST,$src1\n\t"
10379             "FSTP_S $dst" %}
10380   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10381   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10382               OpcReg_FPR(src1),
10383               Pop_Mem_FPR(dst) );
10384   ins_pipe( fpu_mem_reg_mem );
10385 %}
10386 //
10387 // Cisc-alternate to addFPR_reg
10388 // This instruction does not round to 24-bits
10389 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10390   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10391   match(Set dst (AddF dst (LoadF src)));
10392 
10393   format %{ "FADD   $dst,$src" %}
10394   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10395   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10396               OpcP, RegOpc(dst) );
10397   ins_pipe( fpu_reg_mem );
10398 %}
10399 
10400 // // Following two instructions for _222_mpegaudio
10401 // Spill to obtain 24-bit precision
10402 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10403   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10404   match(Set dst (AddF src1 src2));
10405 
10406   format %{ "FADD   $dst,$src1,$src2" %}
10407   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10408   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10409               OpcReg_FPR(src2),
10410               Pop_Mem_FPR(dst) );
10411   ins_pipe( fpu_mem_reg_mem );
10412 %}
10413 
10414 // Cisc-spill variant
10415 // Spill to obtain 24-bit precision
10416 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10417   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10418   match(Set dst (AddF src1 (LoadF src2)));
10419 
10420   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10421   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10422   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10423               set_instruction_start,
10424               OpcP, RMopc_Mem(secondary,src1),
10425               Pop_Mem_FPR(dst) );
10426   ins_pipe( fpu_mem_mem_mem );
10427 %}
10428 
10429 // Spill to obtain 24-bit precision
10430 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10431   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10432   match(Set dst (AddF src1 src2));
10433 
10434   format %{ "FADD   $dst,$src1,$src2" %}
10435   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10436   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10437               set_instruction_start,
10438               OpcP, RMopc_Mem(secondary,src1),
10439               Pop_Mem_FPR(dst) );
10440   ins_pipe( fpu_mem_mem_mem );
10441 %}
10442 
10443 
10444 // Spill to obtain 24-bit precision
10445 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10446   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10447   match(Set dst (AddF src con));
10448   format %{ "FLD    $src\n\t"
10449             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10450             "FSTP_S $dst"  %}
10451   ins_encode %{
10452     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10453     __ fadd_s($constantaddress($con));
10454     __ fstp_s(Address(rsp, $dst$$disp));
10455   %}
10456   ins_pipe(fpu_mem_reg_con);
10457 %}
10458 //
10459 // This instruction does not round to 24-bits
10460 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10461   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10462   match(Set dst (AddF src con));
10463   format %{ "FLD    $src\n\t"
10464             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10465             "FSTP   $dst"  %}
10466   ins_encode %{
10467     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10468     __ fadd_s($constantaddress($con));
10469     __ fstp_d($dst$$reg);
10470   %}
10471   ins_pipe(fpu_reg_reg_con);
10472 %}
10473 
10474 // Spill to obtain 24-bit precision
10475 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10476   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10477   match(Set dst (MulF src1 src2));
10478 
10479   format %{ "FLD    $src1\n\t"
10480             "FMUL   $src2\n\t"
10481             "FSTP_S $dst"  %}
10482   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10483   ins_encode( Push_Reg_FPR(src1),
10484               OpcReg_FPR(src2),
10485               Pop_Mem_FPR(dst) );
10486   ins_pipe( fpu_mem_reg_reg );
10487 %}
10488 //
10489 // This instruction does not round to 24-bits
10490 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10491   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10492   match(Set dst (MulF src1 src2));
10493 
10494   format %{ "FLD    $src1\n\t"
10495             "FMUL   $src2\n\t"
10496             "FSTP_S $dst"  %}
10497   opcode(0xD8, 0x1); /* D8 C8+i */
10498   ins_encode( Push_Reg_FPR(src2),
10499               OpcReg_FPR(src1),
10500               Pop_Reg_FPR(dst) );
10501   ins_pipe( fpu_reg_reg_reg );
10502 %}
10503 
10504 
10505 // Spill to obtain 24-bit precision
10506 // Cisc-alternate to reg-reg multiply
10507 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10508   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10509   match(Set dst (MulF src1 (LoadF src2)));
10510 
10511   format %{ "FLD_S  $src2\n\t"
10512             "FMUL   $src1\n\t"
10513             "FSTP_S $dst"  %}
10514   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10515   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10516               OpcReg_FPR(src1),
10517               Pop_Mem_FPR(dst) );
10518   ins_pipe( fpu_mem_reg_mem );
10519 %}
10520 //
10521 // This instruction does not round to 24-bits
10522 // Cisc-alternate to reg-reg multiply
10523 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10524   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10525   match(Set dst (MulF src1 (LoadF src2)));
10526 
10527   format %{ "FMUL   $dst,$src1,$src2" %}
10528   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10529   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10530               OpcReg_FPR(src1),
10531               Pop_Reg_FPR(dst) );
10532   ins_pipe( fpu_reg_reg_mem );
10533 %}
10534 
10535 // Spill to obtain 24-bit precision
10536 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10537   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10538   match(Set dst (MulF src1 src2));
10539 
10540   format %{ "FMUL   $dst,$src1,$src2" %}
10541   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10542   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10543               set_instruction_start,
10544               OpcP, RMopc_Mem(secondary,src1),
10545               Pop_Mem_FPR(dst) );
10546   ins_pipe( fpu_mem_mem_mem );
10547 %}
10548 
10549 // Spill to obtain 24-bit precision
10550 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10551   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10552   match(Set dst (MulF src con));
10553 
10554   format %{ "FLD    $src\n\t"
10555             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10556             "FSTP_S $dst"  %}
10557   ins_encode %{
10558     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10559     __ fmul_s($constantaddress($con));
10560     __ fstp_s(Address(rsp, $dst$$disp));
10561   %}
10562   ins_pipe(fpu_mem_reg_con);
10563 %}
10564 //
10565 // This instruction does not round to 24-bits
10566 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10567   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10568   match(Set dst (MulF src con));
10569 
10570   format %{ "FLD    $src\n\t"
10571             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10572             "FSTP   $dst"  %}
10573   ins_encode %{
10574     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10575     __ fmul_s($constantaddress($con));
10576     __ fstp_d($dst$$reg);
10577   %}
10578   ins_pipe(fpu_reg_reg_con);
10579 %}
10580 
10581 
10582 //
10583 // MACRO1 -- subsume unshared load into mulFPR
10584 // This instruction does not round to 24-bits
10585 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10586   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10587   match(Set dst (MulF (LoadF mem1) src));
10588 
10589   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10590             "FMUL   ST,$src\n\t"
10591             "FSTP   $dst" %}
10592   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10593   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10594               OpcReg_FPR(src),
10595               Pop_Reg_FPR(dst) );
10596   ins_pipe( fpu_reg_reg_mem );
10597 %}
10598 //
10599 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10600 // This instruction does not round to 24-bits
10601 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10602   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10603   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10604   ins_cost(95);
10605 
10606   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10607             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10608             "FADD   ST,$src2\n\t"
10609             "FSTP   $dst" %}
10610   opcode(0xD9); /* LoadF D9 /0 */
10611   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10612               FMul_ST_reg(src1),
10613               FAdd_ST_reg(src2),
10614               Pop_Reg_FPR(dst) );
10615   ins_pipe( fpu_reg_mem_reg_reg );
10616 %}
10617 
10618 // MACRO3 -- addFPR a mulFPR
10619 // This instruction does not round to 24-bits.  It is a '2-address'
10620 // instruction in that the result goes back to src2.  This eliminates
10621 // a move from the macro; possibly the register allocator will have
10622 // to add it back (and maybe not).
10623 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10624   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10625   match(Set src2 (AddF (MulF src0 src1) src2));
10626 
10627   format %{ "FLD    $src0     ===MACRO3===\n\t"
10628             "FMUL   ST,$src1\n\t"
10629             "FADDP  $src2,ST" %}
10630   opcode(0xD9); /* LoadF D9 /0 */
10631   ins_encode( Push_Reg_FPR(src0),
10632               FMul_ST_reg(src1),
10633               FAddP_reg_ST(src2) );
10634   ins_pipe( fpu_reg_reg_reg );
10635 %}
10636 
10637 // MACRO4 -- divFPR subFPR
10638 // This instruction does not round to 24-bits
10639 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10640   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10641   match(Set dst (DivF (SubF src2 src1) src3));
10642 
10643   format %{ "FLD    $src2   ===MACRO4===\n\t"
10644             "FSUB   ST,$src1\n\t"
10645             "FDIV   ST,$src3\n\t"
10646             "FSTP  $dst" %}
10647   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10648   ins_encode( Push_Reg_FPR(src2),
10649               subFPR_divFPR_encode(src1,src3),
10650               Pop_Reg_FPR(dst) );
10651   ins_pipe( fpu_reg_reg_reg_reg );
10652 %}
10653 
10654 // Spill to obtain 24-bit precision
10655 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10656   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10657   match(Set dst (DivF src1 src2));
10658 
10659   format %{ "FDIV   $dst,$src1,$src2" %}
10660   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10661   ins_encode( Push_Reg_FPR(src1),
10662               OpcReg_FPR(src2),
10663               Pop_Mem_FPR(dst) );
10664   ins_pipe( fpu_mem_reg_reg );
10665 %}
10666 //
10667 // This instruction does not round to 24-bits
10668 instruct divFPR_reg(regFPR dst, regFPR src) %{
10669   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10670   match(Set dst (DivF dst src));
10671 
10672   format %{ "FDIV   $dst,$src" %}
10673   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10674   ins_encode( Push_Reg_FPR(src),
10675               OpcP, RegOpc(dst) );
10676   ins_pipe( fpu_reg_reg );
10677 %}
10678 
10679 
10680 // Spill to obtain 24-bit precision
10681 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10682   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10683   match(Set dst (ModF src1 src2));
10684   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10685 
10686   format %{ "FMOD   $dst,$src1,$src2" %}
10687   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10688               emitModDPR(),
10689               Push_Result_Mod_DPR(src2),
10690               Pop_Mem_FPR(dst));
10691   ins_pipe( pipe_slow );
10692 %}
10693 //
10694 // This instruction does not round to 24-bits
10695 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10696   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10697   match(Set dst (ModF dst src));
10698   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10699 
10700   format %{ "FMOD   $dst,$src" %}
10701   ins_encode(Push_Reg_Mod_DPR(dst, src),
10702               emitModDPR(),
10703               Push_Result_Mod_DPR(src),
10704               Pop_Reg_FPR(dst));
10705   ins_pipe( pipe_slow );
10706 %}
10707 
10708 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10709   predicate(UseSSE>=1);
10710   match(Set dst (ModF src0 src1));
10711   effect(KILL rax, KILL cr);
10712   format %{ "SUB    ESP,4\t # FMOD\n"
10713           "\tMOVSS  [ESP+0],$src1\n"
10714           "\tFLD_S  [ESP+0]\n"
10715           "\tMOVSS  [ESP+0],$src0\n"
10716           "\tFLD_S  [ESP+0]\n"
10717      "loop:\tFPREM\n"
10718           "\tFWAIT\n"
10719           "\tFNSTSW AX\n"
10720           "\tSAHF\n"
10721           "\tJP     loop\n"
10722           "\tFSTP_S [ESP+0]\n"
10723           "\tMOVSS  $dst,[ESP+0]\n"
10724           "\tADD    ESP,4\n"
10725           "\tFSTP   ST0\t # Restore FPU Stack"
10726     %}
10727   ins_cost(250);
10728   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10729   ins_pipe( pipe_slow );
10730 %}
10731 
10732 
10733 //----------Arithmetic Conversion Instructions---------------------------------
10734 // The conversions operations are all Alpha sorted.  Please keep it that way!
10735 
10736 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10737   predicate(UseSSE==0);
10738   match(Set dst (RoundFloat src));
10739   ins_cost(125);
10740   format %{ "FST_S  $dst,$src\t# F-round" %}
10741   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10742   ins_pipe( fpu_mem_reg );
10743 %}
10744 
10745 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10746   predicate(UseSSE<=1);
10747   match(Set dst (RoundDouble src));
10748   ins_cost(125);
10749   format %{ "FST_D  $dst,$src\t# D-round" %}
10750   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10751   ins_pipe( fpu_mem_reg );
10752 %}
10753 
10754 // Force rounding to 24-bit precision and 6-bit exponent
10755 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10756   predicate(UseSSE==0);
10757   match(Set dst (ConvD2F src));
10758   format %{ "FST_S  $dst,$src\t# F-round" %}
10759   expand %{
10760     roundFloat_mem_reg(dst,src);
10761   %}
10762 %}
10763 
10764 // Force rounding to 24-bit precision and 6-bit exponent
10765 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10766   predicate(UseSSE==1);
10767   match(Set dst (ConvD2F src));
10768   effect( KILL cr );
10769   format %{ "SUB    ESP,4\n\t"
10770             "FST_S  [ESP],$src\t# F-round\n\t"
10771             "MOVSS  $dst,[ESP]\n\t"
10772             "ADD ESP,4" %}
10773   ins_encode %{
10774     __ subptr(rsp, 4);
10775     if ($src$$reg != FPR1L_enc) {
10776       __ fld_s($src$$reg-1);
10777       __ fstp_s(Address(rsp, 0));
10778     } else {
10779       __ fst_s(Address(rsp, 0));
10780     }
10781     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10782     __ addptr(rsp, 4);
10783   %}
10784   ins_pipe( pipe_slow );
10785 %}
10786 
10787 // Force rounding double precision to single precision
10788 instruct convD2F_reg(regF dst, regD src) %{
10789   predicate(UseSSE>=2);
10790   match(Set dst (ConvD2F src));
10791   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10792   ins_encode %{
10793     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10794   %}
10795   ins_pipe( pipe_slow );
10796 %}
10797 
10798 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10799   predicate(UseSSE==0);
10800   match(Set dst (ConvF2D src));
10801   format %{ "FST_S  $dst,$src\t# D-round" %}
10802   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10803   ins_pipe( fpu_reg_reg );
10804 %}
10805 
10806 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10807   predicate(UseSSE==1);
10808   match(Set dst (ConvF2D src));
10809   format %{ "FST_D  $dst,$src\t# D-round" %}
10810   expand %{
10811     roundDouble_mem_reg(dst,src);
10812   %}
10813 %}
10814 
10815 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10816   predicate(UseSSE==1);
10817   match(Set dst (ConvF2D src));
10818   effect( KILL cr );
10819   format %{ "SUB    ESP,4\n\t"
10820             "MOVSS  [ESP] $src\n\t"
10821             "FLD_S  [ESP]\n\t"
10822             "ADD    ESP,4\n\t"
10823             "FSTP   $dst\t# D-round" %}
10824   ins_encode %{
10825     __ subptr(rsp, 4);
10826     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10827     __ fld_s(Address(rsp, 0));
10828     __ addptr(rsp, 4);
10829     __ fstp_d($dst$$reg);
10830   %}
10831   ins_pipe( pipe_slow );
10832 %}
10833 
10834 instruct convF2D_reg(regD dst, regF src) %{
10835   predicate(UseSSE>=2);
10836   match(Set dst (ConvF2D src));
10837   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10838   ins_encode %{
10839     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10840   %}
10841   ins_pipe( pipe_slow );
10842 %}
10843 
10844 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10845 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10846   predicate(UseSSE<=1);
10847   match(Set dst (ConvD2I src));
10848   effect( KILL tmp, KILL cr );
10849   format %{ "FLD    $src\t# Convert double to int \n\t"
10850             "FLDCW  trunc mode\n\t"
10851             "SUB    ESP,4\n\t"
10852             "FISTp  [ESP + #0]\n\t"
10853             "FLDCW  std/24-bit mode\n\t"
10854             "POP    EAX\n\t"
10855             "CMP    EAX,0x80000000\n\t"
10856             "JNE,s  fast\n\t"
10857             "FLD_D  $src\n\t"
10858             "CALL   d2i_wrapper\n"
10859       "fast:" %}
10860   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10861   ins_pipe( pipe_slow );
10862 %}
10863 
10864 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10865 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10866   predicate(UseSSE>=2);
10867   match(Set dst (ConvD2I src));
10868   effect( KILL tmp, KILL cr );
10869   format %{ "CVTTSD2SI $dst, $src\n\t"
10870             "CMP    $dst,0x80000000\n\t"
10871             "JNE,s  fast\n\t"
10872             "SUB    ESP, 8\n\t"
10873             "MOVSD  [ESP], $src\n\t"
10874             "FLD_D  [ESP]\n\t"
10875             "ADD    ESP, 8\n\t"
10876             "CALL   d2i_wrapper\n"
10877       "fast:" %}
10878   ins_encode %{
10879     Label fast;
10880     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10881     __ cmpl($dst$$Register, 0x80000000);
10882     __ jccb(Assembler::notEqual, fast);
10883     __ subptr(rsp, 8);
10884     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10885     __ fld_d(Address(rsp, 0));
10886     __ addptr(rsp, 8);
10887     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10888     __ bind(fast);
10889   %}
10890   ins_pipe( pipe_slow );
10891 %}
10892 
10893 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10894   predicate(UseSSE<=1);
10895   match(Set dst (ConvD2L src));
10896   effect( KILL cr );
10897   format %{ "FLD    $src\t# Convert double to long\n\t"
10898             "FLDCW  trunc mode\n\t"
10899             "SUB    ESP,8\n\t"
10900             "FISTp  [ESP + #0]\n\t"
10901             "FLDCW  std/24-bit mode\n\t"
10902             "POP    EAX\n\t"
10903             "POP    EDX\n\t"
10904             "CMP    EDX,0x80000000\n\t"
10905             "JNE,s  fast\n\t"
10906             "TEST   EAX,EAX\n\t"
10907             "JNE,s  fast\n\t"
10908             "FLD    $src\n\t"
10909             "CALL   d2l_wrapper\n"
10910       "fast:" %}
10911   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10912   ins_pipe( pipe_slow );
10913 %}
10914 
10915 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10916 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10917   predicate (UseSSE>=2);
10918   match(Set dst (ConvD2L src));
10919   effect( KILL cr );
10920   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10921             "MOVSD  [ESP],$src\n\t"
10922             "FLD_D  [ESP]\n\t"
10923             "FLDCW  trunc mode\n\t"
10924             "FISTp  [ESP + #0]\n\t"
10925             "FLDCW  std/24-bit mode\n\t"
10926             "POP    EAX\n\t"
10927             "POP    EDX\n\t"
10928             "CMP    EDX,0x80000000\n\t"
10929             "JNE,s  fast\n\t"
10930             "TEST   EAX,EAX\n\t"
10931             "JNE,s  fast\n\t"
10932             "SUB    ESP,8\n\t"
10933             "MOVSD  [ESP],$src\n\t"
10934             "FLD_D  [ESP]\n\t"
10935             "ADD    ESP,8\n\t"
10936             "CALL   d2l_wrapper\n"
10937       "fast:" %}
10938   ins_encode %{
10939     Label fast;
10940     __ subptr(rsp, 8);
10941     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10942     __ fld_d(Address(rsp, 0));
10943     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10944     __ fistp_d(Address(rsp, 0));
10945     // Restore the rounding mode, mask the exception
10946     if (Compile::current()->in_24_bit_fp_mode()) {
10947       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10948     } else {
10949       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10950     }
10951     // Load the converted long, adjust CPU stack
10952     __ pop(rax);
10953     __ pop(rdx);
10954     __ cmpl(rdx, 0x80000000);
10955     __ jccb(Assembler::notEqual, fast);
10956     __ testl(rax, rax);
10957     __ jccb(Assembler::notEqual, fast);
10958     __ subptr(rsp, 8);
10959     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10960     __ fld_d(Address(rsp, 0));
10961     __ addptr(rsp, 8);
10962     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10963     __ bind(fast);
10964   %}
10965   ins_pipe( pipe_slow );
10966 %}
10967 
10968 // Convert a double to an int.  Java semantics require we do complex
10969 // manglations in the corner cases.  So we set the rounding mode to
10970 // 'zero', store the darned double down as an int, and reset the
10971 // rounding mode to 'nearest'.  The hardware stores a flag value down
10972 // if we would overflow or converted a NAN; we check for this and
10973 // and go the slow path if needed.
10974 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10975   predicate(UseSSE==0);
10976   match(Set dst (ConvF2I src));
10977   effect( KILL tmp, KILL cr );
10978   format %{ "FLD    $src\t# Convert float to int \n\t"
10979             "FLDCW  trunc mode\n\t"
10980             "SUB    ESP,4\n\t"
10981             "FISTp  [ESP + #0]\n\t"
10982             "FLDCW  std/24-bit mode\n\t"
10983             "POP    EAX\n\t"
10984             "CMP    EAX,0x80000000\n\t"
10985             "JNE,s  fast\n\t"
10986             "FLD    $src\n\t"
10987             "CALL   d2i_wrapper\n"
10988       "fast:" %}
10989   // DPR2I_encoding works for FPR2I
10990   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10991   ins_pipe( pipe_slow );
10992 %}
10993 
10994 // Convert a float in xmm to an int reg.
10995 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10996   predicate(UseSSE>=1);
10997   match(Set dst (ConvF2I src));
10998   effect( KILL tmp, KILL cr );
10999   format %{ "CVTTSS2SI $dst, $src\n\t"
11000             "CMP    $dst,0x80000000\n\t"
11001             "JNE,s  fast\n\t"
11002             "SUB    ESP, 4\n\t"
11003             "MOVSS  [ESP], $src\n\t"
11004             "FLD    [ESP]\n\t"
11005             "ADD    ESP, 4\n\t"
11006             "CALL   d2i_wrapper\n"
11007       "fast:" %}
11008   ins_encode %{
11009     Label fast;
11010     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11011     __ cmpl($dst$$Register, 0x80000000);
11012     __ jccb(Assembler::notEqual, fast);
11013     __ subptr(rsp, 4);
11014     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11015     __ fld_s(Address(rsp, 0));
11016     __ addptr(rsp, 4);
11017     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11018     __ bind(fast);
11019   %}
11020   ins_pipe( pipe_slow );
11021 %}
11022 
11023 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11024   predicate(UseSSE==0);
11025   match(Set dst (ConvF2L src));
11026   effect( KILL cr );
11027   format %{ "FLD    $src\t# Convert float to long\n\t"
11028             "FLDCW  trunc mode\n\t"
11029             "SUB    ESP,8\n\t"
11030             "FISTp  [ESP + #0]\n\t"
11031             "FLDCW  std/24-bit mode\n\t"
11032             "POP    EAX\n\t"
11033             "POP    EDX\n\t"
11034             "CMP    EDX,0x80000000\n\t"
11035             "JNE,s  fast\n\t"
11036             "TEST   EAX,EAX\n\t"
11037             "JNE,s  fast\n\t"
11038             "FLD    $src\n\t"
11039             "CALL   d2l_wrapper\n"
11040       "fast:" %}
11041   // DPR2L_encoding works for FPR2L
11042   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11043   ins_pipe( pipe_slow );
11044 %}
11045 
11046 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11047 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11048   predicate (UseSSE>=1);
11049   match(Set dst (ConvF2L src));
11050   effect( KILL cr );
11051   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11052             "MOVSS  [ESP],$src\n\t"
11053             "FLD_S  [ESP]\n\t"
11054             "FLDCW  trunc mode\n\t"
11055             "FISTp  [ESP + #0]\n\t"
11056             "FLDCW  std/24-bit mode\n\t"
11057             "POP    EAX\n\t"
11058             "POP    EDX\n\t"
11059             "CMP    EDX,0x80000000\n\t"
11060             "JNE,s  fast\n\t"
11061             "TEST   EAX,EAX\n\t"
11062             "JNE,s  fast\n\t"
11063             "SUB    ESP,4\t# Convert float to long\n\t"
11064             "MOVSS  [ESP],$src\n\t"
11065             "FLD_S  [ESP]\n\t"
11066             "ADD    ESP,4\n\t"
11067             "CALL   d2l_wrapper\n"
11068       "fast:" %}
11069   ins_encode %{
11070     Label fast;
11071     __ subptr(rsp, 8);
11072     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11073     __ fld_s(Address(rsp, 0));
11074     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11075     __ fistp_d(Address(rsp, 0));
11076     // Restore the rounding mode, mask the exception
11077     if (Compile::current()->in_24_bit_fp_mode()) {
11078       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11079     } else {
11080       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11081     }
11082     // Load the converted long, adjust CPU stack
11083     __ pop(rax);
11084     __ pop(rdx);
11085     __ cmpl(rdx, 0x80000000);
11086     __ jccb(Assembler::notEqual, fast);
11087     __ testl(rax, rax);
11088     __ jccb(Assembler::notEqual, fast);
11089     __ subptr(rsp, 4);
11090     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11091     __ fld_s(Address(rsp, 0));
11092     __ addptr(rsp, 4);
11093     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11094     __ bind(fast);
11095   %}
11096   ins_pipe( pipe_slow );
11097 %}
11098 
11099 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11100   predicate( UseSSE<=1 );
11101   match(Set dst (ConvI2D src));
11102   format %{ "FILD   $src\n\t"
11103             "FSTP   $dst" %}
11104   opcode(0xDB, 0x0);  /* DB /0 */
11105   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11106   ins_pipe( fpu_reg_mem );
11107 %}
11108 
11109 instruct convI2D_reg(regD dst, rRegI src) %{
11110   predicate( UseSSE>=2 && !UseXmmI2D );
11111   match(Set dst (ConvI2D src));
11112   format %{ "CVTSI2SD $dst,$src" %}
11113   ins_encode %{
11114     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11115   %}
11116   ins_pipe( pipe_slow );
11117 %}
11118 
11119 instruct convI2D_mem(regD dst, memory mem) %{
11120   predicate( UseSSE>=2 );
11121   match(Set dst (ConvI2D (LoadI mem)));
11122   format %{ "CVTSI2SD $dst,$mem" %}
11123   ins_encode %{
11124     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11125   %}
11126   ins_pipe( pipe_slow );
11127 %}
11128 
11129 instruct convXI2D_reg(regD dst, rRegI src)
11130 %{
11131   predicate( UseSSE>=2 && UseXmmI2D );
11132   match(Set dst (ConvI2D src));
11133 
11134   format %{ "MOVD  $dst,$src\n\t"
11135             "CVTDQ2PD $dst,$dst\t# i2d" %}
11136   ins_encode %{
11137     __ movdl($dst$$XMMRegister, $src$$Register);
11138     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11139   %}
11140   ins_pipe(pipe_slow); // XXX
11141 %}
11142 
11143 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11144   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11145   match(Set dst (ConvI2D (LoadI mem)));
11146   format %{ "FILD   $mem\n\t"
11147             "FSTP   $dst" %}
11148   opcode(0xDB);      /* DB /0 */
11149   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11150               Pop_Reg_DPR(dst));
11151   ins_pipe( fpu_reg_mem );
11152 %}
11153 
11154 // Convert a byte to a float; no rounding step needed.
11155 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11156   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11157   match(Set dst (ConvI2F src));
11158   format %{ "FILD   $src\n\t"
11159             "FSTP   $dst" %}
11160 
11161   opcode(0xDB, 0x0);  /* DB /0 */
11162   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11163   ins_pipe( fpu_reg_mem );
11164 %}
11165 
11166 // In 24-bit mode, force exponent rounding by storing back out
11167 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11168   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11169   match(Set dst (ConvI2F src));
11170   ins_cost(200);
11171   format %{ "FILD   $src\n\t"
11172             "FSTP_S $dst" %}
11173   opcode(0xDB, 0x0);  /* DB /0 */
11174   ins_encode( Push_Mem_I(src),
11175               Pop_Mem_FPR(dst));
11176   ins_pipe( fpu_mem_mem );
11177 %}
11178 
11179 // In 24-bit mode, force exponent rounding by storing back out
11180 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11181   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11182   match(Set dst (ConvI2F (LoadI mem)));
11183   ins_cost(200);
11184   format %{ "FILD   $mem\n\t"
11185             "FSTP_S $dst" %}
11186   opcode(0xDB);  /* DB /0 */
11187   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11188               Pop_Mem_FPR(dst));
11189   ins_pipe( fpu_mem_mem );
11190 %}
11191 
11192 // This instruction does not round to 24-bits
11193 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11194   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11195   match(Set dst (ConvI2F src));
11196   format %{ "FILD   $src\n\t"
11197             "FSTP   $dst" %}
11198   opcode(0xDB, 0x0);  /* DB /0 */
11199   ins_encode( Push_Mem_I(src),
11200               Pop_Reg_FPR(dst));
11201   ins_pipe( fpu_reg_mem );
11202 %}
11203 
11204 // This instruction does not round to 24-bits
11205 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11206   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11207   match(Set dst (ConvI2F (LoadI mem)));
11208   format %{ "FILD   $mem\n\t"
11209             "FSTP   $dst" %}
11210   opcode(0xDB);      /* DB /0 */
11211   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11212               Pop_Reg_FPR(dst));
11213   ins_pipe( fpu_reg_mem );
11214 %}
11215 
11216 // Convert an int to a float in xmm; no rounding step needed.
11217 instruct convI2F_reg(regF dst, rRegI src) %{
11218   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11219   match(Set dst (ConvI2F src));
11220   format %{ "CVTSI2SS $dst, $src" %}
11221   ins_encode %{
11222     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11223   %}
11224   ins_pipe( pipe_slow );
11225 %}
11226 
11227  instruct convXI2F_reg(regF dst, rRegI src)
11228 %{
11229   predicate( UseSSE>=2 && UseXmmI2F );
11230   match(Set dst (ConvI2F src));
11231 
11232   format %{ "MOVD  $dst,$src\n\t"
11233             "CVTDQ2PS $dst,$dst\t# i2f" %}
11234   ins_encode %{
11235     __ movdl($dst$$XMMRegister, $src$$Register);
11236     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11237   %}
11238   ins_pipe(pipe_slow); // XXX
11239 %}
11240 
11241 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11242   match(Set dst (ConvI2L src));
11243   effect(KILL cr);
11244   ins_cost(375);
11245   format %{ "MOV    $dst.lo,$src\n\t"
11246             "MOV    $dst.hi,$src\n\t"
11247             "SAR    $dst.hi,31" %}
11248   ins_encode(convert_int_long(dst,src));
11249   ins_pipe( ialu_reg_reg_long );
11250 %}
11251 
11252 // Zero-extend convert int to long
11253 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11254   match(Set dst (AndL (ConvI2L src) mask) );
11255   effect( KILL flags );
11256   ins_cost(250);
11257   format %{ "MOV    $dst.lo,$src\n\t"
11258             "XOR    $dst.hi,$dst.hi" %}
11259   opcode(0x33); // XOR
11260   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11261   ins_pipe( ialu_reg_reg_long );
11262 %}
11263 
11264 // Zero-extend long
11265 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11266   match(Set dst (AndL src mask) );
11267   effect( KILL flags );
11268   ins_cost(250);
11269   format %{ "MOV    $dst.lo,$src.lo\n\t"
11270             "XOR    $dst.hi,$dst.hi\n\t" %}
11271   opcode(0x33); // XOR
11272   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11273   ins_pipe( ialu_reg_reg_long );
11274 %}
11275 
11276 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11277   predicate (UseSSE<=1);
11278   match(Set dst (ConvL2D src));
11279   effect( KILL cr );
11280   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11281             "PUSH   $src.lo\n\t"
11282             "FILD   ST,[ESP + #0]\n\t"
11283             "ADD    ESP,8\n\t"
11284             "FSTP_D $dst\t# D-round" %}
11285   opcode(0xDF, 0x5);  /* DF /5 */
11286   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11287   ins_pipe( pipe_slow );
11288 %}
11289 
11290 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11291   predicate (UseSSE>=2);
11292   match(Set dst (ConvL2D src));
11293   effect( KILL cr );
11294   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11295             "PUSH   $src.lo\n\t"
11296             "FILD_D [ESP]\n\t"
11297             "FSTP_D [ESP]\n\t"
11298             "MOVSD  $dst,[ESP]\n\t"
11299             "ADD    ESP,8" %}
11300   opcode(0xDF, 0x5);  /* DF /5 */
11301   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11302   ins_pipe( pipe_slow );
11303 %}
11304 
11305 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11306   predicate (UseSSE>=1);
11307   match(Set dst (ConvL2F src));
11308   effect( KILL cr );
11309   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11310             "PUSH   $src.lo\n\t"
11311             "FILD_D [ESP]\n\t"
11312             "FSTP_S [ESP]\n\t"
11313             "MOVSS  $dst,[ESP]\n\t"
11314             "ADD    ESP,8" %}
11315   opcode(0xDF, 0x5);  /* DF /5 */
11316   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11317   ins_pipe( pipe_slow );
11318 %}
11319 
11320 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11321   match(Set dst (ConvL2F src));
11322   effect( KILL cr );
11323   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11324             "PUSH   $src.lo\n\t"
11325             "FILD   ST,[ESP + #0]\n\t"
11326             "ADD    ESP,8\n\t"
11327             "FSTP_S $dst\t# F-round" %}
11328   opcode(0xDF, 0x5);  /* DF /5 */
11329   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11330   ins_pipe( pipe_slow );
11331 %}
11332 
11333 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11334   match(Set dst (ConvL2I src));
11335   effect( DEF dst, USE src );
11336   format %{ "MOV    $dst,$src.lo" %}
11337   ins_encode(enc_CopyL_Lo(dst,src));
11338   ins_pipe( ialu_reg_reg );
11339 %}
11340 
11341 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11342   match(Set dst (MoveF2I src));
11343   effect( DEF dst, USE src );
11344   ins_cost(100);
11345   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11346   ins_encode %{
11347     __ movl($dst$$Register, Address(rsp, $src$$disp));
11348   %}
11349   ins_pipe( ialu_reg_mem );
11350 %}
11351 
11352 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11353   predicate(UseSSE==0);
11354   match(Set dst (MoveF2I src));
11355   effect( DEF dst, USE src );
11356 
11357   ins_cost(125);
11358   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11359   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11360   ins_pipe( fpu_mem_reg );
11361 %}
11362 
11363 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11364   predicate(UseSSE>=1);
11365   match(Set dst (MoveF2I src));
11366   effect( DEF dst, USE src );
11367 
11368   ins_cost(95);
11369   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11370   ins_encode %{
11371     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11372   %}
11373   ins_pipe( pipe_slow );
11374 %}
11375 
11376 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11377   predicate(UseSSE>=2);
11378   match(Set dst (MoveF2I src));
11379   effect( DEF dst, USE src );
11380   ins_cost(85);
11381   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11382   ins_encode %{
11383     __ movdl($dst$$Register, $src$$XMMRegister);
11384   %}
11385   ins_pipe( pipe_slow );
11386 %}
11387 
11388 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11389   match(Set dst (MoveI2F src));
11390   effect( DEF dst, USE src );
11391 
11392   ins_cost(100);
11393   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11394   ins_encode %{
11395     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11396   %}
11397   ins_pipe( ialu_mem_reg );
11398 %}
11399 
11400 
11401 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11402   predicate(UseSSE==0);
11403   match(Set dst (MoveI2F src));
11404   effect(DEF dst, USE src);
11405 
11406   ins_cost(125);
11407   format %{ "FLD_S  $src\n\t"
11408             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11409   opcode(0xD9);               /* D9 /0, FLD m32real */
11410   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11411               Pop_Reg_FPR(dst) );
11412   ins_pipe( fpu_reg_mem );
11413 %}
11414 
11415 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11416   predicate(UseSSE>=1);
11417   match(Set dst (MoveI2F src));
11418   effect( DEF dst, USE src );
11419 
11420   ins_cost(95);
11421   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11422   ins_encode %{
11423     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11424   %}
11425   ins_pipe( pipe_slow );
11426 %}
11427 
11428 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11429   predicate(UseSSE>=2);
11430   match(Set dst (MoveI2F src));
11431   effect( DEF dst, USE src );
11432 
11433   ins_cost(85);
11434   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11435   ins_encode %{
11436     __ movdl($dst$$XMMRegister, $src$$Register);
11437   %}
11438   ins_pipe( pipe_slow );
11439 %}
11440 
11441 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11442   match(Set dst (MoveD2L src));
11443   effect(DEF dst, USE src);
11444 
11445   ins_cost(250);
11446   format %{ "MOV    $dst.lo,$src\n\t"
11447             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11448   opcode(0x8B, 0x8B);
11449   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11450   ins_pipe( ialu_mem_long_reg );
11451 %}
11452 
11453 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11454   predicate(UseSSE<=1);
11455   match(Set dst (MoveD2L src));
11456   effect(DEF dst, USE src);
11457 
11458   ins_cost(125);
11459   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11460   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11461   ins_pipe( fpu_mem_reg );
11462 %}
11463 
11464 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11465   predicate(UseSSE>=2);
11466   match(Set dst (MoveD2L src));
11467   effect(DEF dst, USE src);
11468   ins_cost(95);
11469   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11470   ins_encode %{
11471     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11472   %}
11473   ins_pipe( pipe_slow );
11474 %}
11475 
11476 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11477   predicate(UseSSE>=2);
11478   match(Set dst (MoveD2L src));
11479   effect(DEF dst, USE src, TEMP tmp);
11480   ins_cost(85);
11481   format %{ "MOVD   $dst.lo,$src\n\t"
11482             "PSHUFLW $tmp,$src,0x4E\n\t"
11483             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11484   ins_encode %{
11485     __ movdl($dst$$Register, $src$$XMMRegister);
11486     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11487     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11488   %}
11489   ins_pipe( pipe_slow );
11490 %}
11491 
11492 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11493   match(Set dst (MoveL2D src));
11494   effect(DEF dst, USE src);
11495 
11496   ins_cost(200);
11497   format %{ "MOV    $dst,$src.lo\n\t"
11498             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11499   opcode(0x89, 0x89);
11500   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11501   ins_pipe( ialu_mem_long_reg );
11502 %}
11503 
11504 
11505 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11506   predicate(UseSSE<=1);
11507   match(Set dst (MoveL2D src));
11508   effect(DEF dst, USE src);
11509   ins_cost(125);
11510 
11511   format %{ "FLD_D  $src\n\t"
11512             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11513   opcode(0xDD);               /* DD /0, FLD m64real */
11514   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11515               Pop_Reg_DPR(dst) );
11516   ins_pipe( fpu_reg_mem );
11517 %}
11518 
11519 
11520 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11521   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11522   match(Set dst (MoveL2D src));
11523   effect(DEF dst, USE src);
11524 
11525   ins_cost(95);
11526   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11527   ins_encode %{
11528     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11529   %}
11530   ins_pipe( pipe_slow );
11531 %}
11532 
11533 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11534   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11535   match(Set dst (MoveL2D src));
11536   effect(DEF dst, USE src);
11537 
11538   ins_cost(95);
11539   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11540   ins_encode %{
11541     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11542   %}
11543   ins_pipe( pipe_slow );
11544 %}
11545 
11546 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11547   predicate(UseSSE>=2);
11548   match(Set dst (MoveL2D src));
11549   effect(TEMP dst, USE src, TEMP tmp);
11550   ins_cost(85);
11551   format %{ "MOVD   $dst,$src.lo\n\t"
11552             "MOVD   $tmp,$src.hi\n\t"
11553             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11554   ins_encode %{
11555     __ movdl($dst$$XMMRegister, $src$$Register);
11556     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11557     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11558   %}
11559   ins_pipe( pipe_slow );
11560 %}
11561 
11562 
11563 // =======================================================================
11564 // fast clearing of an array
11565 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11566   predicate(!((ClearArrayNode*)n)->is_large());
11567   match(Set dummy (ClearArray cnt base));
11568   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11569 
11570   format %{ $$template
11571     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11572     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11573     $$emit$$"JG     LARGE\n\t"
11574     $$emit$$"SHL    ECX, 1\n\t"
11575     $$emit$$"DEC    ECX\n\t"
11576     $$emit$$"JS     DONE\t# Zero length\n\t"
11577     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11578     $$emit$$"DEC    ECX\n\t"
11579     $$emit$$"JGE    LOOP\n\t"
11580     $$emit$$"JMP    DONE\n\t"
11581     $$emit$$"# LARGE:\n\t"
11582     if (UseFastStosb) {
11583        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11584        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11585     } else if (UseXMMForObjInit) {
11586        $$emit$$"MOV     RDI,RAX\n\t"
11587        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11588        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11589        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11590        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11591        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11592        $$emit$$"ADD     0x40,RAX\n\t"
11593        $$emit$$"# L_zero_64_bytes:\n\t"
11594        $$emit$$"SUB     0x8,RCX\n\t"
11595        $$emit$$"JGE     L_loop\n\t"
11596        $$emit$$"ADD     0x4,RCX\n\t"
11597        $$emit$$"JL      L_tail\n\t"
11598        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11599        $$emit$$"ADD     0x20,RAX\n\t"
11600        $$emit$$"SUB     0x4,RCX\n\t"
11601        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11602        $$emit$$"ADD     0x4,RCX\n\t"
11603        $$emit$$"JLE     L_end\n\t"
11604        $$emit$$"DEC     RCX\n\t"
11605        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11606        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11607        $$emit$$"ADD     0x8,RAX\n\t"
11608        $$emit$$"DEC     RCX\n\t"
11609        $$emit$$"JGE     L_sloop\n\t"
11610        $$emit$$"# L_end:\n\t"
11611     } else {
11612        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11613        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11614     }
11615     $$emit$$"# DONE"
11616   %}
11617   ins_encode %{
11618     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11619                  $tmp$$XMMRegister, false);
11620   %}
11621   ins_pipe( pipe_slow );
11622 %}
11623 
11624 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11625   predicate(((ClearArrayNode*)n)->is_large());
11626   match(Set dummy (ClearArray cnt base));
11627   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11628   format %{ $$template
11629     if (UseFastStosb) {
11630        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11631        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11632        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11633     } else if (UseXMMForObjInit) {
11634        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11635        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11636        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11637        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11638        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11639        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11640        $$emit$$"ADD     0x40,RAX\n\t"
11641        $$emit$$"# L_zero_64_bytes:\n\t"
11642        $$emit$$"SUB     0x8,RCX\n\t"
11643        $$emit$$"JGE     L_loop\n\t"
11644        $$emit$$"ADD     0x4,RCX\n\t"
11645        $$emit$$"JL      L_tail\n\t"
11646        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11647        $$emit$$"ADD     0x20,RAX\n\t"
11648        $$emit$$"SUB     0x4,RCX\n\t"
11649        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11650        $$emit$$"ADD     0x4,RCX\n\t"
11651        $$emit$$"JLE     L_end\n\t"
11652        $$emit$$"DEC     RCX\n\t"
11653        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11654        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11655        $$emit$$"ADD     0x8,RAX\n\t"
11656        $$emit$$"DEC     RCX\n\t"
11657        $$emit$$"JGE     L_sloop\n\t"
11658        $$emit$$"# L_end:\n\t"
11659     } else {
11660        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11661        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11662        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11663     }
11664     $$emit$$"# DONE"
11665   %}
11666   ins_encode %{
11667     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11668                  $tmp$$XMMRegister, true);
11669   %}
11670   ins_pipe( pipe_slow );
11671 %}
11672 
11673 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11674                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11675   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11676   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11677   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11678 
11679   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11680   ins_encode %{
11681     __ string_compare($str1$$Register, $str2$$Register,
11682                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11683                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11684   %}
11685   ins_pipe( pipe_slow );
11686 %}
11687 
11688 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11689                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11690   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11691   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11692   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11693 
11694   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11695   ins_encode %{
11696     __ string_compare($str1$$Register, $str2$$Register,
11697                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11698                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11699   %}
11700   ins_pipe( pipe_slow );
11701 %}
11702 
11703 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11704                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11705   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11706   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11707   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11708 
11709   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11710   ins_encode %{
11711     __ string_compare($str1$$Register, $str2$$Register,
11712                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11713                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11714   %}
11715   ins_pipe( pipe_slow );
11716 %}
11717 
11718 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11719                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11720   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11721   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11722   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11723 
11724   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11725   ins_encode %{
11726     __ string_compare($str2$$Register, $str1$$Register,
11727                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11728                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 // fast string equals
11734 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11735                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11736   match(Set result (StrEquals (Binary str1 str2) cnt));
11737   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11738 
11739   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11740   ins_encode %{
11741     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11742                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11743                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11744   %}
11745 
11746   ins_pipe( pipe_slow );
11747 %}
11748 
11749 // fast search of substring with known size.
11750 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11751                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11752   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11753   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11754   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11755 
11756   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11757   ins_encode %{
11758     int icnt2 = (int)$int_cnt2$$constant;
11759     if (icnt2 >= 16) {
11760       // IndexOf for constant substrings with size >= 16 elements
11761       // which don't need to be loaded through stack.
11762       __ string_indexofC8($str1$$Register, $str2$$Register,
11763                           $cnt1$$Register, $cnt2$$Register,
11764                           icnt2, $result$$Register,
11765                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11766     } else {
11767       // Small strings are loaded through stack if they cross page boundary.
11768       __ string_indexof($str1$$Register, $str2$$Register,
11769                         $cnt1$$Register, $cnt2$$Register,
11770                         icnt2, $result$$Register,
11771                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11772     }
11773   %}
11774   ins_pipe( pipe_slow );
11775 %}
11776 
11777 // fast search of substring with known size.
11778 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11779                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11780   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11781   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11782   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11783 
11784   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11785   ins_encode %{
11786     int icnt2 = (int)$int_cnt2$$constant;
11787     if (icnt2 >= 8) {
11788       // IndexOf for constant substrings with size >= 8 elements
11789       // which don't need to be loaded through stack.
11790       __ string_indexofC8($str1$$Register, $str2$$Register,
11791                           $cnt1$$Register, $cnt2$$Register,
11792                           icnt2, $result$$Register,
11793                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11794     } else {
11795       // Small strings are loaded through stack if they cross page boundary.
11796       __ string_indexof($str1$$Register, $str2$$Register,
11797                         $cnt1$$Register, $cnt2$$Register,
11798                         icnt2, $result$$Register,
11799                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11800     }
11801   %}
11802   ins_pipe( pipe_slow );
11803 %}
11804 
11805 // fast search of substring with known size.
11806 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11807                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11808   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11809   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11810   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11811 
11812   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11813   ins_encode %{
11814     int icnt2 = (int)$int_cnt2$$constant;
11815     if (icnt2 >= 8) {
11816       // IndexOf for constant substrings with size >= 8 elements
11817       // which don't need to be loaded through stack.
11818       __ string_indexofC8($str1$$Register, $str2$$Register,
11819                           $cnt1$$Register, $cnt2$$Register,
11820                           icnt2, $result$$Register,
11821                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11822     } else {
11823       // Small strings are loaded through stack if they cross page boundary.
11824       __ string_indexof($str1$$Register, $str2$$Register,
11825                         $cnt1$$Register, $cnt2$$Register,
11826                         icnt2, $result$$Register,
11827                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11828     }
11829   %}
11830   ins_pipe( pipe_slow );
11831 %}
11832 
11833 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11834                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11835   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11836   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11837   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11838 
11839   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11840   ins_encode %{
11841     __ string_indexof($str1$$Register, $str2$$Register,
11842                       $cnt1$$Register, $cnt2$$Register,
11843                       (-1), $result$$Register,
11844                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11845   %}
11846   ins_pipe( pipe_slow );
11847 %}
11848 
11849 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11850                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11851   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11852   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11853   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11854 
11855   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11856   ins_encode %{
11857     __ string_indexof($str1$$Register, $str2$$Register,
11858                       $cnt1$$Register, $cnt2$$Register,
11859                       (-1), $result$$Register,
11860                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11861   %}
11862   ins_pipe( pipe_slow );
11863 %}
11864 
11865 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11866                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11867   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11868   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11869   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11870 
11871   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11872   ins_encode %{
11873     __ string_indexof($str1$$Register, $str2$$Register,
11874                       $cnt1$$Register, $cnt2$$Register,
11875                       (-1), $result$$Register,
11876                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11877   %}
11878   ins_pipe( pipe_slow );
11879 %}
11880 
11881 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11882                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11883   predicate(UseSSE42Intrinsics);
11884   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11885   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11886   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11887   ins_encode %{
11888     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11889                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11890   %}
11891   ins_pipe( pipe_slow );
11892 %}
11893 
11894 // fast array equals
11895 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11896                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11897 %{
11898   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11899   match(Set result (AryEq ary1 ary2));
11900   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11901   //ins_cost(300);
11902 
11903   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11904   ins_encode %{
11905     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11906                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11907                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11908   %}
11909   ins_pipe( pipe_slow );
11910 %}
11911 
11912 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11913                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11914 %{
11915   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11916   match(Set result (AryEq ary1 ary2));
11917   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11918   //ins_cost(300);
11919 
11920   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11921   ins_encode %{
11922     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11923                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11924                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11925   %}
11926   ins_pipe( pipe_slow );
11927 %}
11928 
11929 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11930                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11931 %{
11932   match(Set result (HasNegatives ary1 len));
11933   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11934 
11935   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11936   ins_encode %{
11937     __ has_negatives($ary1$$Register, $len$$Register,
11938                      $result$$Register, $tmp3$$Register,
11939                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11940   %}
11941   ins_pipe( pipe_slow );
11942 %}
11943 
11944 // fast char[] to byte[] compression
11945 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11946                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11947   match(Set result (StrCompressedCopy src (Binary dst len)));
11948   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11949 
11950   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11951   ins_encode %{
11952     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11953                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11954                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11955   %}
11956   ins_pipe( pipe_slow );
11957 %}
11958 
11959 // fast byte[] to char[] inflation
11960 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11961                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11962   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11963   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11964 
11965   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11966   ins_encode %{
11967     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11968                           $tmp1$$XMMRegister, $tmp2$$Register);
11969   %}
11970   ins_pipe( pipe_slow );
11971 %}
11972 
11973 // encode char[] to byte[] in ISO_8859_1
11974 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11975                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11976                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11977   match(Set result (EncodeISOArray src (Binary dst len)));
11978   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11979 
11980   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11981   ins_encode %{
11982     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11983                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11984                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11985   %}
11986   ins_pipe( pipe_slow );
11987 %}
11988 
11989 
11990 //----------Control Flow Instructions------------------------------------------
11991 // Signed compare Instructions
11992 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11993   match(Set cr (CmpI op1 op2));
11994   effect( DEF cr, USE op1, USE op2 );
11995   format %{ "CMP    $op1,$op2" %}
11996   opcode(0x3B);  /* Opcode 3B /r */
11997   ins_encode( OpcP, RegReg( op1, op2) );
11998   ins_pipe( ialu_cr_reg_reg );
11999 %}
12000 
12001 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12002   match(Set cr (CmpI op1 op2));
12003   effect( DEF cr, USE op1 );
12004   format %{ "CMP    $op1,$op2" %}
12005   opcode(0x81,0x07);  /* Opcode 81 /7 */
12006   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12007   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12008   ins_pipe( ialu_cr_reg_imm );
12009 %}
12010 
12011 // Cisc-spilled version of cmpI_eReg
12012 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12013   match(Set cr (CmpI op1 (LoadI op2)));
12014 
12015   format %{ "CMP    $op1,$op2" %}
12016   ins_cost(500);
12017   opcode(0x3B);  /* Opcode 3B /r */
12018   ins_encode( OpcP, RegMem( op1, op2) );
12019   ins_pipe( ialu_cr_reg_mem );
12020 %}
12021 
12022 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12023   match(Set cr (CmpI src zero));
12024   effect( DEF cr, USE src );
12025 
12026   format %{ "TEST   $src,$src" %}
12027   opcode(0x85);
12028   ins_encode( OpcP, RegReg( src, src ) );
12029   ins_pipe( ialu_cr_reg_imm );
12030 %}
12031 
12032 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12033   match(Set cr (CmpI (AndI src con) zero));
12034 
12035   format %{ "TEST   $src,$con" %}
12036   opcode(0xF7,0x00);
12037   ins_encode( OpcP, RegOpc(src), Con32(con) );
12038   ins_pipe( ialu_cr_reg_imm );
12039 %}
12040 
12041 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12042   match(Set cr (CmpI (AndI src mem) zero));
12043 
12044   format %{ "TEST   $src,$mem" %}
12045   opcode(0x85);
12046   ins_encode( OpcP, RegMem( src, mem ) );
12047   ins_pipe( ialu_cr_reg_mem );
12048 %}
12049 
12050 // Unsigned compare Instructions; really, same as signed except they
12051 // produce an eFlagsRegU instead of eFlagsReg.
12052 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12053   match(Set cr (CmpU op1 op2));
12054 
12055   format %{ "CMPu   $op1,$op2" %}
12056   opcode(0x3B);  /* Opcode 3B /r */
12057   ins_encode( OpcP, RegReg( op1, op2) );
12058   ins_pipe( ialu_cr_reg_reg );
12059 %}
12060 
12061 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12062   match(Set cr (CmpU op1 op2));
12063 
12064   format %{ "CMPu   $op1,$op2" %}
12065   opcode(0x81,0x07);  /* Opcode 81 /7 */
12066   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12067   ins_pipe( ialu_cr_reg_imm );
12068 %}
12069 
12070 // // Cisc-spilled version of cmpU_eReg
12071 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12072   match(Set cr (CmpU op1 (LoadI op2)));
12073 
12074   format %{ "CMPu   $op1,$op2" %}
12075   ins_cost(500);
12076   opcode(0x3B);  /* Opcode 3B /r */
12077   ins_encode( OpcP, RegMem( op1, op2) );
12078   ins_pipe( ialu_cr_reg_mem );
12079 %}
12080 
12081 // // Cisc-spilled version of cmpU_eReg
12082 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12083 //  match(Set cr (CmpU (LoadI op1) op2));
12084 //
12085 //  format %{ "CMPu   $op1,$op2" %}
12086 //  ins_cost(500);
12087 //  opcode(0x39);  /* Opcode 39 /r */
12088 //  ins_encode( OpcP, RegMem( op1, op2) );
12089 //%}
12090 
12091 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12092   match(Set cr (CmpU src zero));
12093 
12094   format %{ "TESTu  $src,$src" %}
12095   opcode(0x85);
12096   ins_encode( OpcP, RegReg( src, src ) );
12097   ins_pipe( ialu_cr_reg_imm );
12098 %}
12099 
12100 // Unsigned pointer compare Instructions
12101 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12102   match(Set cr (CmpP op1 op2));
12103 
12104   format %{ "CMPu   $op1,$op2" %}
12105   opcode(0x3B);  /* Opcode 3B /r */
12106   ins_encode( OpcP, RegReg( op1, op2) );
12107   ins_pipe( ialu_cr_reg_reg );
12108 %}
12109 
12110 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12111   match(Set cr (CmpP op1 op2));
12112 
12113   format %{ "CMPu   $op1,$op2" %}
12114   opcode(0x81,0x07);  /* Opcode 81 /7 */
12115   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12116   ins_pipe( ialu_cr_reg_imm );
12117 %}
12118 
12119 // // Cisc-spilled version of cmpP_eReg
12120 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12121   match(Set cr (CmpP op1 (LoadP op2)));
12122 
12123   format %{ "CMPu   $op1,$op2" %}
12124   ins_cost(500);
12125   opcode(0x3B);  /* Opcode 3B /r */
12126   ins_encode( OpcP, RegMem( op1, op2) );
12127   ins_pipe( ialu_cr_reg_mem );
12128 %}
12129 
12130 // // Cisc-spilled version of cmpP_eReg
12131 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12132 //  match(Set cr (CmpP (LoadP op1) op2));
12133 //
12134 //  format %{ "CMPu   $op1,$op2" %}
12135 //  ins_cost(500);
12136 //  opcode(0x39);  /* Opcode 39 /r */
12137 //  ins_encode( OpcP, RegMem( op1, op2) );
12138 //%}
12139 
12140 // Compare raw pointer (used in out-of-heap check).
12141 // Only works because non-oop pointers must be raw pointers
12142 // and raw pointers have no anti-dependencies.
12143 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12144   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12145   match(Set cr (CmpP op1 (LoadP op2)));
12146 
12147   format %{ "CMPu   $op1,$op2" %}
12148   opcode(0x3B);  /* Opcode 3B /r */
12149   ins_encode( OpcP, RegMem( op1, op2) );
12150   ins_pipe( ialu_cr_reg_mem );
12151 %}
12152 
12153 //
12154 // This will generate a signed flags result. This should be ok
12155 // since any compare to a zero should be eq/neq.
12156 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12157   match(Set cr (CmpP src zero));
12158 
12159   format %{ "TEST   $src,$src" %}
12160   opcode(0x85);
12161   ins_encode( OpcP, RegReg( src, src ) );
12162   ins_pipe( ialu_cr_reg_imm );
12163 %}
12164 
12165 // Cisc-spilled version of testP_reg
12166 // This will generate a signed flags result. This should be ok
12167 // since any compare to a zero should be eq/neq.
12168 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12169   match(Set cr (CmpP (LoadP op) zero));
12170 
12171   format %{ "TEST   $op,0xFFFFFFFF" %}
12172   ins_cost(500);
12173   opcode(0xF7);               /* Opcode F7 /0 */
12174   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12175   ins_pipe( ialu_cr_reg_imm );
12176 %}
12177 
12178 // Yanked all unsigned pointer compare operations.
12179 // Pointer compares are done with CmpP which is already unsigned.
12180 
12181 //----------Max and Min--------------------------------------------------------
12182 // Min Instructions
12183 ////
12184 //   *** Min and Max using the conditional move are slower than the
12185 //   *** branch version on a Pentium III.
12186 // // Conditional move for min
12187 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12188 //  effect( USE_DEF op2, USE op1, USE cr );
12189 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12190 //  opcode(0x4C,0x0F);
12191 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12192 //  ins_pipe( pipe_cmov_reg );
12193 //%}
12194 //
12195 //// Min Register with Register (P6 version)
12196 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12197 //  predicate(VM_Version::supports_cmov() );
12198 //  match(Set op2 (MinI op1 op2));
12199 //  ins_cost(200);
12200 //  expand %{
12201 //    eFlagsReg cr;
12202 //    compI_eReg(cr,op1,op2);
12203 //    cmovI_reg_lt(op2,op1,cr);
12204 //  %}
12205 //%}
12206 
12207 // Min Register with Register (generic version)
12208 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12209   match(Set dst (MinI dst src));
12210   effect(KILL flags);
12211   ins_cost(300);
12212 
12213   format %{ "MIN    $dst,$src" %}
12214   opcode(0xCC);
12215   ins_encode( min_enc(dst,src) );
12216   ins_pipe( pipe_slow );
12217 %}
12218 
12219 // Max Register with Register
12220 //   *** Min and Max using the conditional move are slower than the
12221 //   *** branch version on a Pentium III.
12222 // // Conditional move for max
12223 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12224 //  effect( USE_DEF op2, USE op1, USE cr );
12225 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12226 //  opcode(0x4F,0x0F);
12227 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12228 //  ins_pipe( pipe_cmov_reg );
12229 //%}
12230 //
12231 // // Max Register with Register (P6 version)
12232 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12233 //  predicate(VM_Version::supports_cmov() );
12234 //  match(Set op2 (MaxI op1 op2));
12235 //  ins_cost(200);
12236 //  expand %{
12237 //    eFlagsReg cr;
12238 //    compI_eReg(cr,op1,op2);
12239 //    cmovI_reg_gt(op2,op1,cr);
12240 //  %}
12241 //%}
12242 
12243 // Max Register with Register (generic version)
12244 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12245   match(Set dst (MaxI dst src));
12246   effect(KILL flags);
12247   ins_cost(300);
12248 
12249   format %{ "MAX    $dst,$src" %}
12250   opcode(0xCC);
12251   ins_encode( max_enc(dst,src) );
12252   ins_pipe( pipe_slow );
12253 %}
12254 
12255 // ============================================================================
12256 // Counted Loop limit node which represents exact final iterator value.
12257 // Note: the resulting value should fit into integer range since
12258 // counted loops have limit check on overflow.
12259 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12260   match(Set limit (LoopLimit (Binary init limit) stride));
12261   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12262   ins_cost(300);
12263 
12264   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12265   ins_encode %{
12266     int strd = (int)$stride$$constant;
12267     assert(strd != 1 && strd != -1, "sanity");
12268     int m1 = (strd > 0) ? 1 : -1;
12269     // Convert limit to long (EAX:EDX)
12270     __ cdql();
12271     // Convert init to long (init:tmp)
12272     __ movl($tmp$$Register, $init$$Register);
12273     __ sarl($tmp$$Register, 31);
12274     // $limit - $init
12275     __ subl($limit$$Register, $init$$Register);
12276     __ sbbl($limit_hi$$Register, $tmp$$Register);
12277     // + ($stride - 1)
12278     if (strd > 0) {
12279       __ addl($limit$$Register, (strd - 1));
12280       __ adcl($limit_hi$$Register, 0);
12281       __ movl($tmp$$Register, strd);
12282     } else {
12283       __ addl($limit$$Register, (strd + 1));
12284       __ adcl($limit_hi$$Register, -1);
12285       __ lneg($limit_hi$$Register, $limit$$Register);
12286       __ movl($tmp$$Register, -strd);
12287     }
12288     // signed devision: (EAX:EDX) / pos_stride
12289     __ idivl($tmp$$Register);
12290     if (strd < 0) {
12291       // restore sign
12292       __ negl($tmp$$Register);
12293     }
12294     // (EAX) * stride
12295     __ mull($tmp$$Register);
12296     // + init (ignore upper bits)
12297     __ addl($limit$$Register, $init$$Register);
12298   %}
12299   ins_pipe( pipe_slow );
12300 %}
12301 
12302 // ============================================================================
12303 // Branch Instructions
12304 // Jump Table
12305 instruct jumpXtnd(rRegI switch_val) %{
12306   match(Jump switch_val);
12307   ins_cost(350);
12308   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12309   ins_encode %{
12310     // Jump to Address(table_base + switch_reg)
12311     Address index(noreg, $switch_val$$Register, Address::times_1);
12312     __ jump(ArrayAddress($constantaddress, index));
12313   %}
12314   ins_pipe(pipe_jmp);
12315 %}
12316 
12317 // Jump Direct - Label defines a relative address from JMP+1
12318 instruct jmpDir(label labl) %{
12319   match(Goto);
12320   effect(USE labl);
12321 
12322   ins_cost(300);
12323   format %{ "JMP    $labl" %}
12324   size(5);
12325   ins_encode %{
12326     Label* L = $labl$$label;
12327     __ jmp(*L, false); // Always long jump
12328   %}
12329   ins_pipe( pipe_jmp );
12330 %}
12331 
12332 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12333 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12334   match(If cop cr);
12335   effect(USE labl);
12336 
12337   ins_cost(300);
12338   format %{ "J$cop    $labl" %}
12339   size(6);
12340   ins_encode %{
12341     Label* L = $labl$$label;
12342     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12343   %}
12344   ins_pipe( pipe_jcc );
12345 %}
12346 
12347 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12348 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12349   predicate(!n->has_vector_mask_set());
12350   match(CountedLoopEnd cop cr);
12351   effect(USE labl);
12352 
12353   ins_cost(300);
12354   format %{ "J$cop    $labl\t# Loop end" %}
12355   size(6);
12356   ins_encode %{
12357     Label* L = $labl$$label;
12358     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12359   %}
12360   ins_pipe( pipe_jcc );
12361 %}
12362 
12363 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12364 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12365   predicate(!n->has_vector_mask_set());
12366   match(CountedLoopEnd cop cmp);
12367   effect(USE labl);
12368 
12369   ins_cost(300);
12370   format %{ "J$cop,u  $labl\t# Loop end" %}
12371   size(6);
12372   ins_encode %{
12373     Label* L = $labl$$label;
12374     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12375   %}
12376   ins_pipe( pipe_jcc );
12377 %}
12378 
12379 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12380   predicate(!n->has_vector_mask_set());
12381   match(CountedLoopEnd cop cmp);
12382   effect(USE labl);
12383 
12384   ins_cost(200);
12385   format %{ "J$cop,u  $labl\t# Loop end" %}
12386   size(6);
12387   ins_encode %{
12388     Label* L = $labl$$label;
12389     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12390   %}
12391   ins_pipe( pipe_jcc );
12392 %}
12393 
12394 // mask version
12395 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12396 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12397   predicate(n->has_vector_mask_set());
12398   match(CountedLoopEnd cop cr);
12399   effect(USE labl);
12400 
12401   ins_cost(400);
12402   format %{ "J$cop    $labl\t# Loop end\n\t"
12403             "restorevectmask \t# vector mask restore for loops" %}
12404   size(10);
12405   ins_encode %{
12406     Label* L = $labl$$label;
12407     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12408     __ restorevectmask();
12409   %}
12410   ins_pipe( pipe_jcc );
12411 %}
12412 
12413 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12414 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12415   predicate(n->has_vector_mask_set());
12416   match(CountedLoopEnd cop cmp);
12417   effect(USE labl);
12418 
12419   ins_cost(400);
12420   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12421             "restorevectmask \t# vector mask restore for loops" %}
12422   size(10);
12423   ins_encode %{
12424     Label* L = $labl$$label;
12425     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12426     __ restorevectmask();
12427   %}
12428   ins_pipe( pipe_jcc );
12429 %}
12430 
12431 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12432   predicate(n->has_vector_mask_set());
12433   match(CountedLoopEnd cop cmp);
12434   effect(USE labl);
12435 
12436   ins_cost(300);
12437   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12438             "restorevectmask \t# vector mask restore for loops" %}
12439   size(10);
12440   ins_encode %{
12441     Label* L = $labl$$label;
12442     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12443     __ restorevectmask();
12444   %}
12445   ins_pipe( pipe_jcc );
12446 %}
12447 
12448 // Jump Direct Conditional - using unsigned comparison
12449 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12450   match(If cop cmp);
12451   effect(USE labl);
12452 
12453   ins_cost(300);
12454   format %{ "J$cop,u  $labl" %}
12455   size(6);
12456   ins_encode %{
12457     Label* L = $labl$$label;
12458     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12459   %}
12460   ins_pipe(pipe_jcc);
12461 %}
12462 
12463 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12464   match(If cop cmp);
12465   effect(USE labl);
12466 
12467   ins_cost(200);
12468   format %{ "J$cop,u  $labl" %}
12469   size(6);
12470   ins_encode %{
12471     Label* L = $labl$$label;
12472     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12473   %}
12474   ins_pipe(pipe_jcc);
12475 %}
12476 
12477 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12478   match(If cop cmp);
12479   effect(USE labl);
12480 
12481   ins_cost(200);
12482   format %{ $$template
12483     if ($cop$$cmpcode == Assembler::notEqual) {
12484       $$emit$$"JP,u   $labl\n\t"
12485       $$emit$$"J$cop,u   $labl"
12486     } else {
12487       $$emit$$"JP,u   done\n\t"
12488       $$emit$$"J$cop,u   $labl\n\t"
12489       $$emit$$"done:"
12490     }
12491   %}
12492   ins_encode %{
12493     Label* l = $labl$$label;
12494     if ($cop$$cmpcode == Assembler::notEqual) {
12495       __ jcc(Assembler::parity, *l, false);
12496       __ jcc(Assembler::notEqual, *l, false);
12497     } else if ($cop$$cmpcode == Assembler::equal) {
12498       Label done;
12499       __ jccb(Assembler::parity, done);
12500       __ jcc(Assembler::equal, *l, false);
12501       __ bind(done);
12502     } else {
12503        ShouldNotReachHere();
12504     }
12505   %}
12506   ins_pipe(pipe_jcc);
12507 %}
12508 
12509 // ============================================================================
12510 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12511 // array for an instance of the superklass.  Set a hidden internal cache on a
12512 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12513 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12514 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12515   match(Set result (PartialSubtypeCheck sub super));
12516   effect( KILL rcx, KILL cr );
12517 
12518   ins_cost(1100);  // slightly larger than the next version
12519   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12520             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12521             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12522             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12523             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12524             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12525             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12526      "miss:\t" %}
12527 
12528   opcode(0x1); // Force a XOR of EDI
12529   ins_encode( enc_PartialSubtypeCheck() );
12530   ins_pipe( pipe_slow );
12531 %}
12532 
12533 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12534   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12535   effect( KILL rcx, KILL result );
12536 
12537   ins_cost(1000);
12538   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12539             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12540             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12541             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12542             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12543             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12544      "miss:\t" %}
12545 
12546   opcode(0x0);  // No need to XOR EDI
12547   ins_encode( enc_PartialSubtypeCheck() );
12548   ins_pipe( pipe_slow );
12549 %}
12550 
12551 // ============================================================================
12552 // Branch Instructions -- short offset versions
12553 //
12554 // These instructions are used to replace jumps of a long offset (the default
12555 // match) with jumps of a shorter offset.  These instructions are all tagged
12556 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12557 // match rules in general matching.  Instead, the ADLC generates a conversion
12558 // method in the MachNode which can be used to do in-place replacement of the
12559 // long variant with the shorter variant.  The compiler will determine if a
12560 // branch can be taken by the is_short_branch_offset() predicate in the machine
12561 // specific code section of the file.
12562 
12563 // Jump Direct - Label defines a relative address from JMP+1
12564 instruct jmpDir_short(label labl) %{
12565   match(Goto);
12566   effect(USE labl);
12567 
12568   ins_cost(300);
12569   format %{ "JMP,s  $labl" %}
12570   size(2);
12571   ins_encode %{
12572     Label* L = $labl$$label;
12573     __ jmpb(*L);
12574   %}
12575   ins_pipe( pipe_jmp );
12576   ins_short_branch(1);
12577 %}
12578 
12579 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12580 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12581   match(If cop cr);
12582   effect(USE labl);
12583 
12584   ins_cost(300);
12585   format %{ "J$cop,s  $labl" %}
12586   size(2);
12587   ins_encode %{
12588     Label* L = $labl$$label;
12589     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12590   %}
12591   ins_pipe( pipe_jcc );
12592   ins_short_branch(1);
12593 %}
12594 
12595 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12596 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12597   match(CountedLoopEnd cop cr);
12598   effect(USE labl);
12599 
12600   ins_cost(300);
12601   format %{ "J$cop,s  $labl\t# Loop end" %}
12602   size(2);
12603   ins_encode %{
12604     Label* L = $labl$$label;
12605     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12606   %}
12607   ins_pipe( pipe_jcc );
12608   ins_short_branch(1);
12609 %}
12610 
12611 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12612 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12613   match(CountedLoopEnd cop cmp);
12614   effect(USE labl);
12615 
12616   ins_cost(300);
12617   format %{ "J$cop,us $labl\t# Loop end" %}
12618   size(2);
12619   ins_encode %{
12620     Label* L = $labl$$label;
12621     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12622   %}
12623   ins_pipe( pipe_jcc );
12624   ins_short_branch(1);
12625 %}
12626 
12627 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12628   match(CountedLoopEnd cop cmp);
12629   effect(USE labl);
12630 
12631   ins_cost(300);
12632   format %{ "J$cop,us $labl\t# Loop end" %}
12633   size(2);
12634   ins_encode %{
12635     Label* L = $labl$$label;
12636     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12637   %}
12638   ins_pipe( pipe_jcc );
12639   ins_short_branch(1);
12640 %}
12641 
12642 // Jump Direct Conditional - using unsigned comparison
12643 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12644   match(If cop cmp);
12645   effect(USE labl);
12646 
12647   ins_cost(300);
12648   format %{ "J$cop,us $labl" %}
12649   size(2);
12650   ins_encode %{
12651     Label* L = $labl$$label;
12652     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12653   %}
12654   ins_pipe( pipe_jcc );
12655   ins_short_branch(1);
12656 %}
12657 
12658 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12659   match(If cop cmp);
12660   effect(USE labl);
12661 
12662   ins_cost(300);
12663   format %{ "J$cop,us $labl" %}
12664   size(2);
12665   ins_encode %{
12666     Label* L = $labl$$label;
12667     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12668   %}
12669   ins_pipe( pipe_jcc );
12670   ins_short_branch(1);
12671 %}
12672 
12673 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12674   match(If cop cmp);
12675   effect(USE labl);
12676 
12677   ins_cost(300);
12678   format %{ $$template
12679     if ($cop$$cmpcode == Assembler::notEqual) {
12680       $$emit$$"JP,u,s   $labl\n\t"
12681       $$emit$$"J$cop,u,s   $labl"
12682     } else {
12683       $$emit$$"JP,u,s   done\n\t"
12684       $$emit$$"J$cop,u,s  $labl\n\t"
12685       $$emit$$"done:"
12686     }
12687   %}
12688   size(4);
12689   ins_encode %{
12690     Label* l = $labl$$label;
12691     if ($cop$$cmpcode == Assembler::notEqual) {
12692       __ jccb(Assembler::parity, *l);
12693       __ jccb(Assembler::notEqual, *l);
12694     } else if ($cop$$cmpcode == Assembler::equal) {
12695       Label done;
12696       __ jccb(Assembler::parity, done);
12697       __ jccb(Assembler::equal, *l);
12698       __ bind(done);
12699     } else {
12700        ShouldNotReachHere();
12701     }
12702   %}
12703   ins_pipe(pipe_jcc);
12704   ins_short_branch(1);
12705 %}
12706 
12707 // ============================================================================
12708 // Long Compare
12709 //
12710 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12711 // is tricky.  The flavor of compare used depends on whether we are testing
12712 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12713 // The GE test is the negated LT test.  The LE test can be had by commuting
12714 // the operands (yielding a GE test) and then negating; negate again for the
12715 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12716 // NE test is negated from that.
12717 
12718 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12719 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12720 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12721 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12722 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12723 // foo match ends up with the wrong leaf.  One fix is to not match both
12724 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12725 // both forms beat the trinary form of long-compare and both are very useful
12726 // on Intel which has so few registers.
12727 
12728 // Manifest a CmpL result in an integer register.  Very painful.
12729 // This is the test to avoid.
12730 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12731   match(Set dst (CmpL3 src1 src2));
12732   effect( KILL flags );
12733   ins_cost(1000);
12734   format %{ "XOR    $dst,$dst\n\t"
12735             "CMP    $src1.hi,$src2.hi\n\t"
12736             "JLT,s  m_one\n\t"
12737             "JGT,s  p_one\n\t"
12738             "CMP    $src1.lo,$src2.lo\n\t"
12739             "JB,s   m_one\n\t"
12740             "JEQ,s  done\n"
12741     "p_one:\tINC    $dst\n\t"
12742             "JMP,s  done\n"
12743     "m_one:\tDEC    $dst\n"
12744      "done:" %}
12745   ins_encode %{
12746     Label p_one, m_one, done;
12747     __ xorptr($dst$$Register, $dst$$Register);
12748     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12749     __ jccb(Assembler::less,    m_one);
12750     __ jccb(Assembler::greater, p_one);
12751     __ cmpl($src1$$Register, $src2$$Register);
12752     __ jccb(Assembler::below,   m_one);
12753     __ jccb(Assembler::equal,   done);
12754     __ bind(p_one);
12755     __ incrementl($dst$$Register);
12756     __ jmpb(done);
12757     __ bind(m_one);
12758     __ decrementl($dst$$Register);
12759     __ bind(done);
12760   %}
12761   ins_pipe( pipe_slow );
12762 %}
12763 
12764 //======
12765 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12766 // compares.  Can be used for LE or GT compares by reversing arguments.
12767 // NOT GOOD FOR EQ/NE tests.
12768 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12769   match( Set flags (CmpL src zero ));
12770   ins_cost(100);
12771   format %{ "TEST   $src.hi,$src.hi" %}
12772   opcode(0x85);
12773   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12774   ins_pipe( ialu_cr_reg_reg );
12775 %}
12776 
12777 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12778 // compares.  Can be used for LE or GT compares by reversing arguments.
12779 // NOT GOOD FOR EQ/NE tests.
12780 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12781   match( Set flags (CmpL src1 src2 ));
12782   effect( TEMP tmp );
12783   ins_cost(300);
12784   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12785             "MOV    $tmp,$src1.hi\n\t"
12786             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12787   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12788   ins_pipe( ialu_cr_reg_reg );
12789 %}
12790 
12791 // Long compares reg < zero/req OR reg >= zero/req.
12792 // Just a wrapper for a normal branch, plus the predicate test.
12793 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12794   match(If cmp flags);
12795   effect(USE labl);
12796   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12797   expand %{
12798     jmpCon(cmp,flags,labl);    // JLT or JGE...
12799   %}
12800 %}
12801 
12802 //======
12803 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12804 // compares.  Can be used for LE or GT compares by reversing arguments.
12805 // NOT GOOD FOR EQ/NE tests.
12806 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12807   match(Set flags (CmpUL src zero));
12808   ins_cost(100);
12809   format %{ "TEST   $src.hi,$src.hi" %}
12810   opcode(0x85);
12811   ins_encode(OpcP, RegReg_Hi2(src, src));
12812   ins_pipe(ialu_cr_reg_reg);
12813 %}
12814 
12815 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12816 // compares.  Can be used for LE or GT compares by reversing arguments.
12817 // NOT GOOD FOR EQ/NE tests.
12818 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12819   match(Set flags (CmpUL src1 src2));
12820   effect(TEMP tmp);
12821   ins_cost(300);
12822   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12823             "MOV    $tmp,$src1.hi\n\t"
12824             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12825   ins_encode(long_cmp_flags2(src1, src2, tmp));
12826   ins_pipe(ialu_cr_reg_reg);
12827 %}
12828 
12829 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12830 // Just a wrapper for a normal branch, plus the predicate test.
12831 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12832   match(If cmp flags);
12833   effect(USE labl);
12834   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12835   expand %{
12836     jmpCon(cmp, flags, labl);    // JLT or JGE...
12837   %}
12838 %}
12839 
12840 // Compare 2 longs and CMOVE longs.
12841 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12842   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12843   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12844   ins_cost(400);
12845   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12846             "CMOV$cmp $dst.hi,$src.hi" %}
12847   opcode(0x0F,0x40);
12848   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12849   ins_pipe( pipe_cmov_reg_long );
12850 %}
12851 
12852 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12853   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12854   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12855   ins_cost(500);
12856   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12857             "CMOV$cmp $dst.hi,$src.hi" %}
12858   opcode(0x0F,0x40);
12859   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12860   ins_pipe( pipe_cmov_reg_long );
12861 %}
12862 
12863 // Compare 2 longs and CMOVE ints.
12864 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12865   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12866   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12867   ins_cost(200);
12868   format %{ "CMOV$cmp $dst,$src" %}
12869   opcode(0x0F,0x40);
12870   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12871   ins_pipe( pipe_cmov_reg );
12872 %}
12873 
12874 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12875   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12876   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12877   ins_cost(250);
12878   format %{ "CMOV$cmp $dst,$src" %}
12879   opcode(0x0F,0x40);
12880   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12881   ins_pipe( pipe_cmov_mem );
12882 %}
12883 
12884 // Compare 2 longs and CMOVE ints.
12885 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12886   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12887   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12888   ins_cost(200);
12889   format %{ "CMOV$cmp $dst,$src" %}
12890   opcode(0x0F,0x40);
12891   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12892   ins_pipe( pipe_cmov_reg );
12893 %}
12894 
12895 // Compare 2 longs and CMOVE doubles
12896 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12897   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12898   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12899   ins_cost(200);
12900   expand %{
12901     fcmovDPR_regS(cmp,flags,dst,src);
12902   %}
12903 %}
12904 
12905 // Compare 2 longs and CMOVE doubles
12906 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12907   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12908   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12909   ins_cost(200);
12910   expand %{
12911     fcmovD_regS(cmp,flags,dst,src);
12912   %}
12913 %}
12914 
12915 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12916   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12917   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12918   ins_cost(200);
12919   expand %{
12920     fcmovFPR_regS(cmp,flags,dst,src);
12921   %}
12922 %}
12923 
12924 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12925   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12926   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12927   ins_cost(200);
12928   expand %{
12929     fcmovF_regS(cmp,flags,dst,src);
12930   %}
12931 %}
12932 
12933 //======
12934 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12935 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12936   match( Set flags (CmpL src zero ));
12937   effect(TEMP tmp);
12938   ins_cost(200);
12939   format %{ "MOV    $tmp,$src.lo\n\t"
12940             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12941   ins_encode( long_cmp_flags0( src, tmp ) );
12942   ins_pipe( ialu_reg_reg_long );
12943 %}
12944 
12945 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12946 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12947   match( Set flags (CmpL src1 src2 ));
12948   ins_cost(200+300);
12949   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12950             "JNE,s  skip\n\t"
12951             "CMP    $src1.hi,$src2.hi\n\t"
12952      "skip:\t" %}
12953   ins_encode( long_cmp_flags1( src1, src2 ) );
12954   ins_pipe( ialu_cr_reg_reg );
12955 %}
12956 
12957 // Long compare reg == zero/reg OR reg != zero/reg
12958 // Just a wrapper for a normal branch, plus the predicate test.
12959 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12960   match(If cmp flags);
12961   effect(USE labl);
12962   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12963   expand %{
12964     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12965   %}
12966 %}
12967 
12968 //======
12969 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12970 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12971   match(Set flags (CmpUL src zero));
12972   effect(TEMP tmp);
12973   ins_cost(200);
12974   format %{ "MOV    $tmp,$src.lo\n\t"
12975             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12976   ins_encode(long_cmp_flags0(src, tmp));
12977   ins_pipe(ialu_reg_reg_long);
12978 %}
12979 
12980 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12981 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12982   match(Set flags (CmpUL src1 src2));
12983   ins_cost(200+300);
12984   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12985             "JNE,s  skip\n\t"
12986             "CMP    $src1.hi,$src2.hi\n\t"
12987      "skip:\t" %}
12988   ins_encode(long_cmp_flags1(src1, src2));
12989   ins_pipe(ialu_cr_reg_reg);
12990 %}
12991 
12992 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12993 // Just a wrapper for a normal branch, plus the predicate test.
12994 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12995   match(If cmp flags);
12996   effect(USE labl);
12997   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12998   expand %{
12999     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13000   %}
13001 %}
13002 
13003 // Compare 2 longs and CMOVE longs.
13004 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13005   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13006   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13007   ins_cost(400);
13008   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13009             "CMOV$cmp $dst.hi,$src.hi" %}
13010   opcode(0x0F,0x40);
13011   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13012   ins_pipe( pipe_cmov_reg_long );
13013 %}
13014 
13015 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13016   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13017   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13018   ins_cost(500);
13019   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13020             "CMOV$cmp $dst.hi,$src.hi" %}
13021   opcode(0x0F,0x40);
13022   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13023   ins_pipe( pipe_cmov_reg_long );
13024 %}
13025 
13026 // Compare 2 longs and CMOVE ints.
13027 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13028   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13029   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13030   ins_cost(200);
13031   format %{ "CMOV$cmp $dst,$src" %}
13032   opcode(0x0F,0x40);
13033   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13034   ins_pipe( pipe_cmov_reg );
13035 %}
13036 
13037 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13038   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13039   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13040   ins_cost(250);
13041   format %{ "CMOV$cmp $dst,$src" %}
13042   opcode(0x0F,0x40);
13043   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13044   ins_pipe( pipe_cmov_mem );
13045 %}
13046 
13047 // Compare 2 longs and CMOVE ints.
13048 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13049   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13050   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13051   ins_cost(200);
13052   format %{ "CMOV$cmp $dst,$src" %}
13053   opcode(0x0F,0x40);
13054   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13055   ins_pipe( pipe_cmov_reg );
13056 %}
13057 
13058 // Compare 2 longs and CMOVE doubles
13059 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13060   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13061   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13062   ins_cost(200);
13063   expand %{
13064     fcmovDPR_regS(cmp,flags,dst,src);
13065   %}
13066 %}
13067 
13068 // Compare 2 longs and CMOVE doubles
13069 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13070   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13071   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13072   ins_cost(200);
13073   expand %{
13074     fcmovD_regS(cmp,flags,dst,src);
13075   %}
13076 %}
13077 
13078 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13079   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13080   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13081   ins_cost(200);
13082   expand %{
13083     fcmovFPR_regS(cmp,flags,dst,src);
13084   %}
13085 %}
13086 
13087 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13088   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13089   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13090   ins_cost(200);
13091   expand %{
13092     fcmovF_regS(cmp,flags,dst,src);
13093   %}
13094 %}
13095 
13096 //======
13097 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13098 // Same as cmpL_reg_flags_LEGT except must negate src
13099 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13100   match( Set flags (CmpL src zero ));
13101   effect( TEMP tmp );
13102   ins_cost(300);
13103   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13104             "CMP    $tmp,$src.lo\n\t"
13105             "SBB    $tmp,$src.hi\n\t" %}
13106   ins_encode( long_cmp_flags3(src, tmp) );
13107   ins_pipe( ialu_reg_reg_long );
13108 %}
13109 
13110 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13111 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13112 // requires a commuted test to get the same result.
13113 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13114   match( Set flags (CmpL src1 src2 ));
13115   effect( TEMP tmp );
13116   ins_cost(300);
13117   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13118             "MOV    $tmp,$src2.hi\n\t"
13119             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13120   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13121   ins_pipe( ialu_cr_reg_reg );
13122 %}
13123 
13124 // Long compares reg < zero/req OR reg >= zero/req.
13125 // Just a wrapper for a normal branch, plus the predicate test
13126 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13127   match(If cmp flags);
13128   effect(USE labl);
13129   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13130   ins_cost(300);
13131   expand %{
13132     jmpCon(cmp,flags,labl);    // JGT or JLE...
13133   %}
13134 %}
13135 
13136 //======
13137 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13138 // Same as cmpUL_reg_flags_LEGT except must negate src
13139 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13140   match(Set flags (CmpUL src zero));
13141   effect(TEMP tmp);
13142   ins_cost(300);
13143   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13144             "CMP    $tmp,$src.lo\n\t"
13145             "SBB    $tmp,$src.hi\n\t" %}
13146   ins_encode(long_cmp_flags3(src, tmp));
13147   ins_pipe(ialu_reg_reg_long);
13148 %}
13149 
13150 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13151 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13152 // requires a commuted test to get the same result.
13153 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13154   match(Set flags (CmpUL src1 src2));
13155   effect(TEMP tmp);
13156   ins_cost(300);
13157   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13158             "MOV    $tmp,$src2.hi\n\t"
13159             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13160   ins_encode(long_cmp_flags2( src2, src1, tmp));
13161   ins_pipe(ialu_cr_reg_reg);
13162 %}
13163 
13164 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13165 // Just a wrapper for a normal branch, plus the predicate test
13166 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13167   match(If cmp flags);
13168   effect(USE labl);
13169   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13170   ins_cost(300);
13171   expand %{
13172     jmpCon(cmp, flags, labl);    // JGT or JLE...
13173   %}
13174 %}
13175 
13176 // Compare 2 longs and CMOVE longs.
13177 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13178   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13179   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13180   ins_cost(400);
13181   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13182             "CMOV$cmp $dst.hi,$src.hi" %}
13183   opcode(0x0F,0x40);
13184   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13185   ins_pipe( pipe_cmov_reg_long );
13186 %}
13187 
13188 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13189   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13190   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13191   ins_cost(500);
13192   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13193             "CMOV$cmp $dst.hi,$src.hi+4" %}
13194   opcode(0x0F,0x40);
13195   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13196   ins_pipe( pipe_cmov_reg_long );
13197 %}
13198 
13199 // Compare 2 longs and CMOVE ints.
13200 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13201   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13202   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13203   ins_cost(200);
13204   format %{ "CMOV$cmp $dst,$src" %}
13205   opcode(0x0F,0x40);
13206   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13207   ins_pipe( pipe_cmov_reg );
13208 %}
13209 
13210 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13211   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13212   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13213   ins_cost(250);
13214   format %{ "CMOV$cmp $dst,$src" %}
13215   opcode(0x0F,0x40);
13216   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13217   ins_pipe( pipe_cmov_mem );
13218 %}
13219 
13220 // Compare 2 longs and CMOVE ptrs.
13221 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13222   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13223   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13224   ins_cost(200);
13225   format %{ "CMOV$cmp $dst,$src" %}
13226   opcode(0x0F,0x40);
13227   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13228   ins_pipe( pipe_cmov_reg );
13229 %}
13230 
13231 // Compare 2 longs and CMOVE doubles
13232 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13233   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13234   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13235   ins_cost(200);
13236   expand %{
13237     fcmovDPR_regS(cmp,flags,dst,src);
13238   %}
13239 %}
13240 
13241 // Compare 2 longs and CMOVE doubles
13242 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13243   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13244   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13245   ins_cost(200);
13246   expand %{
13247     fcmovD_regS(cmp,flags,dst,src);
13248   %}
13249 %}
13250 
13251 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13252   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13253   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13254   ins_cost(200);
13255   expand %{
13256     fcmovFPR_regS(cmp,flags,dst,src);
13257   %}
13258 %}
13259 
13260 
13261 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13262   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13263   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13264   ins_cost(200);
13265   expand %{
13266     fcmovF_regS(cmp,flags,dst,src);
13267   %}
13268 %}
13269 
13270 
13271 // ============================================================================
13272 // Procedure Call/Return Instructions
13273 // Call Java Static Instruction
13274 // Note: If this code changes, the corresponding ret_addr_offset() and
13275 //       compute_padding() functions will have to be adjusted.
13276 instruct CallStaticJavaDirect(method meth) %{
13277   match(CallStaticJava);
13278   effect(USE meth);
13279 
13280   ins_cost(300);
13281   format %{ "CALL,static " %}
13282   opcode(0xE8); /* E8 cd */
13283   ins_encode( pre_call_resets,
13284               Java_Static_Call( meth ),
13285               call_epilog,
13286               post_call_FPU );
13287   ins_pipe( pipe_slow );
13288   ins_alignment(4);
13289 %}
13290 
13291 // Call Java Dynamic Instruction
13292 // Note: If this code changes, the corresponding ret_addr_offset() and
13293 //       compute_padding() functions will have to be adjusted.
13294 instruct CallDynamicJavaDirect(method meth) %{
13295   match(CallDynamicJava);
13296   effect(USE meth);
13297 
13298   ins_cost(300);
13299   format %{ "MOV    EAX,(oop)-1\n\t"
13300             "CALL,dynamic" %}
13301   opcode(0xE8); /* E8 cd */
13302   ins_encode( pre_call_resets,
13303               Java_Dynamic_Call( meth ),
13304               call_epilog,
13305               post_call_FPU );
13306   ins_pipe( pipe_slow );
13307   ins_alignment(4);
13308 %}
13309 
13310 // Call Runtime Instruction
13311 instruct CallRuntimeDirect(method meth) %{
13312   match(CallRuntime );
13313   effect(USE meth);
13314 
13315   ins_cost(300);
13316   format %{ "CALL,runtime " %}
13317   opcode(0xE8); /* E8 cd */
13318   // Use FFREEs to clear entries in float stack
13319   ins_encode( pre_call_resets,
13320               FFree_Float_Stack_All,
13321               Java_To_Runtime( meth ),
13322               post_call_FPU );
13323   ins_pipe( pipe_slow );
13324 %}
13325 
13326 // Call runtime without safepoint
13327 instruct CallLeafDirect(method meth) %{
13328   match(CallLeaf);
13329   effect(USE meth);
13330 
13331   ins_cost(300);
13332   format %{ "CALL_LEAF,runtime " %}
13333   opcode(0xE8); /* E8 cd */
13334   ins_encode( pre_call_resets,
13335               FFree_Float_Stack_All,
13336               Java_To_Runtime( meth ),
13337               Verify_FPU_For_Leaf, post_call_FPU );
13338   ins_pipe( pipe_slow );
13339 %}
13340 
13341 instruct CallLeafNoFPDirect(method meth) %{
13342   match(CallLeafNoFP);
13343   effect(USE meth);
13344 
13345   ins_cost(300);
13346   format %{ "CALL_LEAF_NOFP,runtime " %}
13347   opcode(0xE8); /* E8 cd */
13348   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13349   ins_pipe( pipe_slow );
13350 %}
13351 
13352 
13353 // Return Instruction
13354 // Remove the return address & jump to it.
13355 instruct Ret() %{
13356   match(Return);
13357   format %{ "RET" %}
13358   opcode(0xC3);
13359   ins_encode(OpcP);
13360   ins_pipe( pipe_jmp );
13361 %}
13362 
13363 // Tail Call; Jump from runtime stub to Java code.
13364 // Also known as an 'interprocedural jump'.
13365 // Target of jump will eventually return to caller.
13366 // TailJump below removes the return address.
13367 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13368   match(TailCall jump_target method_oop );
13369   ins_cost(300);
13370   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13371   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13372   ins_encode( OpcP, RegOpc(jump_target) );
13373   ins_pipe( pipe_jmp );
13374 %}
13375 
13376 
13377 // Tail Jump; remove the return address; jump to target.
13378 // TailCall above leaves the return address around.
13379 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13380   match( TailJump jump_target ex_oop );
13381   ins_cost(300);
13382   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13383             "JMP    $jump_target " %}
13384   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13385   ins_encode( enc_pop_rdx,
13386               OpcP, RegOpc(jump_target) );
13387   ins_pipe( pipe_jmp );
13388 %}
13389 
13390 // Create exception oop: created by stack-crawling runtime code.
13391 // Created exception is now available to this handler, and is setup
13392 // just prior to jumping to this handler.  No code emitted.
13393 instruct CreateException( eAXRegP ex_oop )
13394 %{
13395   match(Set ex_oop (CreateEx));
13396 
13397   size(0);
13398   // use the following format syntax
13399   format %{ "# exception oop is in EAX; no code emitted" %}
13400   ins_encode();
13401   ins_pipe( empty );
13402 %}
13403 
13404 
13405 // Rethrow exception:
13406 // The exception oop will come in the first argument position.
13407 // Then JUMP (not call) to the rethrow stub code.
13408 instruct RethrowException()
13409 %{
13410   match(Rethrow);
13411 
13412   // use the following format syntax
13413   format %{ "JMP    rethrow_stub" %}
13414   ins_encode(enc_rethrow);
13415   ins_pipe( pipe_jmp );
13416 %}
13417 
13418 // inlined locking and unlocking
13419 
13420 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13421   predicate(Compile::current()->use_rtm());
13422   match(Set cr (FastLock object box));
13423   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13424   ins_cost(300);
13425   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13426   ins_encode %{
13427     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13428                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13429                  _counters, _rtm_counters, _stack_rtm_counters,
13430                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13431                  true, ra_->C->profile_rtm());
13432   %}
13433   ins_pipe(pipe_slow);
13434 %}
13435 
13436 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13437   predicate(!Compile::current()->use_rtm());
13438   match(Set cr (FastLock object box));
13439   effect(TEMP tmp, TEMP scr, USE_KILL box);
13440   ins_cost(300);
13441   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13442   ins_encode %{
13443     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13444                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13445   %}
13446   ins_pipe(pipe_slow);
13447 %}
13448 
13449 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13450   match(Set cr (FastUnlock object box));
13451   effect(TEMP tmp, USE_KILL box);
13452   ins_cost(300);
13453   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13454   ins_encode %{
13455     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13456   %}
13457   ins_pipe(pipe_slow);
13458 %}
13459 
13460 
13461 
13462 // ============================================================================
13463 // Safepoint Instruction
13464 instruct safePoint_poll(eFlagsReg cr) %{
13465   predicate(SafepointMechanism::uses_global_page_poll());
13466   match(SafePoint);
13467   effect(KILL cr);
13468 
13469   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13470   // On SPARC that might be acceptable as we can generate the address with
13471   // just a sethi, saving an or.  By polling at offset 0 we can end up
13472   // putting additional pressure on the index-0 in the D$.  Because of
13473   // alignment (just like the situation at hand) the lower indices tend
13474   // to see more traffic.  It'd be better to change the polling address
13475   // to offset 0 of the last $line in the polling page.
13476 
13477   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13478   ins_cost(125);
13479   size(6) ;
13480   ins_encode( Safepoint_Poll() );
13481   ins_pipe( ialu_reg_mem );
13482 %}
13483 
13484 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13485   predicate(SafepointMechanism::uses_thread_local_poll());
13486   match(SafePoint poll);
13487   effect(KILL cr, USE poll);
13488 
13489   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13490   ins_cost(125);
13491   // EBP would need size(3)
13492   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13493   ins_encode %{
13494     __ relocate(relocInfo::poll_type);
13495     address pre_pc = __ pc();
13496     __ testl(rax, Address($poll$$Register, 0));
13497     address post_pc = __ pc();
13498     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13499   %}
13500   ins_pipe(ialu_reg_mem);
13501 %}
13502 
13503 
13504 // ============================================================================
13505 // This name is KNOWN by the ADLC and cannot be changed.
13506 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13507 // for this guy.
13508 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13509   match(Set dst (ThreadLocal));
13510   effect(DEF dst, KILL cr);
13511 
13512   format %{ "MOV    $dst, Thread::current()" %}
13513   ins_encode %{
13514     Register dstReg = as_Register($dst$$reg);
13515     __ get_thread(dstReg);
13516   %}
13517   ins_pipe( ialu_reg_fat );
13518 %}
13519 
13520 
13521 
13522 //----------PEEPHOLE RULES-----------------------------------------------------
13523 // These must follow all instruction definitions as they use the names
13524 // defined in the instructions definitions.
13525 //
13526 // peepmatch ( root_instr_name [preceding_instruction]* );
13527 //
13528 // peepconstraint %{
13529 // (instruction_number.operand_name relational_op instruction_number.operand_name
13530 //  [, ...] );
13531 // // instruction numbers are zero-based using left to right order in peepmatch
13532 //
13533 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13534 // // provide an instruction_number.operand_name for each operand that appears
13535 // // in the replacement instruction's match rule
13536 //
13537 // ---------VM FLAGS---------------------------------------------------------
13538 //
13539 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13540 //
13541 // Each peephole rule is given an identifying number starting with zero and
13542 // increasing by one in the order seen by the parser.  An individual peephole
13543 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13544 // on the command-line.
13545 //
13546 // ---------CURRENT LIMITATIONS----------------------------------------------
13547 //
13548 // Only match adjacent instructions in same basic block
13549 // Only equality constraints
13550 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13551 // Only one replacement instruction
13552 //
13553 // ---------EXAMPLE----------------------------------------------------------
13554 //
13555 // // pertinent parts of existing instructions in architecture description
13556 // instruct movI(rRegI dst, rRegI src) %{
13557 //   match(Set dst (CopyI src));
13558 // %}
13559 //
13560 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13561 //   match(Set dst (AddI dst src));
13562 //   effect(KILL cr);
13563 // %}
13564 //
13565 // // Change (inc mov) to lea
13566 // peephole %{
13567 //   // increment preceeded by register-register move
13568 //   peepmatch ( incI_eReg movI );
13569 //   // require that the destination register of the increment
13570 //   // match the destination register of the move
13571 //   peepconstraint ( 0.dst == 1.dst );
13572 //   // construct a replacement instruction that sets
13573 //   // the destination to ( move's source register + one )
13574 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13575 // %}
13576 //
13577 // Implementation no longer uses movX instructions since
13578 // machine-independent system no longer uses CopyX nodes.
13579 //
13580 // peephole %{
13581 //   peepmatch ( incI_eReg movI );
13582 //   peepconstraint ( 0.dst == 1.dst );
13583 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13584 // %}
13585 //
13586 // peephole %{
13587 //   peepmatch ( decI_eReg movI );
13588 //   peepconstraint ( 0.dst == 1.dst );
13589 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13590 // %}
13591 //
13592 // peephole %{
13593 //   peepmatch ( addI_eReg_imm movI );
13594 //   peepconstraint ( 0.dst == 1.dst );
13595 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13596 // %}
13597 //
13598 // peephole %{
13599 //   peepmatch ( addP_eReg_imm movP );
13600 //   peepconstraint ( 0.dst == 1.dst );
13601 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13602 // %}
13603 
13604 // // Change load of spilled value to only a spill
13605 // instruct storeI(memory mem, rRegI src) %{
13606 //   match(Set mem (StoreI mem src));
13607 // %}
13608 //
13609 // instruct loadI(rRegI dst, memory mem) %{
13610 //   match(Set dst (LoadI mem));
13611 // %}
13612 //
13613 peephole %{
13614   peepmatch ( loadI storeI );
13615   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13616   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13617 %}
13618 
13619 //----------SMARTSPILL RULES---------------------------------------------------
13620 // These must follow all instruction definitions as they use the names
13621 // defined in the instructions definitions.