1 //
   2 // Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     if (SafepointMechanism::uses_thread_local_poll()) {
 710       Register pollReg = as_Register(EBX_enc);
 711       MacroAssembler masm(&cbuf);
 712       masm.get_thread(pollReg);
 713       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 714       masm.relocate(relocInfo::poll_return_type);
 715       masm.testl(rax, Address(pollReg, 0));
 716     } else {
 717       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 718       emit_opcode(cbuf,0x85);
 719       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 720       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 721     }
 722   }
 723 }
 724 
 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 726   return MachNode::size(ra_); // too many variables; just compute it
 727                               // the hard way
 728 }
 729 
 730 int MachEpilogNode::reloc() const {
 731   return 0; // a large enough number
 732 }
 733 
 734 const Pipeline * MachEpilogNode::pipeline() const {
 735   return MachNode::pipeline_class();
 736 }
 737 
 738 int MachEpilogNode::safepoint_offset() const { return 0; }
 739 
 740 //=============================================================================
 741 
 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 743 static enum RC rc_class( OptoReg::Name reg ) {
 744 
 745   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 746   if (OptoReg::is_stack(reg)) return rc_stack;
 747 
 748   VMReg r = OptoReg::as_VMReg(reg);
 749   if (r->is_Register()) return rc_int;
 750   if (r->is_FloatRegister()) {
 751     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 752     return rc_float;
 753   }
 754   assert(r->is_XMMRegister(), "must be");
 755   return rc_xmm;
 756 }
 757 
 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 759                         int opcode, const char *op_str, int size, outputStream* st ) {
 760   if( cbuf ) {
 761     emit_opcode  (*cbuf, opcode );
 762     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 763 #ifndef PRODUCT
 764   } else if( !do_size ) {
 765     if( size != 0 ) st->print("\n\t");
 766     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 767       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 768       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 769     } else { // FLD, FST, PUSH, POP
 770       st->print("%s [ESP + #%d]",op_str,offset);
 771     }
 772 #endif
 773   }
 774   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 775   return size+3+offset_size;
 776 }
 777 
 778 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 780                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 781   int in_size_in_bits = Assembler::EVEX_32bit;
 782   int evex_encoding = 0;
 783   if (reg_lo+1 == reg_hi) {
 784     in_size_in_bits = Assembler::EVEX_64bit;
 785     evex_encoding = Assembler::VEX_W;
 786   }
 787   if (cbuf) {
 788     MacroAssembler _masm(cbuf);
 789     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
 790     //                          it maps more cases to single byte displacement
 791     _masm.set_managed();
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 843     _masm.set_managed();
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 883     _masm.set_managed();
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 901     _masm.set_managed();
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }   
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Do we need to mask the count passed to shift instructions or does
1433 // the cpu only look at the lower 5/6 bits anyway?
1434 const bool Matcher::need_masked_shift_count = false;
1435 
1436 bool Matcher::narrow_oop_use_complex_address() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 bool Matcher::narrow_klass_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::const_oop_prefer_decode() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 bool Matcher::const_klass_prefer_decode() {
1452   ShouldNotCallThis();
1453   return true;
1454 }
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     // Clear upper bits of YMM registers when current compiled code uses
1886     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887     MacroAssembler _masm(&cbuf);
1888     __ vzeroupper();
1889     debug_only(int off1 = cbuf.insts_size());
1890     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891   %}
1892 
1893   enc_class post_call_FPU %{
1894     // If method sets FPU control word do it here also
1895     if (Compile::current()->in_24_bit_fp_mode()) {
1896       MacroAssembler masm(&cbuf);
1897       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898     }
1899   %}
1900 
1901   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903     // who we intended to call.
1904     cbuf.set_insts_mark();
1905     $$$emit8$primary;
1906 
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(),
1910                      RELOC_IMM32);
1911     } else {
1912       int method_index = resolved_method_index(cbuf);
1913       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                   : static_call_Relocation::spec(method_index);
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      rspec, RELOC_DISP32);
1917       // Emit stubs for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       }
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     if( os::is_MP() )
2091       emit_opcode(cbuf,0xF0);         // [Lock]
2092   %}
2093 
2094   // Cmp-xchg long value.
2095   // Note: we need to swap rbx, and rcx before and after the
2096   //       cmpxchg8 instruction because the instruction uses
2097   //       rcx as the high order word of the new value to store but
2098   //       our register encoding uses rbx,.
2099   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2100 
2101     // XCHG  rbx,ecx
2102     emit_opcode(cbuf,0x87);
2103     emit_opcode(cbuf,0xD9);
2104     // [Lock]
2105     if( os::is_MP() )
2106       emit_opcode(cbuf,0xF0);
2107     // CMPXCHG8 [Eptr]
2108     emit_opcode(cbuf,0x0F);
2109     emit_opcode(cbuf,0xC7);
2110     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2111     // XCHG  rbx,ecx
2112     emit_opcode(cbuf,0x87);
2113     emit_opcode(cbuf,0xD9);
2114   %}
2115 
2116   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2117     // [Lock]
2118     if( os::is_MP() )
2119       emit_opcode(cbuf,0xF0);
2120 
2121     // CMPXCHG [Eptr]
2122     emit_opcode(cbuf,0x0F);
2123     emit_opcode(cbuf,0xB1);
2124     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2125   %}
2126 
2127   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2128     // [Lock]
2129     if( os::is_MP() )
2130       emit_opcode(cbuf,0xF0);
2131 
2132     // CMPXCHGB [Eptr]
2133     emit_opcode(cbuf,0x0F);
2134     emit_opcode(cbuf,0xB0);
2135     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2136   %}
2137 
2138   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2139     // [Lock]
2140     if( os::is_MP() )
2141       emit_opcode(cbuf,0xF0);
2142 
2143     // 16-bit mode
2144     emit_opcode(cbuf, 0x66);
2145 
2146     // CMPXCHGW [Eptr]
2147     emit_opcode(cbuf,0x0F);
2148     emit_opcode(cbuf,0xB1);
2149     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2150   %}
2151 
2152   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2153     int res_encoding = $res$$reg;
2154 
2155     // MOV  res,0
2156     emit_opcode( cbuf, 0xB8 + res_encoding);
2157     emit_d32( cbuf, 0 );
2158     // JNE,s  fail
2159     emit_opcode(cbuf,0x75);
2160     emit_d8(cbuf, 5 );
2161     // MOV  res,1
2162     emit_opcode( cbuf, 0xB8 + res_encoding);
2163     emit_d32( cbuf, 1 );
2164     // fail:
2165   %}
2166 
2167   enc_class set_instruction_start( ) %{
2168     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2169   %}
2170 
2171   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2172     int reg_encoding = $ereg$$reg;
2173     int base  = $mem$$base;
2174     int index = $mem$$index;
2175     int scale = $mem$$scale;
2176     int displace = $mem$$disp;
2177     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2178     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2179   %}
2180 
2181   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2182     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2183     int base  = $mem$$base;
2184     int index = $mem$$index;
2185     int scale = $mem$$scale;
2186     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2187     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2188     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2189   %}
2190 
2191   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2192     int r1, r2;
2193     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2194     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2195     emit_opcode(cbuf,0x0F);
2196     emit_opcode(cbuf,$tertiary);
2197     emit_rm(cbuf, 0x3, r1, r2);
2198     emit_d8(cbuf,$cnt$$constant);
2199     emit_d8(cbuf,$primary);
2200     emit_rm(cbuf, 0x3, $secondary, r1);
2201     emit_d8(cbuf,$cnt$$constant);
2202   %}
2203 
2204   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2205     emit_opcode( cbuf, 0x8B ); // Move
2206     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2207     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2208       emit_d8(cbuf,$primary);
2209       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2210       emit_d8(cbuf,$cnt$$constant-32);
2211     }
2212     emit_d8(cbuf,$primary);
2213     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2214     emit_d8(cbuf,31);
2215   %}
2216 
2217   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2218     int r1, r2;
2219     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2220     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2221 
2222     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2223     emit_rm(cbuf, 0x3, r1, r2);
2224     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2225       emit_opcode(cbuf,$primary);
2226       emit_rm(cbuf, 0x3, $secondary, r1);
2227       emit_d8(cbuf,$cnt$$constant-32);
2228     }
2229     emit_opcode(cbuf,0x33);  // XOR r2,r2
2230     emit_rm(cbuf, 0x3, r2, r2);
2231   %}
2232 
2233   // Clone of RegMem but accepts an extra parameter to access each
2234   // half of a double in memory; it never needs relocation info.
2235   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2236     emit_opcode(cbuf,$opcode$$constant);
2237     int reg_encoding = $rm_reg$$reg;
2238     int base     = $mem$$base;
2239     int index    = $mem$$index;
2240     int scale    = $mem$$scale;
2241     int displace = $mem$$disp + $disp_for_half$$constant;
2242     relocInfo::relocType disp_reloc = relocInfo::none;
2243     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2244   %}
2245 
2246   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2247   //
2248   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2249   // and it never needs relocation information.
2250   // Frequently used to move data between FPU's Stack Top and memory.
2251   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2252     int rm_byte_opcode = $rm_opcode$$constant;
2253     int base     = $mem$$base;
2254     int index    = $mem$$index;
2255     int scale    = $mem$$scale;
2256     int displace = $mem$$disp;
2257     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2258     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2259   %}
2260 
2261   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2262     int rm_byte_opcode = $rm_opcode$$constant;
2263     int base     = $mem$$base;
2264     int index    = $mem$$index;
2265     int scale    = $mem$$scale;
2266     int displace = $mem$$disp;
2267     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2268     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2269   %}
2270 
2271   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2272     int reg_encoding = $dst$$reg;
2273     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2274     int index        = 0x04;            // 0x04 indicates no index
2275     int scale        = 0x00;            // 0x00 indicates no scale
2276     int displace     = $src1$$constant; // 0x00 indicates no displacement
2277     relocInfo::relocType disp_reloc = relocInfo::none;
2278     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2279   %}
2280 
2281   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2282     // Compare dst,src
2283     emit_opcode(cbuf,0x3B);
2284     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2285     // jmp dst < src around move
2286     emit_opcode(cbuf,0x7C);
2287     emit_d8(cbuf,2);
2288     // move dst,src
2289     emit_opcode(cbuf,0x8B);
2290     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2291   %}
2292 
2293   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2294     // Compare dst,src
2295     emit_opcode(cbuf,0x3B);
2296     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2297     // jmp dst > src around move
2298     emit_opcode(cbuf,0x7F);
2299     emit_d8(cbuf,2);
2300     // move dst,src
2301     emit_opcode(cbuf,0x8B);
2302     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2303   %}
2304 
2305   enc_class enc_FPR_store(memory mem, regDPR src) %{
2306     // If src is FPR1, we can just FST to store it.
2307     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2308     int reg_encoding = 0x2; // Just store
2309     int base  = $mem$$base;
2310     int index = $mem$$index;
2311     int scale = $mem$$scale;
2312     int displace = $mem$$disp;
2313     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2314     if( $src$$reg != FPR1L_enc ) {
2315       reg_encoding = 0x3;  // Store & pop
2316       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2317       emit_d8( cbuf, 0xC0-1+$src$$reg );
2318     }
2319     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2320     emit_opcode(cbuf,$primary);
2321     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2322   %}
2323 
2324   enc_class neg_reg(rRegI dst) %{
2325     // NEG $dst
2326     emit_opcode(cbuf,0xF7);
2327     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2328   %}
2329 
2330   enc_class setLT_reg(eCXRegI dst) %{
2331     // SETLT $dst
2332     emit_opcode(cbuf,0x0F);
2333     emit_opcode(cbuf,0x9C);
2334     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2335   %}
2336 
2337   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2338     int tmpReg = $tmp$$reg;
2339 
2340     // SUB $p,$q
2341     emit_opcode(cbuf,0x2B);
2342     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2343     // SBB $tmp,$tmp
2344     emit_opcode(cbuf,0x1B);
2345     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2346     // AND $tmp,$y
2347     emit_opcode(cbuf,0x23);
2348     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2349     // ADD $p,$tmp
2350     emit_opcode(cbuf,0x03);
2351     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2352   %}
2353 
2354   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2355     // TEST shift,32
2356     emit_opcode(cbuf,0xF7);
2357     emit_rm(cbuf, 0x3, 0, ECX_enc);
2358     emit_d32(cbuf,0x20);
2359     // JEQ,s small
2360     emit_opcode(cbuf, 0x74);
2361     emit_d8(cbuf, 0x04);
2362     // MOV    $dst.hi,$dst.lo
2363     emit_opcode( cbuf, 0x8B );
2364     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2365     // CLR    $dst.lo
2366     emit_opcode(cbuf, 0x33);
2367     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2368 // small:
2369     // SHLD   $dst.hi,$dst.lo,$shift
2370     emit_opcode(cbuf,0x0F);
2371     emit_opcode(cbuf,0xA5);
2372     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2373     // SHL    $dst.lo,$shift"
2374     emit_opcode(cbuf,0xD3);
2375     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2376   %}
2377 
2378   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2379     // TEST shift,32
2380     emit_opcode(cbuf,0xF7);
2381     emit_rm(cbuf, 0x3, 0, ECX_enc);
2382     emit_d32(cbuf,0x20);
2383     // JEQ,s small
2384     emit_opcode(cbuf, 0x74);
2385     emit_d8(cbuf, 0x04);
2386     // MOV    $dst.lo,$dst.hi
2387     emit_opcode( cbuf, 0x8B );
2388     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2389     // CLR    $dst.hi
2390     emit_opcode(cbuf, 0x33);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2392 // small:
2393     // SHRD   $dst.lo,$dst.hi,$shift
2394     emit_opcode(cbuf,0x0F);
2395     emit_opcode(cbuf,0xAD);
2396     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2397     // SHR    $dst.hi,$shift"
2398     emit_opcode(cbuf,0xD3);
2399     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2400   %}
2401 
2402   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2403     // TEST shift,32
2404     emit_opcode(cbuf,0xF7);
2405     emit_rm(cbuf, 0x3, 0, ECX_enc);
2406     emit_d32(cbuf,0x20);
2407     // JEQ,s small
2408     emit_opcode(cbuf, 0x74);
2409     emit_d8(cbuf, 0x05);
2410     // MOV    $dst.lo,$dst.hi
2411     emit_opcode( cbuf, 0x8B );
2412     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2413     // SAR    $dst.hi,31
2414     emit_opcode(cbuf, 0xC1);
2415     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2416     emit_d8(cbuf, 0x1F );
2417 // small:
2418     // SHRD   $dst.lo,$dst.hi,$shift
2419     emit_opcode(cbuf,0x0F);
2420     emit_opcode(cbuf,0xAD);
2421     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2422     // SAR    $dst.hi,$shift"
2423     emit_opcode(cbuf,0xD3);
2424     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2425   %}
2426 
2427 
2428   // ----------------- Encodings for floating point unit -----------------
2429   // May leave result in FPU-TOS or FPU reg depending on opcodes
2430   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2431     $$$emit8$primary;
2432     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2433   %}
2434 
2435   // Pop argument in FPR0 with FSTP ST(0)
2436   enc_class PopFPU() %{
2437     emit_opcode( cbuf, 0xDD );
2438     emit_d8( cbuf, 0xD8 );
2439   %}
2440 
2441   // !!!!! equivalent to Pop_Reg_F
2442   enc_class Pop_Reg_DPR( regDPR dst ) %{
2443     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2444     emit_d8( cbuf, 0xD8+$dst$$reg );
2445   %}
2446 
2447   enc_class Push_Reg_DPR( regDPR dst ) %{
2448     emit_opcode( cbuf, 0xD9 );
2449     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2450   %}
2451 
2452   enc_class strictfp_bias1( regDPR dst ) %{
2453     emit_opcode( cbuf, 0xDB );           // FLD m80real
2454     emit_opcode( cbuf, 0x2D );
2455     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2456     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2457     emit_opcode( cbuf, 0xC8+$dst$$reg );
2458   %}
2459 
2460   enc_class strictfp_bias2( regDPR dst ) %{
2461     emit_opcode( cbuf, 0xDB );           // FLD m80real
2462     emit_opcode( cbuf, 0x2D );
2463     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2464     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2465     emit_opcode( cbuf, 0xC8+$dst$$reg );
2466   %}
2467 
2468   // Special case for moving an integer register to a stack slot.
2469   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2471   %}
2472 
2473   // Special case for moving a register to a stack slot.
2474   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2475     // Opcode already emitted
2476     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2477     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2478     emit_d32(cbuf, $dst$$disp);   // Displacement
2479   %}
2480 
2481   // Push the integer in stackSlot 'src' onto FP-stack
2482   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2483     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2484   %}
2485 
2486   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2487   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2488     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2489   %}
2490 
2491   // Same as Pop_Mem_F except for opcode
2492   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2493   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2494     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2495   %}
2496 
2497   enc_class Pop_Reg_FPR( regFPR dst ) %{
2498     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2499     emit_d8( cbuf, 0xD8+$dst$$reg );
2500   %}
2501 
2502   enc_class Push_Reg_FPR( regFPR dst ) %{
2503     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2504     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2505   %}
2506 
2507   // Push FPU's float to a stack-slot, and pop FPU-stack
2508   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2509     int pop = 0x02;
2510     if ($src$$reg != FPR1L_enc) {
2511       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2512       emit_d8( cbuf, 0xC0-1+$src$$reg );
2513       pop = 0x03;
2514     }
2515     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2516   %}
2517 
2518   // Push FPU's double to a stack-slot, and pop FPU-stack
2519   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2520     int pop = 0x02;
2521     if ($src$$reg != FPR1L_enc) {
2522       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2523       emit_d8( cbuf, 0xC0-1+$src$$reg );
2524       pop = 0x03;
2525     }
2526     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2527   %}
2528 
2529   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2530   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2531     int pop = 0xD0 - 1; // -1 since we skip FLD
2532     if ($src$$reg != FPR1L_enc) {
2533       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2534       emit_d8( cbuf, 0xC0-1+$src$$reg );
2535       pop = 0xD8;
2536     }
2537     emit_opcode( cbuf, 0xDD );
2538     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2539   %}
2540 
2541 
2542   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2543     // load dst in FPR0
2544     emit_opcode( cbuf, 0xD9 );
2545     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2546     if ($src$$reg != FPR1L_enc) {
2547       // fincstp
2548       emit_opcode (cbuf, 0xD9);
2549       emit_opcode (cbuf, 0xF7);
2550       // swap src with FPR1:
2551       // FXCH FPR1 with src
2552       emit_opcode(cbuf, 0xD9);
2553       emit_d8(cbuf, 0xC8-1+$src$$reg );
2554       // fdecstp
2555       emit_opcode (cbuf, 0xD9);
2556       emit_opcode (cbuf, 0xF6);
2557     }
2558   %}
2559 
2560   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2561     MacroAssembler _masm(&cbuf);
2562     __ subptr(rsp, 8);
2563     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2564     __ fld_d(Address(rsp, 0));
2565     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2566     __ fld_d(Address(rsp, 0));
2567   %}
2568 
2569   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2570     MacroAssembler _masm(&cbuf);
2571     __ subptr(rsp, 4);
2572     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2573     __ fld_s(Address(rsp, 0));
2574     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2575     __ fld_s(Address(rsp, 0));
2576   %}
2577 
2578   enc_class Push_ResultD(regD dst) %{
2579     MacroAssembler _masm(&cbuf);
2580     __ fstp_d(Address(rsp, 0));
2581     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2582     __ addptr(rsp, 8);
2583   %}
2584 
2585   enc_class Push_ResultF(regF dst, immI d8) %{
2586     MacroAssembler _masm(&cbuf);
2587     __ fstp_s(Address(rsp, 0));
2588     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2589     __ addptr(rsp, $d8$$constant);
2590   %}
2591 
2592   enc_class Push_SrcD(regD src) %{
2593     MacroAssembler _masm(&cbuf);
2594     __ subptr(rsp, 8);
2595     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2596     __ fld_d(Address(rsp, 0));
2597   %}
2598 
2599   enc_class push_stack_temp_qword() %{
2600     MacroAssembler _masm(&cbuf);
2601     __ subptr(rsp, 8);
2602   %}
2603 
2604   enc_class pop_stack_temp_qword() %{
2605     MacroAssembler _masm(&cbuf);
2606     __ addptr(rsp, 8);
2607   %}
2608 
2609   enc_class push_xmm_to_fpr1(regD src) %{
2610     MacroAssembler _masm(&cbuf);
2611     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2612     __ fld_d(Address(rsp, 0));
2613   %}
2614 
2615   enc_class Push_Result_Mod_DPR( regDPR src) %{
2616     if ($src$$reg != FPR1L_enc) {
2617       // fincstp
2618       emit_opcode (cbuf, 0xD9);
2619       emit_opcode (cbuf, 0xF7);
2620       // FXCH FPR1 with src
2621       emit_opcode(cbuf, 0xD9);
2622       emit_d8(cbuf, 0xC8-1+$src$$reg );
2623       // fdecstp
2624       emit_opcode (cbuf, 0xD9);
2625       emit_opcode (cbuf, 0xF6);
2626     }
2627     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2628     // // FSTP   FPR$dst$$reg
2629     // emit_opcode( cbuf, 0xDD );
2630     // emit_d8( cbuf, 0xD8+$dst$$reg );
2631   %}
2632 
2633   enc_class fnstsw_sahf_skip_parity() %{
2634     // fnstsw ax
2635     emit_opcode( cbuf, 0xDF );
2636     emit_opcode( cbuf, 0xE0 );
2637     // sahf
2638     emit_opcode( cbuf, 0x9E );
2639     // jnp  ::skip
2640     emit_opcode( cbuf, 0x7B );
2641     emit_opcode( cbuf, 0x05 );
2642   %}
2643 
2644   enc_class emitModDPR() %{
2645     // fprem must be iterative
2646     // :: loop
2647     // fprem
2648     emit_opcode( cbuf, 0xD9 );
2649     emit_opcode( cbuf, 0xF8 );
2650     // wait
2651     emit_opcode( cbuf, 0x9b );
2652     // fnstsw ax
2653     emit_opcode( cbuf, 0xDF );
2654     emit_opcode( cbuf, 0xE0 );
2655     // sahf
2656     emit_opcode( cbuf, 0x9E );
2657     // jp  ::loop
2658     emit_opcode( cbuf, 0x0F );
2659     emit_opcode( cbuf, 0x8A );
2660     emit_opcode( cbuf, 0xF4 );
2661     emit_opcode( cbuf, 0xFF );
2662     emit_opcode( cbuf, 0xFF );
2663     emit_opcode( cbuf, 0xFF );
2664   %}
2665 
2666   enc_class fpu_flags() %{
2667     // fnstsw_ax
2668     emit_opcode( cbuf, 0xDF);
2669     emit_opcode( cbuf, 0xE0);
2670     // test ax,0x0400
2671     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2672     emit_opcode( cbuf, 0xA9 );
2673     emit_d16   ( cbuf, 0x0400 );
2674     // // // This sequence works, but stalls for 12-16 cycles on PPro
2675     // // test rax,0x0400
2676     // emit_opcode( cbuf, 0xA9 );
2677     // emit_d32   ( cbuf, 0x00000400 );
2678     //
2679     // jz exit (no unordered comparison)
2680     emit_opcode( cbuf, 0x74 );
2681     emit_d8    ( cbuf, 0x02 );
2682     // mov ah,1 - treat as LT case (set carry flag)
2683     emit_opcode( cbuf, 0xB4 );
2684     emit_d8    ( cbuf, 0x01 );
2685     // sahf
2686     emit_opcode( cbuf, 0x9E);
2687   %}
2688 
2689   enc_class cmpF_P6_fixup() %{
2690     // Fixup the integer flags in case comparison involved a NaN
2691     //
2692     // JNP exit (no unordered comparison, P-flag is set by NaN)
2693     emit_opcode( cbuf, 0x7B );
2694     emit_d8    ( cbuf, 0x03 );
2695     // MOV AH,1 - treat as LT case (set carry flag)
2696     emit_opcode( cbuf, 0xB4 );
2697     emit_d8    ( cbuf, 0x01 );
2698     // SAHF
2699     emit_opcode( cbuf, 0x9E);
2700     // NOP     // target for branch to avoid branch to branch
2701     emit_opcode( cbuf, 0x90);
2702   %}
2703 
2704 //     fnstsw_ax();
2705 //     sahf();
2706 //     movl(dst, nan_result);
2707 //     jcc(Assembler::parity, exit);
2708 //     movl(dst, less_result);
2709 //     jcc(Assembler::below, exit);
2710 //     movl(dst, equal_result);
2711 //     jcc(Assembler::equal, exit);
2712 //     movl(dst, greater_result);
2713 
2714 // less_result     =  1;
2715 // greater_result  = -1;
2716 // equal_result    = 0;
2717 // nan_result      = -1;
2718 
2719   enc_class CmpF_Result(rRegI dst) %{
2720     // fnstsw_ax();
2721     emit_opcode( cbuf, 0xDF);
2722     emit_opcode( cbuf, 0xE0);
2723     // sahf
2724     emit_opcode( cbuf, 0x9E);
2725     // movl(dst, nan_result);
2726     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2727     emit_d32( cbuf, -1 );
2728     // jcc(Assembler::parity, exit);
2729     emit_opcode( cbuf, 0x7A );
2730     emit_d8    ( cbuf, 0x13 );
2731     // movl(dst, less_result);
2732     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2733     emit_d32( cbuf, -1 );
2734     // jcc(Assembler::below, exit);
2735     emit_opcode( cbuf, 0x72 );
2736     emit_d8    ( cbuf, 0x0C );
2737     // movl(dst, equal_result);
2738     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2739     emit_d32( cbuf, 0 );
2740     // jcc(Assembler::equal, exit);
2741     emit_opcode( cbuf, 0x74 );
2742     emit_d8    ( cbuf, 0x05 );
2743     // movl(dst, greater_result);
2744     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2745     emit_d32( cbuf, 1 );
2746   %}
2747 
2748 
2749   // Compare the longs and set flags
2750   // BROKEN!  Do Not use as-is
2751   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2752     // CMP    $src1.hi,$src2.hi
2753     emit_opcode( cbuf, 0x3B );
2754     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2755     // JNE,s  done
2756     emit_opcode(cbuf,0x75);
2757     emit_d8(cbuf, 2 );
2758     // CMP    $src1.lo,$src2.lo
2759     emit_opcode( cbuf, 0x3B );
2760     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2761 // done:
2762   %}
2763 
2764   enc_class convert_int_long( regL dst, rRegI src ) %{
2765     // mov $dst.lo,$src
2766     int dst_encoding = $dst$$reg;
2767     int src_encoding = $src$$reg;
2768     encode_Copy( cbuf, dst_encoding  , src_encoding );
2769     // mov $dst.hi,$src
2770     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2771     // sar $dst.hi,31
2772     emit_opcode( cbuf, 0xC1 );
2773     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2774     emit_d8(cbuf, 0x1F );
2775   %}
2776 
2777   enc_class convert_long_double( eRegL src ) %{
2778     // push $src.hi
2779     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2780     // push $src.lo
2781     emit_opcode(cbuf, 0x50+$src$$reg  );
2782     // fild 64-bits at [SP]
2783     emit_opcode(cbuf,0xdf);
2784     emit_d8(cbuf, 0x6C);
2785     emit_d8(cbuf, 0x24);
2786     emit_d8(cbuf, 0x00);
2787     // pop stack
2788     emit_opcode(cbuf, 0x83); // add  SP, #8
2789     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2790     emit_d8(cbuf, 0x8);
2791   %}
2792 
2793   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2794     // IMUL   EDX:EAX,$src1
2795     emit_opcode( cbuf, 0xF7 );
2796     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2797     // SAR    EDX,$cnt-32
2798     int shift_count = ((int)$cnt$$constant) - 32;
2799     if (shift_count > 0) {
2800       emit_opcode(cbuf, 0xC1);
2801       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2802       emit_d8(cbuf, shift_count);
2803     }
2804   %}
2805 
2806   // this version doesn't have add sp, 8
2807   enc_class convert_long_double2( eRegL src ) %{
2808     // push $src.hi
2809     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2810     // push $src.lo
2811     emit_opcode(cbuf, 0x50+$src$$reg  );
2812     // fild 64-bits at [SP]
2813     emit_opcode(cbuf,0xdf);
2814     emit_d8(cbuf, 0x6C);
2815     emit_d8(cbuf, 0x24);
2816     emit_d8(cbuf, 0x00);
2817   %}
2818 
2819   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2820     // Basic idea: long = (long)int * (long)int
2821     // IMUL EDX:EAX, src
2822     emit_opcode( cbuf, 0xF7 );
2823     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2824   %}
2825 
2826   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2827     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2828     // MUL EDX:EAX, src
2829     emit_opcode( cbuf, 0xF7 );
2830     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2831   %}
2832 
2833   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2834     // Basic idea: lo(result) = lo(x_lo * y_lo)
2835     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2836     // MOV    $tmp,$src.lo
2837     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2838     // IMUL   $tmp,EDX
2839     emit_opcode( cbuf, 0x0F );
2840     emit_opcode( cbuf, 0xAF );
2841     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2842     // MOV    EDX,$src.hi
2843     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2844     // IMUL   EDX,EAX
2845     emit_opcode( cbuf, 0x0F );
2846     emit_opcode( cbuf, 0xAF );
2847     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2848     // ADD    $tmp,EDX
2849     emit_opcode( cbuf, 0x03 );
2850     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2851     // MUL   EDX:EAX,$src.lo
2852     emit_opcode( cbuf, 0xF7 );
2853     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2854     // ADD    EDX,ESI
2855     emit_opcode( cbuf, 0x03 );
2856     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2857   %}
2858 
2859   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2860     // Basic idea: lo(result) = lo(src * y_lo)
2861     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2862     // IMUL   $tmp,EDX,$src
2863     emit_opcode( cbuf, 0x6B );
2864     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2865     emit_d8( cbuf, (int)$src$$constant );
2866     // MOV    EDX,$src
2867     emit_opcode(cbuf, 0xB8 + EDX_enc);
2868     emit_d32( cbuf, (int)$src$$constant );
2869     // MUL   EDX:EAX,EDX
2870     emit_opcode( cbuf, 0xF7 );
2871     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2872     // ADD    EDX,ESI
2873     emit_opcode( cbuf, 0x03 );
2874     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2875   %}
2876 
2877   enc_class long_div( eRegL src1, eRegL src2 ) %{
2878     // PUSH src1.hi
2879     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2880     // PUSH src1.lo
2881     emit_opcode(cbuf,               0x50+$src1$$reg  );
2882     // PUSH src2.hi
2883     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2884     // PUSH src2.lo
2885     emit_opcode(cbuf,               0x50+$src2$$reg  );
2886     // CALL directly to the runtime
2887     cbuf.set_insts_mark();
2888     emit_opcode(cbuf,0xE8);       // Call into runtime
2889     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2890     // Restore stack
2891     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2892     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2893     emit_d8(cbuf, 4*4);
2894   %}
2895 
2896   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2897     // PUSH src1.hi
2898     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2899     // PUSH src1.lo
2900     emit_opcode(cbuf,               0x50+$src1$$reg  );
2901     // PUSH src2.hi
2902     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2903     // PUSH src2.lo
2904     emit_opcode(cbuf,               0x50+$src2$$reg  );
2905     // CALL directly to the runtime
2906     cbuf.set_insts_mark();
2907     emit_opcode(cbuf,0xE8);       // Call into runtime
2908     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2909     // Restore stack
2910     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2911     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2912     emit_d8(cbuf, 4*4);
2913   %}
2914 
2915   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2916     // MOV   $tmp,$src.lo
2917     emit_opcode(cbuf, 0x8B);
2918     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2919     // OR    $tmp,$src.hi
2920     emit_opcode(cbuf, 0x0B);
2921     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2922   %}
2923 
2924   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2925     // CMP    $src1.lo,$src2.lo
2926     emit_opcode( cbuf, 0x3B );
2927     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2928     // JNE,s  skip
2929     emit_cc(cbuf, 0x70, 0x5);
2930     emit_d8(cbuf,2);
2931     // CMP    $src1.hi,$src2.hi
2932     emit_opcode( cbuf, 0x3B );
2933     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2934   %}
2935 
2936   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2937     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2938     emit_opcode( cbuf, 0x3B );
2939     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2940     // MOV    $tmp,$src1.hi
2941     emit_opcode( cbuf, 0x8B );
2942     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2943     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2944     emit_opcode( cbuf, 0x1B );
2945     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2946   %}
2947 
2948   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2949     // XOR    $tmp,$tmp
2950     emit_opcode(cbuf,0x33);  // XOR
2951     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2952     // CMP    $tmp,$src.lo
2953     emit_opcode( cbuf, 0x3B );
2954     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2955     // SBB    $tmp,$src.hi
2956     emit_opcode( cbuf, 0x1B );
2957     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2958   %}
2959 
2960  // Sniff, sniff... smells like Gnu Superoptimizer
2961   enc_class neg_long( eRegL dst ) %{
2962     emit_opcode(cbuf,0xF7);    // NEG hi
2963     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2964     emit_opcode(cbuf,0xF7);    // NEG lo
2965     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2966     emit_opcode(cbuf,0x83);    // SBB hi,0
2967     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2968     emit_d8    (cbuf,0 );
2969   %}
2970 
2971   enc_class enc_pop_rdx() %{
2972     emit_opcode(cbuf,0x5A);
2973   %}
2974 
2975   enc_class enc_rethrow() %{
2976     cbuf.set_insts_mark();
2977     emit_opcode(cbuf, 0xE9);        // jmp    entry
2978     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2979                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2980   %}
2981 
2982 
2983   // Convert a double to an int.  Java semantics require we do complex
2984   // manglelations in the corner cases.  So we set the rounding mode to
2985   // 'zero', store the darned double down as an int, and reset the
2986   // rounding mode to 'nearest'.  The hardware throws an exception which
2987   // patches up the correct value directly to the stack.
2988   enc_class DPR2I_encoding( regDPR src ) %{
2989     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2990     // exceptions here, so that a NAN or other corner-case value will
2991     // thrown an exception (but normal values get converted at full speed).
2992     // However, I2C adapters and other float-stack manglers leave pending
2993     // invalid-op exceptions hanging.  We would have to clear them before
2994     // enabling them and that is more expensive than just testing for the
2995     // invalid value Intel stores down in the corner cases.
2996     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2997     emit_opcode(cbuf,0x2D);
2998     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2999     // Allocate a word
3000     emit_opcode(cbuf,0x83);            // SUB ESP,4
3001     emit_opcode(cbuf,0xEC);
3002     emit_d8(cbuf,0x04);
3003     // Encoding assumes a double has been pushed into FPR0.
3004     // Store down the double as an int, popping the FPU stack
3005     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3006     emit_opcode(cbuf,0x1C);
3007     emit_d8(cbuf,0x24);
3008     // Restore the rounding mode; mask the exception
3009     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3010     emit_opcode(cbuf,0x2D);
3011     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3012         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3013         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3014 
3015     // Load the converted int; adjust CPU stack
3016     emit_opcode(cbuf,0x58);       // POP EAX
3017     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3018     emit_d32   (cbuf,0x80000000); //         0x80000000
3019     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3020     emit_d8    (cbuf,0x07);       // Size of slow_call
3021     // Push src onto stack slow-path
3022     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3023     emit_d8    (cbuf,0xC0-1+$src$$reg );
3024     // CALL directly to the runtime
3025     cbuf.set_insts_mark();
3026     emit_opcode(cbuf,0xE8);       // Call into runtime
3027     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3028     // Carry on here...
3029   %}
3030 
3031   enc_class DPR2L_encoding( regDPR src ) %{
3032     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3033     emit_opcode(cbuf,0x2D);
3034     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3035     // Allocate a word
3036     emit_opcode(cbuf,0x83);            // SUB ESP,8
3037     emit_opcode(cbuf,0xEC);
3038     emit_d8(cbuf,0x08);
3039     // Encoding assumes a double has been pushed into FPR0.
3040     // Store down the double as a long, popping the FPU stack
3041     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3042     emit_opcode(cbuf,0x3C);
3043     emit_d8(cbuf,0x24);
3044     // Restore the rounding mode; mask the exception
3045     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3046     emit_opcode(cbuf,0x2D);
3047     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3048         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3049         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3050 
3051     // Load the converted int; adjust CPU stack
3052     emit_opcode(cbuf,0x58);       // POP EAX
3053     emit_opcode(cbuf,0x5A);       // POP EDX
3054     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3055     emit_d8    (cbuf,0xFA);       // rdx
3056     emit_d32   (cbuf,0x80000000); //         0x80000000
3057     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3058     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3059     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3060     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3061     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3062     emit_d8    (cbuf,0x07);       // Size of slow_call
3063     // Push src onto stack slow-path
3064     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3065     emit_d8    (cbuf,0xC0-1+$src$$reg );
3066     // CALL directly to the runtime
3067     cbuf.set_insts_mark();
3068     emit_opcode(cbuf,0xE8);       // Call into runtime
3069     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3070     // Carry on here...
3071   %}
3072 
3073   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3074     // Operand was loaded from memory into fp ST (stack top)
3075     // FMUL   ST,$src  /* D8 C8+i */
3076     emit_opcode(cbuf, 0xD8);
3077     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3078   %}
3079 
3080   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3081     // FADDP  ST,src2  /* D8 C0+i */
3082     emit_opcode(cbuf, 0xD8);
3083     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3084     //could use FADDP  src2,fpST  /* DE C0+i */
3085   %}
3086 
3087   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3088     // FADDP  src2,ST  /* DE C0+i */
3089     emit_opcode(cbuf, 0xDE);
3090     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3091   %}
3092 
3093   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3094     // Operand has been loaded into fp ST (stack top)
3095       // FSUB   ST,$src1
3096       emit_opcode(cbuf, 0xD8);
3097       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3098 
3099       // FDIV
3100       emit_opcode(cbuf, 0xD8);
3101       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3102   %}
3103 
3104   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3105     // Operand was loaded from memory into fp ST (stack top)
3106     // FADD   ST,$src  /* D8 C0+i */
3107     emit_opcode(cbuf, 0xD8);
3108     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3109 
3110     // FMUL  ST,src2  /* D8 C*+i */
3111     emit_opcode(cbuf, 0xD8);
3112     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3113   %}
3114 
3115 
3116   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3117     // Operand was loaded from memory into fp ST (stack top)
3118     // FADD   ST,$src  /* D8 C0+i */
3119     emit_opcode(cbuf, 0xD8);
3120     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3121 
3122     // FMULP  src2,ST  /* DE C8+i */
3123     emit_opcode(cbuf, 0xDE);
3124     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3125   %}
3126 
3127   // Atomically load the volatile long
3128   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3129     emit_opcode(cbuf,0xDF);
3130     int rm_byte_opcode = 0x05;
3131     int base     = $mem$$base;
3132     int index    = $mem$$index;
3133     int scale    = $mem$$scale;
3134     int displace = $mem$$disp;
3135     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3136     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3137     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3138   %}
3139 
3140   // Volatile Store Long.  Must be atomic, so move it into
3141   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3142   // target address before the store (for null-ptr checks)
3143   // so the memory operand is used twice in the encoding.
3144   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3145     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3146     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3147     emit_opcode(cbuf,0xDF);
3148     int rm_byte_opcode = 0x07;
3149     int base     = $mem$$base;
3150     int index    = $mem$$index;
3151     int scale    = $mem$$scale;
3152     int displace = $mem$$disp;
3153     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3154     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3155   %}
3156 
3157   // Safepoint Poll.  This polls the safepoint page, and causes an
3158   // exception if it is not readable. Unfortunately, it kills the condition code
3159   // in the process
3160   // We current use TESTL [spp],EDI
3161   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3162 
3163   enc_class Safepoint_Poll() %{
3164     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3165     emit_opcode(cbuf,0x85);
3166     emit_rm (cbuf, 0x0, 0x7, 0x5);
3167     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3168   %}
3169 %}
3170 
3171 
3172 //----------FRAME--------------------------------------------------------------
3173 // Definition of frame structure and management information.
3174 //
3175 //  S T A C K   L A Y O U T    Allocators stack-slot number
3176 //                             |   (to get allocators register number
3177 //  G  Owned by    |        |  v    add OptoReg::stack0())
3178 //  r   CALLER     |        |
3179 //  o     |        +--------+      pad to even-align allocators stack-slot
3180 //  w     V        |  pad0  |        numbers; owned by CALLER
3181 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3182 //  h     ^        |   in   |  5
3183 //        |        |  args  |  4   Holes in incoming args owned by SELF
3184 //  |     |        |        |  3
3185 //  |     |        +--------+
3186 //  V     |        | old out|      Empty on Intel, window on Sparc
3187 //        |    old |preserve|      Must be even aligned.
3188 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3189 //        |        |   in   |  3   area for Intel ret address
3190 //     Owned by    |preserve|      Empty on Sparc.
3191 //       SELF      +--------+
3192 //        |        |  pad2  |  2   pad to align old SP
3193 //        |        +--------+  1
3194 //        |        | locks  |  0
3195 //        |        +--------+----> OptoReg::stack0(), even aligned
3196 //        |        |  pad1  | 11   pad to align new SP
3197 //        |        +--------+
3198 //        |        |        | 10
3199 //        |        | spills |  9   spills
3200 //        V        |        |  8   (pad0 slot for callee)
3201 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3202 //        ^        |  out   |  7
3203 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3204 //     Owned by    +--------+
3205 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3206 //        |    new |preserve|      Must be even-aligned.
3207 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3208 //        |        |        |
3209 //
3210 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3211 //         known from SELF's arguments and the Java calling convention.
3212 //         Region 6-7 is determined per call site.
3213 // Note 2: If the calling convention leaves holes in the incoming argument
3214 //         area, those holes are owned by SELF.  Holes in the outgoing area
3215 //         are owned by the CALLEE.  Holes should not be nessecary in the
3216 //         incoming area, as the Java calling convention is completely under
3217 //         the control of the AD file.  Doubles can be sorted and packed to
3218 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3219 //         varargs C calling conventions.
3220 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3221 //         even aligned with pad0 as needed.
3222 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3223 //         region 6-11 is even aligned; it may be padded out more so that
3224 //         the region from SP to FP meets the minimum stack alignment.
3225 
3226 frame %{
3227   // What direction does stack grow in (assumed to be same for C & Java)
3228   stack_direction(TOWARDS_LOW);
3229 
3230   // These three registers define part of the calling convention
3231   // between compiled code and the interpreter.
3232   inline_cache_reg(EAX);                // Inline Cache Register
3233   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3234 
3235   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3236   cisc_spilling_operand_name(indOffset32);
3237 
3238   // Number of stack slots consumed by locking an object
3239   sync_stack_slots(1);
3240 
3241   // Compiled code's Frame Pointer
3242   frame_pointer(ESP);
3243   // Interpreter stores its frame pointer in a register which is
3244   // stored to the stack by I2CAdaptors.
3245   // I2CAdaptors convert from interpreted java to compiled java.
3246   interpreter_frame_pointer(EBP);
3247 
3248   // Stack alignment requirement
3249   // Alignment size in bytes (128-bit -> 16 bytes)
3250   stack_alignment(StackAlignmentInBytes);
3251 
3252   // Number of stack slots between incoming argument block and the start of
3253   // a new frame.  The PROLOG must add this many slots to the stack.  The
3254   // EPILOG must remove this many slots.  Intel needs one slot for
3255   // return address and one for rbp, (must save rbp)
3256   in_preserve_stack_slots(2+VerifyStackAtCalls);
3257 
3258   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3259   // for calls to C.  Supports the var-args backing area for register parms.
3260   varargs_C_out_slots_killed(0);
3261 
3262   // The after-PROLOG location of the return address.  Location of
3263   // return address specifies a type (REG or STACK) and a number
3264   // representing the register number (i.e. - use a register name) or
3265   // stack slot.
3266   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3267   // Otherwise, it is above the locks and verification slot and alignment word
3268   return_addr(STACK - 1 +
3269               align_up((Compile::current()->in_preserve_stack_slots() +
3270                         Compile::current()->fixed_slots()),
3271                        stack_alignment_in_slots()));
3272 
3273   // Body of function which returns an integer array locating
3274   // arguments either in registers or in stack slots.  Passed an array
3275   // of ideal registers called "sig" and a "length" count.  Stack-slot
3276   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3277   // arguments for a CALLEE.  Incoming stack arguments are
3278   // automatically biased by the preserve_stack_slots field above.
3279   calling_convention %{
3280     // No difference between ingoing/outgoing just pass false
3281     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3282   %}
3283 
3284 
3285   // Body of function which returns an integer array locating
3286   // arguments either in registers or in stack slots.  Passed an array
3287   // of ideal registers called "sig" and a "length" count.  Stack-slot
3288   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3289   // arguments for a CALLEE.  Incoming stack arguments are
3290   // automatically biased by the preserve_stack_slots field above.
3291   c_calling_convention %{
3292     // This is obviously always outgoing
3293     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3294   %}
3295 
3296   // Location of C & interpreter return values
3297   c_return_value %{
3298     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3299     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3300     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3301 
3302     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3303     // that C functions return float and double results in XMM0.
3304     if( ideal_reg == Op_RegD && UseSSE>=2 )
3305       return OptoRegPair(XMM0b_num,XMM0_num);
3306     if( ideal_reg == Op_RegF && UseSSE>=2 )
3307       return OptoRegPair(OptoReg::Bad,XMM0_num);
3308 
3309     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3310   %}
3311 
3312   // Location of return values
3313   return_value %{
3314     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3315     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3316     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3317     if( ideal_reg == Op_RegD && UseSSE>=2 )
3318       return OptoRegPair(XMM0b_num,XMM0_num);
3319     if( ideal_reg == Op_RegF && UseSSE>=1 )
3320       return OptoRegPair(OptoReg::Bad,XMM0_num);
3321     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3322   %}
3323 
3324 %}
3325 
3326 //----------ATTRIBUTES---------------------------------------------------------
3327 //----------Operand Attributes-------------------------------------------------
3328 op_attrib op_cost(0);        // Required cost attribute
3329 
3330 //----------Instruction Attributes---------------------------------------------
3331 ins_attrib ins_cost(100);       // Required cost attribute
3332 ins_attrib ins_size(8);         // Required size attribute (in bits)
3333 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3334                                 // non-matching short branch variant of some
3335                                                             // long branch?
3336 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3337                                 // specifies the alignment that some part of the instruction (not
3338                                 // necessarily the start) requires.  If > 1, a compute_padding()
3339                                 // function must be provided for the instruction
3340 
3341 //----------OPERANDS-----------------------------------------------------------
3342 // Operand definitions must precede instruction definitions for correct parsing
3343 // in the ADLC because operands constitute user defined types which are used in
3344 // instruction definitions.
3345 
3346 //----------Simple Operands----------------------------------------------------
3347 // Immediate Operands
3348 // Integer Immediate
3349 operand immI() %{
3350   match(ConI);
3351 
3352   op_cost(10);
3353   format %{ %}
3354   interface(CONST_INTER);
3355 %}
3356 
3357 // Constant for test vs zero
3358 operand immI0() %{
3359   predicate(n->get_int() == 0);
3360   match(ConI);
3361 
3362   op_cost(0);
3363   format %{ %}
3364   interface(CONST_INTER);
3365 %}
3366 
3367 // Constant for increment
3368 operand immI1() %{
3369   predicate(n->get_int() == 1);
3370   match(ConI);
3371 
3372   op_cost(0);
3373   format %{ %}
3374   interface(CONST_INTER);
3375 %}
3376 
3377 // Constant for decrement
3378 operand immI_M1() %{
3379   predicate(n->get_int() == -1);
3380   match(ConI);
3381 
3382   op_cost(0);
3383   format %{ %}
3384   interface(CONST_INTER);
3385 %}
3386 
3387 // Valid scale values for addressing modes
3388 operand immI2() %{
3389   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3390   match(ConI);
3391 
3392   format %{ %}
3393   interface(CONST_INTER);
3394 %}
3395 
3396 operand immI8() %{
3397   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3398   match(ConI);
3399 
3400   op_cost(5);
3401   format %{ %}
3402   interface(CONST_INTER);
3403 %}
3404 
3405 operand immI16() %{
3406   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3407   match(ConI);
3408 
3409   op_cost(10);
3410   format %{ %}
3411   interface(CONST_INTER);
3412 %}
3413 
3414 // Int Immediate non-negative
3415 operand immU31()
3416 %{
3417   predicate(n->get_int() >= 0);
3418   match(ConI);
3419 
3420   op_cost(0);
3421   format %{ %}
3422   interface(CONST_INTER);
3423 %}
3424 
3425 // Constant for long shifts
3426 operand immI_32() %{
3427   predicate( n->get_int() == 32 );
3428   match(ConI);
3429 
3430   op_cost(0);
3431   format %{ %}
3432   interface(CONST_INTER);
3433 %}
3434 
3435 operand immI_1_31() %{
3436   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3437   match(ConI);
3438 
3439   op_cost(0);
3440   format %{ %}
3441   interface(CONST_INTER);
3442 %}
3443 
3444 operand immI_32_63() %{
3445   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3446   match(ConI);
3447   op_cost(0);
3448 
3449   format %{ %}
3450   interface(CONST_INTER);
3451 %}
3452 
3453 operand immI_1() %{
3454   predicate( n->get_int() == 1 );
3455   match(ConI);
3456 
3457   op_cost(0);
3458   format %{ %}
3459   interface(CONST_INTER);
3460 %}
3461 
3462 operand immI_2() %{
3463   predicate( n->get_int() == 2 );
3464   match(ConI);
3465 
3466   op_cost(0);
3467   format %{ %}
3468   interface(CONST_INTER);
3469 %}
3470 
3471 operand immI_3() %{
3472   predicate( n->get_int() == 3 );
3473   match(ConI);
3474 
3475   op_cost(0);
3476   format %{ %}
3477   interface(CONST_INTER);
3478 %}
3479 
3480 // Pointer Immediate
3481 operand immP() %{
3482   match(ConP);
3483 
3484   op_cost(10);
3485   format %{ %}
3486   interface(CONST_INTER);
3487 %}
3488 
3489 // NULL Pointer Immediate
3490 operand immP0() %{
3491   predicate( n->get_ptr() == 0 );
3492   match(ConP);
3493   op_cost(0);
3494 
3495   format %{ %}
3496   interface(CONST_INTER);
3497 %}
3498 
3499 // Long Immediate
3500 operand immL() %{
3501   match(ConL);
3502 
3503   op_cost(20);
3504   format %{ %}
3505   interface(CONST_INTER);
3506 %}
3507 
3508 // Long Immediate zero
3509 operand immL0() %{
3510   predicate( n->get_long() == 0L );
3511   match(ConL);
3512   op_cost(0);
3513 
3514   format %{ %}
3515   interface(CONST_INTER);
3516 %}
3517 
3518 // Long Immediate zero
3519 operand immL_M1() %{
3520   predicate( n->get_long() == -1L );
3521   match(ConL);
3522   op_cost(0);
3523 
3524   format %{ %}
3525   interface(CONST_INTER);
3526 %}
3527 
3528 // Long immediate from 0 to 127.
3529 // Used for a shorter form of long mul by 10.
3530 operand immL_127() %{
3531   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3532   match(ConL);
3533   op_cost(0);
3534 
3535   format %{ %}
3536   interface(CONST_INTER);
3537 %}
3538 
3539 // Long Immediate: low 32-bit mask
3540 operand immL_32bits() %{
3541   predicate(n->get_long() == 0xFFFFFFFFL);
3542   match(ConL);
3543   op_cost(0);
3544 
3545   format %{ %}
3546   interface(CONST_INTER);
3547 %}
3548 
3549 // Long Immediate: low 32-bit mask
3550 operand immL32() %{
3551   predicate(n->get_long() == (int)(n->get_long()));
3552   match(ConL);
3553   op_cost(20);
3554 
3555   format %{ %}
3556   interface(CONST_INTER);
3557 %}
3558 
3559 //Double Immediate zero
3560 operand immDPR0() %{
3561   // Do additional (and counter-intuitive) test against NaN to work around VC++
3562   // bug that generates code such that NaNs compare equal to 0.0
3563   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3564   match(ConD);
3565 
3566   op_cost(5);
3567   format %{ %}
3568   interface(CONST_INTER);
3569 %}
3570 
3571 // Double Immediate one
3572 operand immDPR1() %{
3573   predicate( UseSSE<=1 && n->getd() == 1.0 );
3574   match(ConD);
3575 
3576   op_cost(5);
3577   format %{ %}
3578   interface(CONST_INTER);
3579 %}
3580 
3581 // Double Immediate
3582 operand immDPR() %{
3583   predicate(UseSSE<=1);
3584   match(ConD);
3585 
3586   op_cost(5);
3587   format %{ %}
3588   interface(CONST_INTER);
3589 %}
3590 
3591 operand immD() %{
3592   predicate(UseSSE>=2);
3593   match(ConD);
3594 
3595   op_cost(5);
3596   format %{ %}
3597   interface(CONST_INTER);
3598 %}
3599 
3600 // Double Immediate zero
3601 operand immD0() %{
3602   // Do additional (and counter-intuitive) test against NaN to work around VC++
3603   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3604   // compare equal to -0.0.
3605   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3606   match(ConD);
3607 
3608   format %{ %}
3609   interface(CONST_INTER);
3610 %}
3611 
3612 // Float Immediate zero
3613 operand immFPR0() %{
3614   predicate(UseSSE == 0 && n->getf() == 0.0F);
3615   match(ConF);
3616 
3617   op_cost(5);
3618   format %{ %}
3619   interface(CONST_INTER);
3620 %}
3621 
3622 // Float Immediate one
3623 operand immFPR1() %{
3624   predicate(UseSSE == 0 && n->getf() == 1.0F);
3625   match(ConF);
3626 
3627   op_cost(5);
3628   format %{ %}
3629   interface(CONST_INTER);
3630 %}
3631 
3632 // Float Immediate
3633 operand immFPR() %{
3634   predicate( UseSSE == 0 );
3635   match(ConF);
3636 
3637   op_cost(5);
3638   format %{ %}
3639   interface(CONST_INTER);
3640 %}
3641 
3642 // Float Immediate
3643 operand immF() %{
3644   predicate(UseSSE >= 1);
3645   match(ConF);
3646 
3647   op_cost(5);
3648   format %{ %}
3649   interface(CONST_INTER);
3650 %}
3651 
3652 // Float Immediate zero.  Zero and not -0.0
3653 operand immF0() %{
3654   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3655   match(ConF);
3656 
3657   op_cost(5);
3658   format %{ %}
3659   interface(CONST_INTER);
3660 %}
3661 
3662 // Immediates for special shifts (sign extend)
3663 
3664 // Constants for increment
3665 operand immI_16() %{
3666   predicate( n->get_int() == 16 );
3667   match(ConI);
3668 
3669   format %{ %}
3670   interface(CONST_INTER);
3671 %}
3672 
3673 operand immI_24() %{
3674   predicate( n->get_int() == 24 );
3675   match(ConI);
3676 
3677   format %{ %}
3678   interface(CONST_INTER);
3679 %}
3680 
3681 // Constant for byte-wide masking
3682 operand immI_255() %{
3683   predicate( n->get_int() == 255 );
3684   match(ConI);
3685 
3686   format %{ %}
3687   interface(CONST_INTER);
3688 %}
3689 
3690 // Constant for short-wide masking
3691 operand immI_65535() %{
3692   predicate(n->get_int() == 65535);
3693   match(ConI);
3694 
3695   format %{ %}
3696   interface(CONST_INTER);
3697 %}
3698 
3699 // Register Operands
3700 // Integer Register
3701 operand rRegI() %{
3702   constraint(ALLOC_IN_RC(int_reg));
3703   match(RegI);
3704   match(xRegI);
3705   match(eAXRegI);
3706   match(eBXRegI);
3707   match(eCXRegI);
3708   match(eDXRegI);
3709   match(eDIRegI);
3710   match(eSIRegI);
3711 
3712   format %{ %}
3713   interface(REG_INTER);
3714 %}
3715 
3716 // Subset of Integer Register
3717 operand xRegI(rRegI reg) %{
3718   constraint(ALLOC_IN_RC(int_x_reg));
3719   match(reg);
3720   match(eAXRegI);
3721   match(eBXRegI);
3722   match(eCXRegI);
3723   match(eDXRegI);
3724 
3725   format %{ %}
3726   interface(REG_INTER);
3727 %}
3728 
3729 // Special Registers
3730 operand eAXRegI(xRegI reg) %{
3731   constraint(ALLOC_IN_RC(eax_reg));
3732   match(reg);
3733   match(rRegI);
3734 
3735   format %{ "EAX" %}
3736   interface(REG_INTER);
3737 %}
3738 
3739 // Special Registers
3740 operand eBXRegI(xRegI reg) %{
3741   constraint(ALLOC_IN_RC(ebx_reg));
3742   match(reg);
3743   match(rRegI);
3744 
3745   format %{ "EBX" %}
3746   interface(REG_INTER);
3747 %}
3748 
3749 operand eCXRegI(xRegI reg) %{
3750   constraint(ALLOC_IN_RC(ecx_reg));
3751   match(reg);
3752   match(rRegI);
3753 
3754   format %{ "ECX" %}
3755   interface(REG_INTER);
3756 %}
3757 
3758 operand eDXRegI(xRegI reg) %{
3759   constraint(ALLOC_IN_RC(edx_reg));
3760   match(reg);
3761   match(rRegI);
3762 
3763   format %{ "EDX" %}
3764   interface(REG_INTER);
3765 %}
3766 
3767 operand eDIRegI(xRegI reg) %{
3768   constraint(ALLOC_IN_RC(edi_reg));
3769   match(reg);
3770   match(rRegI);
3771 
3772   format %{ "EDI" %}
3773   interface(REG_INTER);
3774 %}
3775 
3776 operand naxRegI() %{
3777   constraint(ALLOC_IN_RC(nax_reg));
3778   match(RegI);
3779   match(eCXRegI);
3780   match(eDXRegI);
3781   match(eSIRegI);
3782   match(eDIRegI);
3783 
3784   format %{ %}
3785   interface(REG_INTER);
3786 %}
3787 
3788 operand nadxRegI() %{
3789   constraint(ALLOC_IN_RC(nadx_reg));
3790   match(RegI);
3791   match(eBXRegI);
3792   match(eCXRegI);
3793   match(eSIRegI);
3794   match(eDIRegI);
3795 
3796   format %{ %}
3797   interface(REG_INTER);
3798 %}
3799 
3800 operand ncxRegI() %{
3801   constraint(ALLOC_IN_RC(ncx_reg));
3802   match(RegI);
3803   match(eAXRegI);
3804   match(eDXRegI);
3805   match(eSIRegI);
3806   match(eDIRegI);
3807 
3808   format %{ %}
3809   interface(REG_INTER);
3810 %}
3811 
3812 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3813 // //
3814 operand eSIRegI(xRegI reg) %{
3815    constraint(ALLOC_IN_RC(esi_reg));
3816    match(reg);
3817    match(rRegI);
3818 
3819    format %{ "ESI" %}
3820    interface(REG_INTER);
3821 %}
3822 
3823 // Pointer Register
3824 operand anyRegP() %{
3825   constraint(ALLOC_IN_RC(any_reg));
3826   match(RegP);
3827   match(eAXRegP);
3828   match(eBXRegP);
3829   match(eCXRegP);
3830   match(eDIRegP);
3831   match(eRegP);
3832 
3833   format %{ %}
3834   interface(REG_INTER);
3835 %}
3836 
3837 operand eRegP() %{
3838   constraint(ALLOC_IN_RC(int_reg));
3839   match(RegP);
3840   match(eAXRegP);
3841   match(eBXRegP);
3842   match(eCXRegP);
3843   match(eDIRegP);
3844 
3845   format %{ %}
3846   interface(REG_INTER);
3847 %}
3848 
3849 // On windows95, EBP is not safe to use for implicit null tests.
3850 operand eRegP_no_EBP() %{
3851   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3852   match(RegP);
3853   match(eAXRegP);
3854   match(eBXRegP);
3855   match(eCXRegP);
3856   match(eDIRegP);
3857 
3858   op_cost(100);
3859   format %{ %}
3860   interface(REG_INTER);
3861 %}
3862 
3863 operand naxRegP() %{
3864   constraint(ALLOC_IN_RC(nax_reg));
3865   match(RegP);
3866   match(eBXRegP);
3867   match(eDXRegP);
3868   match(eCXRegP);
3869   match(eSIRegP);
3870   match(eDIRegP);
3871 
3872   format %{ %}
3873   interface(REG_INTER);
3874 %}
3875 
3876 operand nabxRegP() %{
3877   constraint(ALLOC_IN_RC(nabx_reg));
3878   match(RegP);
3879   match(eCXRegP);
3880   match(eDXRegP);
3881   match(eSIRegP);
3882   match(eDIRegP);
3883 
3884   format %{ %}
3885   interface(REG_INTER);
3886 %}
3887 
3888 operand pRegP() %{
3889   constraint(ALLOC_IN_RC(p_reg));
3890   match(RegP);
3891   match(eBXRegP);
3892   match(eDXRegP);
3893   match(eSIRegP);
3894   match(eDIRegP);
3895 
3896   format %{ %}
3897   interface(REG_INTER);
3898 %}
3899 
3900 // Special Registers
3901 // Return a pointer value
3902 operand eAXRegP(eRegP reg) %{
3903   constraint(ALLOC_IN_RC(eax_reg));
3904   match(reg);
3905   format %{ "EAX" %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 // Used in AtomicAdd
3910 operand eBXRegP(eRegP reg) %{
3911   constraint(ALLOC_IN_RC(ebx_reg));
3912   match(reg);
3913   format %{ "EBX" %}
3914   interface(REG_INTER);
3915 %}
3916 
3917 // Tail-call (interprocedural jump) to interpreter
3918 operand eCXRegP(eRegP reg) %{
3919   constraint(ALLOC_IN_RC(ecx_reg));
3920   match(reg);
3921   format %{ "ECX" %}
3922   interface(REG_INTER);
3923 %}
3924 
3925 operand eSIRegP(eRegP reg) %{
3926   constraint(ALLOC_IN_RC(esi_reg));
3927   match(reg);
3928   format %{ "ESI" %}
3929   interface(REG_INTER);
3930 %}
3931 
3932 // Used in rep stosw
3933 operand eDIRegP(eRegP reg) %{
3934   constraint(ALLOC_IN_RC(edi_reg));
3935   match(reg);
3936   format %{ "EDI" %}
3937   interface(REG_INTER);
3938 %}
3939 
3940 operand eRegL() %{
3941   constraint(ALLOC_IN_RC(long_reg));
3942   match(RegL);
3943   match(eADXRegL);
3944 
3945   format %{ %}
3946   interface(REG_INTER);
3947 %}
3948 
3949 operand eADXRegL( eRegL reg ) %{
3950   constraint(ALLOC_IN_RC(eadx_reg));
3951   match(reg);
3952 
3953   format %{ "EDX:EAX" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 operand eBCXRegL( eRegL reg ) %{
3958   constraint(ALLOC_IN_RC(ebcx_reg));
3959   match(reg);
3960 
3961   format %{ "EBX:ECX" %}
3962   interface(REG_INTER);
3963 %}
3964 
3965 // Special case for integer high multiply
3966 operand eADXRegL_low_only() %{
3967   constraint(ALLOC_IN_RC(eadx_reg));
3968   match(RegL);
3969 
3970   format %{ "EAX" %}
3971   interface(REG_INTER);
3972 %}
3973 
3974 // Flags register, used as output of compare instructions
3975 operand eFlagsReg() %{
3976   constraint(ALLOC_IN_RC(int_flags));
3977   match(RegFlags);
3978 
3979   format %{ "EFLAGS" %}
3980   interface(REG_INTER);
3981 %}
3982 
3983 // Flags register, used as output of FLOATING POINT compare instructions
3984 operand eFlagsRegU() %{
3985   constraint(ALLOC_IN_RC(int_flags));
3986   match(RegFlags);
3987 
3988   format %{ "EFLAGS_U" %}
3989   interface(REG_INTER);
3990 %}
3991 
3992 operand eFlagsRegUCF() %{
3993   constraint(ALLOC_IN_RC(int_flags));
3994   match(RegFlags);
3995   predicate(false);
3996 
3997   format %{ "EFLAGS_U_CF" %}
3998   interface(REG_INTER);
3999 %}
4000 
4001 // Condition Code Register used by long compare
4002 operand flagsReg_long_LTGE() %{
4003   constraint(ALLOC_IN_RC(int_flags));
4004   match(RegFlags);
4005   format %{ "FLAGS_LTGE" %}
4006   interface(REG_INTER);
4007 %}
4008 operand flagsReg_long_EQNE() %{
4009   constraint(ALLOC_IN_RC(int_flags));
4010   match(RegFlags);
4011   format %{ "FLAGS_EQNE" %}
4012   interface(REG_INTER);
4013 %}
4014 operand flagsReg_long_LEGT() %{
4015   constraint(ALLOC_IN_RC(int_flags));
4016   match(RegFlags);
4017   format %{ "FLAGS_LEGT" %}
4018   interface(REG_INTER);
4019 %}
4020 
4021 // Condition Code Register used by unsigned long compare
4022 operand flagsReg_ulong_LTGE() %{
4023   constraint(ALLOC_IN_RC(int_flags));
4024   match(RegFlags);
4025   format %{ "FLAGS_U_LTGE" %}
4026   interface(REG_INTER);
4027 %}
4028 operand flagsReg_ulong_EQNE() %{
4029   constraint(ALLOC_IN_RC(int_flags));
4030   match(RegFlags);
4031   format %{ "FLAGS_U_EQNE" %}
4032   interface(REG_INTER);
4033 %}
4034 operand flagsReg_ulong_LEGT() %{
4035   constraint(ALLOC_IN_RC(int_flags));
4036   match(RegFlags);
4037   format %{ "FLAGS_U_LEGT" %}
4038   interface(REG_INTER);
4039 %}
4040 
4041 // Float register operands
4042 operand regDPR() %{
4043   predicate( UseSSE < 2 );
4044   constraint(ALLOC_IN_RC(fp_dbl_reg));
4045   match(RegD);
4046   match(regDPR1);
4047   match(regDPR2);
4048   format %{ %}
4049   interface(REG_INTER);
4050 %}
4051 
4052 operand regDPR1(regDPR reg) %{
4053   predicate( UseSSE < 2 );
4054   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4055   match(reg);
4056   format %{ "FPR1" %}
4057   interface(REG_INTER);
4058 %}
4059 
4060 operand regDPR2(regDPR reg) %{
4061   predicate( UseSSE < 2 );
4062   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4063   match(reg);
4064   format %{ "FPR2" %}
4065   interface(REG_INTER);
4066 %}
4067 
4068 operand regnotDPR1(regDPR reg) %{
4069   predicate( UseSSE < 2 );
4070   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4071   match(reg);
4072   format %{ %}
4073   interface(REG_INTER);
4074 %}
4075 
4076 // Float register operands
4077 operand regFPR() %{
4078   predicate( UseSSE < 2 );
4079   constraint(ALLOC_IN_RC(fp_flt_reg));
4080   match(RegF);
4081   match(regFPR1);
4082   format %{ %}
4083   interface(REG_INTER);
4084 %}
4085 
4086 // Float register operands
4087 operand regFPR1(regFPR reg) %{
4088   predicate( UseSSE < 2 );
4089   constraint(ALLOC_IN_RC(fp_flt_reg0));
4090   match(reg);
4091   format %{ "FPR1" %}
4092   interface(REG_INTER);
4093 %}
4094 
4095 // XMM Float register operands
4096 operand regF() %{
4097   predicate( UseSSE>=1 );
4098   constraint(ALLOC_IN_RC(float_reg_legacy));
4099   match(RegF);
4100   format %{ %}
4101   interface(REG_INTER);
4102 %}
4103 
4104 // XMM Double register operands
4105 operand regD() %{
4106   predicate( UseSSE>=2 );
4107   constraint(ALLOC_IN_RC(double_reg_legacy));
4108   match(RegD);
4109   format %{ %}
4110   interface(REG_INTER);
4111 %}
4112 
4113 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4114 // runtime code generation via reg_class_dynamic.
4115 operand vecS() %{
4116   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4117   match(VecS);
4118 
4119   format %{ %}
4120   interface(REG_INTER);
4121 %}
4122 
4123 operand vecD() %{
4124   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4125   match(VecD);
4126 
4127   format %{ %}
4128   interface(REG_INTER);
4129 %}
4130 
4131 operand vecX() %{
4132   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4133   match(VecX);
4134 
4135   format %{ %}
4136   interface(REG_INTER);
4137 %}
4138 
4139 operand vecY() %{
4140   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4141   match(VecY);
4142 
4143   format %{ %}
4144   interface(REG_INTER);
4145 %}
4146 
4147 //----------Memory Operands----------------------------------------------------
4148 // Direct Memory Operand
4149 operand direct(immP addr) %{
4150   match(addr);
4151 
4152   format %{ "[$addr]" %}
4153   interface(MEMORY_INTER) %{
4154     base(0xFFFFFFFF);
4155     index(0x4);
4156     scale(0x0);
4157     disp($addr);
4158   %}
4159 %}
4160 
4161 // Indirect Memory Operand
4162 operand indirect(eRegP reg) %{
4163   constraint(ALLOC_IN_RC(int_reg));
4164   match(reg);
4165 
4166   format %{ "[$reg]" %}
4167   interface(MEMORY_INTER) %{
4168     base($reg);
4169     index(0x4);
4170     scale(0x0);
4171     disp(0x0);
4172   %}
4173 %}
4174 
4175 // Indirect Memory Plus Short Offset Operand
4176 operand indOffset8(eRegP reg, immI8 off) %{
4177   match(AddP reg off);
4178 
4179   format %{ "[$reg + $off]" %}
4180   interface(MEMORY_INTER) %{
4181     base($reg);
4182     index(0x4);
4183     scale(0x0);
4184     disp($off);
4185   %}
4186 %}
4187 
4188 // Indirect Memory Plus Long Offset Operand
4189 operand indOffset32(eRegP reg, immI off) %{
4190   match(AddP reg off);
4191 
4192   format %{ "[$reg + $off]" %}
4193   interface(MEMORY_INTER) %{
4194     base($reg);
4195     index(0x4);
4196     scale(0x0);
4197     disp($off);
4198   %}
4199 %}
4200 
4201 // Indirect Memory Plus Long Offset Operand
4202 operand indOffset32X(rRegI reg, immP off) %{
4203   match(AddP off reg);
4204 
4205   format %{ "[$reg + $off]" %}
4206   interface(MEMORY_INTER) %{
4207     base($reg);
4208     index(0x4);
4209     scale(0x0);
4210     disp($off);
4211   %}
4212 %}
4213 
4214 // Indirect Memory Plus Index Register Plus Offset Operand
4215 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4216   match(AddP (AddP reg ireg) off);
4217 
4218   op_cost(10);
4219   format %{"[$reg + $off + $ireg]" %}
4220   interface(MEMORY_INTER) %{
4221     base($reg);
4222     index($ireg);
4223     scale(0x0);
4224     disp($off);
4225   %}
4226 %}
4227 
4228 // Indirect Memory Plus Index Register Plus Offset Operand
4229 operand indIndex(eRegP reg, rRegI ireg) %{
4230   match(AddP reg ireg);
4231 
4232   op_cost(10);
4233   format %{"[$reg + $ireg]" %}
4234   interface(MEMORY_INTER) %{
4235     base($reg);
4236     index($ireg);
4237     scale(0x0);
4238     disp(0x0);
4239   %}
4240 %}
4241 
4242 // // -------------------------------------------------------------------------
4243 // // 486 architecture doesn't support "scale * index + offset" with out a base
4244 // // -------------------------------------------------------------------------
4245 // // Scaled Memory Operands
4246 // // Indirect Memory Times Scale Plus Offset Operand
4247 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4248 //   match(AddP off (LShiftI ireg scale));
4249 //
4250 //   op_cost(10);
4251 //   format %{"[$off + $ireg << $scale]" %}
4252 //   interface(MEMORY_INTER) %{
4253 //     base(0x4);
4254 //     index($ireg);
4255 //     scale($scale);
4256 //     disp($off);
4257 //   %}
4258 // %}
4259 
4260 // Indirect Memory Times Scale Plus Index Register
4261 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4262   match(AddP reg (LShiftI ireg scale));
4263 
4264   op_cost(10);
4265   format %{"[$reg + $ireg << $scale]" %}
4266   interface(MEMORY_INTER) %{
4267     base($reg);
4268     index($ireg);
4269     scale($scale);
4270     disp(0x0);
4271   %}
4272 %}
4273 
4274 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4275 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4276   match(AddP (AddP reg (LShiftI ireg scale)) off);
4277 
4278   op_cost(10);
4279   format %{"[$reg + $off + $ireg << $scale]" %}
4280   interface(MEMORY_INTER) %{
4281     base($reg);
4282     index($ireg);
4283     scale($scale);
4284     disp($off);
4285   %}
4286 %}
4287 
4288 //----------Load Long Memory Operands------------------------------------------
4289 // The load-long idiom will use it's address expression again after loading
4290 // the first word of the long.  If the load-long destination overlaps with
4291 // registers used in the addressing expression, the 2nd half will be loaded
4292 // from a clobbered address.  Fix this by requiring that load-long use
4293 // address registers that do not overlap with the load-long target.
4294 
4295 // load-long support
4296 operand load_long_RegP() %{
4297   constraint(ALLOC_IN_RC(esi_reg));
4298   match(RegP);
4299   match(eSIRegP);
4300   op_cost(100);
4301   format %{  %}
4302   interface(REG_INTER);
4303 %}
4304 
4305 // Indirect Memory Operand Long
4306 operand load_long_indirect(load_long_RegP reg) %{
4307   constraint(ALLOC_IN_RC(esi_reg));
4308   match(reg);
4309 
4310   format %{ "[$reg]" %}
4311   interface(MEMORY_INTER) %{
4312     base($reg);
4313     index(0x4);
4314     scale(0x0);
4315     disp(0x0);
4316   %}
4317 %}
4318 
4319 // Indirect Memory Plus Long Offset Operand
4320 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4321   match(AddP reg off);
4322 
4323   format %{ "[$reg + $off]" %}
4324   interface(MEMORY_INTER) %{
4325     base($reg);
4326     index(0x4);
4327     scale(0x0);
4328     disp($off);
4329   %}
4330 %}
4331 
4332 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4333 
4334 
4335 //----------Special Memory Operands--------------------------------------------
4336 // Stack Slot Operand - This operand is used for loading and storing temporary
4337 //                      values on the stack where a match requires a value to
4338 //                      flow through memory.
4339 operand stackSlotP(sRegP reg) %{
4340   constraint(ALLOC_IN_RC(stack_slots));
4341   // No match rule because this operand is only generated in matching
4342   format %{ "[$reg]" %}
4343   interface(MEMORY_INTER) %{
4344     base(0x4);   // ESP
4345     index(0x4);  // No Index
4346     scale(0x0);  // No Scale
4347     disp($reg);  // Stack Offset
4348   %}
4349 %}
4350 
4351 operand stackSlotI(sRegI reg) %{
4352   constraint(ALLOC_IN_RC(stack_slots));
4353   // No match rule because this operand is only generated in matching
4354   format %{ "[$reg]" %}
4355   interface(MEMORY_INTER) %{
4356     base(0x4);   // ESP
4357     index(0x4);  // No Index
4358     scale(0x0);  // No Scale
4359     disp($reg);  // Stack Offset
4360   %}
4361 %}
4362 
4363 operand stackSlotF(sRegF reg) %{
4364   constraint(ALLOC_IN_RC(stack_slots));
4365   // No match rule because this operand is only generated in matching
4366   format %{ "[$reg]" %}
4367   interface(MEMORY_INTER) %{
4368     base(0x4);   // ESP
4369     index(0x4);  // No Index
4370     scale(0x0);  // No Scale
4371     disp($reg);  // Stack Offset
4372   %}
4373 %}
4374 
4375 operand stackSlotD(sRegD reg) %{
4376   constraint(ALLOC_IN_RC(stack_slots));
4377   // No match rule because this operand is only generated in matching
4378   format %{ "[$reg]" %}
4379   interface(MEMORY_INTER) %{
4380     base(0x4);   // ESP
4381     index(0x4);  // No Index
4382     scale(0x0);  // No Scale
4383     disp($reg);  // Stack Offset
4384   %}
4385 %}
4386 
4387 operand stackSlotL(sRegL reg) %{
4388   constraint(ALLOC_IN_RC(stack_slots));
4389   // No match rule because this operand is only generated in matching
4390   format %{ "[$reg]" %}
4391   interface(MEMORY_INTER) %{
4392     base(0x4);   // ESP
4393     index(0x4);  // No Index
4394     scale(0x0);  // No Scale
4395     disp($reg);  // Stack Offset
4396   %}
4397 %}
4398 
4399 //----------Memory Operands - Win95 Implicit Null Variants----------------
4400 // Indirect Memory Operand
4401 operand indirect_win95_safe(eRegP_no_EBP reg)
4402 %{
4403   constraint(ALLOC_IN_RC(int_reg));
4404   match(reg);
4405 
4406   op_cost(100);
4407   format %{ "[$reg]" %}
4408   interface(MEMORY_INTER) %{
4409     base($reg);
4410     index(0x4);
4411     scale(0x0);
4412     disp(0x0);
4413   %}
4414 %}
4415 
4416 // Indirect Memory Plus Short Offset Operand
4417 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4418 %{
4419   match(AddP reg off);
4420 
4421   op_cost(100);
4422   format %{ "[$reg + $off]" %}
4423   interface(MEMORY_INTER) %{
4424     base($reg);
4425     index(0x4);
4426     scale(0x0);
4427     disp($off);
4428   %}
4429 %}
4430 
4431 // Indirect Memory Plus Long Offset Operand
4432 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4433 %{
4434   match(AddP reg off);
4435 
4436   op_cost(100);
4437   format %{ "[$reg + $off]" %}
4438   interface(MEMORY_INTER) %{
4439     base($reg);
4440     index(0x4);
4441     scale(0x0);
4442     disp($off);
4443   %}
4444 %}
4445 
4446 // Indirect Memory Plus Index Register Plus Offset Operand
4447 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4448 %{
4449   match(AddP (AddP reg ireg) off);
4450 
4451   op_cost(100);
4452   format %{"[$reg + $off + $ireg]" %}
4453   interface(MEMORY_INTER) %{
4454     base($reg);
4455     index($ireg);
4456     scale(0x0);
4457     disp($off);
4458   %}
4459 %}
4460 
4461 // Indirect Memory Times Scale Plus Index Register
4462 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4463 %{
4464   match(AddP reg (LShiftI ireg scale));
4465 
4466   op_cost(100);
4467   format %{"[$reg + $ireg << $scale]" %}
4468   interface(MEMORY_INTER) %{
4469     base($reg);
4470     index($ireg);
4471     scale($scale);
4472     disp(0x0);
4473   %}
4474 %}
4475 
4476 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4477 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4478 %{
4479   match(AddP (AddP reg (LShiftI ireg scale)) off);
4480 
4481   op_cost(100);
4482   format %{"[$reg + $off + $ireg << $scale]" %}
4483   interface(MEMORY_INTER) %{
4484     base($reg);
4485     index($ireg);
4486     scale($scale);
4487     disp($off);
4488   %}
4489 %}
4490 
4491 //----------Conditional Branch Operands----------------------------------------
4492 // Comparison Op  - This is the operation of the comparison, and is limited to
4493 //                  the following set of codes:
4494 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4495 //
4496 // Other attributes of the comparison, such as unsignedness, are specified
4497 // by the comparison instruction that sets a condition code flags register.
4498 // That result is represented by a flags operand whose subtype is appropriate
4499 // to the unsignedness (etc.) of the comparison.
4500 //
4501 // Later, the instruction which matches both the Comparison Op (a Bool) and
4502 // the flags (produced by the Cmp) specifies the coding of the comparison op
4503 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4504 
4505 // Comparision Code
4506 operand cmpOp() %{
4507   match(Bool);
4508 
4509   format %{ "" %}
4510   interface(COND_INTER) %{
4511     equal(0x4, "e");
4512     not_equal(0x5, "ne");
4513     less(0xC, "l");
4514     greater_equal(0xD, "ge");
4515     less_equal(0xE, "le");
4516     greater(0xF, "g");
4517     overflow(0x0, "o");
4518     no_overflow(0x1, "no");
4519   %}
4520 %}
4521 
4522 // Comparison Code, unsigned compare.  Used by FP also, with
4523 // C2 (unordered) turned into GT or LT already.  The other bits
4524 // C0 and C3 are turned into Carry & Zero flags.
4525 operand cmpOpU() %{
4526   match(Bool);
4527 
4528   format %{ "" %}
4529   interface(COND_INTER) %{
4530     equal(0x4, "e");
4531     not_equal(0x5, "ne");
4532     less(0x2, "b");
4533     greater_equal(0x3, "nb");
4534     less_equal(0x6, "be");
4535     greater(0x7, "nbe");
4536     overflow(0x0, "o");
4537     no_overflow(0x1, "no");
4538   %}
4539 %}
4540 
4541 // Floating comparisons that don't require any fixup for the unordered case
4542 operand cmpOpUCF() %{
4543   match(Bool);
4544   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4545             n->as_Bool()->_test._test == BoolTest::ge ||
4546             n->as_Bool()->_test._test == BoolTest::le ||
4547             n->as_Bool()->_test._test == BoolTest::gt);
4548   format %{ "" %}
4549   interface(COND_INTER) %{
4550     equal(0x4, "e");
4551     not_equal(0x5, "ne");
4552     less(0x2, "b");
4553     greater_equal(0x3, "nb");
4554     less_equal(0x6, "be");
4555     greater(0x7, "nbe");
4556     overflow(0x0, "o");
4557     no_overflow(0x1, "no");
4558   %}
4559 %}
4560 
4561 
4562 // Floating comparisons that can be fixed up with extra conditional jumps
4563 operand cmpOpUCF2() %{
4564   match(Bool);
4565   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4566             n->as_Bool()->_test._test == BoolTest::eq);
4567   format %{ "" %}
4568   interface(COND_INTER) %{
4569     equal(0x4, "e");
4570     not_equal(0x5, "ne");
4571     less(0x2, "b");
4572     greater_equal(0x3, "nb");
4573     less_equal(0x6, "be");
4574     greater(0x7, "nbe");
4575     overflow(0x0, "o");
4576     no_overflow(0x1, "no");
4577   %}
4578 %}
4579 
4580 // Comparison Code for FP conditional move
4581 operand cmpOp_fcmov() %{
4582   match(Bool);
4583 
4584   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4585             n->as_Bool()->_test._test != BoolTest::no_overflow);
4586   format %{ "" %}
4587   interface(COND_INTER) %{
4588     equal        (0x0C8);
4589     not_equal    (0x1C8);
4590     less         (0x0C0);
4591     greater_equal(0x1C0);
4592     less_equal   (0x0D0);
4593     greater      (0x1D0);
4594     overflow(0x0, "o"); // not really supported by the instruction
4595     no_overflow(0x1, "no"); // not really supported by the instruction
4596   %}
4597 %}
4598 
4599 // Comparison Code used in long compares
4600 operand cmpOp_commute() %{
4601   match(Bool);
4602 
4603   format %{ "" %}
4604   interface(COND_INTER) %{
4605     equal(0x4, "e");
4606     not_equal(0x5, "ne");
4607     less(0xF, "g");
4608     greater_equal(0xE, "le");
4609     less_equal(0xD, "ge");
4610     greater(0xC, "l");
4611     overflow(0x0, "o");
4612     no_overflow(0x1, "no");
4613   %}
4614 %}
4615 
4616 // Comparison Code used in unsigned long compares
4617 operand cmpOpU_commute() %{
4618   match(Bool);
4619 
4620   format %{ "" %}
4621   interface(COND_INTER) %{
4622     equal(0x4, "e");
4623     not_equal(0x5, "ne");
4624     less(0x7, "nbe");
4625     greater_equal(0x6, "be");
4626     less_equal(0x3, "nb");
4627     greater(0x2, "b");
4628     overflow(0x0, "o");
4629     no_overflow(0x1, "no");
4630   %}
4631 %}
4632 
4633 //----------OPERAND CLASSES----------------------------------------------------
4634 // Operand Classes are groups of operands that are used as to simplify
4635 // instruction definitions by not requiring the AD writer to specify separate
4636 // instructions for every form of operand when the instruction accepts
4637 // multiple operand types with the same basic encoding and format.  The classic
4638 // case of this is memory operands.
4639 
4640 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4641                indIndex, indIndexScale, indIndexScaleOffset);
4642 
4643 // Long memory operations are encoded in 2 instructions and a +4 offset.
4644 // This means some kind of offset is always required and you cannot use
4645 // an oop as the offset (done when working on static globals).
4646 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4647                     indIndex, indIndexScale, indIndexScaleOffset);
4648 
4649 
4650 //----------PIPELINE-----------------------------------------------------------
4651 // Rules which define the behavior of the target architectures pipeline.
4652 pipeline %{
4653 
4654 //----------ATTRIBUTES---------------------------------------------------------
4655 attributes %{
4656   variable_size_instructions;        // Fixed size instructions
4657   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4658   instruction_unit_size = 1;         // An instruction is 1 bytes long
4659   instruction_fetch_unit_size = 16;  // The processor fetches one line
4660   instruction_fetch_units = 1;       // of 16 bytes
4661 
4662   // List of nop instructions
4663   nops( MachNop );
4664 %}
4665 
4666 //----------RESOURCES----------------------------------------------------------
4667 // Resources are the functional units available to the machine
4668 
4669 // Generic P2/P3 pipeline
4670 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4671 // 3 instructions decoded per cycle.
4672 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4673 // 2 ALU op, only ALU0 handles mul/div instructions.
4674 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4675            MS0, MS1, MEM = MS0 | MS1,
4676            BR, FPU,
4677            ALU0, ALU1, ALU = ALU0 | ALU1 );
4678 
4679 //----------PIPELINE DESCRIPTION-----------------------------------------------
4680 // Pipeline Description specifies the stages in the machine's pipeline
4681 
4682 // Generic P2/P3 pipeline
4683 pipe_desc(S0, S1, S2, S3, S4, S5);
4684 
4685 //----------PIPELINE CLASSES---------------------------------------------------
4686 // Pipeline Classes describe the stages in which input and output are
4687 // referenced by the hardware pipeline.
4688 
4689 // Naming convention: ialu or fpu
4690 // Then: _reg
4691 // Then: _reg if there is a 2nd register
4692 // Then: _long if it's a pair of instructions implementing a long
4693 // Then: _fat if it requires the big decoder
4694 //   Or: _mem if it requires the big decoder and a memory unit.
4695 
4696 // Integer ALU reg operation
4697 pipe_class ialu_reg(rRegI dst) %{
4698     single_instruction;
4699     dst    : S4(write);
4700     dst    : S3(read);
4701     DECODE : S0;        // any decoder
4702     ALU    : S3;        // any alu
4703 %}
4704 
4705 // Long ALU reg operation
4706 pipe_class ialu_reg_long(eRegL dst) %{
4707     instruction_count(2);
4708     dst    : S4(write);
4709     dst    : S3(read);
4710     DECODE : S0(2);     // any 2 decoders
4711     ALU    : S3(2);     // both alus
4712 %}
4713 
4714 // Integer ALU reg operation using big decoder
4715 pipe_class ialu_reg_fat(rRegI dst) %{
4716     single_instruction;
4717     dst    : S4(write);
4718     dst    : S3(read);
4719     D0     : S0;        // big decoder only
4720     ALU    : S3;        // any alu
4721 %}
4722 
4723 // Long ALU reg operation using big decoder
4724 pipe_class ialu_reg_long_fat(eRegL dst) %{
4725     instruction_count(2);
4726     dst    : S4(write);
4727     dst    : S3(read);
4728     D0     : S0(2);     // big decoder only; twice
4729     ALU    : S3(2);     // any 2 alus
4730 %}
4731 
4732 // Integer ALU reg-reg operation
4733 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4734     single_instruction;
4735     dst    : S4(write);
4736     src    : S3(read);
4737     DECODE : S0;        // any decoder
4738     ALU    : S3;        // any alu
4739 %}
4740 
4741 // Long ALU reg-reg operation
4742 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4743     instruction_count(2);
4744     dst    : S4(write);
4745     src    : S3(read);
4746     DECODE : S0(2);     // any 2 decoders
4747     ALU    : S3(2);     // both alus
4748 %}
4749 
4750 // Integer ALU reg-reg operation
4751 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4752     single_instruction;
4753     dst    : S4(write);
4754     src    : S3(read);
4755     D0     : S0;        // big decoder only
4756     ALU    : S3;        // any alu
4757 %}
4758 
4759 // Long ALU reg-reg operation
4760 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4761     instruction_count(2);
4762     dst    : S4(write);
4763     src    : S3(read);
4764     D0     : S0(2);     // big decoder only; twice
4765     ALU    : S3(2);     // both alus
4766 %}
4767 
4768 // Integer ALU reg-mem operation
4769 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4770     single_instruction;
4771     dst    : S5(write);
4772     mem    : S3(read);
4773     D0     : S0;        // big decoder only
4774     ALU    : S4;        // any alu
4775     MEM    : S3;        // any mem
4776 %}
4777 
4778 // Long ALU reg-mem operation
4779 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4780     instruction_count(2);
4781     dst    : S5(write);
4782     mem    : S3(read);
4783     D0     : S0(2);     // big decoder only; twice
4784     ALU    : S4(2);     // any 2 alus
4785     MEM    : S3(2);     // both mems
4786 %}
4787 
4788 // Integer mem operation (prefetch)
4789 pipe_class ialu_mem(memory mem)
4790 %{
4791     single_instruction;
4792     mem    : S3(read);
4793     D0     : S0;        // big decoder only
4794     MEM    : S3;        // any mem
4795 %}
4796 
4797 // Integer Store to Memory
4798 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4799     single_instruction;
4800     mem    : S3(read);
4801     src    : S5(read);
4802     D0     : S0;        // big decoder only
4803     ALU    : S4;        // any alu
4804     MEM    : S3;
4805 %}
4806 
4807 // Long Store to Memory
4808 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4809     instruction_count(2);
4810     mem    : S3(read);
4811     src    : S5(read);
4812     D0     : S0(2);     // big decoder only; twice
4813     ALU    : S4(2);     // any 2 alus
4814     MEM    : S3(2);     // Both mems
4815 %}
4816 
4817 // Integer Store to Memory
4818 pipe_class ialu_mem_imm(memory mem) %{
4819     single_instruction;
4820     mem    : S3(read);
4821     D0     : S0;        // big decoder only
4822     ALU    : S4;        // any alu
4823     MEM    : S3;
4824 %}
4825 
4826 // Integer ALU0 reg-reg operation
4827 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4828     single_instruction;
4829     dst    : S4(write);
4830     src    : S3(read);
4831     D0     : S0;        // Big decoder only
4832     ALU0   : S3;        // only alu0
4833 %}
4834 
4835 // Integer ALU0 reg-mem operation
4836 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4837     single_instruction;
4838     dst    : S5(write);
4839     mem    : S3(read);
4840     D0     : S0;        // big decoder only
4841     ALU0   : S4;        // ALU0 only
4842     MEM    : S3;        // any mem
4843 %}
4844 
4845 // Integer ALU reg-reg operation
4846 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4847     single_instruction;
4848     cr     : S4(write);
4849     src1   : S3(read);
4850     src2   : S3(read);
4851     DECODE : S0;        // any decoder
4852     ALU    : S3;        // any alu
4853 %}
4854 
4855 // Integer ALU reg-imm operation
4856 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4857     single_instruction;
4858     cr     : S4(write);
4859     src1   : S3(read);
4860     DECODE : S0;        // any decoder
4861     ALU    : S3;        // any alu
4862 %}
4863 
4864 // Integer ALU reg-mem operation
4865 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4866     single_instruction;
4867     cr     : S4(write);
4868     src1   : S3(read);
4869     src2   : S3(read);
4870     D0     : S0;        // big decoder only
4871     ALU    : S4;        // any alu
4872     MEM    : S3;
4873 %}
4874 
4875 // Conditional move reg-reg
4876 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4877     instruction_count(4);
4878     y      : S4(read);
4879     q      : S3(read);
4880     p      : S3(read);
4881     DECODE : S0(4);     // any decoder
4882 %}
4883 
4884 // Conditional move reg-reg
4885 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4886     single_instruction;
4887     dst    : S4(write);
4888     src    : S3(read);
4889     cr     : S3(read);
4890     DECODE : S0;        // any decoder
4891 %}
4892 
4893 // Conditional move reg-mem
4894 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4895     single_instruction;
4896     dst    : S4(write);
4897     src    : S3(read);
4898     cr     : S3(read);
4899     DECODE : S0;        // any decoder
4900     MEM    : S3;
4901 %}
4902 
4903 // Conditional move reg-reg long
4904 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4905     single_instruction;
4906     dst    : S4(write);
4907     src    : S3(read);
4908     cr     : S3(read);
4909     DECODE : S0(2);     // any 2 decoders
4910 %}
4911 
4912 // Conditional move double reg-reg
4913 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4914     single_instruction;
4915     dst    : S4(write);
4916     src    : S3(read);
4917     cr     : S3(read);
4918     DECODE : S0;        // any decoder
4919 %}
4920 
4921 // Float reg-reg operation
4922 pipe_class fpu_reg(regDPR dst) %{
4923     instruction_count(2);
4924     dst    : S3(read);
4925     DECODE : S0(2);     // any 2 decoders
4926     FPU    : S3;
4927 %}
4928 
4929 // Float reg-reg operation
4930 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4931     instruction_count(2);
4932     dst    : S4(write);
4933     src    : S3(read);
4934     DECODE : S0(2);     // any 2 decoders
4935     FPU    : S3;
4936 %}
4937 
4938 // Float reg-reg operation
4939 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4940     instruction_count(3);
4941     dst    : S4(write);
4942     src1   : S3(read);
4943     src2   : S3(read);
4944     DECODE : S0(3);     // any 3 decoders
4945     FPU    : S3(2);
4946 %}
4947 
4948 // Float reg-reg operation
4949 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4950     instruction_count(4);
4951     dst    : S4(write);
4952     src1   : S3(read);
4953     src2   : S3(read);
4954     src3   : S3(read);
4955     DECODE : S0(4);     // any 3 decoders
4956     FPU    : S3(2);
4957 %}
4958 
4959 // Float reg-reg operation
4960 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4961     instruction_count(4);
4962     dst    : S4(write);
4963     src1   : S3(read);
4964     src2   : S3(read);
4965     src3   : S3(read);
4966     DECODE : S1(3);     // any 3 decoders
4967     D0     : S0;        // Big decoder only
4968     FPU    : S3(2);
4969     MEM    : S3;
4970 %}
4971 
4972 // Float reg-mem operation
4973 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4974     instruction_count(2);
4975     dst    : S5(write);
4976     mem    : S3(read);
4977     D0     : S0;        // big decoder only
4978     DECODE : S1;        // any decoder for FPU POP
4979     FPU    : S4;
4980     MEM    : S3;        // any mem
4981 %}
4982 
4983 // Float reg-mem operation
4984 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4985     instruction_count(3);
4986     dst    : S5(write);
4987     src1   : S3(read);
4988     mem    : S3(read);
4989     D0     : S0;        // big decoder only
4990     DECODE : S1(2);     // any decoder for FPU POP
4991     FPU    : S4;
4992     MEM    : S3;        // any mem
4993 %}
4994 
4995 // Float mem-reg operation
4996 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4997     instruction_count(2);
4998     src    : S5(read);
4999     mem    : S3(read);
5000     DECODE : S0;        // any decoder for FPU PUSH
5001     D0     : S1;        // big decoder only
5002     FPU    : S4;
5003     MEM    : S3;        // any mem
5004 %}
5005 
5006 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5007     instruction_count(3);
5008     src1   : S3(read);
5009     src2   : S3(read);
5010     mem    : S3(read);
5011     DECODE : S0(2);     // any decoder for FPU PUSH
5012     D0     : S1;        // big decoder only
5013     FPU    : S4;
5014     MEM    : S3;        // any mem
5015 %}
5016 
5017 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5018     instruction_count(3);
5019     src1   : S3(read);
5020     src2   : S3(read);
5021     mem    : S4(read);
5022     DECODE : S0;        // any decoder for FPU PUSH
5023     D0     : S0(2);     // big decoder only
5024     FPU    : S4;
5025     MEM    : S3(2);     // any mem
5026 %}
5027 
5028 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5029     instruction_count(2);
5030     src1   : S3(read);
5031     dst    : S4(read);
5032     D0     : S0(2);     // big decoder only
5033     MEM    : S3(2);     // any mem
5034 %}
5035 
5036 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5037     instruction_count(3);
5038     src1   : S3(read);
5039     src2   : S3(read);
5040     dst    : S4(read);
5041     D0     : S0(3);     // big decoder only
5042     FPU    : S4;
5043     MEM    : S3(3);     // any mem
5044 %}
5045 
5046 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5047     instruction_count(3);
5048     src1   : S4(read);
5049     mem    : S4(read);
5050     DECODE : S0;        // any decoder for FPU PUSH
5051     D0     : S0(2);     // big decoder only
5052     FPU    : S4;
5053     MEM    : S3(2);     // any mem
5054 %}
5055 
5056 // Float load constant
5057 pipe_class fpu_reg_con(regDPR dst) %{
5058     instruction_count(2);
5059     dst    : S5(write);
5060     D0     : S0;        // big decoder only for the load
5061     DECODE : S1;        // any decoder for FPU POP
5062     FPU    : S4;
5063     MEM    : S3;        // any mem
5064 %}
5065 
5066 // Float load constant
5067 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5068     instruction_count(3);
5069     dst    : S5(write);
5070     src    : S3(read);
5071     D0     : S0;        // big decoder only for the load
5072     DECODE : S1(2);     // any decoder for FPU POP
5073     FPU    : S4;
5074     MEM    : S3;        // any mem
5075 %}
5076 
5077 // UnConditional branch
5078 pipe_class pipe_jmp( label labl ) %{
5079     single_instruction;
5080     BR   : S3;
5081 %}
5082 
5083 // Conditional branch
5084 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5085     single_instruction;
5086     cr    : S1(read);
5087     BR    : S3;
5088 %}
5089 
5090 // Allocation idiom
5091 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5092     instruction_count(1); force_serialization;
5093     fixed_latency(6);
5094     heap_ptr : S3(read);
5095     DECODE   : S0(3);
5096     D0       : S2;
5097     MEM      : S3;
5098     ALU      : S3(2);
5099     dst      : S5(write);
5100     BR       : S5;
5101 %}
5102 
5103 // Generic big/slow expanded idiom
5104 pipe_class pipe_slow(  ) %{
5105     instruction_count(10); multiple_bundles; force_serialization;
5106     fixed_latency(100);
5107     D0  : S0(2);
5108     MEM : S3(2);
5109 %}
5110 
5111 // The real do-nothing guy
5112 pipe_class empty( ) %{
5113     instruction_count(0);
5114 %}
5115 
5116 // Define the class for the Nop node
5117 define %{
5118    MachNop = empty;
5119 %}
5120 
5121 %}
5122 
5123 //----------INSTRUCTIONS-------------------------------------------------------
5124 //
5125 // match      -- States which machine-independent subtree may be replaced
5126 //               by this instruction.
5127 // ins_cost   -- The estimated cost of this instruction is used by instruction
5128 //               selection to identify a minimum cost tree of machine
5129 //               instructions that matches a tree of machine-independent
5130 //               instructions.
5131 // format     -- A string providing the disassembly for this instruction.
5132 //               The value of an instruction's operand may be inserted
5133 //               by referring to it with a '$' prefix.
5134 // opcode     -- Three instruction opcodes may be provided.  These are referred
5135 //               to within an encode class as $primary, $secondary, and $tertiary
5136 //               respectively.  The primary opcode is commonly used to
5137 //               indicate the type of machine instruction, while secondary
5138 //               and tertiary are often used for prefix options or addressing
5139 //               modes.
5140 // ins_encode -- A list of encode classes with parameters. The encode class
5141 //               name must have been defined in an 'enc_class' specification
5142 //               in the encode section of the architecture description.
5143 
5144 //----------BSWAP-Instruction--------------------------------------------------
5145 instruct bytes_reverse_int(rRegI dst) %{
5146   match(Set dst (ReverseBytesI dst));
5147 
5148   format %{ "BSWAP  $dst" %}
5149   opcode(0x0F, 0xC8);
5150   ins_encode( OpcP, OpcSReg(dst) );
5151   ins_pipe( ialu_reg );
5152 %}
5153 
5154 instruct bytes_reverse_long(eRegL dst) %{
5155   match(Set dst (ReverseBytesL dst));
5156 
5157   format %{ "BSWAP  $dst.lo\n\t"
5158             "BSWAP  $dst.hi\n\t"
5159             "XCHG   $dst.lo $dst.hi" %}
5160 
5161   ins_cost(125);
5162   ins_encode( bswap_long_bytes(dst) );
5163   ins_pipe( ialu_reg_reg);
5164 %}
5165 
5166 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5167   match(Set dst (ReverseBytesUS dst));
5168   effect(KILL cr);
5169 
5170   format %{ "BSWAP  $dst\n\t"
5171             "SHR    $dst,16\n\t" %}
5172   ins_encode %{
5173     __ bswapl($dst$$Register);
5174     __ shrl($dst$$Register, 16);
5175   %}
5176   ins_pipe( ialu_reg );
5177 %}
5178 
5179 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5180   match(Set dst (ReverseBytesS dst));
5181   effect(KILL cr);
5182 
5183   format %{ "BSWAP  $dst\n\t"
5184             "SAR    $dst,16\n\t" %}
5185   ins_encode %{
5186     __ bswapl($dst$$Register);
5187     __ sarl($dst$$Register, 16);
5188   %}
5189   ins_pipe( ialu_reg );
5190 %}
5191 
5192 
5193 //---------- Zeros Count Instructions ------------------------------------------
5194 
5195 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5196   predicate(UseCountLeadingZerosInstruction);
5197   match(Set dst (CountLeadingZerosI src));
5198   effect(KILL cr);
5199 
5200   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5201   ins_encode %{
5202     __ lzcntl($dst$$Register, $src$$Register);
5203   %}
5204   ins_pipe(ialu_reg);
5205 %}
5206 
5207 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5208   predicate(!UseCountLeadingZerosInstruction);
5209   match(Set dst (CountLeadingZerosI src));
5210   effect(KILL cr);
5211 
5212   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5213             "JNZ    skip\n\t"
5214             "MOV    $dst, -1\n"
5215       "skip:\n\t"
5216             "NEG    $dst\n\t"
5217             "ADD    $dst, 31" %}
5218   ins_encode %{
5219     Register Rdst = $dst$$Register;
5220     Register Rsrc = $src$$Register;
5221     Label skip;
5222     __ bsrl(Rdst, Rsrc);
5223     __ jccb(Assembler::notZero, skip);
5224     __ movl(Rdst, -1);
5225     __ bind(skip);
5226     __ negl(Rdst);
5227     __ addl(Rdst, BitsPerInt - 1);
5228   %}
5229   ins_pipe(ialu_reg);
5230 %}
5231 
5232 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5233   predicate(UseCountLeadingZerosInstruction);
5234   match(Set dst (CountLeadingZerosL src));
5235   effect(TEMP dst, KILL cr);
5236 
5237   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5238             "JNC    done\n\t"
5239             "LZCNT  $dst, $src.lo\n\t"
5240             "ADD    $dst, 32\n"
5241       "done:" %}
5242   ins_encode %{
5243     Register Rdst = $dst$$Register;
5244     Register Rsrc = $src$$Register;
5245     Label done;
5246     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5247     __ jccb(Assembler::carryClear, done);
5248     __ lzcntl(Rdst, Rsrc);
5249     __ addl(Rdst, BitsPerInt);
5250     __ bind(done);
5251   %}
5252   ins_pipe(ialu_reg);
5253 %}
5254 
5255 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5256   predicate(!UseCountLeadingZerosInstruction);
5257   match(Set dst (CountLeadingZerosL src));
5258   effect(TEMP dst, KILL cr);
5259 
5260   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5261             "JZ     msw_is_zero\n\t"
5262             "ADD    $dst, 32\n\t"
5263             "JMP    not_zero\n"
5264       "msw_is_zero:\n\t"
5265             "BSR    $dst, $src.lo\n\t"
5266             "JNZ    not_zero\n\t"
5267             "MOV    $dst, -1\n"
5268       "not_zero:\n\t"
5269             "NEG    $dst\n\t"
5270             "ADD    $dst, 63\n" %}
5271  ins_encode %{
5272     Register Rdst = $dst$$Register;
5273     Register Rsrc = $src$$Register;
5274     Label msw_is_zero;
5275     Label not_zero;
5276     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5277     __ jccb(Assembler::zero, msw_is_zero);
5278     __ addl(Rdst, BitsPerInt);
5279     __ jmpb(not_zero);
5280     __ bind(msw_is_zero);
5281     __ bsrl(Rdst, Rsrc);
5282     __ jccb(Assembler::notZero, not_zero);
5283     __ movl(Rdst, -1);
5284     __ bind(not_zero);
5285     __ negl(Rdst);
5286     __ addl(Rdst, BitsPerLong - 1);
5287   %}
5288   ins_pipe(ialu_reg);
5289 %}
5290 
5291 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5292   predicate(UseCountTrailingZerosInstruction);
5293   match(Set dst (CountTrailingZerosI src));
5294   effect(KILL cr);
5295 
5296   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5297   ins_encode %{
5298     __ tzcntl($dst$$Register, $src$$Register);
5299   %}
5300   ins_pipe(ialu_reg);
5301 %}
5302 
5303 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5304   predicate(!UseCountTrailingZerosInstruction);
5305   match(Set dst (CountTrailingZerosI src));
5306   effect(KILL cr);
5307 
5308   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5309             "JNZ    done\n\t"
5310             "MOV    $dst, 32\n"
5311       "done:" %}
5312   ins_encode %{
5313     Register Rdst = $dst$$Register;
5314     Label done;
5315     __ bsfl(Rdst, $src$$Register);
5316     __ jccb(Assembler::notZero, done);
5317     __ movl(Rdst, BitsPerInt);
5318     __ bind(done);
5319   %}
5320   ins_pipe(ialu_reg);
5321 %}
5322 
5323 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5324   predicate(UseCountTrailingZerosInstruction);
5325   match(Set dst (CountTrailingZerosL src));
5326   effect(TEMP dst, KILL cr);
5327 
5328   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5329             "JNC    done\n\t"
5330             "TZCNT  $dst, $src.hi\n\t"
5331             "ADD    $dst, 32\n"
5332             "done:" %}
5333   ins_encode %{
5334     Register Rdst = $dst$$Register;
5335     Register Rsrc = $src$$Register;
5336     Label done;
5337     __ tzcntl(Rdst, Rsrc);
5338     __ jccb(Assembler::carryClear, done);
5339     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5340     __ addl(Rdst, BitsPerInt);
5341     __ bind(done);
5342   %}
5343   ins_pipe(ialu_reg);
5344 %}
5345 
5346 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5347   predicate(!UseCountTrailingZerosInstruction);
5348   match(Set dst (CountTrailingZerosL src));
5349   effect(TEMP dst, KILL cr);
5350 
5351   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5352             "JNZ    done\n\t"
5353             "BSF    $dst, $src.hi\n\t"
5354             "JNZ    msw_not_zero\n\t"
5355             "MOV    $dst, 32\n"
5356       "msw_not_zero:\n\t"
5357             "ADD    $dst, 32\n"
5358       "done:" %}
5359   ins_encode %{
5360     Register Rdst = $dst$$Register;
5361     Register Rsrc = $src$$Register;
5362     Label msw_not_zero;
5363     Label done;
5364     __ bsfl(Rdst, Rsrc);
5365     __ jccb(Assembler::notZero, done);
5366     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5367     __ jccb(Assembler::notZero, msw_not_zero);
5368     __ movl(Rdst, BitsPerInt);
5369     __ bind(msw_not_zero);
5370     __ addl(Rdst, BitsPerInt);
5371     __ bind(done);
5372   %}
5373   ins_pipe(ialu_reg);
5374 %}
5375 
5376 
5377 //---------- Population Count Instructions -------------------------------------
5378 
5379 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5380   predicate(UsePopCountInstruction);
5381   match(Set dst (PopCountI src));
5382   effect(KILL cr);
5383 
5384   format %{ "POPCNT $dst, $src" %}
5385   ins_encode %{
5386     __ popcntl($dst$$Register, $src$$Register);
5387   %}
5388   ins_pipe(ialu_reg);
5389 %}
5390 
5391 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5392   predicate(UsePopCountInstruction);
5393   match(Set dst (PopCountI (LoadI mem)));
5394   effect(KILL cr);
5395 
5396   format %{ "POPCNT $dst, $mem" %}
5397   ins_encode %{
5398     __ popcntl($dst$$Register, $mem$$Address);
5399   %}
5400   ins_pipe(ialu_reg);
5401 %}
5402 
5403 // Note: Long.bitCount(long) returns an int.
5404 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5405   predicate(UsePopCountInstruction);
5406   match(Set dst (PopCountL src));
5407   effect(KILL cr, TEMP tmp, TEMP dst);
5408 
5409   format %{ "POPCNT $dst, $src.lo\n\t"
5410             "POPCNT $tmp, $src.hi\n\t"
5411             "ADD    $dst, $tmp" %}
5412   ins_encode %{
5413     __ popcntl($dst$$Register, $src$$Register);
5414     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5415     __ addl($dst$$Register, $tmp$$Register);
5416   %}
5417   ins_pipe(ialu_reg);
5418 %}
5419 
5420 // Note: Long.bitCount(long) returns an int.
5421 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5422   predicate(UsePopCountInstruction);
5423   match(Set dst (PopCountL (LoadL mem)));
5424   effect(KILL cr, TEMP tmp, TEMP dst);
5425 
5426   format %{ "POPCNT $dst, $mem\n\t"
5427             "POPCNT $tmp, $mem+4\n\t"
5428             "ADD    $dst, $tmp" %}
5429   ins_encode %{
5430     //__ popcntl($dst$$Register, $mem$$Address$$first);
5431     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5432     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5433     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5434     __ addl($dst$$Register, $tmp$$Register);
5435   %}
5436   ins_pipe(ialu_reg);
5437 %}
5438 
5439 
5440 //----------Load/Store/Move Instructions---------------------------------------
5441 //----------Load Instructions--------------------------------------------------
5442 // Load Byte (8bit signed)
5443 instruct loadB(xRegI dst, memory mem) %{
5444   match(Set dst (LoadB mem));
5445 
5446   ins_cost(125);
5447   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5448 
5449   ins_encode %{
5450     __ movsbl($dst$$Register, $mem$$Address);
5451   %}
5452 
5453   ins_pipe(ialu_reg_mem);
5454 %}
5455 
5456 // Load Byte (8bit signed) into Long Register
5457 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5458   match(Set dst (ConvI2L (LoadB mem)));
5459   effect(KILL cr);
5460 
5461   ins_cost(375);
5462   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5463             "MOV    $dst.hi,$dst.lo\n\t"
5464             "SAR    $dst.hi,7" %}
5465 
5466   ins_encode %{
5467     __ movsbl($dst$$Register, $mem$$Address);
5468     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5469     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5470   %}
5471 
5472   ins_pipe(ialu_reg_mem);
5473 %}
5474 
5475 // Load Unsigned Byte (8bit UNsigned)
5476 instruct loadUB(xRegI dst, memory mem) %{
5477   match(Set dst (LoadUB mem));
5478 
5479   ins_cost(125);
5480   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5481 
5482   ins_encode %{
5483     __ movzbl($dst$$Register, $mem$$Address);
5484   %}
5485 
5486   ins_pipe(ialu_reg_mem);
5487 %}
5488 
5489 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5490 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5491   match(Set dst (ConvI2L (LoadUB mem)));
5492   effect(KILL cr);
5493 
5494   ins_cost(250);
5495   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5496             "XOR    $dst.hi,$dst.hi" %}
5497 
5498   ins_encode %{
5499     Register Rdst = $dst$$Register;
5500     __ movzbl(Rdst, $mem$$Address);
5501     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5502   %}
5503 
5504   ins_pipe(ialu_reg_mem);
5505 %}
5506 
5507 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5508 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5509   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5510   effect(KILL cr);
5511 
5512   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5513             "XOR    $dst.hi,$dst.hi\n\t"
5514             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5515   ins_encode %{
5516     Register Rdst = $dst$$Register;
5517     __ movzbl(Rdst, $mem$$Address);
5518     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5519     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5520   %}
5521   ins_pipe(ialu_reg_mem);
5522 %}
5523 
5524 // Load Short (16bit signed)
5525 instruct loadS(rRegI dst, memory mem) %{
5526   match(Set dst (LoadS mem));
5527 
5528   ins_cost(125);
5529   format %{ "MOVSX  $dst,$mem\t# short" %}
5530 
5531   ins_encode %{
5532     __ movswl($dst$$Register, $mem$$Address);
5533   %}
5534 
5535   ins_pipe(ialu_reg_mem);
5536 %}
5537 
5538 // Load Short (16 bit signed) to Byte (8 bit signed)
5539 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5540   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5541 
5542   ins_cost(125);
5543   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5544   ins_encode %{
5545     __ movsbl($dst$$Register, $mem$$Address);
5546   %}
5547   ins_pipe(ialu_reg_mem);
5548 %}
5549 
5550 // Load Short (16bit signed) into Long Register
5551 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5552   match(Set dst (ConvI2L (LoadS mem)));
5553   effect(KILL cr);
5554 
5555   ins_cost(375);
5556   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5557             "MOV    $dst.hi,$dst.lo\n\t"
5558             "SAR    $dst.hi,15" %}
5559 
5560   ins_encode %{
5561     __ movswl($dst$$Register, $mem$$Address);
5562     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5563     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5564   %}
5565 
5566   ins_pipe(ialu_reg_mem);
5567 %}
5568 
5569 // Load Unsigned Short/Char (16bit unsigned)
5570 instruct loadUS(rRegI dst, memory mem) %{
5571   match(Set dst (LoadUS mem));
5572 
5573   ins_cost(125);
5574   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5575 
5576   ins_encode %{
5577     __ movzwl($dst$$Register, $mem$$Address);
5578   %}
5579 
5580   ins_pipe(ialu_reg_mem);
5581 %}
5582 
5583 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5584 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5585   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5586 
5587   ins_cost(125);
5588   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5589   ins_encode %{
5590     __ movsbl($dst$$Register, $mem$$Address);
5591   %}
5592   ins_pipe(ialu_reg_mem);
5593 %}
5594 
5595 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5596 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5597   match(Set dst (ConvI2L (LoadUS mem)));
5598   effect(KILL cr);
5599 
5600   ins_cost(250);
5601   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5602             "XOR    $dst.hi,$dst.hi" %}
5603 
5604   ins_encode %{
5605     __ movzwl($dst$$Register, $mem$$Address);
5606     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5607   %}
5608 
5609   ins_pipe(ialu_reg_mem);
5610 %}
5611 
5612 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5613 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5614   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5615   effect(KILL cr);
5616 
5617   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5618             "XOR    $dst.hi,$dst.hi" %}
5619   ins_encode %{
5620     Register Rdst = $dst$$Register;
5621     __ movzbl(Rdst, $mem$$Address);
5622     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5623   %}
5624   ins_pipe(ialu_reg_mem);
5625 %}
5626 
5627 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5628 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5629   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5630   effect(KILL cr);
5631 
5632   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5633             "XOR    $dst.hi,$dst.hi\n\t"
5634             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5635   ins_encode %{
5636     Register Rdst = $dst$$Register;
5637     __ movzwl(Rdst, $mem$$Address);
5638     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5639     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5640   %}
5641   ins_pipe(ialu_reg_mem);
5642 %}
5643 
5644 // Load Integer
5645 instruct loadI(rRegI dst, memory mem) %{
5646   match(Set dst (LoadI mem));
5647 
5648   ins_cost(125);
5649   format %{ "MOV    $dst,$mem\t# int" %}
5650 
5651   ins_encode %{
5652     __ movl($dst$$Register, $mem$$Address);
5653   %}
5654 
5655   ins_pipe(ialu_reg_mem);
5656 %}
5657 
5658 // Load Integer (32 bit signed) to Byte (8 bit signed)
5659 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5660   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5661 
5662   ins_cost(125);
5663   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5664   ins_encode %{
5665     __ movsbl($dst$$Register, $mem$$Address);
5666   %}
5667   ins_pipe(ialu_reg_mem);
5668 %}
5669 
5670 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5671 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5672   match(Set dst (AndI (LoadI mem) mask));
5673 
5674   ins_cost(125);
5675   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5676   ins_encode %{
5677     __ movzbl($dst$$Register, $mem$$Address);
5678   %}
5679   ins_pipe(ialu_reg_mem);
5680 %}
5681 
5682 // Load Integer (32 bit signed) to Short (16 bit signed)
5683 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5684   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5685 
5686   ins_cost(125);
5687   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5688   ins_encode %{
5689     __ movswl($dst$$Register, $mem$$Address);
5690   %}
5691   ins_pipe(ialu_reg_mem);
5692 %}
5693 
5694 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5695 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5696   match(Set dst (AndI (LoadI mem) mask));
5697 
5698   ins_cost(125);
5699   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5700   ins_encode %{
5701     __ movzwl($dst$$Register, $mem$$Address);
5702   %}
5703   ins_pipe(ialu_reg_mem);
5704 %}
5705 
5706 // Load Integer into Long Register
5707 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5708   match(Set dst (ConvI2L (LoadI mem)));
5709   effect(KILL cr);
5710 
5711   ins_cost(375);
5712   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5713             "MOV    $dst.hi,$dst.lo\n\t"
5714             "SAR    $dst.hi,31" %}
5715 
5716   ins_encode %{
5717     __ movl($dst$$Register, $mem$$Address);
5718     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5719     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5720   %}
5721 
5722   ins_pipe(ialu_reg_mem);
5723 %}
5724 
5725 // Load Integer with mask 0xFF into Long Register
5726 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5727   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5728   effect(KILL cr);
5729 
5730   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5731             "XOR    $dst.hi,$dst.hi" %}
5732   ins_encode %{
5733     Register Rdst = $dst$$Register;
5734     __ movzbl(Rdst, $mem$$Address);
5735     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5736   %}
5737   ins_pipe(ialu_reg_mem);
5738 %}
5739 
5740 // Load Integer with mask 0xFFFF into Long Register
5741 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5742   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5743   effect(KILL cr);
5744 
5745   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5746             "XOR    $dst.hi,$dst.hi" %}
5747   ins_encode %{
5748     Register Rdst = $dst$$Register;
5749     __ movzwl(Rdst, $mem$$Address);
5750     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5751   %}
5752   ins_pipe(ialu_reg_mem);
5753 %}
5754 
5755 // Load Integer with 31-bit mask into Long Register
5756 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5757   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5758   effect(KILL cr);
5759 
5760   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5761             "XOR    $dst.hi,$dst.hi\n\t"
5762             "AND    $dst.lo,$mask" %}
5763   ins_encode %{
5764     Register Rdst = $dst$$Register;
5765     __ movl(Rdst, $mem$$Address);
5766     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5767     __ andl(Rdst, $mask$$constant);
5768   %}
5769   ins_pipe(ialu_reg_mem);
5770 %}
5771 
5772 // Load Unsigned Integer into Long Register
5773 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5774   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5775   effect(KILL cr);
5776 
5777   ins_cost(250);
5778   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5779             "XOR    $dst.hi,$dst.hi" %}
5780 
5781   ins_encode %{
5782     __ movl($dst$$Register, $mem$$Address);
5783     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5784   %}
5785 
5786   ins_pipe(ialu_reg_mem);
5787 %}
5788 
5789 // Load Long.  Cannot clobber address while loading, so restrict address
5790 // register to ESI
5791 instruct loadL(eRegL dst, load_long_memory mem) %{
5792   predicate(!((LoadLNode*)n)->require_atomic_access());
5793   match(Set dst (LoadL mem));
5794 
5795   ins_cost(250);
5796   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5797             "MOV    $dst.hi,$mem+4" %}
5798 
5799   ins_encode %{
5800     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5801     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5802     __ movl($dst$$Register, Amemlo);
5803     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5804   %}
5805 
5806   ins_pipe(ialu_reg_long_mem);
5807 %}
5808 
5809 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5810 // then store it down to the stack and reload on the int
5811 // side.
5812 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5813   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5814   match(Set dst (LoadL mem));
5815 
5816   ins_cost(200);
5817   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5818             "FISTp  $dst" %}
5819   ins_encode(enc_loadL_volatile(mem,dst));
5820   ins_pipe( fpu_reg_mem );
5821 %}
5822 
5823 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5824   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5825   match(Set dst (LoadL mem));
5826   effect(TEMP tmp);
5827   ins_cost(180);
5828   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5829             "MOVSD  $dst,$tmp" %}
5830   ins_encode %{
5831     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5832     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5833   %}
5834   ins_pipe( pipe_slow );
5835 %}
5836 
5837 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5838   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5839   match(Set dst (LoadL mem));
5840   effect(TEMP tmp);
5841   ins_cost(160);
5842   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5843             "MOVD   $dst.lo,$tmp\n\t"
5844             "PSRLQ  $tmp,32\n\t"
5845             "MOVD   $dst.hi,$tmp" %}
5846   ins_encode %{
5847     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5848     __ movdl($dst$$Register, $tmp$$XMMRegister);
5849     __ psrlq($tmp$$XMMRegister, 32);
5850     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5851   %}
5852   ins_pipe( pipe_slow );
5853 %}
5854 
5855 // Load Range
5856 instruct loadRange(rRegI dst, memory mem) %{
5857   match(Set dst (LoadRange mem));
5858 
5859   ins_cost(125);
5860   format %{ "MOV    $dst,$mem" %}
5861   opcode(0x8B);
5862   ins_encode( OpcP, RegMem(dst,mem));
5863   ins_pipe( ialu_reg_mem );
5864 %}
5865 
5866 
5867 // Load Pointer
5868 instruct loadP(eRegP dst, memory mem) %{
5869   match(Set dst (LoadP mem));
5870 
5871   ins_cost(125);
5872   format %{ "MOV    $dst,$mem" %}
5873   opcode(0x8B);
5874   ins_encode( OpcP, RegMem(dst,mem));
5875   ins_pipe( ialu_reg_mem );
5876 %}
5877 
5878 // Load Klass Pointer
5879 instruct loadKlass(eRegP dst, memory mem) %{
5880   match(Set dst (LoadKlass mem));
5881 
5882   ins_cost(125);
5883   format %{ "MOV    $dst,$mem" %}
5884   opcode(0x8B);
5885   ins_encode( OpcP, RegMem(dst,mem));
5886   ins_pipe( ialu_reg_mem );
5887 %}
5888 
5889 // Load Double
5890 instruct loadDPR(regDPR dst, memory mem) %{
5891   predicate(UseSSE<=1);
5892   match(Set dst (LoadD mem));
5893 
5894   ins_cost(150);
5895   format %{ "FLD_D  ST,$mem\n\t"
5896             "FSTP   $dst" %}
5897   opcode(0xDD);               /* DD /0 */
5898   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5899               Pop_Reg_DPR(dst) );
5900   ins_pipe( fpu_reg_mem );
5901 %}
5902 
5903 // Load Double to XMM
5904 instruct loadD(regD dst, memory mem) %{
5905   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5906   match(Set dst (LoadD mem));
5907   ins_cost(145);
5908   format %{ "MOVSD  $dst,$mem" %}
5909   ins_encode %{
5910     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5911   %}
5912   ins_pipe( pipe_slow );
5913 %}
5914 
5915 instruct loadD_partial(regD dst, memory mem) %{
5916   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5917   match(Set dst (LoadD mem));
5918   ins_cost(145);
5919   format %{ "MOVLPD $dst,$mem" %}
5920   ins_encode %{
5921     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5922   %}
5923   ins_pipe( pipe_slow );
5924 %}
5925 
5926 // Load to XMM register (single-precision floating point)
5927 // MOVSS instruction
5928 instruct loadF(regF dst, memory mem) %{
5929   predicate(UseSSE>=1);
5930   match(Set dst (LoadF mem));
5931   ins_cost(145);
5932   format %{ "MOVSS  $dst,$mem" %}
5933   ins_encode %{
5934     __ movflt ($dst$$XMMRegister, $mem$$Address);
5935   %}
5936   ins_pipe( pipe_slow );
5937 %}
5938 
5939 // Load Float
5940 instruct loadFPR(regFPR dst, memory mem) %{
5941   predicate(UseSSE==0);
5942   match(Set dst (LoadF mem));
5943 
5944   ins_cost(150);
5945   format %{ "FLD_S  ST,$mem\n\t"
5946             "FSTP   $dst" %}
5947   opcode(0xD9);               /* D9 /0 */
5948   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5949               Pop_Reg_FPR(dst) );
5950   ins_pipe( fpu_reg_mem );
5951 %}
5952 
5953 // Load Effective Address
5954 instruct leaP8(eRegP dst, indOffset8 mem) %{
5955   match(Set dst mem);
5956 
5957   ins_cost(110);
5958   format %{ "LEA    $dst,$mem" %}
5959   opcode(0x8D);
5960   ins_encode( OpcP, RegMem(dst,mem));
5961   ins_pipe( ialu_reg_reg_fat );
5962 %}
5963 
5964 instruct leaP32(eRegP dst, indOffset32 mem) %{
5965   match(Set dst mem);
5966 
5967   ins_cost(110);
5968   format %{ "LEA    $dst,$mem" %}
5969   opcode(0x8D);
5970   ins_encode( OpcP, RegMem(dst,mem));
5971   ins_pipe( ialu_reg_reg_fat );
5972 %}
5973 
5974 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5975   match(Set dst mem);
5976 
5977   ins_cost(110);
5978   format %{ "LEA    $dst,$mem" %}
5979   opcode(0x8D);
5980   ins_encode( OpcP, RegMem(dst,mem));
5981   ins_pipe( ialu_reg_reg_fat );
5982 %}
5983 
5984 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5985   match(Set dst mem);
5986 
5987   ins_cost(110);
5988   format %{ "LEA    $dst,$mem" %}
5989   opcode(0x8D);
5990   ins_encode( OpcP, RegMem(dst,mem));
5991   ins_pipe( ialu_reg_reg_fat );
5992 %}
5993 
5994 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5995   match(Set dst mem);
5996 
5997   ins_cost(110);
5998   format %{ "LEA    $dst,$mem" %}
5999   opcode(0x8D);
6000   ins_encode( OpcP, RegMem(dst,mem));
6001   ins_pipe( ialu_reg_reg_fat );
6002 %}
6003 
6004 // Load Constant
6005 instruct loadConI(rRegI dst, immI src) %{
6006   match(Set dst src);
6007 
6008   format %{ "MOV    $dst,$src" %}
6009   ins_encode( LdImmI(dst, src) );
6010   ins_pipe( ialu_reg_fat );
6011 %}
6012 
6013 // Load Constant zero
6014 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6015   match(Set dst src);
6016   effect(KILL cr);
6017 
6018   ins_cost(50);
6019   format %{ "XOR    $dst,$dst" %}
6020   opcode(0x33);  /* + rd */
6021   ins_encode( OpcP, RegReg( dst, dst ) );
6022   ins_pipe( ialu_reg );
6023 %}
6024 
6025 instruct loadConP(eRegP dst, immP src) %{
6026   match(Set dst src);
6027 
6028   format %{ "MOV    $dst,$src" %}
6029   opcode(0xB8);  /* + rd */
6030   ins_encode( LdImmP(dst, src) );
6031   ins_pipe( ialu_reg_fat );
6032 %}
6033 
6034 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6035   match(Set dst src);
6036   effect(KILL cr);
6037   ins_cost(200);
6038   format %{ "MOV    $dst.lo,$src.lo\n\t"
6039             "MOV    $dst.hi,$src.hi" %}
6040   opcode(0xB8);
6041   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6042   ins_pipe( ialu_reg_long_fat );
6043 %}
6044 
6045 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6046   match(Set dst src);
6047   effect(KILL cr);
6048   ins_cost(150);
6049   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6050             "XOR    $dst.hi,$dst.hi" %}
6051   opcode(0x33,0x33);
6052   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6053   ins_pipe( ialu_reg_long );
6054 %}
6055 
6056 // The instruction usage is guarded by predicate in operand immFPR().
6057 instruct loadConFPR(regFPR dst, immFPR con) %{
6058   match(Set dst con);
6059   ins_cost(125);
6060   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6061             "FSTP   $dst" %}
6062   ins_encode %{
6063     __ fld_s($constantaddress($con));
6064     __ fstp_d($dst$$reg);
6065   %}
6066   ins_pipe(fpu_reg_con);
6067 %}
6068 
6069 // The instruction usage is guarded by predicate in operand immFPR0().
6070 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6071   match(Set dst con);
6072   ins_cost(125);
6073   format %{ "FLDZ   ST\n\t"
6074             "FSTP   $dst" %}
6075   ins_encode %{
6076     __ fldz();
6077     __ fstp_d($dst$$reg);
6078   %}
6079   ins_pipe(fpu_reg_con);
6080 %}
6081 
6082 // The instruction usage is guarded by predicate in operand immFPR1().
6083 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6084   match(Set dst con);
6085   ins_cost(125);
6086   format %{ "FLD1   ST\n\t"
6087             "FSTP   $dst" %}
6088   ins_encode %{
6089     __ fld1();
6090     __ fstp_d($dst$$reg);
6091   %}
6092   ins_pipe(fpu_reg_con);
6093 %}
6094 
6095 // The instruction usage is guarded by predicate in operand immF().
6096 instruct loadConF(regF dst, immF con) %{
6097   match(Set dst con);
6098   ins_cost(125);
6099   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6100   ins_encode %{
6101     __ movflt($dst$$XMMRegister, $constantaddress($con));
6102   %}
6103   ins_pipe(pipe_slow);
6104 %}
6105 
6106 // The instruction usage is guarded by predicate in operand immF0().
6107 instruct loadConF0(regF dst, immF0 src) %{
6108   match(Set dst src);
6109   ins_cost(100);
6110   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6111   ins_encode %{
6112     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6113   %}
6114   ins_pipe(pipe_slow);
6115 %}
6116 
6117 // The instruction usage is guarded by predicate in operand immDPR().
6118 instruct loadConDPR(regDPR dst, immDPR con) %{
6119   match(Set dst con);
6120   ins_cost(125);
6121 
6122   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6123             "FSTP   $dst" %}
6124   ins_encode %{
6125     __ fld_d($constantaddress($con));
6126     __ fstp_d($dst$$reg);
6127   %}
6128   ins_pipe(fpu_reg_con);
6129 %}
6130 
6131 // The instruction usage is guarded by predicate in operand immDPR0().
6132 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6133   match(Set dst con);
6134   ins_cost(125);
6135 
6136   format %{ "FLDZ   ST\n\t"
6137             "FSTP   $dst" %}
6138   ins_encode %{
6139     __ fldz();
6140     __ fstp_d($dst$$reg);
6141   %}
6142   ins_pipe(fpu_reg_con);
6143 %}
6144 
6145 // The instruction usage is guarded by predicate in operand immDPR1().
6146 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6147   match(Set dst con);
6148   ins_cost(125);
6149 
6150   format %{ "FLD1   ST\n\t"
6151             "FSTP   $dst" %}
6152   ins_encode %{
6153     __ fld1();
6154     __ fstp_d($dst$$reg);
6155   %}
6156   ins_pipe(fpu_reg_con);
6157 %}
6158 
6159 // The instruction usage is guarded by predicate in operand immD().
6160 instruct loadConD(regD dst, immD con) %{
6161   match(Set dst con);
6162   ins_cost(125);
6163   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6164   ins_encode %{
6165     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6166   %}
6167   ins_pipe(pipe_slow);
6168 %}
6169 
6170 // The instruction usage is guarded by predicate in operand immD0().
6171 instruct loadConD0(regD dst, immD0 src) %{
6172   match(Set dst src);
6173   ins_cost(100);
6174   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6175   ins_encode %{
6176     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6177   %}
6178   ins_pipe( pipe_slow );
6179 %}
6180 
6181 // Load Stack Slot
6182 instruct loadSSI(rRegI dst, stackSlotI src) %{
6183   match(Set dst src);
6184   ins_cost(125);
6185 
6186   format %{ "MOV    $dst,$src" %}
6187   opcode(0x8B);
6188   ins_encode( OpcP, RegMem(dst,src));
6189   ins_pipe( ialu_reg_mem );
6190 %}
6191 
6192 instruct loadSSL(eRegL dst, stackSlotL src) %{
6193   match(Set dst src);
6194 
6195   ins_cost(200);
6196   format %{ "MOV    $dst,$src.lo\n\t"
6197             "MOV    $dst+4,$src.hi" %}
6198   opcode(0x8B, 0x8B);
6199   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6200   ins_pipe( ialu_mem_long_reg );
6201 %}
6202 
6203 // Load Stack Slot
6204 instruct loadSSP(eRegP dst, stackSlotP src) %{
6205   match(Set dst src);
6206   ins_cost(125);
6207 
6208   format %{ "MOV    $dst,$src" %}
6209   opcode(0x8B);
6210   ins_encode( OpcP, RegMem(dst,src));
6211   ins_pipe( ialu_reg_mem );
6212 %}
6213 
6214 // Load Stack Slot
6215 instruct loadSSF(regFPR dst, stackSlotF src) %{
6216   match(Set dst src);
6217   ins_cost(125);
6218 
6219   format %{ "FLD_S  $src\n\t"
6220             "FSTP   $dst" %}
6221   opcode(0xD9);               /* D9 /0, FLD m32real */
6222   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6223               Pop_Reg_FPR(dst) );
6224   ins_pipe( fpu_reg_mem );
6225 %}
6226 
6227 // Load Stack Slot
6228 instruct loadSSD(regDPR dst, stackSlotD src) %{
6229   match(Set dst src);
6230   ins_cost(125);
6231 
6232   format %{ "FLD_D  $src\n\t"
6233             "FSTP   $dst" %}
6234   opcode(0xDD);               /* DD /0, FLD m64real */
6235   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6236               Pop_Reg_DPR(dst) );
6237   ins_pipe( fpu_reg_mem );
6238 %}
6239 
6240 // Prefetch instructions for allocation.
6241 // Must be safe to execute with invalid address (cannot fault).
6242 
6243 instruct prefetchAlloc0( memory mem ) %{
6244   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6245   match(PrefetchAllocation mem);
6246   ins_cost(0);
6247   size(0);
6248   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6249   ins_encode();
6250   ins_pipe(empty);
6251 %}
6252 
6253 instruct prefetchAlloc( memory mem ) %{
6254   predicate(AllocatePrefetchInstr==3);
6255   match( PrefetchAllocation mem );
6256   ins_cost(100);
6257 
6258   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6259   ins_encode %{
6260     __ prefetchw($mem$$Address);
6261   %}
6262   ins_pipe(ialu_mem);
6263 %}
6264 
6265 instruct prefetchAllocNTA( memory mem ) %{
6266   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6267   match(PrefetchAllocation mem);
6268   ins_cost(100);
6269 
6270   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6271   ins_encode %{
6272     __ prefetchnta($mem$$Address);
6273   %}
6274   ins_pipe(ialu_mem);
6275 %}
6276 
6277 instruct prefetchAllocT0( memory mem ) %{
6278   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6279   match(PrefetchAllocation mem);
6280   ins_cost(100);
6281 
6282   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6283   ins_encode %{
6284     __ prefetcht0($mem$$Address);
6285   %}
6286   ins_pipe(ialu_mem);
6287 %}
6288 
6289 instruct prefetchAllocT2( memory mem ) %{
6290   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6291   match(PrefetchAllocation mem);
6292   ins_cost(100);
6293 
6294   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6295   ins_encode %{
6296     __ prefetcht2($mem$$Address);
6297   %}
6298   ins_pipe(ialu_mem);
6299 %}
6300 
6301 //----------Store Instructions-------------------------------------------------
6302 
6303 // Store Byte
6304 instruct storeB(memory mem, xRegI src) %{
6305   match(Set mem (StoreB mem src));
6306 
6307   ins_cost(125);
6308   format %{ "MOV8   $mem,$src" %}
6309   opcode(0x88);
6310   ins_encode( OpcP, RegMem( src, mem ) );
6311   ins_pipe( ialu_mem_reg );
6312 %}
6313 
6314 // Store Char/Short
6315 instruct storeC(memory mem, rRegI src) %{
6316   match(Set mem (StoreC mem src));
6317 
6318   ins_cost(125);
6319   format %{ "MOV16  $mem,$src" %}
6320   opcode(0x89, 0x66);
6321   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6322   ins_pipe( ialu_mem_reg );
6323 %}
6324 
6325 // Store Integer
6326 instruct storeI(memory mem, rRegI src) %{
6327   match(Set mem (StoreI mem src));
6328 
6329   ins_cost(125);
6330   format %{ "MOV    $mem,$src" %}
6331   opcode(0x89);
6332   ins_encode( OpcP, RegMem( src, mem ) );
6333   ins_pipe( ialu_mem_reg );
6334 %}
6335 
6336 // Store Long
6337 instruct storeL(long_memory mem, eRegL src) %{
6338   predicate(!((StoreLNode*)n)->require_atomic_access());
6339   match(Set mem (StoreL mem src));
6340 
6341   ins_cost(200);
6342   format %{ "MOV    $mem,$src.lo\n\t"
6343             "MOV    $mem+4,$src.hi" %}
6344   opcode(0x89, 0x89);
6345   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6346   ins_pipe( ialu_mem_long_reg );
6347 %}
6348 
6349 // Store Long to Integer
6350 instruct storeL2I(memory mem, eRegL src) %{
6351   match(Set mem (StoreI mem (ConvL2I src)));
6352 
6353   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6354   ins_encode %{
6355     __ movl($mem$$Address, $src$$Register);
6356   %}
6357   ins_pipe(ialu_mem_reg);
6358 %}
6359 
6360 // Volatile Store Long.  Must be atomic, so move it into
6361 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6362 // target address before the store (for null-ptr checks)
6363 // so the memory operand is used twice in the encoding.
6364 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6365   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6366   match(Set mem (StoreL mem src));
6367   effect( KILL cr );
6368   ins_cost(400);
6369   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6370             "FILD   $src\n\t"
6371             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6372   opcode(0x3B);
6373   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6374   ins_pipe( fpu_reg_mem );
6375 %}
6376 
6377 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6378   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6379   match(Set mem (StoreL mem src));
6380   effect( TEMP tmp, KILL cr );
6381   ins_cost(380);
6382   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6383             "MOVSD  $tmp,$src\n\t"
6384             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6385   ins_encode %{
6386     __ cmpl(rax, $mem$$Address);
6387     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6388     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6389   %}
6390   ins_pipe( pipe_slow );
6391 %}
6392 
6393 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6394   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6395   match(Set mem (StoreL mem src));
6396   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6397   ins_cost(360);
6398   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6399             "MOVD   $tmp,$src.lo\n\t"
6400             "MOVD   $tmp2,$src.hi\n\t"
6401             "PUNPCKLDQ $tmp,$tmp2\n\t"
6402             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6403   ins_encode %{
6404     __ cmpl(rax, $mem$$Address);
6405     __ movdl($tmp$$XMMRegister, $src$$Register);
6406     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6407     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6408     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6409   %}
6410   ins_pipe( pipe_slow );
6411 %}
6412 
6413 // Store Pointer; for storing unknown oops and raw pointers
6414 instruct storeP(memory mem, anyRegP src) %{
6415   match(Set mem (StoreP mem src));
6416 
6417   ins_cost(125);
6418   format %{ "MOV    $mem,$src" %}
6419   opcode(0x89);
6420   ins_encode( OpcP, RegMem( src, mem ) );
6421   ins_pipe( ialu_mem_reg );
6422 %}
6423 
6424 // Store Integer Immediate
6425 instruct storeImmI(memory mem, immI src) %{
6426   match(Set mem (StoreI mem src));
6427 
6428   ins_cost(150);
6429   format %{ "MOV    $mem,$src" %}
6430   opcode(0xC7);               /* C7 /0 */
6431   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6432   ins_pipe( ialu_mem_imm );
6433 %}
6434 
6435 // Store Short/Char Immediate
6436 instruct storeImmI16(memory mem, immI16 src) %{
6437   predicate(UseStoreImmI16);
6438   match(Set mem (StoreC mem src));
6439 
6440   ins_cost(150);
6441   format %{ "MOV16  $mem,$src" %}
6442   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6443   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6444   ins_pipe( ialu_mem_imm );
6445 %}
6446 
6447 // Store Pointer Immediate; null pointers or constant oops that do not
6448 // need card-mark barriers.
6449 instruct storeImmP(memory mem, immP src) %{
6450   match(Set mem (StoreP mem src));
6451 
6452   ins_cost(150);
6453   format %{ "MOV    $mem,$src" %}
6454   opcode(0xC7);               /* C7 /0 */
6455   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6456   ins_pipe( ialu_mem_imm );
6457 %}
6458 
6459 // Store Byte Immediate
6460 instruct storeImmB(memory mem, immI8 src) %{
6461   match(Set mem (StoreB mem src));
6462 
6463   ins_cost(150);
6464   format %{ "MOV8   $mem,$src" %}
6465   opcode(0xC6);               /* C6 /0 */
6466   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6467   ins_pipe( ialu_mem_imm );
6468 %}
6469 
6470 // Store CMS card-mark Immediate
6471 instruct storeImmCM(memory mem, immI8 src) %{
6472   match(Set mem (StoreCM mem src));
6473 
6474   ins_cost(150);
6475   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6476   opcode(0xC6);               /* C6 /0 */
6477   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6478   ins_pipe( ialu_mem_imm );
6479 %}
6480 
6481 // Store Double
6482 instruct storeDPR( memory mem, regDPR1 src) %{
6483   predicate(UseSSE<=1);
6484   match(Set mem (StoreD mem src));
6485 
6486   ins_cost(100);
6487   format %{ "FST_D  $mem,$src" %}
6488   opcode(0xDD);       /* DD /2 */
6489   ins_encode( enc_FPR_store(mem,src) );
6490   ins_pipe( fpu_mem_reg );
6491 %}
6492 
6493 // Store double does rounding on x86
6494 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6495   predicate(UseSSE<=1);
6496   match(Set mem (StoreD mem (RoundDouble src)));
6497 
6498   ins_cost(100);
6499   format %{ "FST_D  $mem,$src\t# round" %}
6500   opcode(0xDD);       /* DD /2 */
6501   ins_encode( enc_FPR_store(mem,src) );
6502   ins_pipe( fpu_mem_reg );
6503 %}
6504 
6505 // Store XMM register to memory (double-precision floating points)
6506 // MOVSD instruction
6507 instruct storeD(memory mem, regD src) %{
6508   predicate(UseSSE>=2);
6509   match(Set mem (StoreD mem src));
6510   ins_cost(95);
6511   format %{ "MOVSD  $mem,$src" %}
6512   ins_encode %{
6513     __ movdbl($mem$$Address, $src$$XMMRegister);
6514   %}
6515   ins_pipe( pipe_slow );
6516 %}
6517 
6518 // Store XMM register to memory (single-precision floating point)
6519 // MOVSS instruction
6520 instruct storeF(memory mem, regF src) %{
6521   predicate(UseSSE>=1);
6522   match(Set mem (StoreF mem src));
6523   ins_cost(95);
6524   format %{ "MOVSS  $mem,$src" %}
6525   ins_encode %{
6526     __ movflt($mem$$Address, $src$$XMMRegister);
6527   %}
6528   ins_pipe( pipe_slow );
6529 %}
6530 
6531 // Store Float
6532 instruct storeFPR( memory mem, regFPR1 src) %{
6533   predicate(UseSSE==0);
6534   match(Set mem (StoreF mem src));
6535 
6536   ins_cost(100);
6537   format %{ "FST_S  $mem,$src" %}
6538   opcode(0xD9);       /* D9 /2 */
6539   ins_encode( enc_FPR_store(mem,src) );
6540   ins_pipe( fpu_mem_reg );
6541 %}
6542 
6543 // Store Float does rounding on x86
6544 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6545   predicate(UseSSE==0);
6546   match(Set mem (StoreF mem (RoundFloat src)));
6547 
6548   ins_cost(100);
6549   format %{ "FST_S  $mem,$src\t# round" %}
6550   opcode(0xD9);       /* D9 /2 */
6551   ins_encode( enc_FPR_store(mem,src) );
6552   ins_pipe( fpu_mem_reg );
6553 %}
6554 
6555 // Store Float does rounding on x86
6556 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6557   predicate(UseSSE<=1);
6558   match(Set mem (StoreF mem (ConvD2F src)));
6559 
6560   ins_cost(100);
6561   format %{ "FST_S  $mem,$src\t# D-round" %}
6562   opcode(0xD9);       /* D9 /2 */
6563   ins_encode( enc_FPR_store(mem,src) );
6564   ins_pipe( fpu_mem_reg );
6565 %}
6566 
6567 // Store immediate Float value (it is faster than store from FPU register)
6568 // The instruction usage is guarded by predicate in operand immFPR().
6569 instruct storeFPR_imm( memory mem, immFPR src) %{
6570   match(Set mem (StoreF mem src));
6571 
6572   ins_cost(50);
6573   format %{ "MOV    $mem,$src\t# store float" %}
6574   opcode(0xC7);               /* C7 /0 */
6575   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6576   ins_pipe( ialu_mem_imm );
6577 %}
6578 
6579 // Store immediate Float value (it is faster than store from XMM register)
6580 // The instruction usage is guarded by predicate in operand immF().
6581 instruct storeF_imm( memory mem, immF src) %{
6582   match(Set mem (StoreF mem src));
6583 
6584   ins_cost(50);
6585   format %{ "MOV    $mem,$src\t# store float" %}
6586   opcode(0xC7);               /* C7 /0 */
6587   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6588   ins_pipe( ialu_mem_imm );
6589 %}
6590 
6591 // Store Integer to stack slot
6592 instruct storeSSI(stackSlotI dst, rRegI src) %{
6593   match(Set dst src);
6594 
6595   ins_cost(100);
6596   format %{ "MOV    $dst,$src" %}
6597   opcode(0x89);
6598   ins_encode( OpcPRegSS( dst, src ) );
6599   ins_pipe( ialu_mem_reg );
6600 %}
6601 
6602 // Store Integer to stack slot
6603 instruct storeSSP(stackSlotP dst, eRegP src) %{
6604   match(Set dst src);
6605 
6606   ins_cost(100);
6607   format %{ "MOV    $dst,$src" %}
6608   opcode(0x89);
6609   ins_encode( OpcPRegSS( dst, src ) );
6610   ins_pipe( ialu_mem_reg );
6611 %}
6612 
6613 // Store Long to stack slot
6614 instruct storeSSL(stackSlotL dst, eRegL src) %{
6615   match(Set dst src);
6616 
6617   ins_cost(200);
6618   format %{ "MOV    $dst,$src.lo\n\t"
6619             "MOV    $dst+4,$src.hi" %}
6620   opcode(0x89, 0x89);
6621   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6622   ins_pipe( ialu_mem_long_reg );
6623 %}
6624 
6625 //----------MemBar Instructions-----------------------------------------------
6626 // Memory barrier flavors
6627 
6628 instruct membar_acquire() %{
6629   match(MemBarAcquire);
6630   match(LoadFence);
6631   ins_cost(400);
6632 
6633   size(0);
6634   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6635   ins_encode();
6636   ins_pipe(empty);
6637 %}
6638 
6639 instruct membar_acquire_lock() %{
6640   match(MemBarAcquireLock);
6641   ins_cost(0);
6642 
6643   size(0);
6644   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6645   ins_encode( );
6646   ins_pipe(empty);
6647 %}
6648 
6649 instruct membar_release() %{
6650   match(MemBarRelease);
6651   match(StoreFence);
6652   ins_cost(400);
6653 
6654   size(0);
6655   format %{ "MEMBAR-release ! (empty encoding)" %}
6656   ins_encode( );
6657   ins_pipe(empty);
6658 %}
6659 
6660 instruct membar_release_lock() %{
6661   match(MemBarReleaseLock);
6662   ins_cost(0);
6663 
6664   size(0);
6665   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6666   ins_encode( );
6667   ins_pipe(empty);
6668 %}
6669 
6670 instruct membar_volatile(eFlagsReg cr) %{
6671   match(MemBarVolatile);
6672   effect(KILL cr);
6673   ins_cost(400);
6674 
6675   format %{
6676     $$template
6677     if (os::is_MP()) {
6678       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6679     } else {
6680       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6681     }
6682   %}
6683   ins_encode %{
6684     __ membar(Assembler::StoreLoad);
6685   %}
6686   ins_pipe(pipe_slow);
6687 %}
6688 
6689 instruct unnecessary_membar_volatile() %{
6690   match(MemBarVolatile);
6691   predicate(Matcher::post_store_load_barrier(n));
6692   ins_cost(0);
6693 
6694   size(0);
6695   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6696   ins_encode( );
6697   ins_pipe(empty);
6698 %}
6699 
6700 instruct membar_storestore() %{
6701   match(MemBarStoreStore);
6702   ins_cost(0);
6703 
6704   size(0);
6705   format %{ "MEMBAR-storestore (empty encoding)" %}
6706   ins_encode( );
6707   ins_pipe(empty);
6708 %}
6709 
6710 //----------Move Instructions--------------------------------------------------
6711 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6712   match(Set dst (CastX2P src));
6713   format %{ "# X2P  $dst, $src" %}
6714   ins_encode( /*empty encoding*/ );
6715   ins_cost(0);
6716   ins_pipe(empty);
6717 %}
6718 
6719 instruct castP2X(rRegI dst, eRegP src ) %{
6720   match(Set dst (CastP2X src));
6721   ins_cost(50);
6722   format %{ "MOV    $dst, $src\t# CastP2X" %}
6723   ins_encode( enc_Copy( dst, src) );
6724   ins_pipe( ialu_reg_reg );
6725 %}
6726 
6727 //----------Conditional Move---------------------------------------------------
6728 // Conditional move
6729 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6730   predicate(!VM_Version::supports_cmov() );
6731   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6732   ins_cost(200);
6733   format %{ "J$cop,us skip\t# signed cmove\n\t"
6734             "MOV    $dst,$src\n"
6735       "skip:" %}
6736   ins_encode %{
6737     Label Lskip;
6738     // Invert sense of branch from sense of CMOV
6739     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6740     __ movl($dst$$Register, $src$$Register);
6741     __ bind(Lskip);
6742   %}
6743   ins_pipe( pipe_cmov_reg );
6744 %}
6745 
6746 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6747   predicate(!VM_Version::supports_cmov() );
6748   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6749   ins_cost(200);
6750   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6751             "MOV    $dst,$src\n"
6752       "skip:" %}
6753   ins_encode %{
6754     Label Lskip;
6755     // Invert sense of branch from sense of CMOV
6756     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6757     __ movl($dst$$Register, $src$$Register);
6758     __ bind(Lskip);
6759   %}
6760   ins_pipe( pipe_cmov_reg );
6761 %}
6762 
6763 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6764   predicate(VM_Version::supports_cmov() );
6765   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6766   ins_cost(200);
6767   format %{ "CMOV$cop $dst,$src" %}
6768   opcode(0x0F,0x40);
6769   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6770   ins_pipe( pipe_cmov_reg );
6771 %}
6772 
6773 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6774   predicate(VM_Version::supports_cmov() );
6775   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6776   ins_cost(200);
6777   format %{ "CMOV$cop $dst,$src" %}
6778   opcode(0x0F,0x40);
6779   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6780   ins_pipe( pipe_cmov_reg );
6781 %}
6782 
6783 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6784   predicate(VM_Version::supports_cmov() );
6785   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6786   ins_cost(200);
6787   expand %{
6788     cmovI_regU(cop, cr, dst, src);
6789   %}
6790 %}
6791 
6792 // Conditional move
6793 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6794   predicate(VM_Version::supports_cmov() );
6795   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6796   ins_cost(250);
6797   format %{ "CMOV$cop $dst,$src" %}
6798   opcode(0x0F,0x40);
6799   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6800   ins_pipe( pipe_cmov_mem );
6801 %}
6802 
6803 // Conditional move
6804 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6805   predicate(VM_Version::supports_cmov() );
6806   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6807   ins_cost(250);
6808   format %{ "CMOV$cop $dst,$src" %}
6809   opcode(0x0F,0x40);
6810   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6811   ins_pipe( pipe_cmov_mem );
6812 %}
6813 
6814 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6815   predicate(VM_Version::supports_cmov() );
6816   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6817   ins_cost(250);
6818   expand %{
6819     cmovI_memU(cop, cr, dst, src);
6820   %}
6821 %}
6822 
6823 // Conditional move
6824 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6825   predicate(VM_Version::supports_cmov() );
6826   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6827   ins_cost(200);
6828   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6829   opcode(0x0F,0x40);
6830   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6831   ins_pipe( pipe_cmov_reg );
6832 %}
6833 
6834 // Conditional move (non-P6 version)
6835 // Note:  a CMoveP is generated for  stubs and native wrappers
6836 //        regardless of whether we are on a P6, so we
6837 //        emulate a cmov here
6838 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6839   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6840   ins_cost(300);
6841   format %{ "Jn$cop   skip\n\t"
6842           "MOV    $dst,$src\t# pointer\n"
6843       "skip:" %}
6844   opcode(0x8b);
6845   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6846   ins_pipe( pipe_cmov_reg );
6847 %}
6848 
6849 // Conditional move
6850 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6851   predicate(VM_Version::supports_cmov() );
6852   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6853   ins_cost(200);
6854   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6855   opcode(0x0F,0x40);
6856   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6857   ins_pipe( pipe_cmov_reg );
6858 %}
6859 
6860 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6861   predicate(VM_Version::supports_cmov() );
6862   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6863   ins_cost(200);
6864   expand %{
6865     cmovP_regU(cop, cr, dst, src);
6866   %}
6867 %}
6868 
6869 // DISABLED: Requires the ADLC to emit a bottom_type call that
6870 // correctly meets the two pointer arguments; one is an incoming
6871 // register but the other is a memory operand.  ALSO appears to
6872 // be buggy with implicit null checks.
6873 //
6874 //// Conditional move
6875 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6876 //  predicate(VM_Version::supports_cmov() );
6877 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6878 //  ins_cost(250);
6879 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6880 //  opcode(0x0F,0x40);
6881 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6882 //  ins_pipe( pipe_cmov_mem );
6883 //%}
6884 //
6885 //// Conditional move
6886 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6887 //  predicate(VM_Version::supports_cmov() );
6888 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6889 //  ins_cost(250);
6890 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6891 //  opcode(0x0F,0x40);
6892 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6893 //  ins_pipe( pipe_cmov_mem );
6894 //%}
6895 
6896 // Conditional move
6897 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6898   predicate(UseSSE<=1);
6899   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6900   ins_cost(200);
6901   format %{ "FCMOV$cop $dst,$src\t# double" %}
6902   opcode(0xDA);
6903   ins_encode( enc_cmov_dpr(cop,src) );
6904   ins_pipe( pipe_cmovDPR_reg );
6905 %}
6906 
6907 // Conditional move
6908 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6909   predicate(UseSSE==0);
6910   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6911   ins_cost(200);
6912   format %{ "FCMOV$cop $dst,$src\t# float" %}
6913   opcode(0xDA);
6914   ins_encode( enc_cmov_dpr(cop,src) );
6915   ins_pipe( pipe_cmovDPR_reg );
6916 %}
6917 
6918 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6919 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6920   predicate(UseSSE<=1);
6921   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6922   ins_cost(200);
6923   format %{ "Jn$cop   skip\n\t"
6924             "MOV    $dst,$src\t# double\n"
6925       "skip:" %}
6926   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6927   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6928   ins_pipe( pipe_cmovDPR_reg );
6929 %}
6930 
6931 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6932 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6933   predicate(UseSSE==0);
6934   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6935   ins_cost(200);
6936   format %{ "Jn$cop    skip\n\t"
6937             "MOV    $dst,$src\t# float\n"
6938       "skip:" %}
6939   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6940   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6941   ins_pipe( pipe_cmovDPR_reg );
6942 %}
6943 
6944 // No CMOVE with SSE/SSE2
6945 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6946   predicate (UseSSE>=1);
6947   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6948   ins_cost(200);
6949   format %{ "Jn$cop   skip\n\t"
6950             "MOVSS  $dst,$src\t# float\n"
6951       "skip:" %}
6952   ins_encode %{
6953     Label skip;
6954     // Invert sense of branch from sense of CMOV
6955     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6956     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6957     __ bind(skip);
6958   %}
6959   ins_pipe( pipe_slow );
6960 %}
6961 
6962 // No CMOVE with SSE/SSE2
6963 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6964   predicate (UseSSE>=2);
6965   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6966   ins_cost(200);
6967   format %{ "Jn$cop   skip\n\t"
6968             "MOVSD  $dst,$src\t# float\n"
6969       "skip:" %}
6970   ins_encode %{
6971     Label skip;
6972     // Invert sense of branch from sense of CMOV
6973     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6974     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6975     __ bind(skip);
6976   %}
6977   ins_pipe( pipe_slow );
6978 %}
6979 
6980 // unsigned version
6981 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6982   predicate (UseSSE>=1);
6983   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6984   ins_cost(200);
6985   format %{ "Jn$cop   skip\n\t"
6986             "MOVSS  $dst,$src\t# float\n"
6987       "skip:" %}
6988   ins_encode %{
6989     Label skip;
6990     // Invert sense of branch from sense of CMOV
6991     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6992     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6993     __ bind(skip);
6994   %}
6995   ins_pipe( pipe_slow );
6996 %}
6997 
6998 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6999   predicate (UseSSE>=1);
7000   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7001   ins_cost(200);
7002   expand %{
7003     fcmovF_regU(cop, cr, dst, src);
7004   %}
7005 %}
7006 
7007 // unsigned version
7008 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7009   predicate (UseSSE>=2);
7010   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7011   ins_cost(200);
7012   format %{ "Jn$cop   skip\n\t"
7013             "MOVSD  $dst,$src\t# float\n"
7014       "skip:" %}
7015   ins_encode %{
7016     Label skip;
7017     // Invert sense of branch from sense of CMOV
7018     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7019     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7020     __ bind(skip);
7021   %}
7022   ins_pipe( pipe_slow );
7023 %}
7024 
7025 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7026   predicate (UseSSE>=2);
7027   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7028   ins_cost(200);
7029   expand %{
7030     fcmovD_regU(cop, cr, dst, src);
7031   %}
7032 %}
7033 
7034 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7035   predicate(VM_Version::supports_cmov() );
7036   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7037   ins_cost(200);
7038   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7039             "CMOV$cop $dst.hi,$src.hi" %}
7040   opcode(0x0F,0x40);
7041   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7042   ins_pipe( pipe_cmov_reg_long );
7043 %}
7044 
7045 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7046   predicate(VM_Version::supports_cmov() );
7047   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7048   ins_cost(200);
7049   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7050             "CMOV$cop $dst.hi,$src.hi" %}
7051   opcode(0x0F,0x40);
7052   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7053   ins_pipe( pipe_cmov_reg_long );
7054 %}
7055 
7056 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7057   predicate(VM_Version::supports_cmov() );
7058   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7059   ins_cost(200);
7060   expand %{
7061     cmovL_regU(cop, cr, dst, src);
7062   %}
7063 %}
7064 
7065 //----------Arithmetic Instructions--------------------------------------------
7066 //----------Addition Instructions----------------------------------------------
7067 
7068 // Integer Addition Instructions
7069 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7070   match(Set dst (AddI dst src));
7071   effect(KILL cr);
7072 
7073   size(2);
7074   format %{ "ADD    $dst,$src" %}
7075   opcode(0x03);
7076   ins_encode( OpcP, RegReg( dst, src) );
7077   ins_pipe( ialu_reg_reg );
7078 %}
7079 
7080 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7081   match(Set dst (AddI dst src));
7082   effect(KILL cr);
7083 
7084   format %{ "ADD    $dst,$src" %}
7085   opcode(0x81, 0x00); /* /0 id */
7086   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7087   ins_pipe( ialu_reg );
7088 %}
7089 
7090 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7091   predicate(UseIncDec);
7092   match(Set dst (AddI dst src));
7093   effect(KILL cr);
7094 
7095   size(1);
7096   format %{ "INC    $dst" %}
7097   opcode(0x40); /*  */
7098   ins_encode( Opc_plus( primary, dst ) );
7099   ins_pipe( ialu_reg );
7100 %}
7101 
7102 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7103   match(Set dst (AddI src0 src1));
7104   ins_cost(110);
7105 
7106   format %{ "LEA    $dst,[$src0 + $src1]" %}
7107   opcode(0x8D); /* 0x8D /r */
7108   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7109   ins_pipe( ialu_reg_reg );
7110 %}
7111 
7112 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7113   match(Set dst (AddP src0 src1));
7114   ins_cost(110);
7115 
7116   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7117   opcode(0x8D); /* 0x8D /r */
7118   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7119   ins_pipe( ialu_reg_reg );
7120 %}
7121 
7122 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7123   predicate(UseIncDec);
7124   match(Set dst (AddI dst src));
7125   effect(KILL cr);
7126 
7127   size(1);
7128   format %{ "DEC    $dst" %}
7129   opcode(0x48); /*  */
7130   ins_encode( Opc_plus( primary, dst ) );
7131   ins_pipe( ialu_reg );
7132 %}
7133 
7134 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7135   match(Set dst (AddP dst src));
7136   effect(KILL cr);
7137 
7138   size(2);
7139   format %{ "ADD    $dst,$src" %}
7140   opcode(0x03);
7141   ins_encode( OpcP, RegReg( dst, src) );
7142   ins_pipe( ialu_reg_reg );
7143 %}
7144 
7145 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7146   match(Set dst (AddP dst src));
7147   effect(KILL cr);
7148 
7149   format %{ "ADD    $dst,$src" %}
7150   opcode(0x81,0x00); /* Opcode 81 /0 id */
7151   // ins_encode( RegImm( dst, src) );
7152   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7153   ins_pipe( ialu_reg );
7154 %}
7155 
7156 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7157   match(Set dst (AddI dst (LoadI src)));
7158   effect(KILL cr);
7159 
7160   ins_cost(125);
7161   format %{ "ADD    $dst,$src" %}
7162   opcode(0x03);
7163   ins_encode( OpcP, RegMem( dst, src) );
7164   ins_pipe( ialu_reg_mem );
7165 %}
7166 
7167 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7169   effect(KILL cr);
7170 
7171   ins_cost(150);
7172   format %{ "ADD    $dst,$src" %}
7173   opcode(0x01);  /* Opcode 01 /r */
7174   ins_encode( OpcP, RegMem( src, dst ) );
7175   ins_pipe( ialu_mem_reg );
7176 %}
7177 
7178 // Add Memory with Immediate
7179 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7180   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7181   effect(KILL cr);
7182 
7183   ins_cost(125);
7184   format %{ "ADD    $dst,$src" %}
7185   opcode(0x81);               /* Opcode 81 /0 id */
7186   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7187   ins_pipe( ialu_mem_imm );
7188 %}
7189 
7190 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7191   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7192   effect(KILL cr);
7193 
7194   ins_cost(125);
7195   format %{ "INC    $dst" %}
7196   opcode(0xFF);               /* Opcode FF /0 */
7197   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7198   ins_pipe( ialu_mem_imm );
7199 %}
7200 
7201 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7202   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7203   effect(KILL cr);
7204 
7205   ins_cost(125);
7206   format %{ "DEC    $dst" %}
7207   opcode(0xFF);               /* Opcode FF /1 */
7208   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7209   ins_pipe( ialu_mem_imm );
7210 %}
7211 
7212 
7213 instruct checkCastPP( eRegP dst ) %{
7214   match(Set dst (CheckCastPP dst));
7215 
7216   size(0);
7217   format %{ "#checkcastPP of $dst" %}
7218   ins_encode( /*empty encoding*/ );
7219   ins_pipe( empty );
7220 %}
7221 
7222 instruct castPP( eRegP dst ) %{
7223   match(Set dst (CastPP dst));
7224   format %{ "#castPP of $dst" %}
7225   ins_encode( /*empty encoding*/ );
7226   ins_pipe( empty );
7227 %}
7228 
7229 instruct castII( rRegI dst ) %{
7230   match(Set dst (CastII dst));
7231   format %{ "#castII of $dst" %}
7232   ins_encode( /*empty encoding*/ );
7233   ins_cost(0);
7234   ins_pipe( empty );
7235 %}
7236 
7237 
7238 // Load-locked - same as a regular pointer load when used with compare-swap
7239 instruct loadPLocked(eRegP dst, memory mem) %{
7240   match(Set dst (LoadPLocked mem));
7241 
7242   ins_cost(125);
7243   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7244   opcode(0x8B);
7245   ins_encode( OpcP, RegMem(dst,mem));
7246   ins_pipe( ialu_reg_mem );
7247 %}
7248 
7249 // Conditional-store of the updated heap-top.
7250 // Used during allocation of the shared heap.
7251 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7252 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7253   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7254   // EAX is killed if there is contention, but then it's also unused.
7255   // In the common case of no contention, EAX holds the new oop address.
7256   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7257   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7258   ins_pipe( pipe_cmpxchg );
7259 %}
7260 
7261 // Conditional-store of an int value.
7262 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7263 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7264   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7265   effect(KILL oldval);
7266   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7267   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7268   ins_pipe( pipe_cmpxchg );
7269 %}
7270 
7271 // Conditional-store of a long value.
7272 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7273 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7274   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7275   effect(KILL oldval);
7276   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7277             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7278             "XCHG   EBX,ECX"
7279   %}
7280   ins_encode %{
7281     // Note: we need to swap rbx, and rcx before and after the
7282     //       cmpxchg8 instruction because the instruction uses
7283     //       rcx as the high order word of the new value to store but
7284     //       our register encoding uses rbx.
7285     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7286     if( os::is_MP() )
7287       __ lock();
7288     __ cmpxchg8($mem$$Address);
7289     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7290   %}
7291   ins_pipe( pipe_cmpxchg );
7292 %}
7293 
7294 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7295 
7296 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7297   predicate(VM_Version::supports_cx8());
7298   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7299   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7300   effect(KILL cr, KILL oldval);
7301   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7302             "MOV    $res,0\n\t"
7303             "JNE,s  fail\n\t"
7304             "MOV    $res,1\n"
7305           "fail:" %}
7306   ins_encode( enc_cmpxchg8(mem_ptr),
7307               enc_flags_ne_to_boolean(res) );
7308   ins_pipe( pipe_cmpxchg );
7309 %}
7310 
7311 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7312   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7313   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7314   effect(KILL cr, KILL oldval);
7315   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7316             "MOV    $res,0\n\t"
7317             "JNE,s  fail\n\t"
7318             "MOV    $res,1\n"
7319           "fail:" %}
7320   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7321   ins_pipe( pipe_cmpxchg );
7322 %}
7323 
7324 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7325   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7326   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7327   effect(KILL cr, KILL oldval);
7328   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7329             "MOV    $res,0\n\t"
7330             "JNE,s  fail\n\t"
7331             "MOV    $res,1\n"
7332           "fail:" %}
7333   ins_encode( enc_cmpxchgb(mem_ptr),
7334               enc_flags_ne_to_boolean(res) );
7335   ins_pipe( pipe_cmpxchg );
7336 %}
7337 
7338 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7339   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7340   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7341   effect(KILL cr, KILL oldval);
7342   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7343             "MOV    $res,0\n\t"
7344             "JNE,s  fail\n\t"
7345             "MOV    $res,1\n"
7346           "fail:" %}
7347   ins_encode( enc_cmpxchgw(mem_ptr),
7348               enc_flags_ne_to_boolean(res) );
7349   ins_pipe( pipe_cmpxchg );
7350 %}
7351 
7352 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7353   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7354   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7355   effect(KILL cr, KILL oldval);
7356   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7357             "MOV    $res,0\n\t"
7358             "JNE,s  fail\n\t"
7359             "MOV    $res,1\n"
7360           "fail:" %}
7361   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7362   ins_pipe( pipe_cmpxchg );
7363 %}
7364 
7365 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7366   predicate(VM_Version::supports_cx8());
7367   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7368   effect(KILL cr);
7369   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7370   ins_encode( enc_cmpxchg8(mem_ptr) );
7371   ins_pipe( pipe_cmpxchg );
7372 %}
7373 
7374 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7375   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7376   effect(KILL cr);
7377   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7378   ins_encode( enc_cmpxchg(mem_ptr) );
7379   ins_pipe( pipe_cmpxchg );
7380 %}
7381 
7382 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7383   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7384   effect(KILL cr);
7385   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7386   ins_encode( enc_cmpxchgb(mem_ptr) );
7387   ins_pipe( pipe_cmpxchg );
7388 %}
7389 
7390 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7391   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7392   effect(KILL cr);
7393   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7394   ins_encode( enc_cmpxchgw(mem_ptr) );
7395   ins_pipe( pipe_cmpxchg );
7396 %}
7397 
7398 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7399   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7400   effect(KILL cr);
7401   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7402   ins_encode( enc_cmpxchg(mem_ptr) );
7403   ins_pipe( pipe_cmpxchg );
7404 %}
7405 
7406 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7407   predicate(n->as_LoadStore()->result_not_used());
7408   match(Set dummy (GetAndAddB mem add));
7409   effect(KILL cr);
7410   format %{ "ADDB  [$mem],$add" %}
7411   ins_encode %{
7412     if (os::is_MP()) { __ lock(); }
7413     __ addb($mem$$Address, $add$$constant);
7414   %}
7415   ins_pipe( pipe_cmpxchg );
7416 %}
7417 
7418 // Important to match to xRegI: only 8-bit regs.
7419 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7420   match(Set newval (GetAndAddB mem newval));
7421   effect(KILL cr);
7422   format %{ "XADDB  [$mem],$newval" %}
7423   ins_encode %{
7424     if (os::is_MP()) { __ lock(); }
7425     __ xaddb($mem$$Address, $newval$$Register);
7426   %}
7427   ins_pipe( pipe_cmpxchg );
7428 %}
7429 
7430 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7431   predicate(n->as_LoadStore()->result_not_used());
7432   match(Set dummy (GetAndAddS mem add));
7433   effect(KILL cr);
7434   format %{ "ADDS  [$mem],$add" %}
7435   ins_encode %{
7436     if (os::is_MP()) { __ lock(); }
7437     __ addw($mem$$Address, $add$$constant);
7438   %}
7439   ins_pipe( pipe_cmpxchg );
7440 %}
7441 
7442 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7443   match(Set newval (GetAndAddS mem newval));
7444   effect(KILL cr);
7445   format %{ "XADDS  [$mem],$newval" %}
7446   ins_encode %{
7447     if (os::is_MP()) { __ lock(); }
7448     __ xaddw($mem$$Address, $newval$$Register);
7449   %}
7450   ins_pipe( pipe_cmpxchg );
7451 %}
7452 
7453 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7454   predicate(n->as_LoadStore()->result_not_used());
7455   match(Set dummy (GetAndAddI mem add));
7456   effect(KILL cr);
7457   format %{ "ADDL  [$mem],$add" %}
7458   ins_encode %{
7459     if (os::is_MP()) { __ lock(); }
7460     __ addl($mem$$Address, $add$$constant);
7461   %}
7462   ins_pipe( pipe_cmpxchg );
7463 %}
7464 
7465 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7466   match(Set newval (GetAndAddI mem newval));
7467   effect(KILL cr);
7468   format %{ "XADDL  [$mem],$newval" %}
7469   ins_encode %{
7470     if (os::is_MP()) { __ lock(); }
7471     __ xaddl($mem$$Address, $newval$$Register);
7472   %}
7473   ins_pipe( pipe_cmpxchg );
7474 %}
7475 
7476 // Important to match to xRegI: only 8-bit regs.
7477 instruct xchgB( memory mem, xRegI newval) %{
7478   match(Set newval (GetAndSetB mem newval));
7479   format %{ "XCHGB  $newval,[$mem]" %}
7480   ins_encode %{
7481     __ xchgb($newval$$Register, $mem$$Address);
7482   %}
7483   ins_pipe( pipe_cmpxchg );
7484 %}
7485 
7486 instruct xchgS( memory mem, rRegI newval) %{
7487   match(Set newval (GetAndSetS mem newval));
7488   format %{ "XCHGW  $newval,[$mem]" %}
7489   ins_encode %{
7490     __ xchgw($newval$$Register, $mem$$Address);
7491   %}
7492   ins_pipe( pipe_cmpxchg );
7493 %}
7494 
7495 instruct xchgI( memory mem, rRegI newval) %{
7496   match(Set newval (GetAndSetI mem newval));
7497   format %{ "XCHGL  $newval,[$mem]" %}
7498   ins_encode %{
7499     __ xchgl($newval$$Register, $mem$$Address);
7500   %}
7501   ins_pipe( pipe_cmpxchg );
7502 %}
7503 
7504 instruct xchgP( memory mem, pRegP newval) %{
7505   match(Set newval (GetAndSetP mem newval));
7506   format %{ "XCHGL  $newval,[$mem]" %}
7507   ins_encode %{
7508     __ xchgl($newval$$Register, $mem$$Address);
7509   %}
7510   ins_pipe( pipe_cmpxchg );
7511 %}
7512 
7513 //----------Subtraction Instructions-------------------------------------------
7514 
7515 // Integer Subtraction Instructions
7516 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7517   match(Set dst (SubI dst src));
7518   effect(KILL cr);
7519 
7520   size(2);
7521   format %{ "SUB    $dst,$src" %}
7522   opcode(0x2B);
7523   ins_encode( OpcP, RegReg( dst, src) );
7524   ins_pipe( ialu_reg_reg );
7525 %}
7526 
7527 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7528   match(Set dst (SubI dst src));
7529   effect(KILL cr);
7530 
7531   format %{ "SUB    $dst,$src" %}
7532   opcode(0x81,0x05);  /* Opcode 81 /5 */
7533   // ins_encode( RegImm( dst, src) );
7534   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7535   ins_pipe( ialu_reg );
7536 %}
7537 
7538 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7539   match(Set dst (SubI dst (LoadI src)));
7540   effect(KILL cr);
7541 
7542   ins_cost(125);
7543   format %{ "SUB    $dst,$src" %}
7544   opcode(0x2B);
7545   ins_encode( OpcP, RegMem( dst, src) );
7546   ins_pipe( ialu_reg_mem );
7547 %}
7548 
7549 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7550   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7551   effect(KILL cr);
7552 
7553   ins_cost(150);
7554   format %{ "SUB    $dst,$src" %}
7555   opcode(0x29);  /* Opcode 29 /r */
7556   ins_encode( OpcP, RegMem( src, dst ) );
7557   ins_pipe( ialu_mem_reg );
7558 %}
7559 
7560 // Subtract from a pointer
7561 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7562   match(Set dst (AddP dst (SubI zero src)));
7563   effect(KILL cr);
7564 
7565   size(2);
7566   format %{ "SUB    $dst,$src" %}
7567   opcode(0x2B);
7568   ins_encode( OpcP, RegReg( dst, src) );
7569   ins_pipe( ialu_reg_reg );
7570 %}
7571 
7572 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7573   match(Set dst (SubI zero dst));
7574   effect(KILL cr);
7575 
7576   size(2);
7577   format %{ "NEG    $dst" %}
7578   opcode(0xF7,0x03);  // Opcode F7 /3
7579   ins_encode( OpcP, RegOpc( dst ) );
7580   ins_pipe( ialu_reg );
7581 %}
7582 
7583 //----------Multiplication/Division Instructions-------------------------------
7584 // Integer Multiplication Instructions
7585 // Multiply Register
7586 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7587   match(Set dst (MulI dst src));
7588   effect(KILL cr);
7589 
7590   size(3);
7591   ins_cost(300);
7592   format %{ "IMUL   $dst,$src" %}
7593   opcode(0xAF, 0x0F);
7594   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7595   ins_pipe( ialu_reg_reg_alu0 );
7596 %}
7597 
7598 // Multiply 32-bit Immediate
7599 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7600   match(Set dst (MulI src imm));
7601   effect(KILL cr);
7602 
7603   ins_cost(300);
7604   format %{ "IMUL   $dst,$src,$imm" %}
7605   opcode(0x69);  /* 69 /r id */
7606   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7607   ins_pipe( ialu_reg_reg_alu0 );
7608 %}
7609 
7610 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7611   match(Set dst src);
7612   effect(KILL cr);
7613 
7614   // Note that this is artificially increased to make it more expensive than loadConL
7615   ins_cost(250);
7616   format %{ "MOV    EAX,$src\t// low word only" %}
7617   opcode(0xB8);
7618   ins_encode( LdImmL_Lo(dst, src) );
7619   ins_pipe( ialu_reg_fat );
7620 %}
7621 
7622 // Multiply by 32-bit Immediate, taking the shifted high order results
7623 //  (special case for shift by 32)
7624 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7625   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7626   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7627              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7628              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7629   effect(USE src1, KILL cr);
7630 
7631   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7632   ins_cost(0*100 + 1*400 - 150);
7633   format %{ "IMUL   EDX:EAX,$src1" %}
7634   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7635   ins_pipe( pipe_slow );
7636 %}
7637 
7638 // Multiply by 32-bit Immediate, taking the shifted high order results
7639 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7640   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7641   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7642              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7643              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7644   effect(USE src1, KILL cr);
7645 
7646   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7647   ins_cost(1*100 + 1*400 - 150);
7648   format %{ "IMUL   EDX:EAX,$src1\n\t"
7649             "SAR    EDX,$cnt-32" %}
7650   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7651   ins_pipe( pipe_slow );
7652 %}
7653 
7654 // Multiply Memory 32-bit Immediate
7655 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7656   match(Set dst (MulI (LoadI src) imm));
7657   effect(KILL cr);
7658 
7659   ins_cost(300);
7660   format %{ "IMUL   $dst,$src,$imm" %}
7661   opcode(0x69);  /* 69 /r id */
7662   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7663   ins_pipe( ialu_reg_mem_alu0 );
7664 %}
7665 
7666 // Multiply Memory
7667 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7668   match(Set dst (MulI dst (LoadI src)));
7669   effect(KILL cr);
7670 
7671   ins_cost(350);
7672   format %{ "IMUL   $dst,$src" %}
7673   opcode(0xAF, 0x0F);
7674   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7675   ins_pipe( ialu_reg_mem_alu0 );
7676 %}
7677 
7678 // Multiply Register Int to Long
7679 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7680   // Basic Idea: long = (long)int * (long)int
7681   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7682   effect(DEF dst, USE src, USE src1, KILL flags);
7683 
7684   ins_cost(300);
7685   format %{ "IMUL   $dst,$src1" %}
7686 
7687   ins_encode( long_int_multiply( dst, src1 ) );
7688   ins_pipe( ialu_reg_reg_alu0 );
7689 %}
7690 
7691 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7692   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7693   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7694   effect(KILL flags);
7695 
7696   ins_cost(300);
7697   format %{ "MUL    $dst,$src1" %}
7698 
7699   ins_encode( long_uint_multiply(dst, src1) );
7700   ins_pipe( ialu_reg_reg_alu0 );
7701 %}
7702 
7703 // Multiply Register Long
7704 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7705   match(Set dst (MulL dst src));
7706   effect(KILL cr, TEMP tmp);
7707   ins_cost(4*100+3*400);
7708 // Basic idea: lo(result) = lo(x_lo * y_lo)
7709 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7710   format %{ "MOV    $tmp,$src.lo\n\t"
7711             "IMUL   $tmp,EDX\n\t"
7712             "MOV    EDX,$src.hi\n\t"
7713             "IMUL   EDX,EAX\n\t"
7714             "ADD    $tmp,EDX\n\t"
7715             "MUL    EDX:EAX,$src.lo\n\t"
7716             "ADD    EDX,$tmp" %}
7717   ins_encode( long_multiply( dst, src, tmp ) );
7718   ins_pipe( pipe_slow );
7719 %}
7720 
7721 // Multiply Register Long where the left operand's high 32 bits are zero
7722 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7723   predicate(is_operand_hi32_zero(n->in(1)));
7724   match(Set dst (MulL dst src));
7725   effect(KILL cr, TEMP tmp);
7726   ins_cost(2*100+2*400);
7727 // Basic idea: lo(result) = lo(x_lo * y_lo)
7728 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7729   format %{ "MOV    $tmp,$src.hi\n\t"
7730             "IMUL   $tmp,EAX\n\t"
7731             "MUL    EDX:EAX,$src.lo\n\t"
7732             "ADD    EDX,$tmp" %}
7733   ins_encode %{
7734     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7735     __ imull($tmp$$Register, rax);
7736     __ mull($src$$Register);
7737     __ addl(rdx, $tmp$$Register);
7738   %}
7739   ins_pipe( pipe_slow );
7740 %}
7741 
7742 // Multiply Register Long where the right operand's high 32 bits are zero
7743 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7744   predicate(is_operand_hi32_zero(n->in(2)));
7745   match(Set dst (MulL dst src));
7746   effect(KILL cr, TEMP tmp);
7747   ins_cost(2*100+2*400);
7748 // Basic idea: lo(result) = lo(x_lo * y_lo)
7749 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7750   format %{ "MOV    $tmp,$src.lo\n\t"
7751             "IMUL   $tmp,EDX\n\t"
7752             "MUL    EDX:EAX,$src.lo\n\t"
7753             "ADD    EDX,$tmp" %}
7754   ins_encode %{
7755     __ movl($tmp$$Register, $src$$Register);
7756     __ imull($tmp$$Register, rdx);
7757     __ mull($src$$Register);
7758     __ addl(rdx, $tmp$$Register);
7759   %}
7760   ins_pipe( pipe_slow );
7761 %}
7762 
7763 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7764 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7765   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7766   match(Set dst (MulL dst src));
7767   effect(KILL cr);
7768   ins_cost(1*400);
7769 // Basic idea: lo(result) = lo(x_lo * y_lo)
7770 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7771   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7772   ins_encode %{
7773     __ mull($src$$Register);
7774   %}
7775   ins_pipe( pipe_slow );
7776 %}
7777 
7778 // Multiply Register Long by small constant
7779 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7780   match(Set dst (MulL dst src));
7781   effect(KILL cr, TEMP tmp);
7782   ins_cost(2*100+2*400);
7783   size(12);
7784 // Basic idea: lo(result) = lo(src * EAX)
7785 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7786   format %{ "IMUL   $tmp,EDX,$src\n\t"
7787             "MOV    EDX,$src\n\t"
7788             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7789             "ADD    EDX,$tmp" %}
7790   ins_encode( long_multiply_con( dst, src, tmp ) );
7791   ins_pipe( pipe_slow );
7792 %}
7793 
7794 // Integer DIV with Register
7795 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7796   match(Set rax (DivI rax div));
7797   effect(KILL rdx, KILL cr);
7798   size(26);
7799   ins_cost(30*100+10*100);
7800   format %{ "CMP    EAX,0x80000000\n\t"
7801             "JNE,s  normal\n\t"
7802             "XOR    EDX,EDX\n\t"
7803             "CMP    ECX,-1\n\t"
7804             "JE,s   done\n"
7805     "normal: CDQ\n\t"
7806             "IDIV   $div\n\t"
7807     "done:"        %}
7808   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7809   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7810   ins_pipe( ialu_reg_reg_alu0 );
7811 %}
7812 
7813 // Divide Register Long
7814 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7815   match(Set dst (DivL src1 src2));
7816   effect( KILL cr, KILL cx, KILL bx );
7817   ins_cost(10000);
7818   format %{ "PUSH   $src1.hi\n\t"
7819             "PUSH   $src1.lo\n\t"
7820             "PUSH   $src2.hi\n\t"
7821             "PUSH   $src2.lo\n\t"
7822             "CALL   SharedRuntime::ldiv\n\t"
7823             "ADD    ESP,16" %}
7824   ins_encode( long_div(src1,src2) );
7825   ins_pipe( pipe_slow );
7826 %}
7827 
7828 // Integer DIVMOD with Register, both quotient and mod results
7829 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7830   match(DivModI rax div);
7831   effect(KILL cr);
7832   size(26);
7833   ins_cost(30*100+10*100);
7834   format %{ "CMP    EAX,0x80000000\n\t"
7835             "JNE,s  normal\n\t"
7836             "XOR    EDX,EDX\n\t"
7837             "CMP    ECX,-1\n\t"
7838             "JE,s   done\n"
7839     "normal: CDQ\n\t"
7840             "IDIV   $div\n\t"
7841     "done:"        %}
7842   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7843   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7844   ins_pipe( pipe_slow );
7845 %}
7846 
7847 // Integer MOD with Register
7848 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7849   match(Set rdx (ModI rax div));
7850   effect(KILL rax, KILL cr);
7851 
7852   size(26);
7853   ins_cost(300);
7854   format %{ "CDQ\n\t"
7855             "IDIV   $div" %}
7856   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7857   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7858   ins_pipe( ialu_reg_reg_alu0 );
7859 %}
7860 
7861 // Remainder Register Long
7862 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7863   match(Set dst (ModL src1 src2));
7864   effect( KILL cr, KILL cx, KILL bx );
7865   ins_cost(10000);
7866   format %{ "PUSH   $src1.hi\n\t"
7867             "PUSH   $src1.lo\n\t"
7868             "PUSH   $src2.hi\n\t"
7869             "PUSH   $src2.lo\n\t"
7870             "CALL   SharedRuntime::lrem\n\t"
7871             "ADD    ESP,16" %}
7872   ins_encode( long_mod(src1,src2) );
7873   ins_pipe( pipe_slow );
7874 %}
7875 
7876 // Divide Register Long (no special case since divisor != -1)
7877 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7878   match(Set dst (DivL dst imm));
7879   effect( TEMP tmp, TEMP tmp2, KILL cr );
7880   ins_cost(1000);
7881   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7882             "XOR    $tmp2,$tmp2\n\t"
7883             "CMP    $tmp,EDX\n\t"
7884             "JA,s   fast\n\t"
7885             "MOV    $tmp2,EAX\n\t"
7886             "MOV    EAX,EDX\n\t"
7887             "MOV    EDX,0\n\t"
7888             "JLE,s  pos\n\t"
7889             "LNEG   EAX : $tmp2\n\t"
7890             "DIV    $tmp # unsigned division\n\t"
7891             "XCHG   EAX,$tmp2\n\t"
7892             "DIV    $tmp\n\t"
7893             "LNEG   $tmp2 : EAX\n\t"
7894             "JMP,s  done\n"
7895     "pos:\n\t"
7896             "DIV    $tmp\n\t"
7897             "XCHG   EAX,$tmp2\n"
7898     "fast:\n\t"
7899             "DIV    $tmp\n"
7900     "done:\n\t"
7901             "MOV    EDX,$tmp2\n\t"
7902             "NEG    EDX:EAX # if $imm < 0" %}
7903   ins_encode %{
7904     int con = (int)$imm$$constant;
7905     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7906     int pcon = (con > 0) ? con : -con;
7907     Label Lfast, Lpos, Ldone;
7908 
7909     __ movl($tmp$$Register, pcon);
7910     __ xorl($tmp2$$Register,$tmp2$$Register);
7911     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7912     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7913 
7914     __ movl($tmp2$$Register, $dst$$Register); // save
7915     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7916     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7917     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7918 
7919     // Negative dividend.
7920     // convert value to positive to use unsigned division
7921     __ lneg($dst$$Register, $tmp2$$Register);
7922     __ divl($tmp$$Register);
7923     __ xchgl($dst$$Register, $tmp2$$Register);
7924     __ divl($tmp$$Register);
7925     // revert result back to negative
7926     __ lneg($tmp2$$Register, $dst$$Register);
7927     __ jmpb(Ldone);
7928 
7929     __ bind(Lpos);
7930     __ divl($tmp$$Register); // Use unsigned division
7931     __ xchgl($dst$$Register, $tmp2$$Register);
7932     // Fallthrow for final divide, tmp2 has 32 bit hi result
7933 
7934     __ bind(Lfast);
7935     // fast path: src is positive
7936     __ divl($tmp$$Register); // Use unsigned division
7937 
7938     __ bind(Ldone);
7939     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7940     if (con < 0) {
7941       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7942     }
7943   %}
7944   ins_pipe( pipe_slow );
7945 %}
7946 
7947 // Remainder Register Long (remainder fit into 32 bits)
7948 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7949   match(Set dst (ModL dst imm));
7950   effect( TEMP tmp, TEMP tmp2, KILL cr );
7951   ins_cost(1000);
7952   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7953             "CMP    $tmp,EDX\n\t"
7954             "JA,s   fast\n\t"
7955             "MOV    $tmp2,EAX\n\t"
7956             "MOV    EAX,EDX\n\t"
7957             "MOV    EDX,0\n\t"
7958             "JLE,s  pos\n\t"
7959             "LNEG   EAX : $tmp2\n\t"
7960             "DIV    $tmp # unsigned division\n\t"
7961             "MOV    EAX,$tmp2\n\t"
7962             "DIV    $tmp\n\t"
7963             "NEG    EDX\n\t"
7964             "JMP,s  done\n"
7965     "pos:\n\t"
7966             "DIV    $tmp\n\t"
7967             "MOV    EAX,$tmp2\n"
7968     "fast:\n\t"
7969             "DIV    $tmp\n"
7970     "done:\n\t"
7971             "MOV    EAX,EDX\n\t"
7972             "SAR    EDX,31\n\t" %}
7973   ins_encode %{
7974     int con = (int)$imm$$constant;
7975     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7976     int pcon = (con > 0) ? con : -con;
7977     Label  Lfast, Lpos, Ldone;
7978 
7979     __ movl($tmp$$Register, pcon);
7980     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7981     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7982 
7983     __ movl($tmp2$$Register, $dst$$Register); // save
7984     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7985     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7986     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7987 
7988     // Negative dividend.
7989     // convert value to positive to use unsigned division
7990     __ lneg($dst$$Register, $tmp2$$Register);
7991     __ divl($tmp$$Register);
7992     __ movl($dst$$Register, $tmp2$$Register);
7993     __ divl($tmp$$Register);
7994     // revert remainder back to negative
7995     __ negl(HIGH_FROM_LOW($dst$$Register));
7996     __ jmpb(Ldone);
7997 
7998     __ bind(Lpos);
7999     __ divl($tmp$$Register);
8000     __ movl($dst$$Register, $tmp2$$Register);
8001 
8002     __ bind(Lfast);
8003     // fast path: src is positive
8004     __ divl($tmp$$Register);
8005 
8006     __ bind(Ldone);
8007     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8008     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8009 
8010   %}
8011   ins_pipe( pipe_slow );
8012 %}
8013 
8014 // Integer Shift Instructions
8015 // Shift Left by one
8016 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8017   match(Set dst (LShiftI dst shift));
8018   effect(KILL cr);
8019 
8020   size(2);
8021   format %{ "SHL    $dst,$shift" %}
8022   opcode(0xD1, 0x4);  /* D1 /4 */
8023   ins_encode( OpcP, RegOpc( dst ) );
8024   ins_pipe( ialu_reg );
8025 %}
8026 
8027 // Shift Left by 8-bit immediate
8028 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8029   match(Set dst (LShiftI dst shift));
8030   effect(KILL cr);
8031 
8032   size(3);
8033   format %{ "SHL    $dst,$shift" %}
8034   opcode(0xC1, 0x4);  /* C1 /4 ib */
8035   ins_encode( RegOpcImm( dst, shift) );
8036   ins_pipe( ialu_reg );
8037 %}
8038 
8039 // Shift Left by variable
8040 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8041   match(Set dst (LShiftI dst shift));
8042   effect(KILL cr);
8043 
8044   size(2);
8045   format %{ "SHL    $dst,$shift" %}
8046   opcode(0xD3, 0x4);  /* D3 /4 */
8047   ins_encode( OpcP, RegOpc( dst ) );
8048   ins_pipe( ialu_reg_reg );
8049 %}
8050 
8051 // Arithmetic shift right by one
8052 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8053   match(Set dst (RShiftI dst shift));
8054   effect(KILL cr);
8055 
8056   size(2);
8057   format %{ "SAR    $dst,$shift" %}
8058   opcode(0xD1, 0x7);  /* D1 /7 */
8059   ins_encode( OpcP, RegOpc( dst ) );
8060   ins_pipe( ialu_reg );
8061 %}
8062 
8063 // Arithmetic shift right by one
8064 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8065   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8066   effect(KILL cr);
8067   format %{ "SAR    $dst,$shift" %}
8068   opcode(0xD1, 0x7);  /* D1 /7 */
8069   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8070   ins_pipe( ialu_mem_imm );
8071 %}
8072 
8073 // Arithmetic Shift Right by 8-bit immediate
8074 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8075   match(Set dst (RShiftI dst shift));
8076   effect(KILL cr);
8077 
8078   size(3);
8079   format %{ "SAR    $dst,$shift" %}
8080   opcode(0xC1, 0x7);  /* C1 /7 ib */
8081   ins_encode( RegOpcImm( dst, shift ) );
8082   ins_pipe( ialu_mem_imm );
8083 %}
8084 
8085 // Arithmetic Shift Right by 8-bit immediate
8086 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8087   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8088   effect(KILL cr);
8089 
8090   format %{ "SAR    $dst,$shift" %}
8091   opcode(0xC1, 0x7);  /* C1 /7 ib */
8092   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8093   ins_pipe( ialu_mem_imm );
8094 %}
8095 
8096 // Arithmetic Shift Right by variable
8097 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8098   match(Set dst (RShiftI dst shift));
8099   effect(KILL cr);
8100 
8101   size(2);
8102   format %{ "SAR    $dst,$shift" %}
8103   opcode(0xD3, 0x7);  /* D3 /7 */
8104   ins_encode( OpcP, RegOpc( dst ) );
8105   ins_pipe( ialu_reg_reg );
8106 %}
8107 
8108 // Logical shift right by one
8109 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8110   match(Set dst (URShiftI dst shift));
8111   effect(KILL cr);
8112 
8113   size(2);
8114   format %{ "SHR    $dst,$shift" %}
8115   opcode(0xD1, 0x5);  /* D1 /5 */
8116   ins_encode( OpcP, RegOpc( dst ) );
8117   ins_pipe( ialu_reg );
8118 %}
8119 
8120 // Logical Shift Right by 8-bit immediate
8121 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8122   match(Set dst (URShiftI dst shift));
8123   effect(KILL cr);
8124 
8125   size(3);
8126   format %{ "SHR    $dst,$shift" %}
8127   opcode(0xC1, 0x5);  /* C1 /5 ib */
8128   ins_encode( RegOpcImm( dst, shift) );
8129   ins_pipe( ialu_reg );
8130 %}
8131 
8132 
8133 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8134 // This idiom is used by the compiler for the i2b bytecode.
8135 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8136   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8137 
8138   size(3);
8139   format %{ "MOVSX  $dst,$src :8" %}
8140   ins_encode %{
8141     __ movsbl($dst$$Register, $src$$Register);
8142   %}
8143   ins_pipe(ialu_reg_reg);
8144 %}
8145 
8146 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8147 // This idiom is used by the compiler the i2s bytecode.
8148 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8149   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8150 
8151   size(3);
8152   format %{ "MOVSX  $dst,$src :16" %}
8153   ins_encode %{
8154     __ movswl($dst$$Register, $src$$Register);
8155   %}
8156   ins_pipe(ialu_reg_reg);
8157 %}
8158 
8159 
8160 // Logical Shift Right by variable
8161 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8162   match(Set dst (URShiftI dst shift));
8163   effect(KILL cr);
8164 
8165   size(2);
8166   format %{ "SHR    $dst,$shift" %}
8167   opcode(0xD3, 0x5);  /* D3 /5 */
8168   ins_encode( OpcP, RegOpc( dst ) );
8169   ins_pipe( ialu_reg_reg );
8170 %}
8171 
8172 
8173 //----------Logical Instructions-----------------------------------------------
8174 //----------Integer Logical Instructions---------------------------------------
8175 // And Instructions
8176 // And Register with Register
8177 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8178   match(Set dst (AndI dst src));
8179   effect(KILL cr);
8180 
8181   size(2);
8182   format %{ "AND    $dst,$src" %}
8183   opcode(0x23);
8184   ins_encode( OpcP, RegReg( dst, src) );
8185   ins_pipe( ialu_reg_reg );
8186 %}
8187 
8188 // And Register with Immediate
8189 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8190   match(Set dst (AndI dst src));
8191   effect(KILL cr);
8192 
8193   format %{ "AND    $dst,$src" %}
8194   opcode(0x81,0x04);  /* Opcode 81 /4 */
8195   // ins_encode( RegImm( dst, src) );
8196   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8197   ins_pipe( ialu_reg );
8198 %}
8199 
8200 // And Register with Memory
8201 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8202   match(Set dst (AndI dst (LoadI src)));
8203   effect(KILL cr);
8204 
8205   ins_cost(125);
8206   format %{ "AND    $dst,$src" %}
8207   opcode(0x23);
8208   ins_encode( OpcP, RegMem( dst, src) );
8209   ins_pipe( ialu_reg_mem );
8210 %}
8211 
8212 // And Memory with Register
8213 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8214   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8215   effect(KILL cr);
8216 
8217   ins_cost(150);
8218   format %{ "AND    $dst,$src" %}
8219   opcode(0x21);  /* Opcode 21 /r */
8220   ins_encode( OpcP, RegMem( src, dst ) );
8221   ins_pipe( ialu_mem_reg );
8222 %}
8223 
8224 // And Memory with Immediate
8225 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8226   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8227   effect(KILL cr);
8228 
8229   ins_cost(125);
8230   format %{ "AND    $dst,$src" %}
8231   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8232   // ins_encode( MemImm( dst, src) );
8233   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8234   ins_pipe( ialu_mem_imm );
8235 %}
8236 
8237 // BMI1 instructions
8238 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8239   match(Set dst (AndI (XorI src1 minus_1) src2));
8240   predicate(UseBMI1Instructions);
8241   effect(KILL cr);
8242 
8243   format %{ "ANDNL  $dst, $src1, $src2" %}
8244 
8245   ins_encode %{
8246     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8247   %}
8248   ins_pipe(ialu_reg);
8249 %}
8250 
8251 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8252   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8253   predicate(UseBMI1Instructions);
8254   effect(KILL cr);
8255 
8256   ins_cost(125);
8257   format %{ "ANDNL  $dst, $src1, $src2" %}
8258 
8259   ins_encode %{
8260     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8261   %}
8262   ins_pipe(ialu_reg_mem);
8263 %}
8264 
8265 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8266   match(Set dst (AndI (SubI imm_zero src) src));
8267   predicate(UseBMI1Instructions);
8268   effect(KILL cr);
8269 
8270   format %{ "BLSIL  $dst, $src" %}
8271 
8272   ins_encode %{
8273     __ blsil($dst$$Register, $src$$Register);
8274   %}
8275   ins_pipe(ialu_reg);
8276 %}
8277 
8278 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8279   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8280   predicate(UseBMI1Instructions);
8281   effect(KILL cr);
8282 
8283   ins_cost(125);
8284   format %{ "BLSIL  $dst, $src" %}
8285 
8286   ins_encode %{
8287     __ blsil($dst$$Register, $src$$Address);
8288   %}
8289   ins_pipe(ialu_reg_mem);
8290 %}
8291 
8292 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8293 %{
8294   match(Set dst (XorI (AddI src minus_1) src));
8295   predicate(UseBMI1Instructions);
8296   effect(KILL cr);
8297 
8298   format %{ "BLSMSKL $dst, $src" %}
8299 
8300   ins_encode %{
8301     __ blsmskl($dst$$Register, $src$$Register);
8302   %}
8303 
8304   ins_pipe(ialu_reg);
8305 %}
8306 
8307 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8308 %{
8309   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8310   predicate(UseBMI1Instructions);
8311   effect(KILL cr);
8312 
8313   ins_cost(125);
8314   format %{ "BLSMSKL $dst, $src" %}
8315 
8316   ins_encode %{
8317     __ blsmskl($dst$$Register, $src$$Address);
8318   %}
8319 
8320   ins_pipe(ialu_reg_mem);
8321 %}
8322 
8323 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8324 %{
8325   match(Set dst (AndI (AddI src minus_1) src) );
8326   predicate(UseBMI1Instructions);
8327   effect(KILL cr);
8328 
8329   format %{ "BLSRL  $dst, $src" %}
8330 
8331   ins_encode %{
8332     __ blsrl($dst$$Register, $src$$Register);
8333   %}
8334 
8335   ins_pipe(ialu_reg);
8336 %}
8337 
8338 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8339 %{
8340   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8341   predicate(UseBMI1Instructions);
8342   effect(KILL cr);
8343 
8344   ins_cost(125);
8345   format %{ "BLSRL  $dst, $src" %}
8346 
8347   ins_encode %{
8348     __ blsrl($dst$$Register, $src$$Address);
8349   %}
8350 
8351   ins_pipe(ialu_reg_mem);
8352 %}
8353 
8354 // Or Instructions
8355 // Or Register with Register
8356 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8357   match(Set dst (OrI dst src));
8358   effect(KILL cr);
8359 
8360   size(2);
8361   format %{ "OR     $dst,$src" %}
8362   opcode(0x0B);
8363   ins_encode( OpcP, RegReg( dst, src) );
8364   ins_pipe( ialu_reg_reg );
8365 %}
8366 
8367 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8368   match(Set dst (OrI dst (CastP2X src)));
8369   effect(KILL cr);
8370 
8371   size(2);
8372   format %{ "OR     $dst,$src" %}
8373   opcode(0x0B);
8374   ins_encode( OpcP, RegReg( dst, src) );
8375   ins_pipe( ialu_reg_reg );
8376 %}
8377 
8378 
8379 // Or Register with Immediate
8380 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8381   match(Set dst (OrI dst src));
8382   effect(KILL cr);
8383 
8384   format %{ "OR     $dst,$src" %}
8385   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8386   // ins_encode( RegImm( dst, src) );
8387   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8388   ins_pipe( ialu_reg );
8389 %}
8390 
8391 // Or Register with Memory
8392 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8393   match(Set dst (OrI dst (LoadI src)));
8394   effect(KILL cr);
8395 
8396   ins_cost(125);
8397   format %{ "OR     $dst,$src" %}
8398   opcode(0x0B);
8399   ins_encode( OpcP, RegMem( dst, src) );
8400   ins_pipe( ialu_reg_mem );
8401 %}
8402 
8403 // Or Memory with Register
8404 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8405   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8406   effect(KILL cr);
8407 
8408   ins_cost(150);
8409   format %{ "OR     $dst,$src" %}
8410   opcode(0x09);  /* Opcode 09 /r */
8411   ins_encode( OpcP, RegMem( src, dst ) );
8412   ins_pipe( ialu_mem_reg );
8413 %}
8414 
8415 // Or Memory with Immediate
8416 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8417   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8418   effect(KILL cr);
8419 
8420   ins_cost(125);
8421   format %{ "OR     $dst,$src" %}
8422   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8423   // ins_encode( MemImm( dst, src) );
8424   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8425   ins_pipe( ialu_mem_imm );
8426 %}
8427 
8428 // ROL/ROR
8429 // ROL expand
8430 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8431   effect(USE_DEF dst, USE shift, KILL cr);
8432 
8433   format %{ "ROL    $dst, $shift" %}
8434   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8435   ins_encode( OpcP, RegOpc( dst ));
8436   ins_pipe( ialu_reg );
8437 %}
8438 
8439 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8440   effect(USE_DEF dst, USE shift, KILL cr);
8441 
8442   format %{ "ROL    $dst, $shift" %}
8443   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8444   ins_encode( RegOpcImm(dst, shift) );
8445   ins_pipe(ialu_reg);
8446 %}
8447 
8448 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8449   effect(USE_DEF dst, USE shift, KILL cr);
8450 
8451   format %{ "ROL    $dst, $shift" %}
8452   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8453   ins_encode(OpcP, RegOpc(dst));
8454   ins_pipe( ialu_reg_reg );
8455 %}
8456 // end of ROL expand
8457 
8458 // ROL 32bit by one once
8459 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8460   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8461 
8462   expand %{
8463     rolI_eReg_imm1(dst, lshift, cr);
8464   %}
8465 %}
8466 
8467 // ROL 32bit var by imm8 once
8468 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8469   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8470   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8471 
8472   expand %{
8473     rolI_eReg_imm8(dst, lshift, cr);
8474   %}
8475 %}
8476 
8477 // ROL 32bit var by var once
8478 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8479   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8480 
8481   expand %{
8482     rolI_eReg_CL(dst, shift, cr);
8483   %}
8484 %}
8485 
8486 // ROL 32bit var by var once
8487 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8488   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8489 
8490   expand %{
8491     rolI_eReg_CL(dst, shift, cr);
8492   %}
8493 %}
8494 
8495 // ROR expand
8496 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8497   effect(USE_DEF dst, USE shift, KILL cr);
8498 
8499   format %{ "ROR    $dst, $shift" %}
8500   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8501   ins_encode( OpcP, RegOpc( dst ) );
8502   ins_pipe( ialu_reg );
8503 %}
8504 
8505 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8506   effect (USE_DEF dst, USE shift, KILL cr);
8507 
8508   format %{ "ROR    $dst, $shift" %}
8509   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8510   ins_encode( RegOpcImm(dst, shift) );
8511   ins_pipe( ialu_reg );
8512 %}
8513 
8514 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8515   effect(USE_DEF dst, USE shift, KILL cr);
8516 
8517   format %{ "ROR    $dst, $shift" %}
8518   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8519   ins_encode(OpcP, RegOpc(dst));
8520   ins_pipe( ialu_reg_reg );
8521 %}
8522 // end of ROR expand
8523 
8524 // ROR right once
8525 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8526   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8527 
8528   expand %{
8529     rorI_eReg_imm1(dst, rshift, cr);
8530   %}
8531 %}
8532 
8533 // ROR 32bit by immI8 once
8534 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8535   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8536   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8537 
8538   expand %{
8539     rorI_eReg_imm8(dst, rshift, cr);
8540   %}
8541 %}
8542 
8543 // ROR 32bit var by var once
8544 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8545   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8546 
8547   expand %{
8548     rorI_eReg_CL(dst, shift, cr);
8549   %}
8550 %}
8551 
8552 // ROR 32bit var by var once
8553 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8554   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8555 
8556   expand %{
8557     rorI_eReg_CL(dst, shift, cr);
8558   %}
8559 %}
8560 
8561 // Xor Instructions
8562 // Xor Register with Register
8563 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8564   match(Set dst (XorI dst src));
8565   effect(KILL cr);
8566 
8567   size(2);
8568   format %{ "XOR    $dst,$src" %}
8569   opcode(0x33);
8570   ins_encode( OpcP, RegReg( dst, src) );
8571   ins_pipe( ialu_reg_reg );
8572 %}
8573 
8574 // Xor Register with Immediate -1
8575 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8576   match(Set dst (XorI dst imm));
8577 
8578   size(2);
8579   format %{ "NOT    $dst" %}
8580   ins_encode %{
8581      __ notl($dst$$Register);
8582   %}
8583   ins_pipe( ialu_reg );
8584 %}
8585 
8586 // Xor Register with Immediate
8587 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8588   match(Set dst (XorI dst src));
8589   effect(KILL cr);
8590 
8591   format %{ "XOR    $dst,$src" %}
8592   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8593   // ins_encode( RegImm( dst, src) );
8594   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8595   ins_pipe( ialu_reg );
8596 %}
8597 
8598 // Xor Register with Memory
8599 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8600   match(Set dst (XorI dst (LoadI src)));
8601   effect(KILL cr);
8602 
8603   ins_cost(125);
8604   format %{ "XOR    $dst,$src" %}
8605   opcode(0x33);
8606   ins_encode( OpcP, RegMem(dst, src) );
8607   ins_pipe( ialu_reg_mem );
8608 %}
8609 
8610 // Xor Memory with Register
8611 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8612   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8613   effect(KILL cr);
8614 
8615   ins_cost(150);
8616   format %{ "XOR    $dst,$src" %}
8617   opcode(0x31);  /* Opcode 31 /r */
8618   ins_encode( OpcP, RegMem( src, dst ) );
8619   ins_pipe( ialu_mem_reg );
8620 %}
8621 
8622 // Xor Memory with Immediate
8623 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8624   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8625   effect(KILL cr);
8626 
8627   ins_cost(125);
8628   format %{ "XOR    $dst,$src" %}
8629   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8630   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8631   ins_pipe( ialu_mem_imm );
8632 %}
8633 
8634 //----------Convert Int to Boolean---------------------------------------------
8635 
8636 instruct movI_nocopy(rRegI dst, rRegI src) %{
8637   effect( DEF dst, USE src );
8638   format %{ "MOV    $dst,$src" %}
8639   ins_encode( enc_Copy( dst, src) );
8640   ins_pipe( ialu_reg_reg );
8641 %}
8642 
8643 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8644   effect( USE_DEF dst, USE src, KILL cr );
8645 
8646   size(4);
8647   format %{ "NEG    $dst\n\t"
8648             "ADC    $dst,$src" %}
8649   ins_encode( neg_reg(dst),
8650               OpcRegReg(0x13,dst,src) );
8651   ins_pipe( ialu_reg_reg_long );
8652 %}
8653 
8654 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8655   match(Set dst (Conv2B src));
8656 
8657   expand %{
8658     movI_nocopy(dst,src);
8659     ci2b(dst,src,cr);
8660   %}
8661 %}
8662 
8663 instruct movP_nocopy(rRegI dst, eRegP src) %{
8664   effect( DEF dst, USE src );
8665   format %{ "MOV    $dst,$src" %}
8666   ins_encode( enc_Copy( dst, src) );
8667   ins_pipe( ialu_reg_reg );
8668 %}
8669 
8670 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8671   effect( USE_DEF dst, USE src, KILL cr );
8672   format %{ "NEG    $dst\n\t"
8673             "ADC    $dst,$src" %}
8674   ins_encode( neg_reg(dst),
8675               OpcRegReg(0x13,dst,src) );
8676   ins_pipe( ialu_reg_reg_long );
8677 %}
8678 
8679 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8680   match(Set dst (Conv2B src));
8681 
8682   expand %{
8683     movP_nocopy(dst,src);
8684     cp2b(dst,src,cr);
8685   %}
8686 %}
8687 
8688 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8689   match(Set dst (CmpLTMask p q));
8690   effect(KILL cr);
8691   ins_cost(400);
8692 
8693   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8694   format %{ "XOR    $dst,$dst\n\t"
8695             "CMP    $p,$q\n\t"
8696             "SETlt  $dst\n\t"
8697             "NEG    $dst" %}
8698   ins_encode %{
8699     Register Rp = $p$$Register;
8700     Register Rq = $q$$Register;
8701     Register Rd = $dst$$Register;
8702     Label done;
8703     __ xorl(Rd, Rd);
8704     __ cmpl(Rp, Rq);
8705     __ setb(Assembler::less, Rd);
8706     __ negl(Rd);
8707   %}
8708 
8709   ins_pipe(pipe_slow);
8710 %}
8711 
8712 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8713   match(Set dst (CmpLTMask dst zero));
8714   effect(DEF dst, KILL cr);
8715   ins_cost(100);
8716 
8717   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8718   ins_encode %{
8719   __ sarl($dst$$Register, 31);
8720   %}
8721   ins_pipe(ialu_reg);
8722 %}
8723 
8724 /* better to save a register than avoid a branch */
8725 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8726   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8727   effect(KILL cr);
8728   ins_cost(400);
8729   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8730             "JGE    done\n\t"
8731             "ADD    $p,$y\n"
8732             "done:  " %}
8733   ins_encode %{
8734     Register Rp = $p$$Register;
8735     Register Rq = $q$$Register;
8736     Register Ry = $y$$Register;
8737     Label done;
8738     __ subl(Rp, Rq);
8739     __ jccb(Assembler::greaterEqual, done);
8740     __ addl(Rp, Ry);
8741     __ bind(done);
8742   %}
8743 
8744   ins_pipe(pipe_cmplt);
8745 %}
8746 
8747 /* better to save a register than avoid a branch */
8748 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8749   match(Set y (AndI (CmpLTMask p q) y));
8750   effect(KILL cr);
8751 
8752   ins_cost(300);
8753 
8754   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8755             "JLT      done\n\t"
8756             "XORL     $y, $y\n"
8757             "done:  " %}
8758   ins_encode %{
8759     Register Rp = $p$$Register;
8760     Register Rq = $q$$Register;
8761     Register Ry = $y$$Register;
8762     Label done;
8763     __ cmpl(Rp, Rq);
8764     __ jccb(Assembler::less, done);
8765     __ xorl(Ry, Ry);
8766     __ bind(done);
8767   %}
8768 
8769   ins_pipe(pipe_cmplt);
8770 %}
8771 
8772 /* If I enable this, I encourage spilling in the inner loop of compress.
8773 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8774   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8775 */
8776 //----------Overflow Math Instructions-----------------------------------------
8777 
8778 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8779 %{
8780   match(Set cr (OverflowAddI op1 op2));
8781   effect(DEF cr, USE_KILL op1, USE op2);
8782 
8783   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8784 
8785   ins_encode %{
8786     __ addl($op1$$Register, $op2$$Register);
8787   %}
8788   ins_pipe(ialu_reg_reg);
8789 %}
8790 
8791 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8792 %{
8793   match(Set cr (OverflowAddI op1 op2));
8794   effect(DEF cr, USE_KILL op1, USE op2);
8795 
8796   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8797 
8798   ins_encode %{
8799     __ addl($op1$$Register, $op2$$constant);
8800   %}
8801   ins_pipe(ialu_reg_reg);
8802 %}
8803 
8804 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8805 %{
8806   match(Set cr (OverflowSubI op1 op2));
8807 
8808   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8809   ins_encode %{
8810     __ cmpl($op1$$Register, $op2$$Register);
8811   %}
8812   ins_pipe(ialu_reg_reg);
8813 %}
8814 
8815 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8816 %{
8817   match(Set cr (OverflowSubI op1 op2));
8818 
8819   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8820   ins_encode %{
8821     __ cmpl($op1$$Register, $op2$$constant);
8822   %}
8823   ins_pipe(ialu_reg_reg);
8824 %}
8825 
8826 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8827 %{
8828   match(Set cr (OverflowSubI zero op2));
8829   effect(DEF cr, USE_KILL op2);
8830 
8831   format %{ "NEG    $op2\t# overflow check int" %}
8832   ins_encode %{
8833     __ negl($op2$$Register);
8834   %}
8835   ins_pipe(ialu_reg_reg);
8836 %}
8837 
8838 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8839 %{
8840   match(Set cr (OverflowMulI op1 op2));
8841   effect(DEF cr, USE_KILL op1, USE op2);
8842 
8843   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8844   ins_encode %{
8845     __ imull($op1$$Register, $op2$$Register);
8846   %}
8847   ins_pipe(ialu_reg_reg_alu0);
8848 %}
8849 
8850 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8851 %{
8852   match(Set cr (OverflowMulI op1 op2));
8853   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8854 
8855   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8856   ins_encode %{
8857     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8858   %}
8859   ins_pipe(ialu_reg_reg_alu0);
8860 %}
8861 
8862 //----------Long Instructions------------------------------------------------
8863 // Add Long Register with Register
8864 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8865   match(Set dst (AddL dst src));
8866   effect(KILL cr);
8867   ins_cost(200);
8868   format %{ "ADD    $dst.lo,$src.lo\n\t"
8869             "ADC    $dst.hi,$src.hi" %}
8870   opcode(0x03, 0x13);
8871   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8872   ins_pipe( ialu_reg_reg_long );
8873 %}
8874 
8875 // Add Long Register with Immediate
8876 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8877   match(Set dst (AddL dst src));
8878   effect(KILL cr);
8879   format %{ "ADD    $dst.lo,$src.lo\n\t"
8880             "ADC    $dst.hi,$src.hi" %}
8881   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8882   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8883   ins_pipe( ialu_reg_long );
8884 %}
8885 
8886 // Add Long Register with Memory
8887 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8888   match(Set dst (AddL dst (LoadL mem)));
8889   effect(KILL cr);
8890   ins_cost(125);
8891   format %{ "ADD    $dst.lo,$mem\n\t"
8892             "ADC    $dst.hi,$mem+4" %}
8893   opcode(0x03, 0x13);
8894   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8895   ins_pipe( ialu_reg_long_mem );
8896 %}
8897 
8898 // Subtract Long Register with Register.
8899 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8900   match(Set dst (SubL dst src));
8901   effect(KILL cr);
8902   ins_cost(200);
8903   format %{ "SUB    $dst.lo,$src.lo\n\t"
8904             "SBB    $dst.hi,$src.hi" %}
8905   opcode(0x2B, 0x1B);
8906   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8907   ins_pipe( ialu_reg_reg_long );
8908 %}
8909 
8910 // Subtract Long Register with Immediate
8911 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8912   match(Set dst (SubL dst src));
8913   effect(KILL cr);
8914   format %{ "SUB    $dst.lo,$src.lo\n\t"
8915             "SBB    $dst.hi,$src.hi" %}
8916   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8917   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8918   ins_pipe( ialu_reg_long );
8919 %}
8920 
8921 // Subtract Long Register with Memory
8922 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8923   match(Set dst (SubL dst (LoadL mem)));
8924   effect(KILL cr);
8925   ins_cost(125);
8926   format %{ "SUB    $dst.lo,$mem\n\t"
8927             "SBB    $dst.hi,$mem+4" %}
8928   opcode(0x2B, 0x1B);
8929   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8930   ins_pipe( ialu_reg_long_mem );
8931 %}
8932 
8933 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8934   match(Set dst (SubL zero dst));
8935   effect(KILL cr);
8936   ins_cost(300);
8937   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8938   ins_encode( neg_long(dst) );
8939   ins_pipe( ialu_reg_reg_long );
8940 %}
8941 
8942 // And Long Register with Register
8943 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8944   match(Set dst (AndL dst src));
8945   effect(KILL cr);
8946   format %{ "AND    $dst.lo,$src.lo\n\t"
8947             "AND    $dst.hi,$src.hi" %}
8948   opcode(0x23,0x23);
8949   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8950   ins_pipe( ialu_reg_reg_long );
8951 %}
8952 
8953 // And Long Register with Immediate
8954 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8955   match(Set dst (AndL dst src));
8956   effect(KILL cr);
8957   format %{ "AND    $dst.lo,$src.lo\n\t"
8958             "AND    $dst.hi,$src.hi" %}
8959   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8960   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8961   ins_pipe( ialu_reg_long );
8962 %}
8963 
8964 // And Long Register with Memory
8965 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8966   match(Set dst (AndL dst (LoadL mem)));
8967   effect(KILL cr);
8968   ins_cost(125);
8969   format %{ "AND    $dst.lo,$mem\n\t"
8970             "AND    $dst.hi,$mem+4" %}
8971   opcode(0x23, 0x23);
8972   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8973   ins_pipe( ialu_reg_long_mem );
8974 %}
8975 
8976 // BMI1 instructions
8977 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8978   match(Set dst (AndL (XorL src1 minus_1) src2));
8979   predicate(UseBMI1Instructions);
8980   effect(KILL cr, TEMP dst);
8981 
8982   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8983             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8984          %}
8985 
8986   ins_encode %{
8987     Register Rdst = $dst$$Register;
8988     Register Rsrc1 = $src1$$Register;
8989     Register Rsrc2 = $src2$$Register;
8990     __ andnl(Rdst, Rsrc1, Rsrc2);
8991     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8992   %}
8993   ins_pipe(ialu_reg_reg_long);
8994 %}
8995 
8996 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8997   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8998   predicate(UseBMI1Instructions);
8999   effect(KILL cr, TEMP dst);
9000 
9001   ins_cost(125);
9002   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9003             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9004          %}
9005 
9006   ins_encode %{
9007     Register Rdst = $dst$$Register;
9008     Register Rsrc1 = $src1$$Register;
9009     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9010 
9011     __ andnl(Rdst, Rsrc1, $src2$$Address);
9012     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9013   %}
9014   ins_pipe(ialu_reg_mem);
9015 %}
9016 
9017 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9018   match(Set dst (AndL (SubL imm_zero src) src));
9019   predicate(UseBMI1Instructions);
9020   effect(KILL cr, TEMP dst);
9021 
9022   format %{ "MOVL   $dst.hi, 0\n\t"
9023             "BLSIL  $dst.lo, $src.lo\n\t"
9024             "JNZ    done\n\t"
9025             "BLSIL  $dst.hi, $src.hi\n"
9026             "done:"
9027          %}
9028 
9029   ins_encode %{
9030     Label done;
9031     Register Rdst = $dst$$Register;
9032     Register Rsrc = $src$$Register;
9033     __ movl(HIGH_FROM_LOW(Rdst), 0);
9034     __ blsil(Rdst, Rsrc);
9035     __ jccb(Assembler::notZero, done);
9036     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9037     __ bind(done);
9038   %}
9039   ins_pipe(ialu_reg);
9040 %}
9041 
9042 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9043   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9044   predicate(UseBMI1Instructions);
9045   effect(KILL cr, TEMP dst);
9046 
9047   ins_cost(125);
9048   format %{ "MOVL   $dst.hi, 0\n\t"
9049             "BLSIL  $dst.lo, $src\n\t"
9050             "JNZ    done\n\t"
9051             "BLSIL  $dst.hi, $src+4\n"
9052             "done:"
9053          %}
9054 
9055   ins_encode %{
9056     Label done;
9057     Register Rdst = $dst$$Register;
9058     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9059 
9060     __ movl(HIGH_FROM_LOW(Rdst), 0);
9061     __ blsil(Rdst, $src$$Address);
9062     __ jccb(Assembler::notZero, done);
9063     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9064     __ bind(done);
9065   %}
9066   ins_pipe(ialu_reg_mem);
9067 %}
9068 
9069 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9070 %{
9071   match(Set dst (XorL (AddL src minus_1) src));
9072   predicate(UseBMI1Instructions);
9073   effect(KILL cr, TEMP dst);
9074 
9075   format %{ "MOVL    $dst.hi, 0\n\t"
9076             "BLSMSKL $dst.lo, $src.lo\n\t"
9077             "JNC     done\n\t"
9078             "BLSMSKL $dst.hi, $src.hi\n"
9079             "done:"
9080          %}
9081 
9082   ins_encode %{
9083     Label done;
9084     Register Rdst = $dst$$Register;
9085     Register Rsrc = $src$$Register;
9086     __ movl(HIGH_FROM_LOW(Rdst), 0);
9087     __ blsmskl(Rdst, Rsrc);
9088     __ jccb(Assembler::carryClear, done);
9089     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9090     __ bind(done);
9091   %}
9092 
9093   ins_pipe(ialu_reg);
9094 %}
9095 
9096 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9097 %{
9098   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9099   predicate(UseBMI1Instructions);
9100   effect(KILL cr, TEMP dst);
9101 
9102   ins_cost(125);
9103   format %{ "MOVL    $dst.hi, 0\n\t"
9104             "BLSMSKL $dst.lo, $src\n\t"
9105             "JNC     done\n\t"
9106             "BLSMSKL $dst.hi, $src+4\n"
9107             "done:"
9108          %}
9109 
9110   ins_encode %{
9111     Label done;
9112     Register Rdst = $dst$$Register;
9113     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9114 
9115     __ movl(HIGH_FROM_LOW(Rdst), 0);
9116     __ blsmskl(Rdst, $src$$Address);
9117     __ jccb(Assembler::carryClear, done);
9118     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9119     __ bind(done);
9120   %}
9121 
9122   ins_pipe(ialu_reg_mem);
9123 %}
9124 
9125 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9126 %{
9127   match(Set dst (AndL (AddL src minus_1) src) );
9128   predicate(UseBMI1Instructions);
9129   effect(KILL cr, TEMP dst);
9130 
9131   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9132             "BLSRL  $dst.lo, $src.lo\n\t"
9133             "JNC    done\n\t"
9134             "BLSRL  $dst.hi, $src.hi\n"
9135             "done:"
9136   %}
9137 
9138   ins_encode %{
9139     Label done;
9140     Register Rdst = $dst$$Register;
9141     Register Rsrc = $src$$Register;
9142     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9143     __ blsrl(Rdst, Rsrc);
9144     __ jccb(Assembler::carryClear, done);
9145     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9146     __ bind(done);
9147   %}
9148 
9149   ins_pipe(ialu_reg);
9150 %}
9151 
9152 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9153 %{
9154   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9155   predicate(UseBMI1Instructions);
9156   effect(KILL cr, TEMP dst);
9157 
9158   ins_cost(125);
9159   format %{ "MOVL   $dst.hi, $src+4\n\t"
9160             "BLSRL  $dst.lo, $src\n\t"
9161             "JNC    done\n\t"
9162             "BLSRL  $dst.hi, $src+4\n"
9163             "done:"
9164   %}
9165 
9166   ins_encode %{
9167     Label done;
9168     Register Rdst = $dst$$Register;
9169     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9170     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9171     __ blsrl(Rdst, $src$$Address);
9172     __ jccb(Assembler::carryClear, done);
9173     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9174     __ bind(done);
9175   %}
9176 
9177   ins_pipe(ialu_reg_mem);
9178 %}
9179 
9180 // Or Long Register with Register
9181 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9182   match(Set dst (OrL dst src));
9183   effect(KILL cr);
9184   format %{ "OR     $dst.lo,$src.lo\n\t"
9185             "OR     $dst.hi,$src.hi" %}
9186   opcode(0x0B,0x0B);
9187   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9188   ins_pipe( ialu_reg_reg_long );
9189 %}
9190 
9191 // Or Long Register with Immediate
9192 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9193   match(Set dst (OrL dst src));
9194   effect(KILL cr);
9195   format %{ "OR     $dst.lo,$src.lo\n\t"
9196             "OR     $dst.hi,$src.hi" %}
9197   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9198   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9199   ins_pipe( ialu_reg_long );
9200 %}
9201 
9202 // Or Long Register with Memory
9203 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9204   match(Set dst (OrL dst (LoadL mem)));
9205   effect(KILL cr);
9206   ins_cost(125);
9207   format %{ "OR     $dst.lo,$mem\n\t"
9208             "OR     $dst.hi,$mem+4" %}
9209   opcode(0x0B,0x0B);
9210   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9211   ins_pipe( ialu_reg_long_mem );
9212 %}
9213 
9214 // Xor Long Register with Register
9215 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9216   match(Set dst (XorL dst src));
9217   effect(KILL cr);
9218   format %{ "XOR    $dst.lo,$src.lo\n\t"
9219             "XOR    $dst.hi,$src.hi" %}
9220   opcode(0x33,0x33);
9221   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9222   ins_pipe( ialu_reg_reg_long );
9223 %}
9224 
9225 // Xor Long Register with Immediate -1
9226 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9227   match(Set dst (XorL dst imm));
9228   format %{ "NOT    $dst.lo\n\t"
9229             "NOT    $dst.hi" %}
9230   ins_encode %{
9231      __ notl($dst$$Register);
9232      __ notl(HIGH_FROM_LOW($dst$$Register));
9233   %}
9234   ins_pipe( ialu_reg_long );
9235 %}
9236 
9237 // Xor Long Register with Immediate
9238 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9239   match(Set dst (XorL dst src));
9240   effect(KILL cr);
9241   format %{ "XOR    $dst.lo,$src.lo\n\t"
9242             "XOR    $dst.hi,$src.hi" %}
9243   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9244   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9245   ins_pipe( ialu_reg_long );
9246 %}
9247 
9248 // Xor Long Register with Memory
9249 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9250   match(Set dst (XorL dst (LoadL mem)));
9251   effect(KILL cr);
9252   ins_cost(125);
9253   format %{ "XOR    $dst.lo,$mem\n\t"
9254             "XOR    $dst.hi,$mem+4" %}
9255   opcode(0x33,0x33);
9256   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9257   ins_pipe( ialu_reg_long_mem );
9258 %}
9259 
9260 // Shift Left Long by 1
9261 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9262   predicate(UseNewLongLShift);
9263   match(Set dst (LShiftL dst cnt));
9264   effect(KILL cr);
9265   ins_cost(100);
9266   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9267             "ADC    $dst.hi,$dst.hi" %}
9268   ins_encode %{
9269     __ addl($dst$$Register,$dst$$Register);
9270     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9271   %}
9272   ins_pipe( ialu_reg_long );
9273 %}
9274 
9275 // Shift Left Long by 2
9276 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9277   predicate(UseNewLongLShift);
9278   match(Set dst (LShiftL dst cnt));
9279   effect(KILL cr);
9280   ins_cost(100);
9281   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9282             "ADC    $dst.hi,$dst.hi\n\t"
9283             "ADD    $dst.lo,$dst.lo\n\t"
9284             "ADC    $dst.hi,$dst.hi" %}
9285   ins_encode %{
9286     __ addl($dst$$Register,$dst$$Register);
9287     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9288     __ addl($dst$$Register,$dst$$Register);
9289     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9290   %}
9291   ins_pipe( ialu_reg_long );
9292 %}
9293 
9294 // Shift Left Long by 3
9295 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9296   predicate(UseNewLongLShift);
9297   match(Set dst (LShiftL dst cnt));
9298   effect(KILL cr);
9299   ins_cost(100);
9300   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9301             "ADC    $dst.hi,$dst.hi\n\t"
9302             "ADD    $dst.lo,$dst.lo\n\t"
9303             "ADC    $dst.hi,$dst.hi\n\t"
9304             "ADD    $dst.lo,$dst.lo\n\t"
9305             "ADC    $dst.hi,$dst.hi" %}
9306   ins_encode %{
9307     __ addl($dst$$Register,$dst$$Register);
9308     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9309     __ addl($dst$$Register,$dst$$Register);
9310     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9311     __ addl($dst$$Register,$dst$$Register);
9312     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9313   %}
9314   ins_pipe( ialu_reg_long );
9315 %}
9316 
9317 // Shift Left Long by 1-31
9318 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9319   match(Set dst (LShiftL dst cnt));
9320   effect(KILL cr);
9321   ins_cost(200);
9322   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9323             "SHL    $dst.lo,$cnt" %}
9324   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9325   ins_encode( move_long_small_shift(dst,cnt) );
9326   ins_pipe( ialu_reg_long );
9327 %}
9328 
9329 // Shift Left Long by 32-63
9330 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9331   match(Set dst (LShiftL dst cnt));
9332   effect(KILL cr);
9333   ins_cost(300);
9334   format %{ "MOV    $dst.hi,$dst.lo\n"
9335           "\tSHL    $dst.hi,$cnt-32\n"
9336           "\tXOR    $dst.lo,$dst.lo" %}
9337   opcode(0xC1, 0x4);  /* C1 /4 ib */
9338   ins_encode( move_long_big_shift_clr(dst,cnt) );
9339   ins_pipe( ialu_reg_long );
9340 %}
9341 
9342 // Shift Left Long by variable
9343 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9344   match(Set dst (LShiftL dst shift));
9345   effect(KILL cr);
9346   ins_cost(500+200);
9347   size(17);
9348   format %{ "TEST   $shift,32\n\t"
9349             "JEQ,s  small\n\t"
9350             "MOV    $dst.hi,$dst.lo\n\t"
9351             "XOR    $dst.lo,$dst.lo\n"
9352     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9353             "SHL    $dst.lo,$shift" %}
9354   ins_encode( shift_left_long( dst, shift ) );
9355   ins_pipe( pipe_slow );
9356 %}
9357 
9358 // Shift Right Long by 1-31
9359 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9360   match(Set dst (URShiftL dst cnt));
9361   effect(KILL cr);
9362   ins_cost(200);
9363   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9364             "SHR    $dst.hi,$cnt" %}
9365   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9366   ins_encode( move_long_small_shift(dst,cnt) );
9367   ins_pipe( ialu_reg_long );
9368 %}
9369 
9370 // Shift Right Long by 32-63
9371 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9372   match(Set dst (URShiftL dst cnt));
9373   effect(KILL cr);
9374   ins_cost(300);
9375   format %{ "MOV    $dst.lo,$dst.hi\n"
9376           "\tSHR    $dst.lo,$cnt-32\n"
9377           "\tXOR    $dst.hi,$dst.hi" %}
9378   opcode(0xC1, 0x5);  /* C1 /5 ib */
9379   ins_encode( move_long_big_shift_clr(dst,cnt) );
9380   ins_pipe( ialu_reg_long );
9381 %}
9382 
9383 // Shift Right Long by variable
9384 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9385   match(Set dst (URShiftL dst shift));
9386   effect(KILL cr);
9387   ins_cost(600);
9388   size(17);
9389   format %{ "TEST   $shift,32\n\t"
9390             "JEQ,s  small\n\t"
9391             "MOV    $dst.lo,$dst.hi\n\t"
9392             "XOR    $dst.hi,$dst.hi\n"
9393     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9394             "SHR    $dst.hi,$shift" %}
9395   ins_encode( shift_right_long( dst, shift ) );
9396   ins_pipe( pipe_slow );
9397 %}
9398 
9399 // Shift Right Long by 1-31
9400 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9401   match(Set dst (RShiftL dst cnt));
9402   effect(KILL cr);
9403   ins_cost(200);
9404   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9405             "SAR    $dst.hi,$cnt" %}
9406   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9407   ins_encode( move_long_small_shift(dst,cnt) );
9408   ins_pipe( ialu_reg_long );
9409 %}
9410 
9411 // Shift Right Long by 32-63
9412 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9413   match(Set dst (RShiftL dst cnt));
9414   effect(KILL cr);
9415   ins_cost(300);
9416   format %{ "MOV    $dst.lo,$dst.hi\n"
9417           "\tSAR    $dst.lo,$cnt-32\n"
9418           "\tSAR    $dst.hi,31" %}
9419   opcode(0xC1, 0x7);  /* C1 /7 ib */
9420   ins_encode( move_long_big_shift_sign(dst,cnt) );
9421   ins_pipe( ialu_reg_long );
9422 %}
9423 
9424 // Shift Right arithmetic Long by variable
9425 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9426   match(Set dst (RShiftL dst shift));
9427   effect(KILL cr);
9428   ins_cost(600);
9429   size(18);
9430   format %{ "TEST   $shift,32\n\t"
9431             "JEQ,s  small\n\t"
9432             "MOV    $dst.lo,$dst.hi\n\t"
9433             "SAR    $dst.hi,31\n"
9434     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9435             "SAR    $dst.hi,$shift" %}
9436   ins_encode( shift_right_arith_long( dst, shift ) );
9437   ins_pipe( pipe_slow );
9438 %}
9439 
9440 
9441 //----------Double Instructions------------------------------------------------
9442 // Double Math
9443 
9444 // Compare & branch
9445 
9446 // P6 version of float compare, sets condition codes in EFLAGS
9447 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9448   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9449   match(Set cr (CmpD src1 src2));
9450   effect(KILL rax);
9451   ins_cost(150);
9452   format %{ "FLD    $src1\n\t"
9453             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9454             "JNP    exit\n\t"
9455             "MOV    ah,1       // saw a NaN, set CF\n\t"
9456             "SAHF\n"
9457      "exit:\tNOP               // avoid branch to branch" %}
9458   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9459   ins_encode( Push_Reg_DPR(src1),
9460               OpcP, RegOpc(src2),
9461               cmpF_P6_fixup );
9462   ins_pipe( pipe_slow );
9463 %}
9464 
9465 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9466   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9467   match(Set cr (CmpD src1 src2));
9468   ins_cost(150);
9469   format %{ "FLD    $src1\n\t"
9470             "FUCOMIP ST,$src2  // P6 instruction" %}
9471   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9472   ins_encode( Push_Reg_DPR(src1),
9473               OpcP, RegOpc(src2));
9474   ins_pipe( pipe_slow );
9475 %}
9476 
9477 // Compare & branch
9478 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9479   predicate(UseSSE<=1);
9480   match(Set cr (CmpD src1 src2));
9481   effect(KILL rax);
9482   ins_cost(200);
9483   format %{ "FLD    $src1\n\t"
9484             "FCOMp  $src2\n\t"
9485             "FNSTSW AX\n\t"
9486             "TEST   AX,0x400\n\t"
9487             "JZ,s   flags\n\t"
9488             "MOV    AH,1\t# unordered treat as LT\n"
9489     "flags:\tSAHF" %}
9490   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9491   ins_encode( Push_Reg_DPR(src1),
9492               OpcP, RegOpc(src2),
9493               fpu_flags);
9494   ins_pipe( pipe_slow );
9495 %}
9496 
9497 // Compare vs zero into -1,0,1
9498 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9499   predicate(UseSSE<=1);
9500   match(Set dst (CmpD3 src1 zero));
9501   effect(KILL cr, KILL rax);
9502   ins_cost(280);
9503   format %{ "FTSTD  $dst,$src1" %}
9504   opcode(0xE4, 0xD9);
9505   ins_encode( Push_Reg_DPR(src1),
9506               OpcS, OpcP, PopFPU,
9507               CmpF_Result(dst));
9508   ins_pipe( pipe_slow );
9509 %}
9510 
9511 // Compare into -1,0,1
9512 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9513   predicate(UseSSE<=1);
9514   match(Set dst (CmpD3 src1 src2));
9515   effect(KILL cr, KILL rax);
9516   ins_cost(300);
9517   format %{ "FCMPD  $dst,$src1,$src2" %}
9518   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9519   ins_encode( Push_Reg_DPR(src1),
9520               OpcP, RegOpc(src2),
9521               CmpF_Result(dst));
9522   ins_pipe( pipe_slow );
9523 %}
9524 
9525 // float compare and set condition codes in EFLAGS by XMM regs
9526 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9527   predicate(UseSSE>=2);
9528   match(Set cr (CmpD src1 src2));
9529   ins_cost(145);
9530   format %{ "UCOMISD $src1,$src2\n\t"
9531             "JNP,s   exit\n\t"
9532             "PUSHF\t# saw NaN, set CF\n\t"
9533             "AND     [rsp], #0xffffff2b\n\t"
9534             "POPF\n"
9535     "exit:" %}
9536   ins_encode %{
9537     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9538     emit_cmpfp_fixup(_masm);
9539   %}
9540   ins_pipe( pipe_slow );
9541 %}
9542 
9543 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9544   predicate(UseSSE>=2);
9545   match(Set cr (CmpD src1 src2));
9546   ins_cost(100);
9547   format %{ "UCOMISD $src1,$src2" %}
9548   ins_encode %{
9549     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9550   %}
9551   ins_pipe( pipe_slow );
9552 %}
9553 
9554 // float compare and set condition codes in EFLAGS by XMM regs
9555 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9556   predicate(UseSSE>=2);
9557   match(Set cr (CmpD src1 (LoadD src2)));
9558   ins_cost(145);
9559   format %{ "UCOMISD $src1,$src2\n\t"
9560             "JNP,s   exit\n\t"
9561             "PUSHF\t# saw NaN, set CF\n\t"
9562             "AND     [rsp], #0xffffff2b\n\t"
9563             "POPF\n"
9564     "exit:" %}
9565   ins_encode %{
9566     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9567     emit_cmpfp_fixup(_masm);
9568   %}
9569   ins_pipe( pipe_slow );
9570 %}
9571 
9572 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9573   predicate(UseSSE>=2);
9574   match(Set cr (CmpD src1 (LoadD src2)));
9575   ins_cost(100);
9576   format %{ "UCOMISD $src1,$src2" %}
9577   ins_encode %{
9578     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9579   %}
9580   ins_pipe( pipe_slow );
9581 %}
9582 
9583 // Compare into -1,0,1 in XMM
9584 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9585   predicate(UseSSE>=2);
9586   match(Set dst (CmpD3 src1 src2));
9587   effect(KILL cr);
9588   ins_cost(255);
9589   format %{ "UCOMISD $src1, $src2\n\t"
9590             "MOV     $dst, #-1\n\t"
9591             "JP,s    done\n\t"
9592             "JB,s    done\n\t"
9593             "SETNE   $dst\n\t"
9594             "MOVZB   $dst, $dst\n"
9595     "done:" %}
9596   ins_encode %{
9597     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9598     emit_cmpfp3(_masm, $dst$$Register);
9599   %}
9600   ins_pipe( pipe_slow );
9601 %}
9602 
9603 // Compare into -1,0,1 in XMM and memory
9604 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9605   predicate(UseSSE>=2);
9606   match(Set dst (CmpD3 src1 (LoadD src2)));
9607   effect(KILL cr);
9608   ins_cost(275);
9609   format %{ "UCOMISD $src1, $src2\n\t"
9610             "MOV     $dst, #-1\n\t"
9611             "JP,s    done\n\t"
9612             "JB,s    done\n\t"
9613             "SETNE   $dst\n\t"
9614             "MOVZB   $dst, $dst\n"
9615     "done:" %}
9616   ins_encode %{
9617     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9618     emit_cmpfp3(_masm, $dst$$Register);
9619   %}
9620   ins_pipe( pipe_slow );
9621 %}
9622 
9623 
9624 instruct subDPR_reg(regDPR dst, regDPR src) %{
9625   predicate (UseSSE <=1);
9626   match(Set dst (SubD dst src));
9627 
9628   format %{ "FLD    $src\n\t"
9629             "DSUBp  $dst,ST" %}
9630   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9631   ins_cost(150);
9632   ins_encode( Push_Reg_DPR(src),
9633               OpcP, RegOpc(dst) );
9634   ins_pipe( fpu_reg_reg );
9635 %}
9636 
9637 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9638   predicate (UseSSE <=1);
9639   match(Set dst (RoundDouble (SubD src1 src2)));
9640   ins_cost(250);
9641 
9642   format %{ "FLD    $src2\n\t"
9643             "DSUB   ST,$src1\n\t"
9644             "FSTP_D $dst\t# D-round" %}
9645   opcode(0xD8, 0x5);
9646   ins_encode( Push_Reg_DPR(src2),
9647               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9648   ins_pipe( fpu_mem_reg_reg );
9649 %}
9650 
9651 
9652 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9653   predicate (UseSSE <=1);
9654   match(Set dst (SubD dst (LoadD src)));
9655   ins_cost(150);
9656 
9657   format %{ "FLD    $src\n\t"
9658             "DSUBp  $dst,ST" %}
9659   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9660   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9661               OpcP, RegOpc(dst) );
9662   ins_pipe( fpu_reg_mem );
9663 %}
9664 
9665 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9666   predicate (UseSSE<=1);
9667   match(Set dst (AbsD src));
9668   ins_cost(100);
9669   format %{ "FABS" %}
9670   opcode(0xE1, 0xD9);
9671   ins_encode( OpcS, OpcP );
9672   ins_pipe( fpu_reg_reg );
9673 %}
9674 
9675 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9676   predicate(UseSSE<=1);
9677   match(Set dst (NegD src));
9678   ins_cost(100);
9679   format %{ "FCHS" %}
9680   opcode(0xE0, 0xD9);
9681   ins_encode( OpcS, OpcP );
9682   ins_pipe( fpu_reg_reg );
9683 %}
9684 
9685 instruct addDPR_reg(regDPR dst, regDPR src) %{
9686   predicate(UseSSE<=1);
9687   match(Set dst (AddD dst src));
9688   format %{ "FLD    $src\n\t"
9689             "DADD   $dst,ST" %}
9690   size(4);
9691   ins_cost(150);
9692   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9693   ins_encode( Push_Reg_DPR(src),
9694               OpcP, RegOpc(dst) );
9695   ins_pipe( fpu_reg_reg );
9696 %}
9697 
9698 
9699 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9700   predicate(UseSSE<=1);
9701   match(Set dst (RoundDouble (AddD src1 src2)));
9702   ins_cost(250);
9703 
9704   format %{ "FLD    $src2\n\t"
9705             "DADD   ST,$src1\n\t"
9706             "FSTP_D $dst\t# D-round" %}
9707   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9708   ins_encode( Push_Reg_DPR(src2),
9709               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9710   ins_pipe( fpu_mem_reg_reg );
9711 %}
9712 
9713 
9714 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9715   predicate(UseSSE<=1);
9716   match(Set dst (AddD dst (LoadD src)));
9717   ins_cost(150);
9718 
9719   format %{ "FLD    $src\n\t"
9720             "DADDp  $dst,ST" %}
9721   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9722   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9723               OpcP, RegOpc(dst) );
9724   ins_pipe( fpu_reg_mem );
9725 %}
9726 
9727 // add-to-memory
9728 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9729   predicate(UseSSE<=1);
9730   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9731   ins_cost(150);
9732 
9733   format %{ "FLD_D  $dst\n\t"
9734             "DADD   ST,$src\n\t"
9735             "FST_D  $dst" %}
9736   opcode(0xDD, 0x0);
9737   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9738               Opcode(0xD8), RegOpc(src),
9739               set_instruction_start,
9740               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9741   ins_pipe( fpu_reg_mem );
9742 %}
9743 
9744 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9745   predicate(UseSSE<=1);
9746   match(Set dst (AddD dst con));
9747   ins_cost(125);
9748   format %{ "FLD1\n\t"
9749             "DADDp  $dst,ST" %}
9750   ins_encode %{
9751     __ fld1();
9752     __ faddp($dst$$reg);
9753   %}
9754   ins_pipe(fpu_reg);
9755 %}
9756 
9757 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9758   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9759   match(Set dst (AddD dst con));
9760   ins_cost(200);
9761   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9762             "DADDp  $dst,ST" %}
9763   ins_encode %{
9764     __ fld_d($constantaddress($con));
9765     __ faddp($dst$$reg);
9766   %}
9767   ins_pipe(fpu_reg_mem);
9768 %}
9769 
9770 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9771   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9772   match(Set dst (RoundDouble (AddD src con)));
9773   ins_cost(200);
9774   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9775             "DADD   ST,$src\n\t"
9776             "FSTP_D $dst\t# D-round" %}
9777   ins_encode %{
9778     __ fld_d($constantaddress($con));
9779     __ fadd($src$$reg);
9780     __ fstp_d(Address(rsp, $dst$$disp));
9781   %}
9782   ins_pipe(fpu_mem_reg_con);
9783 %}
9784 
9785 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9786   predicate(UseSSE<=1);
9787   match(Set dst (MulD dst src));
9788   format %{ "FLD    $src\n\t"
9789             "DMULp  $dst,ST" %}
9790   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9791   ins_cost(150);
9792   ins_encode( Push_Reg_DPR(src),
9793               OpcP, RegOpc(dst) );
9794   ins_pipe( fpu_reg_reg );
9795 %}
9796 
9797 // Strict FP instruction biases argument before multiply then
9798 // biases result to avoid double rounding of subnormals.
9799 //
9800 // scale arg1 by multiplying arg1 by 2^(-15360)
9801 // load arg2
9802 // multiply scaled arg1 by arg2
9803 // rescale product by 2^(15360)
9804 //
9805 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9806   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9807   match(Set dst (MulD dst src));
9808   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9809 
9810   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9811             "DMULp  $dst,ST\n\t"
9812             "FLD    $src\n\t"
9813             "DMULp  $dst,ST\n\t"
9814             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9815             "DMULp  $dst,ST\n\t" %}
9816   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9817   ins_encode( strictfp_bias1(dst),
9818               Push_Reg_DPR(src),
9819               OpcP, RegOpc(dst),
9820               strictfp_bias2(dst) );
9821   ins_pipe( fpu_reg_reg );
9822 %}
9823 
9824 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9825   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9826   match(Set dst (MulD dst con));
9827   ins_cost(200);
9828   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9829             "DMULp  $dst,ST" %}
9830   ins_encode %{
9831     __ fld_d($constantaddress($con));
9832     __ fmulp($dst$$reg);
9833   %}
9834   ins_pipe(fpu_reg_mem);
9835 %}
9836 
9837 
9838 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9839   predicate( UseSSE<=1 );
9840   match(Set dst (MulD dst (LoadD src)));
9841   ins_cost(200);
9842   format %{ "FLD_D  $src\n\t"
9843             "DMULp  $dst,ST" %}
9844   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9845   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9846               OpcP, RegOpc(dst) );
9847   ins_pipe( fpu_reg_mem );
9848 %}
9849 
9850 //
9851 // Cisc-alternate to reg-reg multiply
9852 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9853   predicate( UseSSE<=1 );
9854   match(Set dst (MulD src (LoadD mem)));
9855   ins_cost(250);
9856   format %{ "FLD_D  $mem\n\t"
9857             "DMUL   ST,$src\n\t"
9858             "FSTP_D $dst" %}
9859   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9860   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9861               OpcReg_FPR(src),
9862               Pop_Reg_DPR(dst) );
9863   ins_pipe( fpu_reg_reg_mem );
9864 %}
9865 
9866 
9867 // MACRO3 -- addDPR a mulDPR
9868 // This instruction is a '2-address' instruction in that the result goes
9869 // back to src2.  This eliminates a move from the macro; possibly the
9870 // register allocator will have to add it back (and maybe not).
9871 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9872   predicate( UseSSE<=1 );
9873   match(Set src2 (AddD (MulD src0 src1) src2));
9874   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9875             "DMUL   ST,$src1\n\t"
9876             "DADDp  $src2,ST" %}
9877   ins_cost(250);
9878   opcode(0xDD); /* LoadD DD /0 */
9879   ins_encode( Push_Reg_FPR(src0),
9880               FMul_ST_reg(src1),
9881               FAddP_reg_ST(src2) );
9882   ins_pipe( fpu_reg_reg_reg );
9883 %}
9884 
9885 
9886 // MACRO3 -- subDPR a mulDPR
9887 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9888   predicate( UseSSE<=1 );
9889   match(Set src2 (SubD (MulD src0 src1) src2));
9890   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9891             "DMUL   ST,$src1\n\t"
9892             "DSUBRp $src2,ST" %}
9893   ins_cost(250);
9894   ins_encode( Push_Reg_FPR(src0),
9895               FMul_ST_reg(src1),
9896               Opcode(0xDE), Opc_plus(0xE0,src2));
9897   ins_pipe( fpu_reg_reg_reg );
9898 %}
9899 
9900 
9901 instruct divDPR_reg(regDPR dst, regDPR src) %{
9902   predicate( UseSSE<=1 );
9903   match(Set dst (DivD dst src));
9904 
9905   format %{ "FLD    $src\n\t"
9906             "FDIVp  $dst,ST" %}
9907   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9908   ins_cost(150);
9909   ins_encode( Push_Reg_DPR(src),
9910               OpcP, RegOpc(dst) );
9911   ins_pipe( fpu_reg_reg );
9912 %}
9913 
9914 // Strict FP instruction biases argument before division then
9915 // biases result, to avoid double rounding of subnormals.
9916 //
9917 // scale dividend by multiplying dividend by 2^(-15360)
9918 // load divisor
9919 // divide scaled dividend by divisor
9920 // rescale quotient by 2^(15360)
9921 //
9922 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9923   predicate (UseSSE<=1);
9924   match(Set dst (DivD dst src));
9925   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9926   ins_cost(01);
9927 
9928   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9929             "DMULp  $dst,ST\n\t"
9930             "FLD    $src\n\t"
9931             "FDIVp  $dst,ST\n\t"
9932             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9933             "DMULp  $dst,ST\n\t" %}
9934   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9935   ins_encode( strictfp_bias1(dst),
9936               Push_Reg_DPR(src),
9937               OpcP, RegOpc(dst),
9938               strictfp_bias2(dst) );
9939   ins_pipe( fpu_reg_reg );
9940 %}
9941 
9942 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9943   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9944   match(Set dst (RoundDouble (DivD src1 src2)));
9945 
9946   format %{ "FLD    $src1\n\t"
9947             "FDIV   ST,$src2\n\t"
9948             "FSTP_D $dst\t# D-round" %}
9949   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9950   ins_encode( Push_Reg_DPR(src1),
9951               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9952   ins_pipe( fpu_mem_reg_reg );
9953 %}
9954 
9955 
9956 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9957   predicate(UseSSE<=1);
9958   match(Set dst (ModD dst src));
9959   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9960 
9961   format %{ "DMOD   $dst,$src" %}
9962   ins_cost(250);
9963   ins_encode(Push_Reg_Mod_DPR(dst, src),
9964               emitModDPR(),
9965               Push_Result_Mod_DPR(src),
9966               Pop_Reg_DPR(dst));
9967   ins_pipe( pipe_slow );
9968 %}
9969 
9970 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9971   predicate(UseSSE>=2);
9972   match(Set dst (ModD src0 src1));
9973   effect(KILL rax, KILL cr);
9974 
9975   format %{ "SUB    ESP,8\t # DMOD\n"
9976           "\tMOVSD  [ESP+0],$src1\n"
9977           "\tFLD_D  [ESP+0]\n"
9978           "\tMOVSD  [ESP+0],$src0\n"
9979           "\tFLD_D  [ESP+0]\n"
9980      "loop:\tFPREM\n"
9981           "\tFWAIT\n"
9982           "\tFNSTSW AX\n"
9983           "\tSAHF\n"
9984           "\tJP     loop\n"
9985           "\tFSTP_D [ESP+0]\n"
9986           "\tMOVSD  $dst,[ESP+0]\n"
9987           "\tADD    ESP,8\n"
9988           "\tFSTP   ST0\t # Restore FPU Stack"
9989     %}
9990   ins_cost(250);
9991   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9992   ins_pipe( pipe_slow );
9993 %}
9994 
9995 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9996   predicate (UseSSE<=1);
9997   match(Set dst(AtanD dst src));
9998   format %{ "DATA   $dst,$src" %}
9999   opcode(0xD9, 0xF3);
10000   ins_encode( Push_Reg_DPR(src),
10001               OpcP, OpcS, RegOpc(dst) );
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10006   predicate (UseSSE>=2);
10007   match(Set dst(AtanD dst src));
10008   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10009   format %{ "DATA   $dst,$src" %}
10010   opcode(0xD9, 0xF3);
10011   ins_encode( Push_SrcD(src),
10012               OpcP, OpcS, Push_ResultD(dst) );
10013   ins_pipe( pipe_slow );
10014 %}
10015 
10016 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10017   predicate (UseSSE<=1);
10018   match(Set dst (SqrtD src));
10019   format %{ "DSQRT  $dst,$src" %}
10020   opcode(0xFA, 0xD9);
10021   ins_encode( Push_Reg_DPR(src),
10022               OpcS, OpcP, Pop_Reg_DPR(dst) );
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 //-------------Float Instructions-------------------------------
10027 // Float Math
10028 
10029 // Code for float compare:
10030 //     fcompp();
10031 //     fwait(); fnstsw_ax();
10032 //     sahf();
10033 //     movl(dst, unordered_result);
10034 //     jcc(Assembler::parity, exit);
10035 //     movl(dst, less_result);
10036 //     jcc(Assembler::below, exit);
10037 //     movl(dst, equal_result);
10038 //     jcc(Assembler::equal, exit);
10039 //     movl(dst, greater_result);
10040 //   exit:
10041 
10042 // P6 version of float compare, sets condition codes in EFLAGS
10043 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10044   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10045   match(Set cr (CmpF src1 src2));
10046   effect(KILL rax);
10047   ins_cost(150);
10048   format %{ "FLD    $src1\n\t"
10049             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10050             "JNP    exit\n\t"
10051             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10052             "SAHF\n"
10053      "exit:\tNOP               // avoid branch to branch" %}
10054   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10055   ins_encode( Push_Reg_DPR(src1),
10056               OpcP, RegOpc(src2),
10057               cmpF_P6_fixup );
10058   ins_pipe( pipe_slow );
10059 %}
10060 
10061 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10062   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10063   match(Set cr (CmpF src1 src2));
10064   ins_cost(100);
10065   format %{ "FLD    $src1\n\t"
10066             "FUCOMIP ST,$src2  // P6 instruction" %}
10067   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10068   ins_encode( Push_Reg_DPR(src1),
10069               OpcP, RegOpc(src2));
10070   ins_pipe( pipe_slow );
10071 %}
10072 
10073 
10074 // Compare & branch
10075 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10076   predicate(UseSSE == 0);
10077   match(Set cr (CmpF src1 src2));
10078   effect(KILL rax);
10079   ins_cost(200);
10080   format %{ "FLD    $src1\n\t"
10081             "FCOMp  $src2\n\t"
10082             "FNSTSW AX\n\t"
10083             "TEST   AX,0x400\n\t"
10084             "JZ,s   flags\n\t"
10085             "MOV    AH,1\t# unordered treat as LT\n"
10086     "flags:\tSAHF" %}
10087   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10088   ins_encode( Push_Reg_DPR(src1),
10089               OpcP, RegOpc(src2),
10090               fpu_flags);
10091   ins_pipe( pipe_slow );
10092 %}
10093 
10094 // Compare vs zero into -1,0,1
10095 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10096   predicate(UseSSE == 0);
10097   match(Set dst (CmpF3 src1 zero));
10098   effect(KILL cr, KILL rax);
10099   ins_cost(280);
10100   format %{ "FTSTF  $dst,$src1" %}
10101   opcode(0xE4, 0xD9);
10102   ins_encode( Push_Reg_DPR(src1),
10103               OpcS, OpcP, PopFPU,
10104               CmpF_Result(dst));
10105   ins_pipe( pipe_slow );
10106 %}
10107 
10108 // Compare into -1,0,1
10109 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10110   predicate(UseSSE == 0);
10111   match(Set dst (CmpF3 src1 src2));
10112   effect(KILL cr, KILL rax);
10113   ins_cost(300);
10114   format %{ "FCMPF  $dst,$src1,$src2" %}
10115   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10116   ins_encode( Push_Reg_DPR(src1),
10117               OpcP, RegOpc(src2),
10118               CmpF_Result(dst));
10119   ins_pipe( pipe_slow );
10120 %}
10121 
10122 // float compare and set condition codes in EFLAGS by XMM regs
10123 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10124   predicate(UseSSE>=1);
10125   match(Set cr (CmpF src1 src2));
10126   ins_cost(145);
10127   format %{ "UCOMISS $src1,$src2\n\t"
10128             "JNP,s   exit\n\t"
10129             "PUSHF\t# saw NaN, set CF\n\t"
10130             "AND     [rsp], #0xffffff2b\n\t"
10131             "POPF\n"
10132     "exit:" %}
10133   ins_encode %{
10134     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10135     emit_cmpfp_fixup(_masm);
10136   %}
10137   ins_pipe( pipe_slow );
10138 %}
10139 
10140 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10141   predicate(UseSSE>=1);
10142   match(Set cr (CmpF src1 src2));
10143   ins_cost(100);
10144   format %{ "UCOMISS $src1,$src2" %}
10145   ins_encode %{
10146     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10147   %}
10148   ins_pipe( pipe_slow );
10149 %}
10150 
10151 // float compare and set condition codes in EFLAGS by XMM regs
10152 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10153   predicate(UseSSE>=1);
10154   match(Set cr (CmpF src1 (LoadF src2)));
10155   ins_cost(165);
10156   format %{ "UCOMISS $src1,$src2\n\t"
10157             "JNP,s   exit\n\t"
10158             "PUSHF\t# saw NaN, set CF\n\t"
10159             "AND     [rsp], #0xffffff2b\n\t"
10160             "POPF\n"
10161     "exit:" %}
10162   ins_encode %{
10163     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10164     emit_cmpfp_fixup(_masm);
10165   %}
10166   ins_pipe( pipe_slow );
10167 %}
10168 
10169 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10170   predicate(UseSSE>=1);
10171   match(Set cr (CmpF src1 (LoadF src2)));
10172   ins_cost(100);
10173   format %{ "UCOMISS $src1,$src2" %}
10174   ins_encode %{
10175     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10176   %}
10177   ins_pipe( pipe_slow );
10178 %}
10179 
10180 // Compare into -1,0,1 in XMM
10181 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10182   predicate(UseSSE>=1);
10183   match(Set dst (CmpF3 src1 src2));
10184   effect(KILL cr);
10185   ins_cost(255);
10186   format %{ "UCOMISS $src1, $src2\n\t"
10187             "MOV     $dst, #-1\n\t"
10188             "JP,s    done\n\t"
10189             "JB,s    done\n\t"
10190             "SETNE   $dst\n\t"
10191             "MOVZB   $dst, $dst\n"
10192     "done:" %}
10193   ins_encode %{
10194     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10195     emit_cmpfp3(_masm, $dst$$Register);
10196   %}
10197   ins_pipe( pipe_slow );
10198 %}
10199 
10200 // Compare into -1,0,1 in XMM and memory
10201 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10202   predicate(UseSSE>=1);
10203   match(Set dst (CmpF3 src1 (LoadF src2)));
10204   effect(KILL cr);
10205   ins_cost(275);
10206   format %{ "UCOMISS $src1, $src2\n\t"
10207             "MOV     $dst, #-1\n\t"
10208             "JP,s    done\n\t"
10209             "JB,s    done\n\t"
10210             "SETNE   $dst\n\t"
10211             "MOVZB   $dst, $dst\n"
10212     "done:" %}
10213   ins_encode %{
10214     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10215     emit_cmpfp3(_masm, $dst$$Register);
10216   %}
10217   ins_pipe( pipe_slow );
10218 %}
10219 
10220 // Spill to obtain 24-bit precision
10221 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10222   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10223   match(Set dst (SubF src1 src2));
10224 
10225   format %{ "FSUB   $dst,$src1 - $src2" %}
10226   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10227   ins_encode( Push_Reg_FPR(src1),
10228               OpcReg_FPR(src2),
10229               Pop_Mem_FPR(dst) );
10230   ins_pipe( fpu_mem_reg_reg );
10231 %}
10232 //
10233 // This instruction does not round to 24-bits
10234 instruct subFPR_reg(regFPR dst, regFPR src) %{
10235   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10236   match(Set dst (SubF dst src));
10237 
10238   format %{ "FSUB   $dst,$src" %}
10239   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10240   ins_encode( Push_Reg_FPR(src),
10241               OpcP, RegOpc(dst) );
10242   ins_pipe( fpu_reg_reg );
10243 %}
10244 
10245 // Spill to obtain 24-bit precision
10246 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10247   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10248   match(Set dst (AddF src1 src2));
10249 
10250   format %{ "FADD   $dst,$src1,$src2" %}
10251   opcode(0xD8, 0x0); /* D8 C0+i */
10252   ins_encode( Push_Reg_FPR(src2),
10253               OpcReg_FPR(src1),
10254               Pop_Mem_FPR(dst) );
10255   ins_pipe( fpu_mem_reg_reg );
10256 %}
10257 //
10258 // This instruction does not round to 24-bits
10259 instruct addFPR_reg(regFPR dst, regFPR src) %{
10260   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10261   match(Set dst (AddF dst src));
10262 
10263   format %{ "FLD    $src\n\t"
10264             "FADDp  $dst,ST" %}
10265   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10266   ins_encode( Push_Reg_FPR(src),
10267               OpcP, RegOpc(dst) );
10268   ins_pipe( fpu_reg_reg );
10269 %}
10270 
10271 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10272   predicate(UseSSE==0);
10273   match(Set dst (AbsF src));
10274   ins_cost(100);
10275   format %{ "FABS" %}
10276   opcode(0xE1, 0xD9);
10277   ins_encode( OpcS, OpcP );
10278   ins_pipe( fpu_reg_reg );
10279 %}
10280 
10281 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10282   predicate(UseSSE==0);
10283   match(Set dst (NegF src));
10284   ins_cost(100);
10285   format %{ "FCHS" %}
10286   opcode(0xE0, 0xD9);
10287   ins_encode( OpcS, OpcP );
10288   ins_pipe( fpu_reg_reg );
10289 %}
10290 
10291 // Cisc-alternate to addFPR_reg
10292 // Spill to obtain 24-bit precision
10293 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10294   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10295   match(Set dst (AddF src1 (LoadF src2)));
10296 
10297   format %{ "FLD    $src2\n\t"
10298             "FADD   ST,$src1\n\t"
10299             "FSTP_S $dst" %}
10300   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10301   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10302               OpcReg_FPR(src1),
10303               Pop_Mem_FPR(dst) );
10304   ins_pipe( fpu_mem_reg_mem );
10305 %}
10306 //
10307 // Cisc-alternate to addFPR_reg
10308 // This instruction does not round to 24-bits
10309 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10310   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10311   match(Set dst (AddF dst (LoadF src)));
10312 
10313   format %{ "FADD   $dst,$src" %}
10314   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10315   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10316               OpcP, RegOpc(dst) );
10317   ins_pipe( fpu_reg_mem );
10318 %}
10319 
10320 // // Following two instructions for _222_mpegaudio
10321 // Spill to obtain 24-bit precision
10322 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10323   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10324   match(Set dst (AddF src1 src2));
10325 
10326   format %{ "FADD   $dst,$src1,$src2" %}
10327   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10328   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10329               OpcReg_FPR(src2),
10330               Pop_Mem_FPR(dst) );
10331   ins_pipe( fpu_mem_reg_mem );
10332 %}
10333 
10334 // Cisc-spill variant
10335 // Spill to obtain 24-bit precision
10336 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10337   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10338   match(Set dst (AddF src1 (LoadF src2)));
10339 
10340   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10341   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10342   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10343               set_instruction_start,
10344               OpcP, RMopc_Mem(secondary,src1),
10345               Pop_Mem_FPR(dst) );
10346   ins_pipe( fpu_mem_mem_mem );
10347 %}
10348 
10349 // Spill to obtain 24-bit precision
10350 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10351   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10352   match(Set dst (AddF src1 src2));
10353 
10354   format %{ "FADD   $dst,$src1,$src2" %}
10355   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10356   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10357               set_instruction_start,
10358               OpcP, RMopc_Mem(secondary,src1),
10359               Pop_Mem_FPR(dst) );
10360   ins_pipe( fpu_mem_mem_mem );
10361 %}
10362 
10363 
10364 // Spill to obtain 24-bit precision
10365 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10366   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10367   match(Set dst (AddF src con));
10368   format %{ "FLD    $src\n\t"
10369             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10370             "FSTP_S $dst"  %}
10371   ins_encode %{
10372     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10373     __ fadd_s($constantaddress($con));
10374     __ fstp_s(Address(rsp, $dst$$disp));
10375   %}
10376   ins_pipe(fpu_mem_reg_con);
10377 %}
10378 //
10379 // This instruction does not round to 24-bits
10380 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10381   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10382   match(Set dst (AddF src con));
10383   format %{ "FLD    $src\n\t"
10384             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10385             "FSTP   $dst"  %}
10386   ins_encode %{
10387     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10388     __ fadd_s($constantaddress($con));
10389     __ fstp_d($dst$$reg);
10390   %}
10391   ins_pipe(fpu_reg_reg_con);
10392 %}
10393 
10394 // Spill to obtain 24-bit precision
10395 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10396   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10397   match(Set dst (MulF src1 src2));
10398 
10399   format %{ "FLD    $src1\n\t"
10400             "FMUL   $src2\n\t"
10401             "FSTP_S $dst"  %}
10402   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10403   ins_encode( Push_Reg_FPR(src1),
10404               OpcReg_FPR(src2),
10405               Pop_Mem_FPR(dst) );
10406   ins_pipe( fpu_mem_reg_reg );
10407 %}
10408 //
10409 // This instruction does not round to 24-bits
10410 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10411   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10412   match(Set dst (MulF src1 src2));
10413 
10414   format %{ "FLD    $src1\n\t"
10415             "FMUL   $src2\n\t"
10416             "FSTP_S $dst"  %}
10417   opcode(0xD8, 0x1); /* D8 C8+i */
10418   ins_encode( Push_Reg_FPR(src2),
10419               OpcReg_FPR(src1),
10420               Pop_Reg_FPR(dst) );
10421   ins_pipe( fpu_reg_reg_reg );
10422 %}
10423 
10424 
10425 // Spill to obtain 24-bit precision
10426 // Cisc-alternate to reg-reg multiply
10427 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10428   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10429   match(Set dst (MulF src1 (LoadF src2)));
10430 
10431   format %{ "FLD_S  $src2\n\t"
10432             "FMUL   $src1\n\t"
10433             "FSTP_S $dst"  %}
10434   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10435   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10436               OpcReg_FPR(src1),
10437               Pop_Mem_FPR(dst) );
10438   ins_pipe( fpu_mem_reg_mem );
10439 %}
10440 //
10441 // This instruction does not round to 24-bits
10442 // Cisc-alternate to reg-reg multiply
10443 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10444   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10445   match(Set dst (MulF src1 (LoadF src2)));
10446 
10447   format %{ "FMUL   $dst,$src1,$src2" %}
10448   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10449   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10450               OpcReg_FPR(src1),
10451               Pop_Reg_FPR(dst) );
10452   ins_pipe( fpu_reg_reg_mem );
10453 %}
10454 
10455 // Spill to obtain 24-bit precision
10456 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10457   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10458   match(Set dst (MulF src1 src2));
10459 
10460   format %{ "FMUL   $dst,$src1,$src2" %}
10461   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10462   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10463               set_instruction_start,
10464               OpcP, RMopc_Mem(secondary,src1),
10465               Pop_Mem_FPR(dst) );
10466   ins_pipe( fpu_mem_mem_mem );
10467 %}
10468 
10469 // Spill to obtain 24-bit precision
10470 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10471   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10472   match(Set dst (MulF src con));
10473 
10474   format %{ "FLD    $src\n\t"
10475             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10476             "FSTP_S $dst"  %}
10477   ins_encode %{
10478     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10479     __ fmul_s($constantaddress($con));
10480     __ fstp_s(Address(rsp, $dst$$disp));
10481   %}
10482   ins_pipe(fpu_mem_reg_con);
10483 %}
10484 //
10485 // This instruction does not round to 24-bits
10486 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10487   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10488   match(Set dst (MulF src con));
10489 
10490   format %{ "FLD    $src\n\t"
10491             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10492             "FSTP   $dst"  %}
10493   ins_encode %{
10494     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10495     __ fmul_s($constantaddress($con));
10496     __ fstp_d($dst$$reg);
10497   %}
10498   ins_pipe(fpu_reg_reg_con);
10499 %}
10500 
10501 
10502 //
10503 // MACRO1 -- subsume unshared load into mulFPR
10504 // This instruction does not round to 24-bits
10505 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10506   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10507   match(Set dst (MulF (LoadF mem1) src));
10508 
10509   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10510             "FMUL   ST,$src\n\t"
10511             "FSTP   $dst" %}
10512   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10513   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10514               OpcReg_FPR(src),
10515               Pop_Reg_FPR(dst) );
10516   ins_pipe( fpu_reg_reg_mem );
10517 %}
10518 //
10519 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10520 // This instruction does not round to 24-bits
10521 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10522   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10523   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10524   ins_cost(95);
10525 
10526   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10527             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10528             "FADD   ST,$src2\n\t"
10529             "FSTP   $dst" %}
10530   opcode(0xD9); /* LoadF D9 /0 */
10531   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10532               FMul_ST_reg(src1),
10533               FAdd_ST_reg(src2),
10534               Pop_Reg_FPR(dst) );
10535   ins_pipe( fpu_reg_mem_reg_reg );
10536 %}
10537 
10538 // MACRO3 -- addFPR a mulFPR
10539 // This instruction does not round to 24-bits.  It is a '2-address'
10540 // instruction in that the result goes back to src2.  This eliminates
10541 // a move from the macro; possibly the register allocator will have
10542 // to add it back (and maybe not).
10543 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10544   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10545   match(Set src2 (AddF (MulF src0 src1) src2));
10546 
10547   format %{ "FLD    $src0     ===MACRO3===\n\t"
10548             "FMUL   ST,$src1\n\t"
10549             "FADDP  $src2,ST" %}
10550   opcode(0xD9); /* LoadF D9 /0 */
10551   ins_encode( Push_Reg_FPR(src0),
10552               FMul_ST_reg(src1),
10553               FAddP_reg_ST(src2) );
10554   ins_pipe( fpu_reg_reg_reg );
10555 %}
10556 
10557 // MACRO4 -- divFPR subFPR
10558 // This instruction does not round to 24-bits
10559 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10560   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10561   match(Set dst (DivF (SubF src2 src1) src3));
10562 
10563   format %{ "FLD    $src2   ===MACRO4===\n\t"
10564             "FSUB   ST,$src1\n\t"
10565             "FDIV   ST,$src3\n\t"
10566             "FSTP  $dst" %}
10567   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10568   ins_encode( Push_Reg_FPR(src2),
10569               subFPR_divFPR_encode(src1,src3),
10570               Pop_Reg_FPR(dst) );
10571   ins_pipe( fpu_reg_reg_reg_reg );
10572 %}
10573 
10574 // Spill to obtain 24-bit precision
10575 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10576   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10577   match(Set dst (DivF src1 src2));
10578 
10579   format %{ "FDIV   $dst,$src1,$src2" %}
10580   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10581   ins_encode( Push_Reg_FPR(src1),
10582               OpcReg_FPR(src2),
10583               Pop_Mem_FPR(dst) );
10584   ins_pipe( fpu_mem_reg_reg );
10585 %}
10586 //
10587 // This instruction does not round to 24-bits
10588 instruct divFPR_reg(regFPR dst, regFPR src) %{
10589   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10590   match(Set dst (DivF dst src));
10591 
10592   format %{ "FDIV   $dst,$src" %}
10593   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10594   ins_encode( Push_Reg_FPR(src),
10595               OpcP, RegOpc(dst) );
10596   ins_pipe( fpu_reg_reg );
10597 %}
10598 
10599 
10600 // Spill to obtain 24-bit precision
10601 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10602   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10603   match(Set dst (ModF src1 src2));
10604   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10605 
10606   format %{ "FMOD   $dst,$src1,$src2" %}
10607   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10608               emitModDPR(),
10609               Push_Result_Mod_DPR(src2),
10610               Pop_Mem_FPR(dst));
10611   ins_pipe( pipe_slow );
10612 %}
10613 //
10614 // This instruction does not round to 24-bits
10615 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10616   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10617   match(Set dst (ModF dst src));
10618   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10619 
10620   format %{ "FMOD   $dst,$src" %}
10621   ins_encode(Push_Reg_Mod_DPR(dst, src),
10622               emitModDPR(),
10623               Push_Result_Mod_DPR(src),
10624               Pop_Reg_FPR(dst));
10625   ins_pipe( pipe_slow );
10626 %}
10627 
10628 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10629   predicate(UseSSE>=1);
10630   match(Set dst (ModF src0 src1));
10631   effect(KILL rax, KILL cr);
10632   format %{ "SUB    ESP,4\t # FMOD\n"
10633           "\tMOVSS  [ESP+0],$src1\n"
10634           "\tFLD_S  [ESP+0]\n"
10635           "\tMOVSS  [ESP+0],$src0\n"
10636           "\tFLD_S  [ESP+0]\n"
10637      "loop:\tFPREM\n"
10638           "\tFWAIT\n"
10639           "\tFNSTSW AX\n"
10640           "\tSAHF\n"
10641           "\tJP     loop\n"
10642           "\tFSTP_S [ESP+0]\n"
10643           "\tMOVSS  $dst,[ESP+0]\n"
10644           "\tADD    ESP,4\n"
10645           "\tFSTP   ST0\t # Restore FPU Stack"
10646     %}
10647   ins_cost(250);
10648   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10649   ins_pipe( pipe_slow );
10650 %}
10651 
10652 
10653 //----------Arithmetic Conversion Instructions---------------------------------
10654 // The conversions operations are all Alpha sorted.  Please keep it that way!
10655 
10656 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10657   predicate(UseSSE==0);
10658   match(Set dst (RoundFloat src));
10659   ins_cost(125);
10660   format %{ "FST_S  $dst,$src\t# F-round" %}
10661   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10662   ins_pipe( fpu_mem_reg );
10663 %}
10664 
10665 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10666   predicate(UseSSE<=1);
10667   match(Set dst (RoundDouble src));
10668   ins_cost(125);
10669   format %{ "FST_D  $dst,$src\t# D-round" %}
10670   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10671   ins_pipe( fpu_mem_reg );
10672 %}
10673 
10674 // Force rounding to 24-bit precision and 6-bit exponent
10675 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10676   predicate(UseSSE==0);
10677   match(Set dst (ConvD2F src));
10678   format %{ "FST_S  $dst,$src\t# F-round" %}
10679   expand %{
10680     roundFloat_mem_reg(dst,src);
10681   %}
10682 %}
10683 
10684 // Force rounding to 24-bit precision and 6-bit exponent
10685 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10686   predicate(UseSSE==1);
10687   match(Set dst (ConvD2F src));
10688   effect( KILL cr );
10689   format %{ "SUB    ESP,4\n\t"
10690             "FST_S  [ESP],$src\t# F-round\n\t"
10691             "MOVSS  $dst,[ESP]\n\t"
10692             "ADD ESP,4" %}
10693   ins_encode %{
10694     __ subptr(rsp, 4);
10695     if ($src$$reg != FPR1L_enc) {
10696       __ fld_s($src$$reg-1);
10697       __ fstp_s(Address(rsp, 0));
10698     } else {
10699       __ fst_s(Address(rsp, 0));
10700     }
10701     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10702     __ addptr(rsp, 4);
10703   %}
10704   ins_pipe( pipe_slow );
10705 %}
10706 
10707 // Force rounding double precision to single precision
10708 instruct convD2F_reg(regF dst, regD src) %{
10709   predicate(UseSSE>=2);
10710   match(Set dst (ConvD2F src));
10711   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10712   ins_encode %{
10713     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10714   %}
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10719   predicate(UseSSE==0);
10720   match(Set dst (ConvF2D src));
10721   format %{ "FST_S  $dst,$src\t# D-round" %}
10722   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10723   ins_pipe( fpu_reg_reg );
10724 %}
10725 
10726 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10727   predicate(UseSSE==1);
10728   match(Set dst (ConvF2D src));
10729   format %{ "FST_D  $dst,$src\t# D-round" %}
10730   expand %{
10731     roundDouble_mem_reg(dst,src);
10732   %}
10733 %}
10734 
10735 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10736   predicate(UseSSE==1);
10737   match(Set dst (ConvF2D src));
10738   effect( KILL cr );
10739   format %{ "SUB    ESP,4\n\t"
10740             "MOVSS  [ESP] $src\n\t"
10741             "FLD_S  [ESP]\n\t"
10742             "ADD    ESP,4\n\t"
10743             "FSTP   $dst\t# D-round" %}
10744   ins_encode %{
10745     __ subptr(rsp, 4);
10746     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10747     __ fld_s(Address(rsp, 0));
10748     __ addptr(rsp, 4);
10749     __ fstp_d($dst$$reg);
10750   %}
10751   ins_pipe( pipe_slow );
10752 %}
10753 
10754 instruct convF2D_reg(regD dst, regF src) %{
10755   predicate(UseSSE>=2);
10756   match(Set dst (ConvF2D src));
10757   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10758   ins_encode %{
10759     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10760   %}
10761   ins_pipe( pipe_slow );
10762 %}
10763 
10764 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10765 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10766   predicate(UseSSE<=1);
10767   match(Set dst (ConvD2I src));
10768   effect( KILL tmp, KILL cr );
10769   format %{ "FLD    $src\t# Convert double to int \n\t"
10770             "FLDCW  trunc mode\n\t"
10771             "SUB    ESP,4\n\t"
10772             "FISTp  [ESP + #0]\n\t"
10773             "FLDCW  std/24-bit mode\n\t"
10774             "POP    EAX\n\t"
10775             "CMP    EAX,0x80000000\n\t"
10776             "JNE,s  fast\n\t"
10777             "FLD_D  $src\n\t"
10778             "CALL   d2i_wrapper\n"
10779       "fast:" %}
10780   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10781   ins_pipe( pipe_slow );
10782 %}
10783 
10784 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10785 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10786   predicate(UseSSE>=2);
10787   match(Set dst (ConvD2I src));
10788   effect( KILL tmp, KILL cr );
10789   format %{ "CVTTSD2SI $dst, $src\n\t"
10790             "CMP    $dst,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "SUB    ESP, 8\n\t"
10793             "MOVSD  [ESP], $src\n\t"
10794             "FLD_D  [ESP]\n\t"
10795             "ADD    ESP, 8\n\t"
10796             "CALL   d2i_wrapper\n"
10797       "fast:" %}
10798   ins_encode %{
10799     Label fast;
10800     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10801     __ cmpl($dst$$Register, 0x80000000);
10802     __ jccb(Assembler::notEqual, fast);
10803     __ subptr(rsp, 8);
10804     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10805     __ fld_d(Address(rsp, 0));
10806     __ addptr(rsp, 8);
10807     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10808     __ bind(fast);
10809   %}
10810   ins_pipe( pipe_slow );
10811 %}
10812 
10813 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10814   predicate(UseSSE<=1);
10815   match(Set dst (ConvD2L src));
10816   effect( KILL cr );
10817   format %{ "FLD    $src\t# Convert double to long\n\t"
10818             "FLDCW  trunc mode\n\t"
10819             "SUB    ESP,8\n\t"
10820             "FISTp  [ESP + #0]\n\t"
10821             "FLDCW  std/24-bit mode\n\t"
10822             "POP    EAX\n\t"
10823             "POP    EDX\n\t"
10824             "CMP    EDX,0x80000000\n\t"
10825             "JNE,s  fast\n\t"
10826             "TEST   EAX,EAX\n\t"
10827             "JNE,s  fast\n\t"
10828             "FLD    $src\n\t"
10829             "CALL   d2l_wrapper\n"
10830       "fast:" %}
10831   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10832   ins_pipe( pipe_slow );
10833 %}
10834 
10835 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10836 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10837   predicate (UseSSE>=2);
10838   match(Set dst (ConvD2L src));
10839   effect( KILL cr );
10840   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10841             "MOVSD  [ESP],$src\n\t"
10842             "FLD_D  [ESP]\n\t"
10843             "FLDCW  trunc mode\n\t"
10844             "FISTp  [ESP + #0]\n\t"
10845             "FLDCW  std/24-bit mode\n\t"
10846             "POP    EAX\n\t"
10847             "POP    EDX\n\t"
10848             "CMP    EDX,0x80000000\n\t"
10849             "JNE,s  fast\n\t"
10850             "TEST   EAX,EAX\n\t"
10851             "JNE,s  fast\n\t"
10852             "SUB    ESP,8\n\t"
10853             "MOVSD  [ESP],$src\n\t"
10854             "FLD_D  [ESP]\n\t"
10855             "ADD    ESP,8\n\t"
10856             "CALL   d2l_wrapper\n"
10857       "fast:" %}
10858   ins_encode %{
10859     Label fast;
10860     __ subptr(rsp, 8);
10861     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10862     __ fld_d(Address(rsp, 0));
10863     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10864     __ fistp_d(Address(rsp, 0));
10865     // Restore the rounding mode, mask the exception
10866     if (Compile::current()->in_24_bit_fp_mode()) {
10867       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10868     } else {
10869       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10870     }
10871     // Load the converted long, adjust CPU stack
10872     __ pop(rax);
10873     __ pop(rdx);
10874     __ cmpl(rdx, 0x80000000);
10875     __ jccb(Assembler::notEqual, fast);
10876     __ testl(rax, rax);
10877     __ jccb(Assembler::notEqual, fast);
10878     __ subptr(rsp, 8);
10879     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10880     __ fld_d(Address(rsp, 0));
10881     __ addptr(rsp, 8);
10882     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10883     __ bind(fast);
10884   %}
10885   ins_pipe( pipe_slow );
10886 %}
10887 
10888 // Convert a double to an int.  Java semantics require we do complex
10889 // manglations in the corner cases.  So we set the rounding mode to
10890 // 'zero', store the darned double down as an int, and reset the
10891 // rounding mode to 'nearest'.  The hardware stores a flag value down
10892 // if we would overflow or converted a NAN; we check for this and
10893 // and go the slow path if needed.
10894 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10895   predicate(UseSSE==0);
10896   match(Set dst (ConvF2I src));
10897   effect( KILL tmp, KILL cr );
10898   format %{ "FLD    $src\t# Convert float to int \n\t"
10899             "FLDCW  trunc mode\n\t"
10900             "SUB    ESP,4\n\t"
10901             "FISTp  [ESP + #0]\n\t"
10902             "FLDCW  std/24-bit mode\n\t"
10903             "POP    EAX\n\t"
10904             "CMP    EAX,0x80000000\n\t"
10905             "JNE,s  fast\n\t"
10906             "FLD    $src\n\t"
10907             "CALL   d2i_wrapper\n"
10908       "fast:" %}
10909   // DPR2I_encoding works for FPR2I
10910   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10911   ins_pipe( pipe_slow );
10912 %}
10913 
10914 // Convert a float in xmm to an int reg.
10915 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10916   predicate(UseSSE>=1);
10917   match(Set dst (ConvF2I src));
10918   effect( KILL tmp, KILL cr );
10919   format %{ "CVTTSS2SI $dst, $src\n\t"
10920             "CMP    $dst,0x80000000\n\t"
10921             "JNE,s  fast\n\t"
10922             "SUB    ESP, 4\n\t"
10923             "MOVSS  [ESP], $src\n\t"
10924             "FLD    [ESP]\n\t"
10925             "ADD    ESP, 4\n\t"
10926             "CALL   d2i_wrapper\n"
10927       "fast:" %}
10928   ins_encode %{
10929     Label fast;
10930     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10931     __ cmpl($dst$$Register, 0x80000000);
10932     __ jccb(Assembler::notEqual, fast);
10933     __ subptr(rsp, 4);
10934     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10935     __ fld_s(Address(rsp, 0));
10936     __ addptr(rsp, 4);
10937     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10938     __ bind(fast);
10939   %}
10940   ins_pipe( pipe_slow );
10941 %}
10942 
10943 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10944   predicate(UseSSE==0);
10945   match(Set dst (ConvF2L src));
10946   effect( KILL cr );
10947   format %{ "FLD    $src\t# Convert float to long\n\t"
10948             "FLDCW  trunc mode\n\t"
10949             "SUB    ESP,8\n\t"
10950             "FISTp  [ESP + #0]\n\t"
10951             "FLDCW  std/24-bit mode\n\t"
10952             "POP    EAX\n\t"
10953             "POP    EDX\n\t"
10954             "CMP    EDX,0x80000000\n\t"
10955             "JNE,s  fast\n\t"
10956             "TEST   EAX,EAX\n\t"
10957             "JNE,s  fast\n\t"
10958             "FLD    $src\n\t"
10959             "CALL   d2l_wrapper\n"
10960       "fast:" %}
10961   // DPR2L_encoding works for FPR2L
10962   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10963   ins_pipe( pipe_slow );
10964 %}
10965 
10966 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10967 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10968   predicate (UseSSE>=1);
10969   match(Set dst (ConvF2L src));
10970   effect( KILL cr );
10971   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10972             "MOVSS  [ESP],$src\n\t"
10973             "FLD_S  [ESP]\n\t"
10974             "FLDCW  trunc mode\n\t"
10975             "FISTp  [ESP + #0]\n\t"
10976             "FLDCW  std/24-bit mode\n\t"
10977             "POP    EAX\n\t"
10978             "POP    EDX\n\t"
10979             "CMP    EDX,0x80000000\n\t"
10980             "JNE,s  fast\n\t"
10981             "TEST   EAX,EAX\n\t"
10982             "JNE,s  fast\n\t"
10983             "SUB    ESP,4\t# Convert float to long\n\t"
10984             "MOVSS  [ESP],$src\n\t"
10985             "FLD_S  [ESP]\n\t"
10986             "ADD    ESP,4\n\t"
10987             "CALL   d2l_wrapper\n"
10988       "fast:" %}
10989   ins_encode %{
10990     Label fast;
10991     __ subptr(rsp, 8);
10992     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10993     __ fld_s(Address(rsp, 0));
10994     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10995     __ fistp_d(Address(rsp, 0));
10996     // Restore the rounding mode, mask the exception
10997     if (Compile::current()->in_24_bit_fp_mode()) {
10998       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10999     } else {
11000       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11001     }
11002     // Load the converted long, adjust CPU stack
11003     __ pop(rax);
11004     __ pop(rdx);
11005     __ cmpl(rdx, 0x80000000);
11006     __ jccb(Assembler::notEqual, fast);
11007     __ testl(rax, rax);
11008     __ jccb(Assembler::notEqual, fast);
11009     __ subptr(rsp, 4);
11010     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11011     __ fld_s(Address(rsp, 0));
11012     __ addptr(rsp, 4);
11013     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11014     __ bind(fast);
11015   %}
11016   ins_pipe( pipe_slow );
11017 %}
11018 
11019 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11020   predicate( UseSSE<=1 );
11021   match(Set dst (ConvI2D src));
11022   format %{ "FILD   $src\n\t"
11023             "FSTP   $dst" %}
11024   opcode(0xDB, 0x0);  /* DB /0 */
11025   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11026   ins_pipe( fpu_reg_mem );
11027 %}
11028 
11029 instruct convI2D_reg(regD dst, rRegI src) %{
11030   predicate( UseSSE>=2 && !UseXmmI2D );
11031   match(Set dst (ConvI2D src));
11032   format %{ "CVTSI2SD $dst,$src" %}
11033   ins_encode %{
11034     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11035   %}
11036   ins_pipe( pipe_slow );
11037 %}
11038 
11039 instruct convI2D_mem(regD dst, memory mem) %{
11040   predicate( UseSSE>=2 );
11041   match(Set dst (ConvI2D (LoadI mem)));
11042   format %{ "CVTSI2SD $dst,$mem" %}
11043   ins_encode %{
11044     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11045   %}
11046   ins_pipe( pipe_slow );
11047 %}
11048 
11049 instruct convXI2D_reg(regD dst, rRegI src)
11050 %{
11051   predicate( UseSSE>=2 && UseXmmI2D );
11052   match(Set dst (ConvI2D src));
11053 
11054   format %{ "MOVD  $dst,$src\n\t"
11055             "CVTDQ2PD $dst,$dst\t# i2d" %}
11056   ins_encode %{
11057     __ movdl($dst$$XMMRegister, $src$$Register);
11058     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11059   %}
11060   ins_pipe(pipe_slow); // XXX
11061 %}
11062 
11063 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11064   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11065   match(Set dst (ConvI2D (LoadI mem)));
11066   format %{ "FILD   $mem\n\t"
11067             "FSTP   $dst" %}
11068   opcode(0xDB);      /* DB /0 */
11069   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11070               Pop_Reg_DPR(dst));
11071   ins_pipe( fpu_reg_mem );
11072 %}
11073 
11074 // Convert a byte to a float; no rounding step needed.
11075 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11076   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11077   match(Set dst (ConvI2F src));
11078   format %{ "FILD   $src\n\t"
11079             "FSTP   $dst" %}
11080 
11081   opcode(0xDB, 0x0);  /* DB /0 */
11082   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11083   ins_pipe( fpu_reg_mem );
11084 %}
11085 
11086 // In 24-bit mode, force exponent rounding by storing back out
11087 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11088   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11089   match(Set dst (ConvI2F src));
11090   ins_cost(200);
11091   format %{ "FILD   $src\n\t"
11092             "FSTP_S $dst" %}
11093   opcode(0xDB, 0x0);  /* DB /0 */
11094   ins_encode( Push_Mem_I(src),
11095               Pop_Mem_FPR(dst));
11096   ins_pipe( fpu_mem_mem );
11097 %}
11098 
11099 // In 24-bit mode, force exponent rounding by storing back out
11100 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11101   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11102   match(Set dst (ConvI2F (LoadI mem)));
11103   ins_cost(200);
11104   format %{ "FILD   $mem\n\t"
11105             "FSTP_S $dst" %}
11106   opcode(0xDB);  /* DB /0 */
11107   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11108               Pop_Mem_FPR(dst));
11109   ins_pipe( fpu_mem_mem );
11110 %}
11111 
11112 // This instruction does not round to 24-bits
11113 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11114   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11115   match(Set dst (ConvI2F src));
11116   format %{ "FILD   $src\n\t"
11117             "FSTP   $dst" %}
11118   opcode(0xDB, 0x0);  /* DB /0 */
11119   ins_encode( Push_Mem_I(src),
11120               Pop_Reg_FPR(dst));
11121   ins_pipe( fpu_reg_mem );
11122 %}
11123 
11124 // This instruction does not round to 24-bits
11125 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11126   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11127   match(Set dst (ConvI2F (LoadI mem)));
11128   format %{ "FILD   $mem\n\t"
11129             "FSTP   $dst" %}
11130   opcode(0xDB);      /* DB /0 */
11131   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11132               Pop_Reg_FPR(dst));
11133   ins_pipe( fpu_reg_mem );
11134 %}
11135 
11136 // Convert an int to a float in xmm; no rounding step needed.
11137 instruct convI2F_reg(regF dst, rRegI src) %{
11138   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11139   match(Set dst (ConvI2F src));
11140   format %{ "CVTSI2SS $dst, $src" %}
11141   ins_encode %{
11142     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11143   %}
11144   ins_pipe( pipe_slow );
11145 %}
11146 
11147  instruct convXI2F_reg(regF dst, rRegI src)
11148 %{
11149   predicate( UseSSE>=2 && UseXmmI2F );
11150   match(Set dst (ConvI2F src));
11151 
11152   format %{ "MOVD  $dst,$src\n\t"
11153             "CVTDQ2PS $dst,$dst\t# i2f" %}
11154   ins_encode %{
11155     __ movdl($dst$$XMMRegister, $src$$Register);
11156     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11157   %}
11158   ins_pipe(pipe_slow); // XXX
11159 %}
11160 
11161 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11162   match(Set dst (ConvI2L src));
11163   effect(KILL cr);
11164   ins_cost(375);
11165   format %{ "MOV    $dst.lo,$src\n\t"
11166             "MOV    $dst.hi,$src\n\t"
11167             "SAR    $dst.hi,31" %}
11168   ins_encode(convert_int_long(dst,src));
11169   ins_pipe( ialu_reg_reg_long );
11170 %}
11171 
11172 // Zero-extend convert int to long
11173 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11174   match(Set dst (AndL (ConvI2L src) mask) );
11175   effect( KILL flags );
11176   ins_cost(250);
11177   format %{ "MOV    $dst.lo,$src\n\t"
11178             "XOR    $dst.hi,$dst.hi" %}
11179   opcode(0x33); // XOR
11180   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11181   ins_pipe( ialu_reg_reg_long );
11182 %}
11183 
11184 // Zero-extend long
11185 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11186   match(Set dst (AndL src mask) );
11187   effect( KILL flags );
11188   ins_cost(250);
11189   format %{ "MOV    $dst.lo,$src.lo\n\t"
11190             "XOR    $dst.hi,$dst.hi\n\t" %}
11191   opcode(0x33); // XOR
11192   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11193   ins_pipe( ialu_reg_reg_long );
11194 %}
11195 
11196 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11197   predicate (UseSSE<=1);
11198   match(Set dst (ConvL2D src));
11199   effect( KILL cr );
11200   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11201             "PUSH   $src.lo\n\t"
11202             "FILD   ST,[ESP + #0]\n\t"
11203             "ADD    ESP,8\n\t"
11204             "FSTP_D $dst\t# D-round" %}
11205   opcode(0xDF, 0x5);  /* DF /5 */
11206   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11207   ins_pipe( pipe_slow );
11208 %}
11209 
11210 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11211   predicate (UseSSE>=2);
11212   match(Set dst (ConvL2D src));
11213   effect( KILL cr );
11214   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11215             "PUSH   $src.lo\n\t"
11216             "FILD_D [ESP]\n\t"
11217             "FSTP_D [ESP]\n\t"
11218             "MOVSD  $dst,[ESP]\n\t"
11219             "ADD    ESP,8" %}
11220   opcode(0xDF, 0x5);  /* DF /5 */
11221   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11222   ins_pipe( pipe_slow );
11223 %}
11224 
11225 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11226   predicate (UseSSE>=1);
11227   match(Set dst (ConvL2F src));
11228   effect( KILL cr );
11229   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11230             "PUSH   $src.lo\n\t"
11231             "FILD_D [ESP]\n\t"
11232             "FSTP_S [ESP]\n\t"
11233             "MOVSS  $dst,[ESP]\n\t"
11234             "ADD    ESP,8" %}
11235   opcode(0xDF, 0x5);  /* DF /5 */
11236   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11237   ins_pipe( pipe_slow );
11238 %}
11239 
11240 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11241   match(Set dst (ConvL2F src));
11242   effect( KILL cr );
11243   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11244             "PUSH   $src.lo\n\t"
11245             "FILD   ST,[ESP + #0]\n\t"
11246             "ADD    ESP,8\n\t"
11247             "FSTP_S $dst\t# F-round" %}
11248   opcode(0xDF, 0x5);  /* DF /5 */
11249   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11250   ins_pipe( pipe_slow );
11251 %}
11252 
11253 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11254   match(Set dst (ConvL2I src));
11255   effect( DEF dst, USE src );
11256   format %{ "MOV    $dst,$src.lo" %}
11257   ins_encode(enc_CopyL_Lo(dst,src));
11258   ins_pipe( ialu_reg_reg );
11259 %}
11260 
11261 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11262   match(Set dst (MoveF2I src));
11263   effect( DEF dst, USE src );
11264   ins_cost(100);
11265   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11266   ins_encode %{
11267     __ movl($dst$$Register, Address(rsp, $src$$disp));
11268   %}
11269   ins_pipe( ialu_reg_mem );
11270 %}
11271 
11272 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11273   predicate(UseSSE==0);
11274   match(Set dst (MoveF2I src));
11275   effect( DEF dst, USE src );
11276 
11277   ins_cost(125);
11278   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11279   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11280   ins_pipe( fpu_mem_reg );
11281 %}
11282 
11283 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11284   predicate(UseSSE>=1);
11285   match(Set dst (MoveF2I src));
11286   effect( DEF dst, USE src );
11287 
11288   ins_cost(95);
11289   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11290   ins_encode %{
11291     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11292   %}
11293   ins_pipe( pipe_slow );
11294 %}
11295 
11296 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11297   predicate(UseSSE>=2);
11298   match(Set dst (MoveF2I src));
11299   effect( DEF dst, USE src );
11300   ins_cost(85);
11301   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11302   ins_encode %{
11303     __ movdl($dst$$Register, $src$$XMMRegister);
11304   %}
11305   ins_pipe( pipe_slow );
11306 %}
11307 
11308 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11309   match(Set dst (MoveI2F src));
11310   effect( DEF dst, USE src );
11311 
11312   ins_cost(100);
11313   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11314   ins_encode %{
11315     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11316   %}
11317   ins_pipe( ialu_mem_reg );
11318 %}
11319 
11320 
11321 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11322   predicate(UseSSE==0);
11323   match(Set dst (MoveI2F src));
11324   effect(DEF dst, USE src);
11325 
11326   ins_cost(125);
11327   format %{ "FLD_S  $src\n\t"
11328             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11329   opcode(0xD9);               /* D9 /0, FLD m32real */
11330   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11331               Pop_Reg_FPR(dst) );
11332   ins_pipe( fpu_reg_mem );
11333 %}
11334 
11335 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11336   predicate(UseSSE>=1);
11337   match(Set dst (MoveI2F src));
11338   effect( DEF dst, USE src );
11339 
11340   ins_cost(95);
11341   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11342   ins_encode %{
11343     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11344   %}
11345   ins_pipe( pipe_slow );
11346 %}
11347 
11348 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11349   predicate(UseSSE>=2);
11350   match(Set dst (MoveI2F src));
11351   effect( DEF dst, USE src );
11352 
11353   ins_cost(85);
11354   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11355   ins_encode %{
11356     __ movdl($dst$$XMMRegister, $src$$Register);
11357   %}
11358   ins_pipe( pipe_slow );
11359 %}
11360 
11361 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11362   match(Set dst (MoveD2L src));
11363   effect(DEF dst, USE src);
11364 
11365   ins_cost(250);
11366   format %{ "MOV    $dst.lo,$src\n\t"
11367             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11368   opcode(0x8B, 0x8B);
11369   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11370   ins_pipe( ialu_mem_long_reg );
11371 %}
11372 
11373 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11374   predicate(UseSSE<=1);
11375   match(Set dst (MoveD2L src));
11376   effect(DEF dst, USE src);
11377 
11378   ins_cost(125);
11379   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11380   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11381   ins_pipe( fpu_mem_reg );
11382 %}
11383 
11384 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11385   predicate(UseSSE>=2);
11386   match(Set dst (MoveD2L src));
11387   effect(DEF dst, USE src);
11388   ins_cost(95);
11389   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11390   ins_encode %{
11391     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11392   %}
11393   ins_pipe( pipe_slow );
11394 %}
11395 
11396 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11397   predicate(UseSSE>=2);
11398   match(Set dst (MoveD2L src));
11399   effect(DEF dst, USE src, TEMP tmp);
11400   ins_cost(85);
11401   format %{ "MOVD   $dst.lo,$src\n\t"
11402             "PSHUFLW $tmp,$src,0x4E\n\t"
11403             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11404   ins_encode %{
11405     __ movdl($dst$$Register, $src$$XMMRegister);
11406     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11407     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11408   %}
11409   ins_pipe( pipe_slow );
11410 %}
11411 
11412 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11413   match(Set dst (MoveL2D src));
11414   effect(DEF dst, USE src);
11415 
11416   ins_cost(200);
11417   format %{ "MOV    $dst,$src.lo\n\t"
11418             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11419   opcode(0x89, 0x89);
11420   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11421   ins_pipe( ialu_mem_long_reg );
11422 %}
11423 
11424 
11425 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11426   predicate(UseSSE<=1);
11427   match(Set dst (MoveL2D src));
11428   effect(DEF dst, USE src);
11429   ins_cost(125);
11430 
11431   format %{ "FLD_D  $src\n\t"
11432             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11433   opcode(0xDD);               /* DD /0, FLD m64real */
11434   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11435               Pop_Reg_DPR(dst) );
11436   ins_pipe( fpu_reg_mem );
11437 %}
11438 
11439 
11440 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11441   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11442   match(Set dst (MoveL2D src));
11443   effect(DEF dst, USE src);
11444 
11445   ins_cost(95);
11446   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11447   ins_encode %{
11448     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11449   %}
11450   ins_pipe( pipe_slow );
11451 %}
11452 
11453 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11454   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11455   match(Set dst (MoveL2D src));
11456   effect(DEF dst, USE src);
11457 
11458   ins_cost(95);
11459   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11460   ins_encode %{
11461     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11462   %}
11463   ins_pipe( pipe_slow );
11464 %}
11465 
11466 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11467   predicate(UseSSE>=2);
11468   match(Set dst (MoveL2D src));
11469   effect(TEMP dst, USE src, TEMP tmp);
11470   ins_cost(85);
11471   format %{ "MOVD   $dst,$src.lo\n\t"
11472             "MOVD   $tmp,$src.hi\n\t"
11473             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11474   ins_encode %{
11475     __ movdl($dst$$XMMRegister, $src$$Register);
11476     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11477     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11478   %}
11479   ins_pipe( pipe_slow );
11480 %}
11481 
11482 
11483 // =======================================================================
11484 // fast clearing of an array
11485 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11486   predicate(!((ClearArrayNode*)n)->is_large());
11487   match(Set dummy (ClearArray cnt base));
11488   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11489 
11490   format %{ $$template
11491     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11492     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11493     $$emit$$"JG     LARGE\n\t"
11494     $$emit$$"SHL    ECX, 1\n\t"
11495     $$emit$$"DEC    ECX\n\t"
11496     $$emit$$"JS     DONE\t# Zero length\n\t"
11497     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11498     $$emit$$"DEC    ECX\n\t"
11499     $$emit$$"JGE    LOOP\n\t"
11500     $$emit$$"JMP    DONE\n\t"
11501     $$emit$$"# LARGE:\n\t"
11502     if (UseFastStosb) {
11503        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11504        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11505     } else {
11506        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11507        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11508     }
11509     $$emit$$"# DONE"
11510   %}
11511   ins_encode %{
11512     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11513   %}
11514   ins_pipe( pipe_slow );
11515 %}
11516 
11517 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11518   predicate(((ClearArrayNode*)n)->is_large());
11519   match(Set dummy (ClearArray cnt base));
11520   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11521   format %{ $$template
11522     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11523     if (UseFastStosb) {
11524        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11525        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11526     } else {
11527        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11528        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11529     }
11530     $$emit$$"# DONE"
11531   %}
11532   ins_encode %{
11533     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11534   %}
11535   ins_pipe( pipe_slow );
11536 %}
11537 
11538 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11539                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11540   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11541   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11542   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11543 
11544   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11545   ins_encode %{
11546     __ string_compare($str1$$Register, $str2$$Register,
11547                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11548                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11549   %}
11550   ins_pipe( pipe_slow );
11551 %}
11552 
11553 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11554                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11555   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11556   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11557   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11558 
11559   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11560   ins_encode %{
11561     __ string_compare($str1$$Register, $str2$$Register,
11562                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11563                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11564   %}
11565   ins_pipe( pipe_slow );
11566 %}
11567 
11568 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11569                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11570   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11571   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11572   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11573 
11574   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11575   ins_encode %{
11576     __ string_compare($str1$$Register, $str2$$Register,
11577                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11578                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11579   %}
11580   ins_pipe( pipe_slow );
11581 %}
11582 
11583 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11584                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11585   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11586   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11587   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11588 
11589   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11590   ins_encode %{
11591     __ string_compare($str2$$Register, $str1$$Register,
11592                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11593                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11594   %}
11595   ins_pipe( pipe_slow );
11596 %}
11597 
11598 // fast string equals
11599 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11600                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11601   match(Set result (StrEquals (Binary str1 str2) cnt));
11602   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11603 
11604   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11605   ins_encode %{
11606     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11607                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11608                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11609   %}
11610 
11611   ins_pipe( pipe_slow );
11612 %}
11613 
11614 // fast search of substring with known size.
11615 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11616                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11617   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11618   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11619   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11620 
11621   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11622   ins_encode %{
11623     int icnt2 = (int)$int_cnt2$$constant;
11624     if (icnt2 >= 16) {
11625       // IndexOf for constant substrings with size >= 16 elements
11626       // which don't need to be loaded through stack.
11627       __ string_indexofC8($str1$$Register, $str2$$Register,
11628                           $cnt1$$Register, $cnt2$$Register,
11629                           icnt2, $result$$Register,
11630                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11631     } else {
11632       // Small strings are loaded through stack if they cross page boundary.
11633       __ string_indexof($str1$$Register, $str2$$Register,
11634                         $cnt1$$Register, $cnt2$$Register,
11635                         icnt2, $result$$Register,
11636                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11637     }
11638   %}
11639   ins_pipe( pipe_slow );
11640 %}
11641 
11642 // fast search of substring with known size.
11643 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11644                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11645   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11646   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11647   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11648 
11649   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11650   ins_encode %{
11651     int icnt2 = (int)$int_cnt2$$constant;
11652     if (icnt2 >= 8) {
11653       // IndexOf for constant substrings with size >= 8 elements
11654       // which don't need to be loaded through stack.
11655       __ string_indexofC8($str1$$Register, $str2$$Register,
11656                           $cnt1$$Register, $cnt2$$Register,
11657                           icnt2, $result$$Register,
11658                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11659     } else {
11660       // Small strings are loaded through stack if they cross page boundary.
11661       __ string_indexof($str1$$Register, $str2$$Register,
11662                         $cnt1$$Register, $cnt2$$Register,
11663                         icnt2, $result$$Register,
11664                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11665     }
11666   %}
11667   ins_pipe( pipe_slow );
11668 %}
11669 
11670 // fast search of substring with known size.
11671 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11672                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11673   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11674   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11675   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11676 
11677   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11678   ins_encode %{
11679     int icnt2 = (int)$int_cnt2$$constant;
11680     if (icnt2 >= 8) {
11681       // IndexOf for constant substrings with size >= 8 elements
11682       // which don't need to be loaded through stack.
11683       __ string_indexofC8($str1$$Register, $str2$$Register,
11684                           $cnt1$$Register, $cnt2$$Register,
11685                           icnt2, $result$$Register,
11686                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11687     } else {
11688       // Small strings are loaded through stack if they cross page boundary.
11689       __ string_indexof($str1$$Register, $str2$$Register,
11690                         $cnt1$$Register, $cnt2$$Register,
11691                         icnt2, $result$$Register,
11692                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11693     }
11694   %}
11695   ins_pipe( pipe_slow );
11696 %}
11697 
11698 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11699                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11700   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11701   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11702   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11703 
11704   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11705   ins_encode %{
11706     __ string_indexof($str1$$Register, $str2$$Register,
11707                       $cnt1$$Register, $cnt2$$Register,
11708                       (-1), $result$$Register,
11709                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11710   %}
11711   ins_pipe( pipe_slow );
11712 %}
11713 
11714 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11715                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11716   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11717   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11718   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11719 
11720   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11721   ins_encode %{
11722     __ string_indexof($str1$$Register, $str2$$Register,
11723                       $cnt1$$Register, $cnt2$$Register,
11724                       (-1), $result$$Register,
11725                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11726   %}
11727   ins_pipe( pipe_slow );
11728 %}
11729 
11730 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11731                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11732   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11733   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11734   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11735 
11736   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11737   ins_encode %{
11738     __ string_indexof($str1$$Register, $str2$$Register,
11739                       $cnt1$$Register, $cnt2$$Register,
11740                       (-1), $result$$Register,
11741                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11742   %}
11743   ins_pipe( pipe_slow );
11744 %}
11745 
11746 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11747                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11748   predicate(UseSSE42Intrinsics);
11749   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11750   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11751   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11752   ins_encode %{
11753     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11754                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11755   %}
11756   ins_pipe( pipe_slow );
11757 %}
11758 
11759 // fast array equals
11760 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11761                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11762 %{
11763   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11764   match(Set result (AryEq ary1 ary2));
11765   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11766   //ins_cost(300);
11767 
11768   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11769   ins_encode %{
11770     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11771                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11772                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11773   %}
11774   ins_pipe( pipe_slow );
11775 %}
11776 
11777 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11778                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11779 %{
11780   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11781   match(Set result (AryEq ary1 ary2));
11782   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11783   //ins_cost(300);
11784 
11785   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11786   ins_encode %{
11787     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11788                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11789                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11790   %}
11791   ins_pipe( pipe_slow );
11792 %}
11793 
11794 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11795                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11796 %{
11797   match(Set result (HasNegatives ary1 len));
11798   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11799 
11800   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11801   ins_encode %{
11802     __ has_negatives($ary1$$Register, $len$$Register,
11803                      $result$$Register, $tmp3$$Register,
11804                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11805   %}
11806   ins_pipe( pipe_slow );
11807 %}
11808 
11809 // fast char[] to byte[] compression
11810 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11811                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11812   match(Set result (StrCompressedCopy src (Binary dst len)));
11813   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11814 
11815   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11816   ins_encode %{
11817     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11818                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11819                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11820   %}
11821   ins_pipe( pipe_slow );
11822 %}
11823 
11824 // fast byte[] to char[] inflation
11825 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11826                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11827   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11828   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11829 
11830   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11831   ins_encode %{
11832     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11833                           $tmp1$$XMMRegister, $tmp2$$Register);
11834   %}
11835   ins_pipe( pipe_slow );
11836 %}
11837 
11838 // encode char[] to byte[] in ISO_8859_1
11839 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11840                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11841                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11842   match(Set result (EncodeISOArray src (Binary dst len)));
11843   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11844 
11845   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11846   ins_encode %{
11847     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11848                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11849                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11850   %}
11851   ins_pipe( pipe_slow );
11852 %}
11853 
11854 
11855 //----------Control Flow Instructions------------------------------------------
11856 // Signed compare Instructions
11857 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11858   match(Set cr (CmpI op1 op2));
11859   effect( DEF cr, USE op1, USE op2 );
11860   format %{ "CMP    $op1,$op2" %}
11861   opcode(0x3B);  /* Opcode 3B /r */
11862   ins_encode( OpcP, RegReg( op1, op2) );
11863   ins_pipe( ialu_cr_reg_reg );
11864 %}
11865 
11866 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11867   match(Set cr (CmpI op1 op2));
11868   effect( DEF cr, USE op1 );
11869   format %{ "CMP    $op1,$op2" %}
11870   opcode(0x81,0x07);  /* Opcode 81 /7 */
11871   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11872   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11873   ins_pipe( ialu_cr_reg_imm );
11874 %}
11875 
11876 // Cisc-spilled version of cmpI_eReg
11877 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11878   match(Set cr (CmpI op1 (LoadI op2)));
11879 
11880   format %{ "CMP    $op1,$op2" %}
11881   ins_cost(500);
11882   opcode(0x3B);  /* Opcode 3B /r */
11883   ins_encode( OpcP, RegMem( op1, op2) );
11884   ins_pipe( ialu_cr_reg_mem );
11885 %}
11886 
11887 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11888   match(Set cr (CmpI src zero));
11889   effect( DEF cr, USE src );
11890 
11891   format %{ "TEST   $src,$src" %}
11892   opcode(0x85);
11893   ins_encode( OpcP, RegReg( src, src ) );
11894   ins_pipe( ialu_cr_reg_imm );
11895 %}
11896 
11897 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11898   match(Set cr (CmpI (AndI src con) zero));
11899 
11900   format %{ "TEST   $src,$con" %}
11901   opcode(0xF7,0x00);
11902   ins_encode( OpcP, RegOpc(src), Con32(con) );
11903   ins_pipe( ialu_cr_reg_imm );
11904 %}
11905 
11906 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11907   match(Set cr (CmpI (AndI src mem) zero));
11908 
11909   format %{ "TEST   $src,$mem" %}
11910   opcode(0x85);
11911   ins_encode( OpcP, RegMem( src, mem ) );
11912   ins_pipe( ialu_cr_reg_mem );
11913 %}
11914 
11915 // Unsigned compare Instructions; really, same as signed except they
11916 // produce an eFlagsRegU instead of eFlagsReg.
11917 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11918   match(Set cr (CmpU op1 op2));
11919 
11920   format %{ "CMPu   $op1,$op2" %}
11921   opcode(0x3B);  /* Opcode 3B /r */
11922   ins_encode( OpcP, RegReg( op1, op2) );
11923   ins_pipe( ialu_cr_reg_reg );
11924 %}
11925 
11926 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11927   match(Set cr (CmpU op1 op2));
11928 
11929   format %{ "CMPu   $op1,$op2" %}
11930   opcode(0x81,0x07);  /* Opcode 81 /7 */
11931   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11932   ins_pipe( ialu_cr_reg_imm );
11933 %}
11934 
11935 // // Cisc-spilled version of cmpU_eReg
11936 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11937   match(Set cr (CmpU op1 (LoadI op2)));
11938 
11939   format %{ "CMPu   $op1,$op2" %}
11940   ins_cost(500);
11941   opcode(0x3B);  /* Opcode 3B /r */
11942   ins_encode( OpcP, RegMem( op1, op2) );
11943   ins_pipe( ialu_cr_reg_mem );
11944 %}
11945 
11946 // // Cisc-spilled version of cmpU_eReg
11947 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11948 //  match(Set cr (CmpU (LoadI op1) op2));
11949 //
11950 //  format %{ "CMPu   $op1,$op2" %}
11951 //  ins_cost(500);
11952 //  opcode(0x39);  /* Opcode 39 /r */
11953 //  ins_encode( OpcP, RegMem( op1, op2) );
11954 //%}
11955 
11956 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11957   match(Set cr (CmpU src zero));
11958 
11959   format %{ "TESTu  $src,$src" %}
11960   opcode(0x85);
11961   ins_encode( OpcP, RegReg( src, src ) );
11962   ins_pipe( ialu_cr_reg_imm );
11963 %}
11964 
11965 // Unsigned pointer compare Instructions
11966 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11967   match(Set cr (CmpP op1 op2));
11968 
11969   format %{ "CMPu   $op1,$op2" %}
11970   opcode(0x3B);  /* Opcode 3B /r */
11971   ins_encode( OpcP, RegReg( op1, op2) );
11972   ins_pipe( ialu_cr_reg_reg );
11973 %}
11974 
11975 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11976   match(Set cr (CmpP op1 op2));
11977 
11978   format %{ "CMPu   $op1,$op2" %}
11979   opcode(0x81,0x07);  /* Opcode 81 /7 */
11980   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11981   ins_pipe( ialu_cr_reg_imm );
11982 %}
11983 
11984 // // Cisc-spilled version of cmpP_eReg
11985 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11986   match(Set cr (CmpP op1 (LoadP op2)));
11987 
11988   format %{ "CMPu   $op1,$op2" %}
11989   ins_cost(500);
11990   opcode(0x3B);  /* Opcode 3B /r */
11991   ins_encode( OpcP, RegMem( op1, op2) );
11992   ins_pipe( ialu_cr_reg_mem );
11993 %}
11994 
11995 // // Cisc-spilled version of cmpP_eReg
11996 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11997 //  match(Set cr (CmpP (LoadP op1) op2));
11998 //
11999 //  format %{ "CMPu   $op1,$op2" %}
12000 //  ins_cost(500);
12001 //  opcode(0x39);  /* Opcode 39 /r */
12002 //  ins_encode( OpcP, RegMem( op1, op2) );
12003 //%}
12004 
12005 // Compare raw pointer (used in out-of-heap check).
12006 // Only works because non-oop pointers must be raw pointers
12007 // and raw pointers have no anti-dependencies.
12008 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12009   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12010   match(Set cr (CmpP op1 (LoadP op2)));
12011 
12012   format %{ "CMPu   $op1,$op2" %}
12013   opcode(0x3B);  /* Opcode 3B /r */
12014   ins_encode( OpcP, RegMem( op1, op2) );
12015   ins_pipe( ialu_cr_reg_mem );
12016 %}
12017 
12018 //
12019 // This will generate a signed flags result. This should be ok
12020 // since any compare to a zero should be eq/neq.
12021 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12022   match(Set cr (CmpP src zero));
12023 
12024   format %{ "TEST   $src,$src" %}
12025   opcode(0x85);
12026   ins_encode( OpcP, RegReg( src, src ) );
12027   ins_pipe( ialu_cr_reg_imm );
12028 %}
12029 
12030 // Cisc-spilled version of testP_reg
12031 // This will generate a signed flags result. This should be ok
12032 // since any compare to a zero should be eq/neq.
12033 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12034   match(Set cr (CmpP (LoadP op) zero));
12035 
12036   format %{ "TEST   $op,0xFFFFFFFF" %}
12037   ins_cost(500);
12038   opcode(0xF7);               /* Opcode F7 /0 */
12039   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12040   ins_pipe( ialu_cr_reg_imm );
12041 %}
12042 
12043 // Yanked all unsigned pointer compare operations.
12044 // Pointer compares are done with CmpP which is already unsigned.
12045 
12046 //----------Max and Min--------------------------------------------------------
12047 // Min Instructions
12048 ////
12049 //   *** Min and Max using the conditional move are slower than the
12050 //   *** branch version on a Pentium III.
12051 // // Conditional move for min
12052 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12053 //  effect( USE_DEF op2, USE op1, USE cr );
12054 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12055 //  opcode(0x4C,0x0F);
12056 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12057 //  ins_pipe( pipe_cmov_reg );
12058 //%}
12059 //
12060 //// Min Register with Register (P6 version)
12061 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12062 //  predicate(VM_Version::supports_cmov() );
12063 //  match(Set op2 (MinI op1 op2));
12064 //  ins_cost(200);
12065 //  expand %{
12066 //    eFlagsReg cr;
12067 //    compI_eReg(cr,op1,op2);
12068 //    cmovI_reg_lt(op2,op1,cr);
12069 //  %}
12070 //%}
12071 
12072 // Min Register with Register (generic version)
12073 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12074   match(Set dst (MinI dst src));
12075   effect(KILL flags);
12076   ins_cost(300);
12077 
12078   format %{ "MIN    $dst,$src" %}
12079   opcode(0xCC);
12080   ins_encode( min_enc(dst,src) );
12081   ins_pipe( pipe_slow );
12082 %}
12083 
12084 // Max Register with Register
12085 //   *** Min and Max using the conditional move are slower than the
12086 //   *** branch version on a Pentium III.
12087 // // Conditional move for max
12088 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12089 //  effect( USE_DEF op2, USE op1, USE cr );
12090 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12091 //  opcode(0x4F,0x0F);
12092 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12093 //  ins_pipe( pipe_cmov_reg );
12094 //%}
12095 //
12096 // // Max Register with Register (P6 version)
12097 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12098 //  predicate(VM_Version::supports_cmov() );
12099 //  match(Set op2 (MaxI op1 op2));
12100 //  ins_cost(200);
12101 //  expand %{
12102 //    eFlagsReg cr;
12103 //    compI_eReg(cr,op1,op2);
12104 //    cmovI_reg_gt(op2,op1,cr);
12105 //  %}
12106 //%}
12107 
12108 // Max Register with Register (generic version)
12109 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12110   match(Set dst (MaxI dst src));
12111   effect(KILL flags);
12112   ins_cost(300);
12113 
12114   format %{ "MAX    $dst,$src" %}
12115   opcode(0xCC);
12116   ins_encode( max_enc(dst,src) );
12117   ins_pipe( pipe_slow );
12118 %}
12119 
12120 // ============================================================================
12121 // Counted Loop limit node which represents exact final iterator value.
12122 // Note: the resulting value should fit into integer range since
12123 // counted loops have limit check on overflow.
12124 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12125   match(Set limit (LoopLimit (Binary init limit) stride));
12126   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12127   ins_cost(300);
12128 
12129   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12130   ins_encode %{
12131     int strd = (int)$stride$$constant;
12132     assert(strd != 1 && strd != -1, "sanity");
12133     int m1 = (strd > 0) ? 1 : -1;
12134     // Convert limit to long (EAX:EDX)
12135     __ cdql();
12136     // Convert init to long (init:tmp)
12137     __ movl($tmp$$Register, $init$$Register);
12138     __ sarl($tmp$$Register, 31);
12139     // $limit - $init
12140     __ subl($limit$$Register, $init$$Register);
12141     __ sbbl($limit_hi$$Register, $tmp$$Register);
12142     // + ($stride - 1)
12143     if (strd > 0) {
12144       __ addl($limit$$Register, (strd - 1));
12145       __ adcl($limit_hi$$Register, 0);
12146       __ movl($tmp$$Register, strd);
12147     } else {
12148       __ addl($limit$$Register, (strd + 1));
12149       __ adcl($limit_hi$$Register, -1);
12150       __ lneg($limit_hi$$Register, $limit$$Register);
12151       __ movl($tmp$$Register, -strd);
12152     }
12153     // signed devision: (EAX:EDX) / pos_stride
12154     __ idivl($tmp$$Register);
12155     if (strd < 0) {
12156       // restore sign
12157       __ negl($tmp$$Register);
12158     }
12159     // (EAX) * stride
12160     __ mull($tmp$$Register);
12161     // + init (ignore upper bits)
12162     __ addl($limit$$Register, $init$$Register);
12163   %}
12164   ins_pipe( pipe_slow );
12165 %}
12166 
12167 // ============================================================================
12168 // Branch Instructions
12169 // Jump Table
12170 instruct jumpXtnd(rRegI switch_val) %{
12171   match(Jump switch_val);
12172   ins_cost(350);
12173   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12174   ins_encode %{
12175     // Jump to Address(table_base + switch_reg)
12176     Address index(noreg, $switch_val$$Register, Address::times_1);
12177     __ jump(ArrayAddress($constantaddress, index));
12178   %}
12179   ins_pipe(pipe_jmp);
12180 %}
12181 
12182 // Jump Direct - Label defines a relative address from JMP+1
12183 instruct jmpDir(label labl) %{
12184   match(Goto);
12185   effect(USE labl);
12186 
12187   ins_cost(300);
12188   format %{ "JMP    $labl" %}
12189   size(5);
12190   ins_encode %{
12191     Label* L = $labl$$label;
12192     __ jmp(*L, false); // Always long jump
12193   %}
12194   ins_pipe( pipe_jmp );
12195 %}
12196 
12197 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12198 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12199   match(If cop cr);
12200   effect(USE labl);
12201 
12202   ins_cost(300);
12203   format %{ "J$cop    $labl" %}
12204   size(6);
12205   ins_encode %{
12206     Label* L = $labl$$label;
12207     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12208   %}
12209   ins_pipe( pipe_jcc );
12210 %}
12211 
12212 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12213 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12214   predicate(!n->has_vector_mask_set());
12215   match(CountedLoopEnd cop cr);
12216   effect(USE labl);
12217 
12218   ins_cost(300);
12219   format %{ "J$cop    $labl\t# Loop end" %}
12220   size(6);
12221   ins_encode %{
12222     Label* L = $labl$$label;
12223     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12224   %}
12225   ins_pipe( pipe_jcc );
12226 %}
12227 
12228 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12229 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12230   predicate(!n->has_vector_mask_set());
12231   match(CountedLoopEnd cop cmp);
12232   effect(USE labl);
12233 
12234   ins_cost(300);
12235   format %{ "J$cop,u  $labl\t# Loop end" %}
12236   size(6);
12237   ins_encode %{
12238     Label* L = $labl$$label;
12239     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12240   %}
12241   ins_pipe( pipe_jcc );
12242 %}
12243 
12244 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12245   predicate(!n->has_vector_mask_set());
12246   match(CountedLoopEnd cop cmp);
12247   effect(USE labl);
12248 
12249   ins_cost(200);
12250   format %{ "J$cop,u  $labl\t# Loop end" %}
12251   size(6);
12252   ins_encode %{
12253     Label* L = $labl$$label;
12254     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12255   %}
12256   ins_pipe( pipe_jcc );
12257 %}
12258 
12259 // mask version
12260 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12261 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12262   predicate(n->has_vector_mask_set());
12263   match(CountedLoopEnd cop cr);
12264   effect(USE labl);
12265 
12266   ins_cost(400);
12267   format %{ "J$cop    $labl\t# Loop end\n\t"
12268             "restorevectmask \t# vector mask restore for loops" %}
12269   size(10);
12270   ins_encode %{
12271     Label* L = $labl$$label;
12272     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12273     __ restorevectmask();
12274   %}
12275   ins_pipe( pipe_jcc );
12276 %}
12277 
12278 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12279 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12280   predicate(n->has_vector_mask_set());
12281   match(CountedLoopEnd cop cmp);
12282   effect(USE labl);
12283 
12284   ins_cost(400);
12285   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12286             "restorevectmask \t# vector mask restore for loops" %}
12287   size(10);
12288   ins_encode %{
12289     Label* L = $labl$$label;
12290     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12291     __ restorevectmask();
12292   %}
12293   ins_pipe( pipe_jcc );
12294 %}
12295 
12296 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12297   predicate(n->has_vector_mask_set());
12298   match(CountedLoopEnd cop cmp);
12299   effect(USE labl);
12300 
12301   ins_cost(300);
12302   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12303             "restorevectmask \t# vector mask restore for loops" %}
12304   size(10);
12305   ins_encode %{
12306     Label* L = $labl$$label;
12307     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12308     __ restorevectmask();
12309   %}
12310   ins_pipe( pipe_jcc );
12311 %}
12312 
12313 // Jump Direct Conditional - using unsigned comparison
12314 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12315   match(If cop cmp);
12316   effect(USE labl);
12317 
12318   ins_cost(300);
12319   format %{ "J$cop,u  $labl" %}
12320   size(6);
12321   ins_encode %{
12322     Label* L = $labl$$label;
12323     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12324   %}
12325   ins_pipe(pipe_jcc);
12326 %}
12327 
12328 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12329   match(If cop cmp);
12330   effect(USE labl);
12331 
12332   ins_cost(200);
12333   format %{ "J$cop,u  $labl" %}
12334   size(6);
12335   ins_encode %{
12336     Label* L = $labl$$label;
12337     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12338   %}
12339   ins_pipe(pipe_jcc);
12340 %}
12341 
12342 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12343   match(If cop cmp);
12344   effect(USE labl);
12345 
12346   ins_cost(200);
12347   format %{ $$template
12348     if ($cop$$cmpcode == Assembler::notEqual) {
12349       $$emit$$"JP,u   $labl\n\t"
12350       $$emit$$"J$cop,u   $labl"
12351     } else {
12352       $$emit$$"JP,u   done\n\t"
12353       $$emit$$"J$cop,u   $labl\n\t"
12354       $$emit$$"done:"
12355     }
12356   %}
12357   ins_encode %{
12358     Label* l = $labl$$label;
12359     if ($cop$$cmpcode == Assembler::notEqual) {
12360       __ jcc(Assembler::parity, *l, false);
12361       __ jcc(Assembler::notEqual, *l, false);
12362     } else if ($cop$$cmpcode == Assembler::equal) {
12363       Label done;
12364       __ jccb(Assembler::parity, done);
12365       __ jcc(Assembler::equal, *l, false);
12366       __ bind(done);
12367     } else {
12368        ShouldNotReachHere();
12369     }
12370   %}
12371   ins_pipe(pipe_jcc);
12372 %}
12373 
12374 // ============================================================================
12375 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12376 // array for an instance of the superklass.  Set a hidden internal cache on a
12377 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12378 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12379 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12380   match(Set result (PartialSubtypeCheck sub super));
12381   effect( KILL rcx, KILL cr );
12382 
12383   ins_cost(1100);  // slightly larger than the next version
12384   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12385             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12386             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12387             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12388             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12389             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12390             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12391      "miss:\t" %}
12392 
12393   opcode(0x1); // Force a XOR of EDI
12394   ins_encode( enc_PartialSubtypeCheck() );
12395   ins_pipe( pipe_slow );
12396 %}
12397 
12398 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12399   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12400   effect( KILL rcx, KILL result );
12401 
12402   ins_cost(1000);
12403   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12404             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12405             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12406             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12407             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12408             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12409      "miss:\t" %}
12410 
12411   opcode(0x0);  // No need to XOR EDI
12412   ins_encode( enc_PartialSubtypeCheck() );
12413   ins_pipe( pipe_slow );
12414 %}
12415 
12416 // ============================================================================
12417 // Branch Instructions -- short offset versions
12418 //
12419 // These instructions are used to replace jumps of a long offset (the default
12420 // match) with jumps of a shorter offset.  These instructions are all tagged
12421 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12422 // match rules in general matching.  Instead, the ADLC generates a conversion
12423 // method in the MachNode which can be used to do in-place replacement of the
12424 // long variant with the shorter variant.  The compiler will determine if a
12425 // branch can be taken by the is_short_branch_offset() predicate in the machine
12426 // specific code section of the file.
12427 
12428 // Jump Direct - Label defines a relative address from JMP+1
12429 instruct jmpDir_short(label labl) %{
12430   match(Goto);
12431   effect(USE labl);
12432 
12433   ins_cost(300);
12434   format %{ "JMP,s  $labl" %}
12435   size(2);
12436   ins_encode %{
12437     Label* L = $labl$$label;
12438     __ jmpb(*L);
12439   %}
12440   ins_pipe( pipe_jmp );
12441   ins_short_branch(1);
12442 %}
12443 
12444 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12445 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12446   match(If cop cr);
12447   effect(USE labl);
12448 
12449   ins_cost(300);
12450   format %{ "J$cop,s  $labl" %}
12451   size(2);
12452   ins_encode %{
12453     Label* L = $labl$$label;
12454     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12455   %}
12456   ins_pipe( pipe_jcc );
12457   ins_short_branch(1);
12458 %}
12459 
12460 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12461 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12462   match(CountedLoopEnd cop cr);
12463   effect(USE labl);
12464 
12465   ins_cost(300);
12466   format %{ "J$cop,s  $labl\t# Loop end" %}
12467   size(2);
12468   ins_encode %{
12469     Label* L = $labl$$label;
12470     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12471   %}
12472   ins_pipe( pipe_jcc );
12473   ins_short_branch(1);
12474 %}
12475 
12476 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12477 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12478   match(CountedLoopEnd cop cmp);
12479   effect(USE labl);
12480 
12481   ins_cost(300);
12482   format %{ "J$cop,us $labl\t# Loop end" %}
12483   size(2);
12484   ins_encode %{
12485     Label* L = $labl$$label;
12486     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12487   %}
12488   ins_pipe( pipe_jcc );
12489   ins_short_branch(1);
12490 %}
12491 
12492 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12493   match(CountedLoopEnd cop cmp);
12494   effect(USE labl);
12495 
12496   ins_cost(300);
12497   format %{ "J$cop,us $labl\t# Loop end" %}
12498   size(2);
12499   ins_encode %{
12500     Label* L = $labl$$label;
12501     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12502   %}
12503   ins_pipe( pipe_jcc );
12504   ins_short_branch(1);
12505 %}
12506 
12507 // Jump Direct Conditional - using unsigned comparison
12508 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12509   match(If cop cmp);
12510   effect(USE labl);
12511 
12512   ins_cost(300);
12513   format %{ "J$cop,us $labl" %}
12514   size(2);
12515   ins_encode %{
12516     Label* L = $labl$$label;
12517     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12518   %}
12519   ins_pipe( pipe_jcc );
12520   ins_short_branch(1);
12521 %}
12522 
12523 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12524   match(If cop cmp);
12525   effect(USE labl);
12526 
12527   ins_cost(300);
12528   format %{ "J$cop,us $labl" %}
12529   size(2);
12530   ins_encode %{
12531     Label* L = $labl$$label;
12532     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12533   %}
12534   ins_pipe( pipe_jcc );
12535   ins_short_branch(1);
12536 %}
12537 
12538 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12539   match(If cop cmp);
12540   effect(USE labl);
12541 
12542   ins_cost(300);
12543   format %{ $$template
12544     if ($cop$$cmpcode == Assembler::notEqual) {
12545       $$emit$$"JP,u,s   $labl\n\t"
12546       $$emit$$"J$cop,u,s   $labl"
12547     } else {
12548       $$emit$$"JP,u,s   done\n\t"
12549       $$emit$$"J$cop,u,s  $labl\n\t"
12550       $$emit$$"done:"
12551     }
12552   %}
12553   size(4);
12554   ins_encode %{
12555     Label* l = $labl$$label;
12556     if ($cop$$cmpcode == Assembler::notEqual) {
12557       __ jccb(Assembler::parity, *l);
12558       __ jccb(Assembler::notEqual, *l);
12559     } else if ($cop$$cmpcode == Assembler::equal) {
12560       Label done;
12561       __ jccb(Assembler::parity, done);
12562       __ jccb(Assembler::equal, *l);
12563       __ bind(done);
12564     } else {
12565        ShouldNotReachHere();
12566     }
12567   %}
12568   ins_pipe(pipe_jcc);
12569   ins_short_branch(1);
12570 %}
12571 
12572 // ============================================================================
12573 // Long Compare
12574 //
12575 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12576 // is tricky.  The flavor of compare used depends on whether we are testing
12577 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12578 // The GE test is the negated LT test.  The LE test can be had by commuting
12579 // the operands (yielding a GE test) and then negating; negate again for the
12580 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12581 // NE test is negated from that.
12582 
12583 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12584 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12585 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12586 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12587 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12588 // foo match ends up with the wrong leaf.  One fix is to not match both
12589 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12590 // both forms beat the trinary form of long-compare and both are very useful
12591 // on Intel which has so few registers.
12592 
12593 // Manifest a CmpL result in an integer register.  Very painful.
12594 // This is the test to avoid.
12595 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12596   match(Set dst (CmpL3 src1 src2));
12597   effect( KILL flags );
12598   ins_cost(1000);
12599   format %{ "XOR    $dst,$dst\n\t"
12600             "CMP    $src1.hi,$src2.hi\n\t"
12601             "JLT,s  m_one\n\t"
12602             "JGT,s  p_one\n\t"
12603             "CMP    $src1.lo,$src2.lo\n\t"
12604             "JB,s   m_one\n\t"
12605             "JEQ,s  done\n"
12606     "p_one:\tINC    $dst\n\t"
12607             "JMP,s  done\n"
12608     "m_one:\tDEC    $dst\n"
12609      "done:" %}
12610   ins_encode %{
12611     Label p_one, m_one, done;
12612     __ xorptr($dst$$Register, $dst$$Register);
12613     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12614     __ jccb(Assembler::less,    m_one);
12615     __ jccb(Assembler::greater, p_one);
12616     __ cmpl($src1$$Register, $src2$$Register);
12617     __ jccb(Assembler::below,   m_one);
12618     __ jccb(Assembler::equal,   done);
12619     __ bind(p_one);
12620     __ incrementl($dst$$Register);
12621     __ jmpb(done);
12622     __ bind(m_one);
12623     __ decrementl($dst$$Register);
12624     __ bind(done);
12625   %}
12626   ins_pipe( pipe_slow );
12627 %}
12628 
12629 //======
12630 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12631 // compares.  Can be used for LE or GT compares by reversing arguments.
12632 // NOT GOOD FOR EQ/NE tests.
12633 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12634   match( Set flags (CmpL src zero ));
12635   ins_cost(100);
12636   format %{ "TEST   $src.hi,$src.hi" %}
12637   opcode(0x85);
12638   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12639   ins_pipe( ialu_cr_reg_reg );
12640 %}
12641 
12642 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12643 // compares.  Can be used for LE or GT compares by reversing arguments.
12644 // NOT GOOD FOR EQ/NE tests.
12645 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12646   match( Set flags (CmpL src1 src2 ));
12647   effect( TEMP tmp );
12648   ins_cost(300);
12649   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12650             "MOV    $tmp,$src1.hi\n\t"
12651             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12652   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12653   ins_pipe( ialu_cr_reg_reg );
12654 %}
12655 
12656 // Long compares reg < zero/req OR reg >= zero/req.
12657 // Just a wrapper for a normal branch, plus the predicate test.
12658 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12659   match(If cmp flags);
12660   effect(USE labl);
12661   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12662   expand %{
12663     jmpCon(cmp,flags,labl);    // JLT or JGE...
12664   %}
12665 %}
12666 
12667 //======
12668 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12669 // compares.  Can be used for LE or GT compares by reversing arguments.
12670 // NOT GOOD FOR EQ/NE tests.
12671 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12672   match(Set flags (CmpUL src zero));
12673   ins_cost(100);
12674   format %{ "TEST   $src.hi,$src.hi" %}
12675   opcode(0x85);
12676   ins_encode(OpcP, RegReg_Hi2(src, src));
12677   ins_pipe(ialu_cr_reg_reg);
12678 %}
12679 
12680 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12681 // compares.  Can be used for LE or GT compares by reversing arguments.
12682 // NOT GOOD FOR EQ/NE tests.
12683 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12684   match(Set flags (CmpUL src1 src2));
12685   effect(TEMP tmp);
12686   ins_cost(300);
12687   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12688             "MOV    $tmp,$src1.hi\n\t"
12689             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12690   ins_encode(long_cmp_flags2(src1, src2, tmp));
12691   ins_pipe(ialu_cr_reg_reg);
12692 %}
12693 
12694 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12695 // Just a wrapper for a normal branch, plus the predicate test.
12696 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12697   match(If cmp flags);
12698   effect(USE labl);
12699   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12700   expand %{
12701     jmpCon(cmp, flags, labl);    // JLT or JGE...
12702   %}
12703 %}
12704 
12705 // Compare 2 longs and CMOVE longs.
12706 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12707   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12708   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12709   ins_cost(400);
12710   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12711             "CMOV$cmp $dst.hi,$src.hi" %}
12712   opcode(0x0F,0x40);
12713   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12714   ins_pipe( pipe_cmov_reg_long );
12715 %}
12716 
12717 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12718   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12719   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12720   ins_cost(500);
12721   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12722             "CMOV$cmp $dst.hi,$src.hi" %}
12723   opcode(0x0F,0x40);
12724   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12725   ins_pipe( pipe_cmov_reg_long );
12726 %}
12727 
12728 // Compare 2 longs and CMOVE ints.
12729 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12730   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12731   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12732   ins_cost(200);
12733   format %{ "CMOV$cmp $dst,$src" %}
12734   opcode(0x0F,0x40);
12735   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12736   ins_pipe( pipe_cmov_reg );
12737 %}
12738 
12739 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12740   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12741   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12742   ins_cost(250);
12743   format %{ "CMOV$cmp $dst,$src" %}
12744   opcode(0x0F,0x40);
12745   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12746   ins_pipe( pipe_cmov_mem );
12747 %}
12748 
12749 // Compare 2 longs and CMOVE ints.
12750 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12751   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12752   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12753   ins_cost(200);
12754   format %{ "CMOV$cmp $dst,$src" %}
12755   opcode(0x0F,0x40);
12756   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12757   ins_pipe( pipe_cmov_reg );
12758 %}
12759 
12760 // Compare 2 longs and CMOVE doubles
12761 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12762   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12763   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12764   ins_cost(200);
12765   expand %{
12766     fcmovDPR_regS(cmp,flags,dst,src);
12767   %}
12768 %}
12769 
12770 // Compare 2 longs and CMOVE doubles
12771 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12772   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12773   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12774   ins_cost(200);
12775   expand %{
12776     fcmovD_regS(cmp,flags,dst,src);
12777   %}
12778 %}
12779 
12780 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12781   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12782   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12783   ins_cost(200);
12784   expand %{
12785     fcmovFPR_regS(cmp,flags,dst,src);
12786   %}
12787 %}
12788 
12789 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12790   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12791   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12792   ins_cost(200);
12793   expand %{
12794     fcmovF_regS(cmp,flags,dst,src);
12795   %}
12796 %}
12797 
12798 //======
12799 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12800 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12801   match( Set flags (CmpL src zero ));
12802   effect(TEMP tmp);
12803   ins_cost(200);
12804   format %{ "MOV    $tmp,$src.lo\n\t"
12805             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12806   ins_encode( long_cmp_flags0( src, tmp ) );
12807   ins_pipe( ialu_reg_reg_long );
12808 %}
12809 
12810 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12811 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12812   match( Set flags (CmpL src1 src2 ));
12813   ins_cost(200+300);
12814   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12815             "JNE,s  skip\n\t"
12816             "CMP    $src1.hi,$src2.hi\n\t"
12817      "skip:\t" %}
12818   ins_encode( long_cmp_flags1( src1, src2 ) );
12819   ins_pipe( ialu_cr_reg_reg );
12820 %}
12821 
12822 // Long compare reg == zero/reg OR reg != zero/reg
12823 // Just a wrapper for a normal branch, plus the predicate test.
12824 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12825   match(If cmp flags);
12826   effect(USE labl);
12827   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12828   expand %{
12829     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12830   %}
12831 %}
12832 
12833 //======
12834 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12835 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12836   match(Set flags (CmpUL src zero));
12837   effect(TEMP tmp);
12838   ins_cost(200);
12839   format %{ "MOV    $tmp,$src.lo\n\t"
12840             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12841   ins_encode(long_cmp_flags0(src, tmp));
12842   ins_pipe(ialu_reg_reg_long);
12843 %}
12844 
12845 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12846 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12847   match(Set flags (CmpUL src1 src2));
12848   ins_cost(200+300);
12849   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12850             "JNE,s  skip\n\t"
12851             "CMP    $src1.hi,$src2.hi\n\t"
12852      "skip:\t" %}
12853   ins_encode(long_cmp_flags1(src1, src2));
12854   ins_pipe(ialu_cr_reg_reg);
12855 %}
12856 
12857 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12858 // Just a wrapper for a normal branch, plus the predicate test.
12859 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12860   match(If cmp flags);
12861   effect(USE labl);
12862   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12863   expand %{
12864     jmpCon(cmp, flags, labl);    // JEQ or JNE...
12865   %}
12866 %}
12867 
12868 // Compare 2 longs and CMOVE longs.
12869 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12870   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12871   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12872   ins_cost(400);
12873   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12874             "CMOV$cmp $dst.hi,$src.hi" %}
12875   opcode(0x0F,0x40);
12876   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12877   ins_pipe( pipe_cmov_reg_long );
12878 %}
12879 
12880 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12881   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12882   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12883   ins_cost(500);
12884   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12885             "CMOV$cmp $dst.hi,$src.hi" %}
12886   opcode(0x0F,0x40);
12887   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12888   ins_pipe( pipe_cmov_reg_long );
12889 %}
12890 
12891 // Compare 2 longs and CMOVE ints.
12892 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12893   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12894   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12895   ins_cost(200);
12896   format %{ "CMOV$cmp $dst,$src" %}
12897   opcode(0x0F,0x40);
12898   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12899   ins_pipe( pipe_cmov_reg );
12900 %}
12901 
12902 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12903   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12904   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12905   ins_cost(250);
12906   format %{ "CMOV$cmp $dst,$src" %}
12907   opcode(0x0F,0x40);
12908   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12909   ins_pipe( pipe_cmov_mem );
12910 %}
12911 
12912 // Compare 2 longs and CMOVE ints.
12913 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12914   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12915   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12916   ins_cost(200);
12917   format %{ "CMOV$cmp $dst,$src" %}
12918   opcode(0x0F,0x40);
12919   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12920   ins_pipe( pipe_cmov_reg );
12921 %}
12922 
12923 // Compare 2 longs and CMOVE doubles
12924 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12925   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12926   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12927   ins_cost(200);
12928   expand %{
12929     fcmovDPR_regS(cmp,flags,dst,src);
12930   %}
12931 %}
12932 
12933 // Compare 2 longs and CMOVE doubles
12934 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12935   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12936   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12937   ins_cost(200);
12938   expand %{
12939     fcmovD_regS(cmp,flags,dst,src);
12940   %}
12941 %}
12942 
12943 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12944   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12945   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12946   ins_cost(200);
12947   expand %{
12948     fcmovFPR_regS(cmp,flags,dst,src);
12949   %}
12950 %}
12951 
12952 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12953   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12954   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12955   ins_cost(200);
12956   expand %{
12957     fcmovF_regS(cmp,flags,dst,src);
12958   %}
12959 %}
12960 
12961 //======
12962 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12963 // Same as cmpL_reg_flags_LEGT except must negate src
12964 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12965   match( Set flags (CmpL src zero ));
12966   effect( TEMP tmp );
12967   ins_cost(300);
12968   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12969             "CMP    $tmp,$src.lo\n\t"
12970             "SBB    $tmp,$src.hi\n\t" %}
12971   ins_encode( long_cmp_flags3(src, tmp) );
12972   ins_pipe( ialu_reg_reg_long );
12973 %}
12974 
12975 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12976 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12977 // requires a commuted test to get the same result.
12978 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12979   match( Set flags (CmpL src1 src2 ));
12980   effect( TEMP tmp );
12981   ins_cost(300);
12982   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12983             "MOV    $tmp,$src2.hi\n\t"
12984             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12985   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12986   ins_pipe( ialu_cr_reg_reg );
12987 %}
12988 
12989 // Long compares reg < zero/req OR reg >= zero/req.
12990 // Just a wrapper for a normal branch, plus the predicate test
12991 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12992   match(If cmp flags);
12993   effect(USE labl);
12994   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12995   ins_cost(300);
12996   expand %{
12997     jmpCon(cmp,flags,labl);    // JGT or JLE...
12998   %}
12999 %}
13000 
13001 //======
13002 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13003 // Same as cmpUL_reg_flags_LEGT except must negate src
13004 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13005   match(Set flags (CmpUL src zero));
13006   effect(TEMP tmp);
13007   ins_cost(300);
13008   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13009             "CMP    $tmp,$src.lo\n\t"
13010             "SBB    $tmp,$src.hi\n\t" %}
13011   ins_encode(long_cmp_flags3(src, tmp));
13012   ins_pipe(ialu_reg_reg_long);
13013 %}
13014 
13015 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13016 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13017 // requires a commuted test to get the same result.
13018 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13019   match(Set flags (CmpUL src1 src2));
13020   effect(TEMP tmp);
13021   ins_cost(300);
13022   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13023             "MOV    $tmp,$src2.hi\n\t"
13024             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13025   ins_encode(long_cmp_flags2( src2, src1, tmp));
13026   ins_pipe(ialu_cr_reg_reg);
13027 %}
13028 
13029 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13030 // Just a wrapper for a normal branch, plus the predicate test
13031 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13032   match(If cmp flags);
13033   effect(USE labl);
13034   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13035   ins_cost(300);
13036   expand %{
13037     jmpCon(cmp, flags, labl);    // JGT or JLE...
13038   %}
13039 %}
13040 
13041 // Compare 2 longs and CMOVE longs.
13042 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13043   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13044   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13045   ins_cost(400);
13046   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13047             "CMOV$cmp $dst.hi,$src.hi" %}
13048   opcode(0x0F,0x40);
13049   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13050   ins_pipe( pipe_cmov_reg_long );
13051 %}
13052 
13053 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13054   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13055   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13056   ins_cost(500);
13057   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13058             "CMOV$cmp $dst.hi,$src.hi+4" %}
13059   opcode(0x0F,0x40);
13060   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13061   ins_pipe( pipe_cmov_reg_long );
13062 %}
13063 
13064 // Compare 2 longs and CMOVE ints.
13065 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13066   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13067   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13068   ins_cost(200);
13069   format %{ "CMOV$cmp $dst,$src" %}
13070   opcode(0x0F,0x40);
13071   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13072   ins_pipe( pipe_cmov_reg );
13073 %}
13074 
13075 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13076   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13077   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13078   ins_cost(250);
13079   format %{ "CMOV$cmp $dst,$src" %}
13080   opcode(0x0F,0x40);
13081   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13082   ins_pipe( pipe_cmov_mem );
13083 %}
13084 
13085 // Compare 2 longs and CMOVE ptrs.
13086 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13087   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13088   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13089   ins_cost(200);
13090   format %{ "CMOV$cmp $dst,$src" %}
13091   opcode(0x0F,0x40);
13092   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13093   ins_pipe( pipe_cmov_reg );
13094 %}
13095 
13096 // Compare 2 longs and CMOVE doubles
13097 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13098   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13099   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13100   ins_cost(200);
13101   expand %{
13102     fcmovDPR_regS(cmp,flags,dst,src);
13103   %}
13104 %}
13105 
13106 // Compare 2 longs and CMOVE doubles
13107 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13108   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13109   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13110   ins_cost(200);
13111   expand %{
13112     fcmovD_regS(cmp,flags,dst,src);
13113   %}
13114 %}
13115 
13116 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13117   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13118   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13119   ins_cost(200);
13120   expand %{
13121     fcmovFPR_regS(cmp,flags,dst,src);
13122   %}
13123 %}
13124 
13125 
13126 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13127   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13128   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13129   ins_cost(200);
13130   expand %{
13131     fcmovF_regS(cmp,flags,dst,src);
13132   %}
13133 %}
13134 
13135 
13136 // ============================================================================
13137 // Procedure Call/Return Instructions
13138 // Call Java Static Instruction
13139 // Note: If this code changes, the corresponding ret_addr_offset() and
13140 //       compute_padding() functions will have to be adjusted.
13141 instruct CallStaticJavaDirect(method meth) %{
13142   match(CallStaticJava);
13143   effect(USE meth);
13144 
13145   ins_cost(300);
13146   format %{ "CALL,static " %}
13147   opcode(0xE8); /* E8 cd */
13148   ins_encode( pre_call_resets,
13149               Java_Static_Call( meth ),
13150               call_epilog,
13151               post_call_FPU );
13152   ins_pipe( pipe_slow );
13153   ins_alignment(4);
13154 %}
13155 
13156 // Call Java Dynamic Instruction
13157 // Note: If this code changes, the corresponding ret_addr_offset() and
13158 //       compute_padding() functions will have to be adjusted.
13159 instruct CallDynamicJavaDirect(method meth) %{
13160   match(CallDynamicJava);
13161   effect(USE meth);
13162 
13163   ins_cost(300);
13164   format %{ "MOV    EAX,(oop)-1\n\t"
13165             "CALL,dynamic" %}
13166   opcode(0xE8); /* E8 cd */
13167   ins_encode( pre_call_resets,
13168               Java_Dynamic_Call( meth ),
13169               call_epilog,
13170               post_call_FPU );
13171   ins_pipe( pipe_slow );
13172   ins_alignment(4);
13173 %}
13174 
13175 // Call Runtime Instruction
13176 instruct CallRuntimeDirect(method meth) %{
13177   match(CallRuntime );
13178   effect(USE meth);
13179 
13180   ins_cost(300);
13181   format %{ "CALL,runtime " %}
13182   opcode(0xE8); /* E8 cd */
13183   // Use FFREEs to clear entries in float stack
13184   ins_encode( pre_call_resets,
13185               FFree_Float_Stack_All,
13186               Java_To_Runtime( meth ),
13187               post_call_FPU );
13188   ins_pipe( pipe_slow );
13189 %}
13190 
13191 // Call runtime without safepoint
13192 instruct CallLeafDirect(method meth) %{
13193   match(CallLeaf);
13194   effect(USE meth);
13195 
13196   ins_cost(300);
13197   format %{ "CALL_LEAF,runtime " %}
13198   opcode(0xE8); /* E8 cd */
13199   ins_encode( pre_call_resets,
13200               FFree_Float_Stack_All,
13201               Java_To_Runtime( meth ),
13202               Verify_FPU_For_Leaf, post_call_FPU );
13203   ins_pipe( pipe_slow );
13204 %}
13205 
13206 instruct CallLeafNoFPDirect(method meth) %{
13207   match(CallLeafNoFP);
13208   effect(USE meth);
13209 
13210   ins_cost(300);
13211   format %{ "CALL_LEAF_NOFP,runtime " %}
13212   opcode(0xE8); /* E8 cd */
13213   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13214   ins_pipe( pipe_slow );
13215 %}
13216 
13217 
13218 // Return Instruction
13219 // Remove the return address & jump to it.
13220 instruct Ret() %{
13221   match(Return);
13222   format %{ "RET" %}
13223   opcode(0xC3);
13224   ins_encode(OpcP);
13225   ins_pipe( pipe_jmp );
13226 %}
13227 
13228 // Tail Call; Jump from runtime stub to Java code.
13229 // Also known as an 'interprocedural jump'.
13230 // Target of jump will eventually return to caller.
13231 // TailJump below removes the return address.
13232 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13233   match(TailCall jump_target method_oop );
13234   ins_cost(300);
13235   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13236   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13237   ins_encode( OpcP, RegOpc(jump_target) );
13238   ins_pipe( pipe_jmp );
13239 %}
13240 
13241 
13242 // Tail Jump; remove the return address; jump to target.
13243 // TailCall above leaves the return address around.
13244 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13245   match( TailJump jump_target ex_oop );
13246   ins_cost(300);
13247   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13248             "JMP    $jump_target " %}
13249   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13250   ins_encode( enc_pop_rdx,
13251               OpcP, RegOpc(jump_target) );
13252   ins_pipe( pipe_jmp );
13253 %}
13254 
13255 // Create exception oop: created by stack-crawling runtime code.
13256 // Created exception is now available to this handler, and is setup
13257 // just prior to jumping to this handler.  No code emitted.
13258 instruct CreateException( eAXRegP ex_oop )
13259 %{
13260   match(Set ex_oop (CreateEx));
13261 
13262   size(0);
13263   // use the following format syntax
13264   format %{ "# exception oop is in EAX; no code emitted" %}
13265   ins_encode();
13266   ins_pipe( empty );
13267 %}
13268 
13269 
13270 // Rethrow exception:
13271 // The exception oop will come in the first argument position.
13272 // Then JUMP (not call) to the rethrow stub code.
13273 instruct RethrowException()
13274 %{
13275   match(Rethrow);
13276 
13277   // use the following format syntax
13278   format %{ "JMP    rethrow_stub" %}
13279   ins_encode(enc_rethrow);
13280   ins_pipe( pipe_jmp );
13281 %}
13282 
13283 // inlined locking and unlocking
13284 
13285 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13286   predicate(Compile::current()->use_rtm());
13287   match(Set cr (FastLock object box));
13288   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13289   ins_cost(300);
13290   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13291   ins_encode %{
13292     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13293                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13294                  _counters, _rtm_counters, _stack_rtm_counters,
13295                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13296                  true, ra_->C->profile_rtm());
13297   %}
13298   ins_pipe(pipe_slow);
13299 %}
13300 
13301 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13302   predicate(!Compile::current()->use_rtm());
13303   match(Set cr (FastLock object box));
13304   effect(TEMP tmp, TEMP scr, USE_KILL box);
13305   ins_cost(300);
13306   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13307   ins_encode %{
13308     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13309                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13310   %}
13311   ins_pipe(pipe_slow);
13312 %}
13313 
13314 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13315   match(Set cr (FastUnlock object box));
13316   effect(TEMP tmp, USE_KILL box);
13317   ins_cost(300);
13318   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13319   ins_encode %{
13320     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13321   %}
13322   ins_pipe(pipe_slow);
13323 %}
13324 
13325 
13326 
13327 // ============================================================================
13328 // Safepoint Instruction
13329 instruct safePoint_poll(eFlagsReg cr) %{
13330   predicate(SafepointMechanism::uses_global_page_poll());
13331   match(SafePoint);
13332   effect(KILL cr);
13333 
13334   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13335   // On SPARC that might be acceptable as we can generate the address with
13336   // just a sethi, saving an or.  By polling at offset 0 we can end up
13337   // putting additional pressure on the index-0 in the D$.  Because of
13338   // alignment (just like the situation at hand) the lower indices tend
13339   // to see more traffic.  It'd be better to change the polling address
13340   // to offset 0 of the last $line in the polling page.
13341 
13342   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13343   ins_cost(125);
13344   size(6) ;
13345   ins_encode( Safepoint_Poll() );
13346   ins_pipe( ialu_reg_mem );
13347 %}
13348 
13349 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13350   predicate(SafepointMechanism::uses_thread_local_poll());
13351   match(SafePoint poll);
13352   effect(KILL cr, USE poll);
13353 
13354   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13355   ins_cost(125);
13356   // EBP would need size(3)
13357   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13358   ins_encode %{
13359     __ relocate(relocInfo::poll_type);
13360     address pre_pc = __ pc();
13361     __ testl(rax, Address($poll$$Register, 0));
13362     address post_pc = __ pc();
13363     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13364   %}
13365   ins_pipe(ialu_reg_mem);
13366 %}
13367 
13368 
13369 // ============================================================================
13370 // This name is KNOWN by the ADLC and cannot be changed.
13371 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13372 // for this guy.
13373 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13374   match(Set dst (ThreadLocal));
13375   effect(DEF dst, KILL cr);
13376 
13377   format %{ "MOV    $dst, Thread::current()" %}
13378   ins_encode %{
13379     Register dstReg = as_Register($dst$$reg);
13380     __ get_thread(dstReg);
13381   %}
13382   ins_pipe( ialu_reg_fat );
13383 %}
13384 
13385 
13386 
13387 //----------PEEPHOLE RULES-----------------------------------------------------
13388 // These must follow all instruction definitions as they use the names
13389 // defined in the instructions definitions.
13390 //
13391 // peepmatch ( root_instr_name [preceding_instruction]* );
13392 //
13393 // peepconstraint %{
13394 // (instruction_number.operand_name relational_op instruction_number.operand_name
13395 //  [, ...] );
13396 // // instruction numbers are zero-based using left to right order in peepmatch
13397 //
13398 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13399 // // provide an instruction_number.operand_name for each operand that appears
13400 // // in the replacement instruction's match rule
13401 //
13402 // ---------VM FLAGS---------------------------------------------------------
13403 //
13404 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13405 //
13406 // Each peephole rule is given an identifying number starting with zero and
13407 // increasing by one in the order seen by the parser.  An individual peephole
13408 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13409 // on the command-line.
13410 //
13411 // ---------CURRENT LIMITATIONS----------------------------------------------
13412 //
13413 // Only match adjacent instructions in same basic block
13414 // Only equality constraints
13415 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13416 // Only one replacement instruction
13417 //
13418 // ---------EXAMPLE----------------------------------------------------------
13419 //
13420 // // pertinent parts of existing instructions in architecture description
13421 // instruct movI(rRegI dst, rRegI src) %{
13422 //   match(Set dst (CopyI src));
13423 // %}
13424 //
13425 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13426 //   match(Set dst (AddI dst src));
13427 //   effect(KILL cr);
13428 // %}
13429 //
13430 // // Change (inc mov) to lea
13431 // peephole %{
13432 //   // increment preceeded by register-register move
13433 //   peepmatch ( incI_eReg movI );
13434 //   // require that the destination register of the increment
13435 //   // match the destination register of the move
13436 //   peepconstraint ( 0.dst == 1.dst );
13437 //   // construct a replacement instruction that sets
13438 //   // the destination to ( move's source register + one )
13439 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13440 // %}
13441 //
13442 // Implementation no longer uses movX instructions since
13443 // machine-independent system no longer uses CopyX nodes.
13444 //
13445 // peephole %{
13446 //   peepmatch ( incI_eReg movI );
13447 //   peepconstraint ( 0.dst == 1.dst );
13448 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13449 // %}
13450 //
13451 // peephole %{
13452 //   peepmatch ( decI_eReg movI );
13453 //   peepconstraint ( 0.dst == 1.dst );
13454 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13455 // %}
13456 //
13457 // peephole %{
13458 //   peepmatch ( addI_eReg_imm movI );
13459 //   peepconstraint ( 0.dst == 1.dst );
13460 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13461 // %}
13462 //
13463 // peephole %{
13464 //   peepmatch ( addP_eReg_imm movP );
13465 //   peepconstraint ( 0.dst == 1.dst );
13466 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13467 // %}
13468 
13469 // // Change load of spilled value to only a spill
13470 // instruct storeI(memory mem, rRegI src) %{
13471 //   match(Set mem (StoreI mem src));
13472 // %}
13473 //
13474 // instruct loadI(rRegI dst, memory mem) %{
13475 //   match(Set dst (LoadI mem));
13476 // %}
13477 //
13478 peephole %{
13479   peepmatch ( loadI storeI );
13480   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13481   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13482 %}
13483 
13484 //----------SMARTSPILL RULES---------------------------------------------------
13485 // These must follow all instruction definitions as they use the names
13486 // defined in the instructions definitions.