1 //
   2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     if (SafepointMechanism::uses_thread_local_poll()) {
 710       Register pollReg = as_Register(EBX_enc);
 711       MacroAssembler masm(&cbuf);
 712       masm.get_thread(pollReg);
 713       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 714       masm.relocate(relocInfo::poll_return_type);
 715       masm.testl(rax, Address(pollReg, 0));
 716     } else {
 717       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 718       emit_opcode(cbuf,0x85);
 719       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 720       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 721     }
 722   }
 723 }
 724 
 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 726   return MachNode::size(ra_); // too many variables; just compute it
 727                               // the hard way
 728 }
 729 
 730 int MachEpilogNode::reloc() const {
 731   return 0; // a large enough number
 732 }
 733 
 734 const Pipeline * MachEpilogNode::pipeline() const {
 735   return MachNode::pipeline_class();
 736 }
 737 
 738 int MachEpilogNode::safepoint_offset() const { return 0; }
 739 
 740 //=============================================================================
 741 
 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 743 static enum RC rc_class( OptoReg::Name reg ) {
 744 
 745   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 746   if (OptoReg::is_stack(reg)) return rc_stack;
 747 
 748   VMReg r = OptoReg::as_VMReg(reg);
 749   if (r->is_Register()) return rc_int;
 750   if (r->is_FloatRegister()) {
 751     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 752     return rc_float;
 753   }
 754   assert(r->is_XMMRegister(), "must be");
 755   return rc_xmm;
 756 }
 757 
 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 759                         int opcode, const char *op_str, int size, outputStream* st ) {
 760   if( cbuf ) {
 761     emit_opcode  (*cbuf, opcode );
 762     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 763 #ifndef PRODUCT
 764   } else if( !do_size ) {
 765     if( size != 0 ) st->print("\n\t");
 766     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 767       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 768       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 769     } else { // FLD, FST, PUSH, POP
 770       st->print("%s [ESP + #%d]",op_str,offset);
 771     }
 772 #endif
 773   }
 774   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 775   return size+3+offset_size;
 776 }
 777 
 778 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 780                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 781   int in_size_in_bits = Assembler::EVEX_32bit;
 782   int evex_encoding = 0;
 783   if (reg_lo+1 == reg_hi) {
 784     in_size_in_bits = Assembler::EVEX_64bit;
 785     evex_encoding = Assembler::VEX_W;
 786   }
 787   if (cbuf) {
 788     MacroAssembler _masm(cbuf);
 789     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 790     //                          it maps more cases to single byte displacement
 791     _masm.set_managed();
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 843     _masm.set_managed();
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 883     _masm.set_managed();
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 901     _masm.set_managed();
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Do we need to mask the count passed to shift instructions or does
1433 // the cpu only look at the lower 5/6 bits anyway?
1434 const bool Matcher::need_masked_shift_count = false;
1435 
1436 bool Matcher::narrow_oop_use_complex_address() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 bool Matcher::narrow_klass_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::const_oop_prefer_decode() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 bool Matcher::const_klass_prefer_decode() {
1452   ShouldNotCallThis();
1453   return true;
1454 }
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     // Clear upper bits of YMM registers when current compiled code uses
1886     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887     MacroAssembler _masm(&cbuf);
1888     __ vzeroupper();
1889     debug_only(int off1 = cbuf.insts_size());
1890     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891   %}
1892 
1893   enc_class post_call_FPU %{
1894     // If method sets FPU control word do it here also
1895     if (Compile::current()->in_24_bit_fp_mode()) {
1896       MacroAssembler masm(&cbuf);
1897       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898     }
1899   %}
1900 
1901   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903     // who we intended to call.
1904     cbuf.set_insts_mark();
1905     $$$emit8$primary;
1906 
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(),
1910                      RELOC_IMM32);
1911     } else {
1912       int method_index = resolved_method_index(cbuf);
1913       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                   : static_call_Relocation::spec(method_index);
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      rspec, RELOC_DISP32);
1917       // Emit stubs for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       }
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     emit_opcode(cbuf,0xF0);         // [Lock]
2091   %}
2092 
2093   // Cmp-xchg long value.
2094   // Note: we need to swap rbx, and rcx before and after the
2095   //       cmpxchg8 instruction because the instruction uses
2096   //       rcx as the high order word of the new value to store but
2097   //       our register encoding uses rbx,.
2098   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099 
2100     // XCHG  rbx,ecx
2101     emit_opcode(cbuf,0x87);
2102     emit_opcode(cbuf,0xD9);
2103     // [Lock]
2104     emit_opcode(cbuf,0xF0);
2105     // CMPXCHG8 [Eptr]
2106     emit_opcode(cbuf,0x0F);
2107     emit_opcode(cbuf,0xC7);
2108     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109     // XCHG  rbx,ecx
2110     emit_opcode(cbuf,0x87);
2111     emit_opcode(cbuf,0xD9);
2112   %}
2113 
2114   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115     // [Lock]
2116     emit_opcode(cbuf,0xF0);
2117 
2118     // CMPXCHG [Eptr]
2119     emit_opcode(cbuf,0x0F);
2120     emit_opcode(cbuf,0xB1);
2121     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2122   %}
2123 
2124   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2125     // [Lock]
2126     emit_opcode(cbuf,0xF0);
2127 
2128     // CMPXCHGB [Eptr]
2129     emit_opcode(cbuf,0x0F);
2130     emit_opcode(cbuf,0xB0);
2131     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2132   %}
2133 
2134   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2135     // [Lock]
2136     emit_opcode(cbuf,0xF0);
2137 
2138     // 16-bit mode
2139     emit_opcode(cbuf, 0x66);
2140 
2141     // CMPXCHGW [Eptr]
2142     emit_opcode(cbuf,0x0F);
2143     emit_opcode(cbuf,0xB1);
2144     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145   %}
2146 
2147   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2148     int res_encoding = $res$$reg;
2149 
2150     // MOV  res,0
2151     emit_opcode( cbuf, 0xB8 + res_encoding);
2152     emit_d32( cbuf, 0 );
2153     // JNE,s  fail
2154     emit_opcode(cbuf,0x75);
2155     emit_d8(cbuf, 5 );
2156     // MOV  res,1
2157     emit_opcode( cbuf, 0xB8 + res_encoding);
2158     emit_d32( cbuf, 1 );
2159     // fail:
2160   %}
2161 
2162   enc_class set_instruction_start( ) %{
2163     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2164   %}
2165 
2166   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2167     int reg_encoding = $ereg$$reg;
2168     int base  = $mem$$base;
2169     int index = $mem$$index;
2170     int scale = $mem$$scale;
2171     int displace = $mem$$disp;
2172     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2173     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2174   %}
2175 
2176   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2177     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2178     int base  = $mem$$base;
2179     int index = $mem$$index;
2180     int scale = $mem$$scale;
2181     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2182     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2183     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2184   %}
2185 
2186   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2187     int r1, r2;
2188     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190     emit_opcode(cbuf,0x0F);
2191     emit_opcode(cbuf,$tertiary);
2192     emit_rm(cbuf, 0x3, r1, r2);
2193     emit_d8(cbuf,$cnt$$constant);
2194     emit_d8(cbuf,$primary);
2195     emit_rm(cbuf, 0x3, $secondary, r1);
2196     emit_d8(cbuf,$cnt$$constant);
2197   %}
2198 
2199   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2200     emit_opcode( cbuf, 0x8B ); // Move
2201     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_d8(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_d8(cbuf,$primary);
2208     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2209     emit_d8(cbuf,31);
2210   %}
2211 
2212   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2213     int r1, r2;
2214     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2215     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2216 
2217     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2218     emit_rm(cbuf, 0x3, r1, r2);
2219     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220       emit_opcode(cbuf,$primary);
2221       emit_rm(cbuf, 0x3, $secondary, r1);
2222       emit_d8(cbuf,$cnt$$constant-32);
2223     }
2224     emit_opcode(cbuf,0x33);  // XOR r2,r2
2225     emit_rm(cbuf, 0x3, r2, r2);
2226   %}
2227 
2228   // Clone of RegMem but accepts an extra parameter to access each
2229   // half of a double in memory; it never needs relocation info.
2230   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2231     emit_opcode(cbuf,$opcode$$constant);
2232     int reg_encoding = $rm_reg$$reg;
2233     int base     = $mem$$base;
2234     int index    = $mem$$index;
2235     int scale    = $mem$$scale;
2236     int displace = $mem$$disp + $disp_for_half$$constant;
2237     relocInfo::relocType disp_reloc = relocInfo::none;
2238     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2239   %}
2240 
2241   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2242   //
2243   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2244   // and it never needs relocation information.
2245   // Frequently used to move data between FPU's Stack Top and memory.
2246   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2247     int rm_byte_opcode = $rm_opcode$$constant;
2248     int base     = $mem$$base;
2249     int index    = $mem$$index;
2250     int scale    = $mem$$scale;
2251     int displace = $mem$$disp;
2252     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2253     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2254   %}
2255 
2256   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2257     int rm_byte_opcode = $rm_opcode$$constant;
2258     int base     = $mem$$base;
2259     int index    = $mem$$index;
2260     int scale    = $mem$$scale;
2261     int displace = $mem$$disp;
2262     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2263     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2264   %}
2265 
2266   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2267     int reg_encoding = $dst$$reg;
2268     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2269     int index        = 0x04;            // 0x04 indicates no index
2270     int scale        = 0x00;            // 0x00 indicates no scale
2271     int displace     = $src1$$constant; // 0x00 indicates no displacement
2272     relocInfo::relocType disp_reloc = relocInfo::none;
2273     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2274   %}
2275 
2276   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2277     // Compare dst,src
2278     emit_opcode(cbuf,0x3B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280     // jmp dst < src around move
2281     emit_opcode(cbuf,0x7C);
2282     emit_d8(cbuf,2);
2283     // move dst,src
2284     emit_opcode(cbuf,0x8B);
2285     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286   %}
2287 
2288   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2289     // Compare dst,src
2290     emit_opcode(cbuf,0x3B);
2291     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2292     // jmp dst > src around move
2293     emit_opcode(cbuf,0x7F);
2294     emit_d8(cbuf,2);
2295     // move dst,src
2296     emit_opcode(cbuf,0x8B);
2297     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2298   %}
2299 
2300   enc_class enc_FPR_store(memory mem, regDPR src) %{
2301     // If src is FPR1, we can just FST to store it.
2302     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2303     int reg_encoding = 0x2; // Just store
2304     int base  = $mem$$base;
2305     int index = $mem$$index;
2306     int scale = $mem$$scale;
2307     int displace = $mem$$disp;
2308     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2309     if( $src$$reg != FPR1L_enc ) {
2310       reg_encoding = 0x3;  // Store & pop
2311       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2312       emit_d8( cbuf, 0xC0-1+$src$$reg );
2313     }
2314     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2315     emit_opcode(cbuf,$primary);
2316     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2317   %}
2318 
2319   enc_class neg_reg(rRegI dst) %{
2320     // NEG $dst
2321     emit_opcode(cbuf,0xF7);
2322     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2323   %}
2324 
2325   enc_class setLT_reg(eCXRegI dst) %{
2326     // SETLT $dst
2327     emit_opcode(cbuf,0x0F);
2328     emit_opcode(cbuf,0x9C);
2329     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2330   %}
2331 
2332   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2333     int tmpReg = $tmp$$reg;
2334 
2335     // SUB $p,$q
2336     emit_opcode(cbuf,0x2B);
2337     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2338     // SBB $tmp,$tmp
2339     emit_opcode(cbuf,0x1B);
2340     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2341     // AND $tmp,$y
2342     emit_opcode(cbuf,0x23);
2343     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2344     // ADD $p,$tmp
2345     emit_opcode(cbuf,0x03);
2346     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2347   %}
2348 
2349   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.hi,$dst.lo
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2360     // CLR    $dst.lo
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2363 // small:
2364     // SHLD   $dst.hi,$dst.lo,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xA5);
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2368     // SHL    $dst.lo,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2371   %}
2372 
2373   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x04);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // CLR    $dst.hi
2385     emit_opcode(cbuf, 0x33);
2386     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2387 // small:
2388     // SHRD   $dst.lo,$dst.hi,$shift
2389     emit_opcode(cbuf,0x0F);
2390     emit_opcode(cbuf,0xAD);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392     // SHR    $dst.hi,$shift"
2393     emit_opcode(cbuf,0xD3);
2394     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2395   %}
2396 
2397   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2398     // TEST shift,32
2399     emit_opcode(cbuf,0xF7);
2400     emit_rm(cbuf, 0x3, 0, ECX_enc);
2401     emit_d32(cbuf,0x20);
2402     // JEQ,s small
2403     emit_opcode(cbuf, 0x74);
2404     emit_d8(cbuf, 0x05);
2405     // MOV    $dst.lo,$dst.hi
2406     emit_opcode( cbuf, 0x8B );
2407     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2408     // SAR    $dst.hi,31
2409     emit_opcode(cbuf, 0xC1);
2410     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2411     emit_d8(cbuf, 0x1F );
2412 // small:
2413     // SHRD   $dst.lo,$dst.hi,$shift
2414     emit_opcode(cbuf,0x0F);
2415     emit_opcode(cbuf,0xAD);
2416     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417     // SAR    $dst.hi,$shift"
2418     emit_opcode(cbuf,0xD3);
2419     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2420   %}
2421 
2422 
2423   // ----------------- Encodings for floating point unit -----------------
2424   // May leave result in FPU-TOS or FPU reg depending on opcodes
2425   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2426     $$$emit8$primary;
2427     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2428   %}
2429 
2430   // Pop argument in FPR0 with FSTP ST(0)
2431   enc_class PopFPU() %{
2432     emit_opcode( cbuf, 0xDD );
2433     emit_d8( cbuf, 0xD8 );
2434   %}
2435 
2436   // !!!!! equivalent to Pop_Reg_F
2437   enc_class Pop_Reg_DPR( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2439     emit_d8( cbuf, 0xD8+$dst$$reg );
2440   %}
2441 
2442   enc_class Push_Reg_DPR( regDPR dst ) %{
2443     emit_opcode( cbuf, 0xD9 );
2444     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2445   %}
2446 
2447   enc_class strictfp_bias1( regDPR dst ) %{
2448     emit_opcode( cbuf, 0xDB );           // FLD m80real
2449     emit_opcode( cbuf, 0x2D );
2450     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2451     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2452     emit_opcode( cbuf, 0xC8+$dst$$reg );
2453   %}
2454 
2455   enc_class strictfp_bias2( regDPR dst ) %{
2456     emit_opcode( cbuf, 0xDB );           // FLD m80real
2457     emit_opcode( cbuf, 0x2D );
2458     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2459     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460     emit_opcode( cbuf, 0xC8+$dst$$reg );
2461   %}
2462 
2463   // Special case for moving an integer register to a stack slot.
2464   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2465     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2466   %}
2467 
2468   // Special case for moving a register to a stack slot.
2469   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470     // Opcode already emitted
2471     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2472     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2473     emit_d32(cbuf, $dst$$disp);   // Displacement
2474   %}
2475 
2476   // Push the integer in stackSlot 'src' onto FP-stack
2477   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2478     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2479   %}
2480 
2481   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2483     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2484   %}
2485 
2486   // Same as Pop_Mem_F except for opcode
2487   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2488   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2489     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2490   %}
2491 
2492   enc_class Pop_Reg_FPR( regFPR dst ) %{
2493     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2494     emit_d8( cbuf, 0xD8+$dst$$reg );
2495   %}
2496 
2497   enc_class Push_Reg_FPR( regFPR dst ) %{
2498     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2499     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2500   %}
2501 
2502   // Push FPU's float to a stack-slot, and pop FPU-stack
2503   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2504     int pop = 0x02;
2505     if ($src$$reg != FPR1L_enc) {
2506       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2507       emit_d8( cbuf, 0xC0-1+$src$$reg );
2508       pop = 0x03;
2509     }
2510     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2511   %}
2512 
2513   // Push FPU's double to a stack-slot, and pop FPU-stack
2514   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2515     int pop = 0x02;
2516     if ($src$$reg != FPR1L_enc) {
2517       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2518       emit_d8( cbuf, 0xC0-1+$src$$reg );
2519       pop = 0x03;
2520     }
2521     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2522   %}
2523 
2524   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2525   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2526     int pop = 0xD0 - 1; // -1 since we skip FLD
2527     if ($src$$reg != FPR1L_enc) {
2528       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2529       emit_d8( cbuf, 0xC0-1+$src$$reg );
2530       pop = 0xD8;
2531     }
2532     emit_opcode( cbuf, 0xDD );
2533     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2534   %}
2535 
2536 
2537   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2538     // load dst in FPR0
2539     emit_opcode( cbuf, 0xD9 );
2540     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2541     if ($src$$reg != FPR1L_enc) {
2542       // fincstp
2543       emit_opcode (cbuf, 0xD9);
2544       emit_opcode (cbuf, 0xF7);
2545       // swap src with FPR1:
2546       // FXCH FPR1 with src
2547       emit_opcode(cbuf, 0xD9);
2548       emit_d8(cbuf, 0xC8-1+$src$$reg );
2549       // fdecstp
2550       emit_opcode (cbuf, 0xD9);
2551       emit_opcode (cbuf, 0xF6);
2552     }
2553   %}
2554 
2555   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2561     __ fld_d(Address(rsp, 0));
2562   %}
2563 
2564   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ subptr(rsp, 4);
2567     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2568     __ fld_s(Address(rsp, 0));
2569     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2570     __ fld_s(Address(rsp, 0));
2571   %}
2572 
2573   enc_class Push_ResultD(regD dst) %{
2574     MacroAssembler _masm(&cbuf);
2575     __ fstp_d(Address(rsp, 0));
2576     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class Push_ResultF(regF dst, immI d8) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ fstp_s(Address(rsp, 0));
2583     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2584     __ addptr(rsp, $d8$$constant);
2585   %}
2586 
2587   enc_class Push_SrcD(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ subptr(rsp, 8);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class push_stack_temp_qword() %{
2595     MacroAssembler _masm(&cbuf);
2596     __ subptr(rsp, 8);
2597   %}
2598 
2599   enc_class pop_stack_temp_qword() %{
2600     MacroAssembler _masm(&cbuf);
2601     __ addptr(rsp, 8);
2602   %}
2603 
2604   enc_class push_xmm_to_fpr1(regD src) %{
2605     MacroAssembler _masm(&cbuf);
2606     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2607     __ fld_d(Address(rsp, 0));
2608   %}
2609 
2610   enc_class Push_Result_Mod_DPR( regDPR src) %{
2611     if ($src$$reg != FPR1L_enc) {
2612       // fincstp
2613       emit_opcode (cbuf, 0xD9);
2614       emit_opcode (cbuf, 0xF7);
2615       // FXCH FPR1 with src
2616       emit_opcode(cbuf, 0xD9);
2617       emit_d8(cbuf, 0xC8-1+$src$$reg );
2618       // fdecstp
2619       emit_opcode (cbuf, 0xD9);
2620       emit_opcode (cbuf, 0xF6);
2621     }
2622     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2623     // // FSTP   FPR$dst$$reg
2624     // emit_opcode( cbuf, 0xDD );
2625     // emit_d8( cbuf, 0xD8+$dst$$reg );
2626   %}
2627 
2628   enc_class fnstsw_sahf_skip_parity() %{
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jnp  ::skip
2635     emit_opcode( cbuf, 0x7B );
2636     emit_opcode( cbuf, 0x05 );
2637   %}
2638 
2639   enc_class emitModDPR() %{
2640     // fprem must be iterative
2641     // :: loop
2642     // fprem
2643     emit_opcode( cbuf, 0xD9 );
2644     emit_opcode( cbuf, 0xF8 );
2645     // wait
2646     emit_opcode( cbuf, 0x9b );
2647     // fnstsw ax
2648     emit_opcode( cbuf, 0xDF );
2649     emit_opcode( cbuf, 0xE0 );
2650     // sahf
2651     emit_opcode( cbuf, 0x9E );
2652     // jp  ::loop
2653     emit_opcode( cbuf, 0x0F );
2654     emit_opcode( cbuf, 0x8A );
2655     emit_opcode( cbuf, 0xF4 );
2656     emit_opcode( cbuf, 0xFF );
2657     emit_opcode( cbuf, 0xFF );
2658     emit_opcode( cbuf, 0xFF );
2659   %}
2660 
2661   enc_class fpu_flags() %{
2662     // fnstsw_ax
2663     emit_opcode( cbuf, 0xDF);
2664     emit_opcode( cbuf, 0xE0);
2665     // test ax,0x0400
2666     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2667     emit_opcode( cbuf, 0xA9 );
2668     emit_d16   ( cbuf, 0x0400 );
2669     // // // This sequence works, but stalls for 12-16 cycles on PPro
2670     // // test rax,0x0400
2671     // emit_opcode( cbuf, 0xA9 );
2672     // emit_d32   ( cbuf, 0x00000400 );
2673     //
2674     // jz exit (no unordered comparison)
2675     emit_opcode( cbuf, 0x74 );
2676     emit_d8    ( cbuf, 0x02 );
2677     // mov ah,1 - treat as LT case (set carry flag)
2678     emit_opcode( cbuf, 0xB4 );
2679     emit_d8    ( cbuf, 0x01 );
2680     // sahf
2681     emit_opcode( cbuf, 0x9E);
2682   %}
2683 
2684   enc_class cmpF_P6_fixup() %{
2685     // Fixup the integer flags in case comparison involved a NaN
2686     //
2687     // JNP exit (no unordered comparison, P-flag is set by NaN)
2688     emit_opcode( cbuf, 0x7B );
2689     emit_d8    ( cbuf, 0x03 );
2690     // MOV AH,1 - treat as LT case (set carry flag)
2691     emit_opcode( cbuf, 0xB4 );
2692     emit_d8    ( cbuf, 0x01 );
2693     // SAHF
2694     emit_opcode( cbuf, 0x9E);
2695     // NOP     // target for branch to avoid branch to branch
2696     emit_opcode( cbuf, 0x90);
2697   %}
2698 
2699 //     fnstsw_ax();
2700 //     sahf();
2701 //     movl(dst, nan_result);
2702 //     jcc(Assembler::parity, exit);
2703 //     movl(dst, less_result);
2704 //     jcc(Assembler::below, exit);
2705 //     movl(dst, equal_result);
2706 //     jcc(Assembler::equal, exit);
2707 //     movl(dst, greater_result);
2708 
2709 // less_result     =  1;
2710 // greater_result  = -1;
2711 // equal_result    = 0;
2712 // nan_result      = -1;
2713 
2714   enc_class CmpF_Result(rRegI dst) %{
2715     // fnstsw_ax();
2716     emit_opcode( cbuf, 0xDF);
2717     emit_opcode( cbuf, 0xE0);
2718     // sahf
2719     emit_opcode( cbuf, 0x9E);
2720     // movl(dst, nan_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, -1 );
2723     // jcc(Assembler::parity, exit);
2724     emit_opcode( cbuf, 0x7A );
2725     emit_d8    ( cbuf, 0x13 );
2726     // movl(dst, less_result);
2727     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728     emit_d32( cbuf, -1 );
2729     // jcc(Assembler::below, exit);
2730     emit_opcode( cbuf, 0x72 );
2731     emit_d8    ( cbuf, 0x0C );
2732     // movl(dst, equal_result);
2733     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2734     emit_d32( cbuf, 0 );
2735     // jcc(Assembler::equal, exit);
2736     emit_opcode( cbuf, 0x74 );
2737     emit_d8    ( cbuf, 0x05 );
2738     // movl(dst, greater_result);
2739     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2740     emit_d32( cbuf, 1 );
2741   %}
2742 
2743 
2744   // Compare the longs and set flags
2745   // BROKEN!  Do Not use as-is
2746   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2747     // CMP    $src1.hi,$src2.hi
2748     emit_opcode( cbuf, 0x3B );
2749     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2750     // JNE,s  done
2751     emit_opcode(cbuf,0x75);
2752     emit_d8(cbuf, 2 );
2753     // CMP    $src1.lo,$src2.lo
2754     emit_opcode( cbuf, 0x3B );
2755     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2756 // done:
2757   %}
2758 
2759   enc_class convert_int_long( regL dst, rRegI src ) %{
2760     // mov $dst.lo,$src
2761     int dst_encoding = $dst$$reg;
2762     int src_encoding = $src$$reg;
2763     encode_Copy( cbuf, dst_encoding  , src_encoding );
2764     // mov $dst.hi,$src
2765     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2766     // sar $dst.hi,31
2767     emit_opcode( cbuf, 0xC1 );
2768     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2769     emit_d8(cbuf, 0x1F );
2770   %}
2771 
2772   enc_class convert_long_double( eRegL src ) %{
2773     // push $src.hi
2774     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2775     // push $src.lo
2776     emit_opcode(cbuf, 0x50+$src$$reg  );
2777     // fild 64-bits at [SP]
2778     emit_opcode(cbuf,0xdf);
2779     emit_d8(cbuf, 0x6C);
2780     emit_d8(cbuf, 0x24);
2781     emit_d8(cbuf, 0x00);
2782     // pop stack
2783     emit_opcode(cbuf, 0x83); // add  SP, #8
2784     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2785     emit_d8(cbuf, 0x8);
2786   %}
2787 
2788   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2789     // IMUL   EDX:EAX,$src1
2790     emit_opcode( cbuf, 0xF7 );
2791     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2792     // SAR    EDX,$cnt-32
2793     int shift_count = ((int)$cnt$$constant) - 32;
2794     if (shift_count > 0) {
2795       emit_opcode(cbuf, 0xC1);
2796       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2797       emit_d8(cbuf, shift_count);
2798     }
2799   %}
2800 
2801   // this version doesn't have add sp, 8
2802   enc_class convert_long_double2( eRegL src ) %{
2803     // push $src.hi
2804     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2805     // push $src.lo
2806     emit_opcode(cbuf, 0x50+$src$$reg  );
2807     // fild 64-bits at [SP]
2808     emit_opcode(cbuf,0xdf);
2809     emit_d8(cbuf, 0x6C);
2810     emit_d8(cbuf, 0x24);
2811     emit_d8(cbuf, 0x00);
2812   %}
2813 
2814   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2815     // Basic idea: long = (long)int * (long)int
2816     // IMUL EDX:EAX, src
2817     emit_opcode( cbuf, 0xF7 );
2818     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2819   %}
2820 
2821   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2822     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2823     // MUL EDX:EAX, src
2824     emit_opcode( cbuf, 0xF7 );
2825     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2826   %}
2827 
2828   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2829     // Basic idea: lo(result) = lo(x_lo * y_lo)
2830     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2831     // MOV    $tmp,$src.lo
2832     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2833     // IMUL   $tmp,EDX
2834     emit_opcode( cbuf, 0x0F );
2835     emit_opcode( cbuf, 0xAF );
2836     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2837     // MOV    EDX,$src.hi
2838     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2839     // IMUL   EDX,EAX
2840     emit_opcode( cbuf, 0x0F );
2841     emit_opcode( cbuf, 0xAF );
2842     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2843     // ADD    $tmp,EDX
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2846     // MUL   EDX:EAX,$src.lo
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2852   %}
2853 
2854   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2855     // Basic idea: lo(result) = lo(src * y_lo)
2856     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2857     // IMUL   $tmp,EDX,$src
2858     emit_opcode( cbuf, 0x6B );
2859     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860     emit_d8( cbuf, (int)$src$$constant );
2861     // MOV    EDX,$src
2862     emit_opcode(cbuf, 0xB8 + EDX_enc);
2863     emit_d32( cbuf, (int)$src$$constant );
2864     // MUL   EDX:EAX,EDX
2865     emit_opcode( cbuf, 0xF7 );
2866     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2867     // ADD    EDX,ESI
2868     emit_opcode( cbuf, 0x03 );
2869     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2870   %}
2871 
2872   enc_class long_div( eRegL src1, eRegL src2 ) %{
2873     // PUSH src1.hi
2874     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875     // PUSH src1.lo
2876     emit_opcode(cbuf,               0x50+$src1$$reg  );
2877     // PUSH src2.hi
2878     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879     // PUSH src2.lo
2880     emit_opcode(cbuf,               0x50+$src2$$reg  );
2881     // CALL directly to the runtime
2882     cbuf.set_insts_mark();
2883     emit_opcode(cbuf,0xE8);       // Call into runtime
2884     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885     // Restore stack
2886     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888     emit_d8(cbuf, 4*4);
2889   %}
2890 
2891   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2892     // PUSH src1.hi
2893     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2894     // PUSH src1.lo
2895     emit_opcode(cbuf,               0x50+$src1$$reg  );
2896     // PUSH src2.hi
2897     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2898     // PUSH src2.lo
2899     emit_opcode(cbuf,               0x50+$src2$$reg  );
2900     // CALL directly to the runtime
2901     cbuf.set_insts_mark();
2902     emit_opcode(cbuf,0xE8);       // Call into runtime
2903     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2904     // Restore stack
2905     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2906     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2907     emit_d8(cbuf, 4*4);
2908   %}
2909 
2910   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2911     // MOV   $tmp,$src.lo
2912     emit_opcode(cbuf, 0x8B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2914     // OR    $tmp,$src.hi
2915     emit_opcode(cbuf, 0x0B);
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2917   %}
2918 
2919   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2920     // CMP    $src1.lo,$src2.lo
2921     emit_opcode( cbuf, 0x3B );
2922     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923     // JNE,s  skip
2924     emit_cc(cbuf, 0x70, 0x5);
2925     emit_d8(cbuf,2);
2926     // CMP    $src1.hi,$src2.hi
2927     emit_opcode( cbuf, 0x3B );
2928     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2929   %}
2930 
2931   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2932     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2933     emit_opcode( cbuf, 0x3B );
2934     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2935     // MOV    $tmp,$src1.hi
2936     emit_opcode( cbuf, 0x8B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2938     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2939     emit_opcode( cbuf, 0x1B );
2940     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2941   %}
2942 
2943   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2944     // XOR    $tmp,$tmp
2945     emit_opcode(cbuf,0x33);  // XOR
2946     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2947     // CMP    $tmp,$src.lo
2948     emit_opcode( cbuf, 0x3B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2950     // SBB    $tmp,$src.hi
2951     emit_opcode( cbuf, 0x1B );
2952     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2953   %}
2954 
2955  // Sniff, sniff... smells like Gnu Superoptimizer
2956   enc_class neg_long( eRegL dst ) %{
2957     emit_opcode(cbuf,0xF7);    // NEG hi
2958     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2959     emit_opcode(cbuf,0xF7);    // NEG lo
2960     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2961     emit_opcode(cbuf,0x83);    // SBB hi,0
2962     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2963     emit_d8    (cbuf,0 );
2964   %}
2965 
2966   enc_class enc_pop_rdx() %{
2967     emit_opcode(cbuf,0x5A);
2968   %}
2969 
2970   enc_class enc_rethrow() %{
2971     cbuf.set_insts_mark();
2972     emit_opcode(cbuf, 0xE9);        // jmp    entry
2973     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2974                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2975   %}
2976 
2977 
2978   // Convert a double to an int.  Java semantics require we do complex
2979   // manglelations in the corner cases.  So we set the rounding mode to
2980   // 'zero', store the darned double down as an int, and reset the
2981   // rounding mode to 'nearest'.  The hardware throws an exception which
2982   // patches up the correct value directly to the stack.
2983   enc_class DPR2I_encoding( regDPR src ) %{
2984     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2985     // exceptions here, so that a NAN or other corner-case value will
2986     // thrown an exception (but normal values get converted at full speed).
2987     // However, I2C adapters and other float-stack manglers leave pending
2988     // invalid-op exceptions hanging.  We would have to clear them before
2989     // enabling them and that is more expensive than just testing for the
2990     // invalid value Intel stores down in the corner cases.
2991     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2992     emit_opcode(cbuf,0x2D);
2993     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2994     // Allocate a word
2995     emit_opcode(cbuf,0x83);            // SUB ESP,4
2996     emit_opcode(cbuf,0xEC);
2997     emit_d8(cbuf,0x04);
2998     // Encoding assumes a double has been pushed into FPR0.
2999     // Store down the double as an int, popping the FPU stack
3000     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3001     emit_opcode(cbuf,0x1C);
3002     emit_d8(cbuf,0x24);
3003     // Restore the rounding mode; mask the exception
3004     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3005     emit_opcode(cbuf,0x2D);
3006     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3007         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3008         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3009 
3010     // Load the converted int; adjust CPU stack
3011     emit_opcode(cbuf,0x58);       // POP EAX
3012     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3013     emit_d32   (cbuf,0x80000000); //         0x80000000
3014     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3015     emit_d8    (cbuf,0x07);       // Size of slow_call
3016     // Push src onto stack slow-path
3017     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3018     emit_d8    (cbuf,0xC0-1+$src$$reg );
3019     // CALL directly to the runtime
3020     cbuf.set_insts_mark();
3021     emit_opcode(cbuf,0xE8);       // Call into runtime
3022     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3023     // Carry on here...
3024   %}
3025 
3026   enc_class DPR2L_encoding( regDPR src ) %{
3027     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3028     emit_opcode(cbuf,0x2D);
3029     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3030     // Allocate a word
3031     emit_opcode(cbuf,0x83);            // SUB ESP,8
3032     emit_opcode(cbuf,0xEC);
3033     emit_d8(cbuf,0x08);
3034     // Encoding assumes a double has been pushed into FPR0.
3035     // Store down the double as a long, popping the FPU stack
3036     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3037     emit_opcode(cbuf,0x3C);
3038     emit_d8(cbuf,0x24);
3039     // Restore the rounding mode; mask the exception
3040     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3041     emit_opcode(cbuf,0x2D);
3042     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3043         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3044         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3045 
3046     // Load the converted int; adjust CPU stack
3047     emit_opcode(cbuf,0x58);       // POP EAX
3048     emit_opcode(cbuf,0x5A);       // POP EDX
3049     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3050     emit_d8    (cbuf,0xFA);       // rdx
3051     emit_d32   (cbuf,0x80000000); //         0x80000000
3052     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3053     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3054     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3055     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3056     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3057     emit_d8    (cbuf,0x07);       // Size of slow_call
3058     // Push src onto stack slow-path
3059     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3060     emit_d8    (cbuf,0xC0-1+$src$$reg );
3061     // CALL directly to the runtime
3062     cbuf.set_insts_mark();
3063     emit_opcode(cbuf,0xE8);       // Call into runtime
3064     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065     // Carry on here...
3066   %}
3067 
3068   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3069     // Operand was loaded from memory into fp ST (stack top)
3070     // FMUL   ST,$src  /* D8 C8+i */
3071     emit_opcode(cbuf, 0xD8);
3072     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3073   %}
3074 
3075   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3076     // FADDP  ST,src2  /* D8 C0+i */
3077     emit_opcode(cbuf, 0xD8);
3078     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3079     //could use FADDP  src2,fpST  /* DE C0+i */
3080   %}
3081 
3082   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3083     // FADDP  src2,ST  /* DE C0+i */
3084     emit_opcode(cbuf, 0xDE);
3085     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3086   %}
3087 
3088   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3089     // Operand has been loaded into fp ST (stack top)
3090       // FSUB   ST,$src1
3091       emit_opcode(cbuf, 0xD8);
3092       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3093 
3094       // FDIV
3095       emit_opcode(cbuf, 0xD8);
3096       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3097   %}
3098 
3099   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3100     // Operand was loaded from memory into fp ST (stack top)
3101     // FADD   ST,$src  /* D8 C0+i */
3102     emit_opcode(cbuf, 0xD8);
3103     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104 
3105     // FMUL  ST,src2  /* D8 C*+i */
3106     emit_opcode(cbuf, 0xD8);
3107     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108   %}
3109 
3110 
3111   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3112     // Operand was loaded from memory into fp ST (stack top)
3113     // FADD   ST,$src  /* D8 C0+i */
3114     emit_opcode(cbuf, 0xD8);
3115     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3116 
3117     // FMULP  src2,ST  /* DE C8+i */
3118     emit_opcode(cbuf, 0xDE);
3119     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3120   %}
3121 
3122   // Atomically load the volatile long
3123   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x05;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3133   %}
3134 
3135   // Volatile Store Long.  Must be atomic, so move it into
3136   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3137   // target address before the store (for null-ptr checks)
3138   // so the memory operand is used twice in the encoding.
3139   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3140     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3141     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3142     emit_opcode(cbuf,0xDF);
3143     int rm_byte_opcode = 0x07;
3144     int base     = $mem$$base;
3145     int index    = $mem$$index;
3146     int scale    = $mem$$scale;
3147     int displace = $mem$$disp;
3148     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3149     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3150   %}
3151 
3152   // Safepoint Poll.  This polls the safepoint page, and causes an
3153   // exception if it is not readable. Unfortunately, it kills the condition code
3154   // in the process
3155   // We current use TESTL [spp],EDI
3156   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3157 
3158   enc_class Safepoint_Poll() %{
3159     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3160     emit_opcode(cbuf,0x85);
3161     emit_rm (cbuf, 0x0, 0x7, 0x5);
3162     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3163   %}
3164 %}
3165 
3166 
3167 //----------FRAME--------------------------------------------------------------
3168 // Definition of frame structure and management information.
3169 //
3170 //  S T A C K   L A Y O U T    Allocators stack-slot number
3171 //                             |   (to get allocators register number
3172 //  G  Owned by    |        |  v    add OptoReg::stack0())
3173 //  r   CALLER     |        |
3174 //  o     |        +--------+      pad to even-align allocators stack-slot
3175 //  w     V        |  pad0  |        numbers; owned by CALLER
3176 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3177 //  h     ^        |   in   |  5
3178 //        |        |  args  |  4   Holes in incoming args owned by SELF
3179 //  |     |        |        |  3
3180 //  |     |        +--------+
3181 //  V     |        | old out|      Empty on Intel, window on Sparc
3182 //        |    old |preserve|      Must be even aligned.
3183 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3184 //        |        |   in   |  3   area for Intel ret address
3185 //     Owned by    |preserve|      Empty on Sparc.
3186 //       SELF      +--------+
3187 //        |        |  pad2  |  2   pad to align old SP
3188 //        |        +--------+  1
3189 //        |        | locks  |  0
3190 //        |        +--------+----> OptoReg::stack0(), even aligned
3191 //        |        |  pad1  | 11   pad to align new SP
3192 //        |        +--------+
3193 //        |        |        | 10
3194 //        |        | spills |  9   spills
3195 //        V        |        |  8   (pad0 slot for callee)
3196 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3197 //        ^        |  out   |  7
3198 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3199 //     Owned by    +--------+
3200 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3201 //        |    new |preserve|      Must be even-aligned.
3202 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3203 //        |        |        |
3204 //
3205 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3206 //         known from SELF's arguments and the Java calling convention.
3207 //         Region 6-7 is determined per call site.
3208 // Note 2: If the calling convention leaves holes in the incoming argument
3209 //         area, those holes are owned by SELF.  Holes in the outgoing area
3210 //         are owned by the CALLEE.  Holes should not be nessecary in the
3211 //         incoming area, as the Java calling convention is completely under
3212 //         the control of the AD file.  Doubles can be sorted and packed to
3213 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3214 //         varargs C calling conventions.
3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3216 //         even aligned with pad0 as needed.
3217 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3218 //         region 6-11 is even aligned; it may be padded out more so that
3219 //         the region from SP to FP meets the minimum stack alignment.
3220 
3221 frame %{
3222   // What direction does stack grow in (assumed to be same for C & Java)
3223   stack_direction(TOWARDS_LOW);
3224 
3225   // These three registers define part of the calling convention
3226   // between compiled code and the interpreter.
3227   inline_cache_reg(EAX);                // Inline Cache Register
3228   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3229 
3230   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3231   cisc_spilling_operand_name(indOffset32);
3232 
3233   // Number of stack slots consumed by locking an object
3234   sync_stack_slots(1);
3235 
3236   // Compiled code's Frame Pointer
3237   frame_pointer(ESP);
3238   // Interpreter stores its frame pointer in a register which is
3239   // stored to the stack by I2CAdaptors.
3240   // I2CAdaptors convert from interpreted java to compiled java.
3241   interpreter_frame_pointer(EBP);
3242 
3243   // Stack alignment requirement
3244   // Alignment size in bytes (128-bit -> 16 bytes)
3245   stack_alignment(StackAlignmentInBytes);
3246 
3247   // Number of stack slots between incoming argument block and the start of
3248   // a new frame.  The PROLOG must add this many slots to the stack.  The
3249   // EPILOG must remove this many slots.  Intel needs one slot for
3250   // return address and one for rbp, (must save rbp)
3251   in_preserve_stack_slots(2+VerifyStackAtCalls);
3252 
3253   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3254   // for calls to C.  Supports the var-args backing area for register parms.
3255   varargs_C_out_slots_killed(0);
3256 
3257   // The after-PROLOG location of the return address.  Location of
3258   // return address specifies a type (REG or STACK) and a number
3259   // representing the register number (i.e. - use a register name) or
3260   // stack slot.
3261   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3262   // Otherwise, it is above the locks and verification slot and alignment word
3263   return_addr(STACK - 1 +
3264               align_up((Compile::current()->in_preserve_stack_slots() +
3265                         Compile::current()->fixed_slots()),
3266                        stack_alignment_in_slots()));
3267 
3268   // Body of function which returns an integer array locating
3269   // arguments either in registers or in stack slots.  Passed an array
3270   // of ideal registers called "sig" and a "length" count.  Stack-slot
3271   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272   // arguments for a CALLEE.  Incoming stack arguments are
3273   // automatically biased by the preserve_stack_slots field above.
3274   calling_convention %{
3275     // No difference between ingoing/outgoing just pass false
3276     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3277   %}
3278 
3279 
3280   // Body of function which returns an integer array locating
3281   // arguments either in registers or in stack slots.  Passed an array
3282   // of ideal registers called "sig" and a "length" count.  Stack-slot
3283   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3284   // arguments for a CALLEE.  Incoming stack arguments are
3285   // automatically biased by the preserve_stack_slots field above.
3286   c_calling_convention %{
3287     // This is obviously always outgoing
3288     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3289   %}
3290 
3291   // Location of C & interpreter return values
3292   c_return_value %{
3293     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3294     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3295     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3296 
3297     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3298     // that C functions return float and double results in XMM0.
3299     if( ideal_reg == Op_RegD && UseSSE>=2 )
3300       return OptoRegPair(XMM0b_num,XMM0_num);
3301     if( ideal_reg == Op_RegF && UseSSE>=2 )
3302       return OptoRegPair(OptoReg::Bad,XMM0_num);
3303 
3304     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305   %}
3306 
3307   // Location of return values
3308   return_value %{
3309     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3310     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3311     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3312     if( ideal_reg == Op_RegD && UseSSE>=2 )
3313       return OptoRegPair(XMM0b_num,XMM0_num);
3314     if( ideal_reg == Op_RegF && UseSSE>=1 )
3315       return OptoRegPair(OptoReg::Bad,XMM0_num);
3316     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3317   %}
3318 
3319 %}
3320 
3321 //----------ATTRIBUTES---------------------------------------------------------
3322 //----------Operand Attributes-------------------------------------------------
3323 op_attrib op_cost(0);        // Required cost attribute
3324 
3325 //----------Instruction Attributes---------------------------------------------
3326 ins_attrib ins_cost(100);       // Required cost attribute
3327 ins_attrib ins_size(8);         // Required size attribute (in bits)
3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3329                                 // non-matching short branch variant of some
3330                                                             // long branch?
3331 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3332                                 // specifies the alignment that some part of the instruction (not
3333                                 // necessarily the start) requires.  If > 1, a compute_padding()
3334                                 // function must be provided for the instruction
3335 
3336 //----------OPERANDS-----------------------------------------------------------
3337 // Operand definitions must precede instruction definitions for correct parsing
3338 // in the ADLC because operands constitute user defined types which are used in
3339 // instruction definitions.
3340 
3341 //----------Simple Operands----------------------------------------------------
3342 // Immediate Operands
3343 // Integer Immediate
3344 operand immI() %{
3345   match(ConI);
3346 
3347   op_cost(10);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for test vs zero
3353 operand immI0() %{
3354   predicate(n->get_int() == 0);
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 // Constant for increment
3363 operand immI1() %{
3364   predicate(n->get_int() == 1);
3365   match(ConI);
3366 
3367   op_cost(0);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 // Constant for decrement
3373 operand immI_M1() %{
3374   predicate(n->get_int() == -1);
3375   match(ConI);
3376 
3377   op_cost(0);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 // Valid scale values for addressing modes
3383 operand immI2() %{
3384   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3385   match(ConI);
3386 
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 operand immI8() %{
3392   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3393   match(ConI);
3394 
3395   op_cost(5);
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 operand immI16() %{
3401   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3402   match(ConI);
3403 
3404   op_cost(10);
3405   format %{ %}
3406   interface(CONST_INTER);
3407 %}
3408 
3409 // Int Immediate non-negative
3410 operand immU31()
3411 %{
3412   predicate(n->get_int() >= 0);
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 // Constant for long shifts
3421 operand immI_32() %{
3422   predicate( n->get_int() == 32 );
3423   match(ConI);
3424 
3425   op_cost(0);
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1_31() %{
3431   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_32_63() %{
3440   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3441   match(ConI);
3442   op_cost(0);
3443 
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_1() %{
3449   predicate( n->get_int() == 1 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 operand immI_2() %{
3458   predicate( n->get_int() == 2 );
3459   match(ConI);
3460 
3461   op_cost(0);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 operand immI_3() %{
3467   predicate( n->get_int() == 3 );
3468   match(ConI);
3469 
3470   op_cost(0);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Pointer Immediate
3476 operand immP() %{
3477   match(ConP);
3478 
3479   op_cost(10);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // NULL Pointer Immediate
3485 operand immP0() %{
3486   predicate( n->get_ptr() == 0 );
3487   match(ConP);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate
3495 operand immL() %{
3496   match(ConL);
3497 
3498   op_cost(20);
3499   format %{ %}
3500   interface(CONST_INTER);
3501 %}
3502 
3503 // Long Immediate zero
3504 operand immL0() %{
3505   predicate( n->get_long() == 0L );
3506   match(ConL);
3507   op_cost(0);
3508 
3509   format %{ %}
3510   interface(CONST_INTER);
3511 %}
3512 
3513 // Long Immediate zero
3514 operand immL_M1() %{
3515   predicate( n->get_long() == -1L );
3516   match(ConL);
3517   op_cost(0);
3518 
3519   format %{ %}
3520   interface(CONST_INTER);
3521 %}
3522 
3523 // Long immediate from 0 to 127.
3524 // Used for a shorter form of long mul by 10.
3525 operand immL_127() %{
3526   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3527   match(ConL);
3528   op_cost(0);
3529 
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Long Immediate: low 32-bit mask
3535 operand immL_32bits() %{
3536   predicate(n->get_long() == 0xFFFFFFFFL);
3537   match(ConL);
3538   op_cost(0);
3539 
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Long Immediate: low 32-bit mask
3545 operand immL32() %{
3546   predicate(n->get_long() == (int)(n->get_long()));
3547   match(ConL);
3548   op_cost(20);
3549 
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 //Double Immediate zero
3555 operand immDPR0() %{
3556   // Do additional (and counter-intuitive) test against NaN to work around VC++
3557   // bug that generates code such that NaNs compare equal to 0.0
3558   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3559   match(ConD);
3560 
3561   op_cost(5);
3562   format %{ %}
3563   interface(CONST_INTER);
3564 %}
3565 
3566 // Double Immediate one
3567 operand immDPR1() %{
3568   predicate( UseSSE<=1 && n->getd() == 1.0 );
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate
3577 operand immDPR() %{
3578   predicate(UseSSE<=1);
3579   match(ConD);
3580 
3581   op_cost(5);
3582   format %{ %}
3583   interface(CONST_INTER);
3584 %}
3585 
3586 operand immD() %{
3587   predicate(UseSSE>=2);
3588   match(ConD);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Double Immediate zero
3596 operand immD0() %{
3597   // Do additional (and counter-intuitive) test against NaN to work around VC++
3598   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3599   // compare equal to -0.0.
3600   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3601   match(ConD);
3602 
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 // Float Immediate zero
3608 operand immFPR0() %{
3609   predicate(UseSSE == 0 && n->getf() == 0.0F);
3610   match(ConF);
3611 
3612   op_cost(5);
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Float Immediate one
3618 operand immFPR1() %{
3619   predicate(UseSSE == 0 && n->getf() == 1.0F);
3620   match(ConF);
3621 
3622   op_cost(5);
3623   format %{ %}
3624   interface(CONST_INTER);
3625 %}
3626 
3627 // Float Immediate
3628 operand immFPR() %{
3629   predicate( UseSSE == 0 );
3630   match(ConF);
3631 
3632   op_cost(5);
3633   format %{ %}
3634   interface(CONST_INTER);
3635 %}
3636 
3637 // Float Immediate
3638 operand immF() %{
3639   predicate(UseSSE >= 1);
3640   match(ConF);
3641 
3642   op_cost(5);
3643   format %{ %}
3644   interface(CONST_INTER);
3645 %}
3646 
3647 // Float Immediate zero.  Zero and not -0.0
3648 operand immF0() %{
3649   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3650   match(ConF);
3651 
3652   op_cost(5);
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Immediates for special shifts (sign extend)
3658 
3659 // Constants for increment
3660 operand immI_16() %{
3661   predicate( n->get_int() == 16 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 operand immI_24() %{
3669   predicate( n->get_int() == 24 );
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Constant for byte-wide masking
3677 operand immI_255() %{
3678   predicate( n->get_int() == 255 );
3679   match(ConI);
3680 
3681   format %{ %}
3682   interface(CONST_INTER);
3683 %}
3684 
3685 // Constant for short-wide masking
3686 operand immI_65535() %{
3687   predicate(n->get_int() == 65535);
3688   match(ConI);
3689 
3690   format %{ %}
3691   interface(CONST_INTER);
3692 %}
3693 
3694 // Register Operands
3695 // Integer Register
3696 operand rRegI() %{
3697   constraint(ALLOC_IN_RC(int_reg));
3698   match(RegI);
3699   match(xRegI);
3700   match(eAXRegI);
3701   match(eBXRegI);
3702   match(eCXRegI);
3703   match(eDXRegI);
3704   match(eDIRegI);
3705   match(eSIRegI);
3706 
3707   format %{ %}
3708   interface(REG_INTER);
3709 %}
3710 
3711 // Subset of Integer Register
3712 operand xRegI(rRegI reg) %{
3713   constraint(ALLOC_IN_RC(int_x_reg));
3714   match(reg);
3715   match(eAXRegI);
3716   match(eBXRegI);
3717   match(eCXRegI);
3718   match(eDXRegI);
3719 
3720   format %{ %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Special Registers
3725 operand eAXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(eax_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EAX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 // Special Registers
3735 operand eBXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(ebx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EBX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eCXRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(ecx_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "ECX" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand eDXRegI(xRegI reg) %{
3754   constraint(ALLOC_IN_RC(edx_reg));
3755   match(reg);
3756   match(rRegI);
3757 
3758   format %{ "EDX" %}
3759   interface(REG_INTER);
3760 %}
3761 
3762 operand eDIRegI(xRegI reg) %{
3763   constraint(ALLOC_IN_RC(edi_reg));
3764   match(reg);
3765   match(rRegI);
3766 
3767   format %{ "EDI" %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand naxRegI() %{
3772   constraint(ALLOC_IN_RC(nax_reg));
3773   match(RegI);
3774   match(eCXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 operand nadxRegI() %{
3784   constraint(ALLOC_IN_RC(nadx_reg));
3785   match(RegI);
3786   match(eBXRegI);
3787   match(eCXRegI);
3788   match(eSIRegI);
3789   match(eDIRegI);
3790 
3791   format %{ %}
3792   interface(REG_INTER);
3793 %}
3794 
3795 operand ncxRegI() %{
3796   constraint(ALLOC_IN_RC(ncx_reg));
3797   match(RegI);
3798   match(eAXRegI);
3799   match(eDXRegI);
3800   match(eSIRegI);
3801   match(eDIRegI);
3802 
3803   format %{ %}
3804   interface(REG_INTER);
3805 %}
3806 
3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3808 // //
3809 operand eSIRegI(xRegI reg) %{
3810    constraint(ALLOC_IN_RC(esi_reg));
3811    match(reg);
3812    match(rRegI);
3813 
3814    format %{ "ESI" %}
3815    interface(REG_INTER);
3816 %}
3817 
3818 // Pointer Register
3819 operand anyRegP() %{
3820   constraint(ALLOC_IN_RC(any_reg));
3821   match(RegP);
3822   match(eAXRegP);
3823   match(eBXRegP);
3824   match(eCXRegP);
3825   match(eDIRegP);
3826   match(eRegP);
3827 
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 operand eRegP() %{
3833   constraint(ALLOC_IN_RC(int_reg));
3834   match(RegP);
3835   match(eAXRegP);
3836   match(eBXRegP);
3837   match(eCXRegP);
3838   match(eDIRegP);
3839 
3840   format %{ %}
3841   interface(REG_INTER);
3842 %}
3843 
3844 // On windows95, EBP is not safe to use for implicit null tests.
3845 operand eRegP_no_EBP() %{
3846   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3847   match(RegP);
3848   match(eAXRegP);
3849   match(eBXRegP);
3850   match(eCXRegP);
3851   match(eDIRegP);
3852 
3853   op_cost(100);
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 operand naxRegP() %{
3859   constraint(ALLOC_IN_RC(nax_reg));
3860   match(RegP);
3861   match(eBXRegP);
3862   match(eDXRegP);
3863   match(eCXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 operand nabxRegP() %{
3872   constraint(ALLOC_IN_RC(nabx_reg));
3873   match(RegP);
3874   match(eCXRegP);
3875   match(eDXRegP);
3876   match(eSIRegP);
3877   match(eDIRegP);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 operand pRegP() %{
3884   constraint(ALLOC_IN_RC(p_reg));
3885   match(RegP);
3886   match(eBXRegP);
3887   match(eDXRegP);
3888   match(eSIRegP);
3889   match(eDIRegP);
3890 
3891   format %{ %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Special Registers
3896 // Return a pointer value
3897 operand eAXRegP(eRegP reg) %{
3898   constraint(ALLOC_IN_RC(eax_reg));
3899   match(reg);
3900   format %{ "EAX" %}
3901   interface(REG_INTER);
3902 %}
3903 
3904 // Used in AtomicAdd
3905 operand eBXRegP(eRegP reg) %{
3906   constraint(ALLOC_IN_RC(ebx_reg));
3907   match(reg);
3908   format %{ "EBX" %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Tail-call (interprocedural jump) to interpreter
3913 operand eCXRegP(eRegP reg) %{
3914   constraint(ALLOC_IN_RC(ecx_reg));
3915   match(reg);
3916   format %{ "ECX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eSIRegP(eRegP reg) %{
3921   constraint(ALLOC_IN_RC(esi_reg));
3922   match(reg);
3923   format %{ "ESI" %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 // Used in rep stosw
3928 operand eDIRegP(eRegP reg) %{
3929   constraint(ALLOC_IN_RC(edi_reg));
3930   match(reg);
3931   format %{ "EDI" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eRegL() %{
3936   constraint(ALLOC_IN_RC(long_reg));
3937   match(RegL);
3938   match(eADXRegL);
3939 
3940   format %{ %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 operand eADXRegL( eRegL reg ) %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(reg);
3947 
3948   format %{ "EDX:EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eBCXRegL( eRegL reg ) %{
3953   constraint(ALLOC_IN_RC(ebcx_reg));
3954   match(reg);
3955 
3956   format %{ "EBX:ECX" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Special case for integer high multiply
3961 operand eADXRegL_low_only() %{
3962   constraint(ALLOC_IN_RC(eadx_reg));
3963   match(RegL);
3964 
3965   format %{ "EAX" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 // Flags register, used as output of compare instructions
3970 operand eFlagsReg() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973 
3974   format %{ "EFLAGS" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Flags register, used as output of FLOATING POINT compare instructions
3979 operand eFlagsRegU() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982 
3983   format %{ "EFLAGS_U" %}
3984   interface(REG_INTER);
3985 %}
3986 
3987 operand eFlagsRegUCF() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990   predicate(false);
3991 
3992   format %{ "EFLAGS_U_CF" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 // Condition Code Register used by long compare
3997 operand flagsReg_long_LTGE() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LTGE" %}
4001   interface(REG_INTER);
4002 %}
4003 operand flagsReg_long_EQNE() %{
4004   constraint(ALLOC_IN_RC(int_flags));
4005   match(RegFlags);
4006   format %{ "FLAGS_EQNE" %}
4007   interface(REG_INTER);
4008 %}
4009 operand flagsReg_long_LEGT() %{
4010   constraint(ALLOC_IN_RC(int_flags));
4011   match(RegFlags);
4012   format %{ "FLAGS_LEGT" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 // Condition Code Register used by unsigned long compare
4017 operand flagsReg_ulong_LTGE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_U_LTGE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_ulong_EQNE() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_U_EQNE" %}
4027   interface(REG_INTER);
4028 %}
4029 operand flagsReg_ulong_LEGT() %{
4030   constraint(ALLOC_IN_RC(int_flags));
4031   match(RegFlags);
4032   format %{ "FLAGS_U_LEGT" %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Float register operands
4037 operand regDPR() %{
4038   predicate( UseSSE < 2 );
4039   constraint(ALLOC_IN_RC(fp_dbl_reg));
4040   match(RegD);
4041   match(regDPR1);
4042   match(regDPR2);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 operand regDPR1(regDPR reg) %{
4048   predicate( UseSSE < 2 );
4049   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4050   match(reg);
4051   format %{ "FPR1" %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 operand regDPR2(regDPR reg) %{
4056   predicate( UseSSE < 2 );
4057   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4058   match(reg);
4059   format %{ "FPR2" %}
4060   interface(REG_INTER);
4061 %}
4062 
4063 operand regnotDPR1(regDPR reg) %{
4064   predicate( UseSSE < 2 );
4065   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4066   match(reg);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Float register operands
4072 operand regFPR() %{
4073   predicate( UseSSE < 2 );
4074   constraint(ALLOC_IN_RC(fp_flt_reg));
4075   match(RegF);
4076   match(regFPR1);
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 // Float register operands
4082 operand regFPR1(regFPR reg) %{
4083   predicate( UseSSE < 2 );
4084   constraint(ALLOC_IN_RC(fp_flt_reg0));
4085   match(reg);
4086   format %{ "FPR1" %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 // XMM Float register operands
4091 operand regF() %{
4092   predicate( UseSSE>=1 );
4093   constraint(ALLOC_IN_RC(float_reg_legacy));
4094   match(RegF);
4095   format %{ %}
4096   interface(REG_INTER);
4097 %}
4098 
4099 // Float register operands
4100 operand vlRegF() %{
4101    constraint(ALLOC_IN_RC(float_reg_vl));
4102    match(RegF);
4103 
4104    format %{ %}
4105    interface(REG_INTER);
4106 %}
4107 
4108 // XMM Double register operands
4109 operand regD() %{
4110   predicate( UseSSE>=2 );
4111   constraint(ALLOC_IN_RC(double_reg_legacy));
4112   match(RegD);
4113   format %{ %}
4114   interface(REG_INTER);
4115 %}
4116 
4117 // Double register operands
4118 operand vlRegD() %{
4119    constraint(ALLOC_IN_RC(double_reg_vl));
4120    match(RegD);
4121 
4122    format %{ %}
4123    interface(REG_INTER);
4124 %}
4125 
4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4127 // runtime code generation via reg_class_dynamic.
4128 operand vecS() %{
4129   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4130   match(VecS);
4131 
4132   format %{ %}
4133   interface(REG_INTER);
4134 %}
4135 
4136 operand legVecS() %{
4137   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4138   match(VecS);
4139 
4140   format %{ %}
4141   interface(REG_INTER);
4142 %}
4143 
4144 operand vecD() %{
4145   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4146   match(VecD);
4147 
4148   format %{ %}
4149   interface(REG_INTER);
4150 %}
4151 
4152 operand legVecD() %{
4153   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4154   match(VecD);
4155 
4156   format %{ %}
4157   interface(REG_INTER);
4158 %}
4159 
4160 operand vecX() %{
4161   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4162   match(VecX);
4163 
4164   format %{ %}
4165   interface(REG_INTER);
4166 %}
4167 
4168 operand legVecX() %{
4169   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4170   match(VecX);
4171 
4172   format %{ %}
4173   interface(REG_INTER);
4174 %}
4175 
4176 operand vecY() %{
4177   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4178   match(VecY);
4179 
4180   format %{ %}
4181   interface(REG_INTER);
4182 %}
4183 
4184 operand legVecY() %{
4185   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4186   match(VecY);
4187 
4188   format %{ %}
4189   interface(REG_INTER);
4190 %}
4191 
4192 //----------Memory Operands----------------------------------------------------
4193 // Direct Memory Operand
4194 operand direct(immP addr) %{
4195   match(addr);
4196 
4197   format %{ "[$addr]" %}
4198   interface(MEMORY_INTER) %{
4199     base(0xFFFFFFFF);
4200     index(0x4);
4201     scale(0x0);
4202     disp($addr);
4203   %}
4204 %}
4205 
4206 // Indirect Memory Operand
4207 operand indirect(eRegP reg) %{
4208   constraint(ALLOC_IN_RC(int_reg));
4209   match(reg);
4210 
4211   format %{ "[$reg]" %}
4212   interface(MEMORY_INTER) %{
4213     base($reg);
4214     index(0x4);
4215     scale(0x0);
4216     disp(0x0);
4217   %}
4218 %}
4219 
4220 // Indirect Memory Plus Short Offset Operand
4221 operand indOffset8(eRegP reg, immI8 off) %{
4222   match(AddP reg off);
4223 
4224   format %{ "[$reg + $off]" %}
4225   interface(MEMORY_INTER) %{
4226     base($reg);
4227     index(0x4);
4228     scale(0x0);
4229     disp($off);
4230   %}
4231 %}
4232 
4233 // Indirect Memory Plus Long Offset Operand
4234 operand indOffset32(eRegP reg, immI off) %{
4235   match(AddP reg off);
4236 
4237   format %{ "[$reg + $off]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index(0x4);
4241     scale(0x0);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 // Indirect Memory Plus Long Offset Operand
4247 operand indOffset32X(rRegI reg, immP off) %{
4248   match(AddP off reg);
4249 
4250   format %{ "[$reg + $off]" %}
4251   interface(MEMORY_INTER) %{
4252     base($reg);
4253     index(0x4);
4254     scale(0x0);
4255     disp($off);
4256   %}
4257 %}
4258 
4259 // Indirect Memory Plus Index Register Plus Offset Operand
4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4261   match(AddP (AddP reg ireg) off);
4262 
4263   op_cost(10);
4264   format %{"[$reg + $off + $ireg]" %}
4265   interface(MEMORY_INTER) %{
4266     base($reg);
4267     index($ireg);
4268     scale(0x0);
4269     disp($off);
4270   %}
4271 %}
4272 
4273 // Indirect Memory Plus Index Register Plus Offset Operand
4274 operand indIndex(eRegP reg, rRegI ireg) %{
4275   match(AddP reg ireg);
4276 
4277   op_cost(10);
4278   format %{"[$reg + $ireg]" %}
4279   interface(MEMORY_INTER) %{
4280     base($reg);
4281     index($ireg);
4282     scale(0x0);
4283     disp(0x0);
4284   %}
4285 %}
4286 
4287 // // -------------------------------------------------------------------------
4288 // // 486 architecture doesn't support "scale * index + offset" with out a base
4289 // // -------------------------------------------------------------------------
4290 // // Scaled Memory Operands
4291 // // Indirect Memory Times Scale Plus Offset Operand
4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4293 //   match(AddP off (LShiftI ireg scale));
4294 //
4295 //   op_cost(10);
4296 //   format %{"[$off + $ireg << $scale]" %}
4297 //   interface(MEMORY_INTER) %{
4298 //     base(0x4);
4299 //     index($ireg);
4300 //     scale($scale);
4301 //     disp($off);
4302 //   %}
4303 // %}
4304 
4305 // Indirect Memory Times Scale Plus Index Register
4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4307   match(AddP reg (LShiftI ireg scale));
4308 
4309   op_cost(10);
4310   format %{"[$reg + $ireg << $scale]" %}
4311   interface(MEMORY_INTER) %{
4312     base($reg);
4313     index($ireg);
4314     scale($scale);
4315     disp(0x0);
4316   %}
4317 %}
4318 
4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4321   match(AddP (AddP reg (LShiftI ireg scale)) off);
4322 
4323   op_cost(10);
4324   format %{"[$reg + $off + $ireg << $scale]" %}
4325   interface(MEMORY_INTER) %{
4326     base($reg);
4327     index($ireg);
4328     scale($scale);
4329     disp($off);
4330   %}
4331 %}
4332 
4333 //----------Load Long Memory Operands------------------------------------------
4334 // The load-long idiom will use it's address expression again after loading
4335 // the first word of the long.  If the load-long destination overlaps with
4336 // registers used in the addressing expression, the 2nd half will be loaded
4337 // from a clobbered address.  Fix this by requiring that load-long use
4338 // address registers that do not overlap with the load-long target.
4339 
4340 // load-long support
4341 operand load_long_RegP() %{
4342   constraint(ALLOC_IN_RC(esi_reg));
4343   match(RegP);
4344   match(eSIRegP);
4345   op_cost(100);
4346   format %{  %}
4347   interface(REG_INTER);
4348 %}
4349 
4350 // Indirect Memory Operand Long
4351 operand load_long_indirect(load_long_RegP reg) %{
4352   constraint(ALLOC_IN_RC(esi_reg));
4353   match(reg);
4354 
4355   format %{ "[$reg]" %}
4356   interface(MEMORY_INTER) %{
4357     base($reg);
4358     index(0x4);
4359     scale(0x0);
4360     disp(0x0);
4361   %}
4362 %}
4363 
4364 // Indirect Memory Plus Long Offset Operand
4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4366   match(AddP reg off);
4367 
4368   format %{ "[$reg + $off]" %}
4369   interface(MEMORY_INTER) %{
4370     base($reg);
4371     index(0x4);
4372     scale(0x0);
4373     disp($off);
4374   %}
4375 %}
4376 
4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4378 
4379  
4380 opclass legRegF (regF);
4381 opclass legRegD (regD);
4382 
4383 
4384 
4385 //----------Special Memory Operands--------------------------------------------
4386 // Stack Slot Operand - This operand is used for loading and storing temporary
4387 //                      values on the stack where a match requires a value to
4388 //                      flow through memory.
4389 operand stackSlotP(sRegP reg) %{
4390   constraint(ALLOC_IN_RC(stack_slots));
4391   // No match rule because this operand is only generated in matching
4392   format %{ "[$reg]" %}
4393   interface(MEMORY_INTER) %{
4394     base(0x4);   // ESP
4395     index(0x4);  // No Index
4396     scale(0x0);  // No Scale
4397     disp($reg);  // Stack Offset
4398   %}
4399 %}
4400 
4401 operand stackSlotI(sRegI reg) %{
4402   constraint(ALLOC_IN_RC(stack_slots));
4403   // No match rule because this operand is only generated in matching
4404   format %{ "[$reg]" %}
4405   interface(MEMORY_INTER) %{
4406     base(0x4);   // ESP
4407     index(0x4);  // No Index
4408     scale(0x0);  // No Scale
4409     disp($reg);  // Stack Offset
4410   %}
4411 %}
4412 
4413 operand stackSlotF(sRegF reg) %{
4414   constraint(ALLOC_IN_RC(stack_slots));
4415   // No match rule because this operand is only generated in matching
4416   format %{ "[$reg]" %}
4417   interface(MEMORY_INTER) %{
4418     base(0x4);   // ESP
4419     index(0x4);  // No Index
4420     scale(0x0);  // No Scale
4421     disp($reg);  // Stack Offset
4422   %}
4423 %}
4424 
4425 operand stackSlotD(sRegD reg) %{
4426   constraint(ALLOC_IN_RC(stack_slots));
4427   // No match rule because this operand is only generated in matching
4428   format %{ "[$reg]" %}
4429   interface(MEMORY_INTER) %{
4430     base(0x4);   // ESP
4431     index(0x4);  // No Index
4432     scale(0x0);  // No Scale
4433     disp($reg);  // Stack Offset
4434   %}
4435 %}
4436 
4437 operand stackSlotL(sRegL reg) %{
4438   constraint(ALLOC_IN_RC(stack_slots));
4439   // No match rule because this operand is only generated in matching
4440   format %{ "[$reg]" %}
4441   interface(MEMORY_INTER) %{
4442     base(0x4);   // ESP
4443     index(0x4);  // No Index
4444     scale(0x0);  // No Scale
4445     disp($reg);  // Stack Offset
4446   %}
4447 %}
4448 
4449 //----------Memory Operands - Win95 Implicit Null Variants----------------
4450 // Indirect Memory Operand
4451 operand indirect_win95_safe(eRegP_no_EBP reg)
4452 %{
4453   constraint(ALLOC_IN_RC(int_reg));
4454   match(reg);
4455 
4456   op_cost(100);
4457   format %{ "[$reg]" %}
4458   interface(MEMORY_INTER) %{
4459     base($reg);
4460     index(0x4);
4461     scale(0x0);
4462     disp(0x0);
4463   %}
4464 %}
4465 
4466 // Indirect Memory Plus Short Offset Operand
4467 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4468 %{
4469   match(AddP reg off);
4470 
4471   op_cost(100);
4472   format %{ "[$reg + $off]" %}
4473   interface(MEMORY_INTER) %{
4474     base($reg);
4475     index(0x4);
4476     scale(0x0);
4477     disp($off);
4478   %}
4479 %}
4480 
4481 // Indirect Memory Plus Long Offset Operand
4482 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4483 %{
4484   match(AddP reg off);
4485 
4486   op_cost(100);
4487   format %{ "[$reg + $off]" %}
4488   interface(MEMORY_INTER) %{
4489     base($reg);
4490     index(0x4);
4491     scale(0x0);
4492     disp($off);
4493   %}
4494 %}
4495 
4496 // Indirect Memory Plus Index Register Plus Offset Operand
4497 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4498 %{
4499   match(AddP (AddP reg ireg) off);
4500 
4501   op_cost(100);
4502   format %{"[$reg + $off + $ireg]" %}
4503   interface(MEMORY_INTER) %{
4504     base($reg);
4505     index($ireg);
4506     scale(0x0);
4507     disp($off);
4508   %}
4509 %}
4510 
4511 // Indirect Memory Times Scale Plus Index Register
4512 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4513 %{
4514   match(AddP reg (LShiftI ireg scale));
4515 
4516   op_cost(100);
4517   format %{"[$reg + $ireg << $scale]" %}
4518   interface(MEMORY_INTER) %{
4519     base($reg);
4520     index($ireg);
4521     scale($scale);
4522     disp(0x0);
4523   %}
4524 %}
4525 
4526 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4527 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4528 %{
4529   match(AddP (AddP reg (LShiftI ireg scale)) off);
4530 
4531   op_cost(100);
4532   format %{"[$reg + $off + $ireg << $scale]" %}
4533   interface(MEMORY_INTER) %{
4534     base($reg);
4535     index($ireg);
4536     scale($scale);
4537     disp($off);
4538   %}
4539 %}
4540 
4541 //----------Conditional Branch Operands----------------------------------------
4542 // Comparison Op  - This is the operation of the comparison, and is limited to
4543 //                  the following set of codes:
4544 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4545 //
4546 // Other attributes of the comparison, such as unsignedness, are specified
4547 // by the comparison instruction that sets a condition code flags register.
4548 // That result is represented by a flags operand whose subtype is appropriate
4549 // to the unsignedness (etc.) of the comparison.
4550 //
4551 // Later, the instruction which matches both the Comparison Op (a Bool) and
4552 // the flags (produced by the Cmp) specifies the coding of the comparison op
4553 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4554 
4555 // Comparision Code
4556 operand cmpOp() %{
4557   match(Bool);
4558 
4559   format %{ "" %}
4560   interface(COND_INTER) %{
4561     equal(0x4, "e");
4562     not_equal(0x5, "ne");
4563     less(0xC, "l");
4564     greater_equal(0xD, "ge");
4565     less_equal(0xE, "le");
4566     greater(0xF, "g");
4567     overflow(0x0, "o");
4568     no_overflow(0x1, "no");
4569   %}
4570 %}
4571 
4572 // Comparison Code, unsigned compare.  Used by FP also, with
4573 // C2 (unordered) turned into GT or LT already.  The other bits
4574 // C0 and C3 are turned into Carry & Zero flags.
4575 operand cmpOpU() %{
4576   match(Bool);
4577 
4578   format %{ "" %}
4579   interface(COND_INTER) %{
4580     equal(0x4, "e");
4581     not_equal(0x5, "ne");
4582     less(0x2, "b");
4583     greater_equal(0x3, "nb");
4584     less_equal(0x6, "be");
4585     greater(0x7, "nbe");
4586     overflow(0x0, "o");
4587     no_overflow(0x1, "no");
4588   %}
4589 %}
4590 
4591 // Floating comparisons that don't require any fixup for the unordered case
4592 operand cmpOpUCF() %{
4593   match(Bool);
4594   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4595             n->as_Bool()->_test._test == BoolTest::ge ||
4596             n->as_Bool()->_test._test == BoolTest::le ||
4597             n->as_Bool()->_test._test == BoolTest::gt);
4598   format %{ "" %}
4599   interface(COND_INTER) %{
4600     equal(0x4, "e");
4601     not_equal(0x5, "ne");
4602     less(0x2, "b");
4603     greater_equal(0x3, "nb");
4604     less_equal(0x6, "be");
4605     greater(0x7, "nbe");
4606     overflow(0x0, "o");
4607     no_overflow(0x1, "no");
4608   %}
4609 %}
4610 
4611 
4612 // Floating comparisons that can be fixed up with extra conditional jumps
4613 operand cmpOpUCF2() %{
4614   match(Bool);
4615   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4616             n->as_Bool()->_test._test == BoolTest::eq);
4617   format %{ "" %}
4618   interface(COND_INTER) %{
4619     equal(0x4, "e");
4620     not_equal(0x5, "ne");
4621     less(0x2, "b");
4622     greater_equal(0x3, "nb");
4623     less_equal(0x6, "be");
4624     greater(0x7, "nbe");
4625     overflow(0x0, "o");
4626     no_overflow(0x1, "no");
4627   %}
4628 %}
4629 
4630 // Comparison Code for FP conditional move
4631 operand cmpOp_fcmov() %{
4632   match(Bool);
4633 
4634   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4635             n->as_Bool()->_test._test != BoolTest::no_overflow);
4636   format %{ "" %}
4637   interface(COND_INTER) %{
4638     equal        (0x0C8);
4639     not_equal    (0x1C8);
4640     less         (0x0C0);
4641     greater_equal(0x1C0);
4642     less_equal   (0x0D0);
4643     greater      (0x1D0);
4644     overflow(0x0, "o"); // not really supported by the instruction
4645     no_overflow(0x1, "no"); // not really supported by the instruction
4646   %}
4647 %}
4648 
4649 // Comparison Code used in long compares
4650 operand cmpOp_commute() %{
4651   match(Bool);
4652 
4653   format %{ "" %}
4654   interface(COND_INTER) %{
4655     equal(0x4, "e");
4656     not_equal(0x5, "ne");
4657     less(0xF, "g");
4658     greater_equal(0xE, "le");
4659     less_equal(0xD, "ge");
4660     greater(0xC, "l");
4661     overflow(0x0, "o");
4662     no_overflow(0x1, "no");
4663   %}
4664 %}
4665 
4666 // Comparison Code used in unsigned long compares
4667 operand cmpOpU_commute() %{
4668   match(Bool);
4669 
4670   format %{ "" %}
4671   interface(COND_INTER) %{
4672     equal(0x4, "e");
4673     not_equal(0x5, "ne");
4674     less(0x7, "nbe");
4675     greater_equal(0x6, "be");
4676     less_equal(0x3, "nb");
4677     greater(0x2, "b");
4678     overflow(0x0, "o");
4679     no_overflow(0x1, "no");
4680   %}
4681 %}
4682 
4683 //----------OPERAND CLASSES----------------------------------------------------
4684 // Operand Classes are groups of operands that are used as to simplify
4685 // instruction definitions by not requiring the AD writer to specify separate
4686 // instructions for every form of operand when the instruction accepts
4687 // multiple operand types with the same basic encoding and format.  The classic
4688 // case of this is memory operands.
4689 
4690 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4691                indIndex, indIndexScale, indIndexScaleOffset);
4692 
4693 // Long memory operations are encoded in 2 instructions and a +4 offset.
4694 // This means some kind of offset is always required and you cannot use
4695 // an oop as the offset (done when working on static globals).
4696 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4697                     indIndex, indIndexScale, indIndexScaleOffset);
4698 
4699 
4700 //----------PIPELINE-----------------------------------------------------------
4701 // Rules which define the behavior of the target architectures pipeline.
4702 pipeline %{
4703 
4704 //----------ATTRIBUTES---------------------------------------------------------
4705 attributes %{
4706   variable_size_instructions;        // Fixed size instructions
4707   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4708   instruction_unit_size = 1;         // An instruction is 1 bytes long
4709   instruction_fetch_unit_size = 16;  // The processor fetches one line
4710   instruction_fetch_units = 1;       // of 16 bytes
4711 
4712   // List of nop instructions
4713   nops( MachNop );
4714 %}
4715 
4716 //----------RESOURCES----------------------------------------------------------
4717 // Resources are the functional units available to the machine
4718 
4719 // Generic P2/P3 pipeline
4720 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4721 // 3 instructions decoded per cycle.
4722 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4723 // 2 ALU op, only ALU0 handles mul/div instructions.
4724 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4725            MS0, MS1, MEM = MS0 | MS1,
4726            BR, FPU,
4727            ALU0, ALU1, ALU = ALU0 | ALU1 );
4728 
4729 //----------PIPELINE DESCRIPTION-----------------------------------------------
4730 // Pipeline Description specifies the stages in the machine's pipeline
4731 
4732 // Generic P2/P3 pipeline
4733 pipe_desc(S0, S1, S2, S3, S4, S5);
4734 
4735 //----------PIPELINE CLASSES---------------------------------------------------
4736 // Pipeline Classes describe the stages in which input and output are
4737 // referenced by the hardware pipeline.
4738 
4739 // Naming convention: ialu or fpu
4740 // Then: _reg
4741 // Then: _reg if there is a 2nd register
4742 // Then: _long if it's a pair of instructions implementing a long
4743 // Then: _fat if it requires the big decoder
4744 //   Or: _mem if it requires the big decoder and a memory unit.
4745 
4746 // Integer ALU reg operation
4747 pipe_class ialu_reg(rRegI dst) %{
4748     single_instruction;
4749     dst    : S4(write);
4750     dst    : S3(read);
4751     DECODE : S0;        // any decoder
4752     ALU    : S3;        // any alu
4753 %}
4754 
4755 // Long ALU reg operation
4756 pipe_class ialu_reg_long(eRegL dst) %{
4757     instruction_count(2);
4758     dst    : S4(write);
4759     dst    : S3(read);
4760     DECODE : S0(2);     // any 2 decoders
4761     ALU    : S3(2);     // both alus
4762 %}
4763 
4764 // Integer ALU reg operation using big decoder
4765 pipe_class ialu_reg_fat(rRegI dst) %{
4766     single_instruction;
4767     dst    : S4(write);
4768     dst    : S3(read);
4769     D0     : S0;        // big decoder only
4770     ALU    : S3;        // any alu
4771 %}
4772 
4773 // Long ALU reg operation using big decoder
4774 pipe_class ialu_reg_long_fat(eRegL dst) %{
4775     instruction_count(2);
4776     dst    : S4(write);
4777     dst    : S3(read);
4778     D0     : S0(2);     // big decoder only; twice
4779     ALU    : S3(2);     // any 2 alus
4780 %}
4781 
4782 // Integer ALU reg-reg operation
4783 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4784     single_instruction;
4785     dst    : S4(write);
4786     src    : S3(read);
4787     DECODE : S0;        // any decoder
4788     ALU    : S3;        // any alu
4789 %}
4790 
4791 // Long ALU reg-reg operation
4792 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4793     instruction_count(2);
4794     dst    : S4(write);
4795     src    : S3(read);
4796     DECODE : S0(2);     // any 2 decoders
4797     ALU    : S3(2);     // both alus
4798 %}
4799 
4800 // Integer ALU reg-reg operation
4801 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4802     single_instruction;
4803     dst    : S4(write);
4804     src    : S3(read);
4805     D0     : S0;        // big decoder only
4806     ALU    : S3;        // any alu
4807 %}
4808 
4809 // Long ALU reg-reg operation
4810 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4811     instruction_count(2);
4812     dst    : S4(write);
4813     src    : S3(read);
4814     D0     : S0(2);     // big decoder only; twice
4815     ALU    : S3(2);     // both alus
4816 %}
4817 
4818 // Integer ALU reg-mem operation
4819 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4820     single_instruction;
4821     dst    : S5(write);
4822     mem    : S3(read);
4823     D0     : S0;        // big decoder only
4824     ALU    : S4;        // any alu
4825     MEM    : S3;        // any mem
4826 %}
4827 
4828 // Long ALU reg-mem operation
4829 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4830     instruction_count(2);
4831     dst    : S5(write);
4832     mem    : S3(read);
4833     D0     : S0(2);     // big decoder only; twice
4834     ALU    : S4(2);     // any 2 alus
4835     MEM    : S3(2);     // both mems
4836 %}
4837 
4838 // Integer mem operation (prefetch)
4839 pipe_class ialu_mem(memory mem)
4840 %{
4841     single_instruction;
4842     mem    : S3(read);
4843     D0     : S0;        // big decoder only
4844     MEM    : S3;        // any mem
4845 %}
4846 
4847 // Integer Store to Memory
4848 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4849     single_instruction;
4850     mem    : S3(read);
4851     src    : S5(read);
4852     D0     : S0;        // big decoder only
4853     ALU    : S4;        // any alu
4854     MEM    : S3;
4855 %}
4856 
4857 // Long Store to Memory
4858 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4859     instruction_count(2);
4860     mem    : S3(read);
4861     src    : S5(read);
4862     D0     : S0(2);     // big decoder only; twice
4863     ALU    : S4(2);     // any 2 alus
4864     MEM    : S3(2);     // Both mems
4865 %}
4866 
4867 // Integer Store to Memory
4868 pipe_class ialu_mem_imm(memory mem) %{
4869     single_instruction;
4870     mem    : S3(read);
4871     D0     : S0;        // big decoder only
4872     ALU    : S4;        // any alu
4873     MEM    : S3;
4874 %}
4875 
4876 // Integer ALU0 reg-reg operation
4877 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4878     single_instruction;
4879     dst    : S4(write);
4880     src    : S3(read);
4881     D0     : S0;        // Big decoder only
4882     ALU0   : S3;        // only alu0
4883 %}
4884 
4885 // Integer ALU0 reg-mem operation
4886 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4887     single_instruction;
4888     dst    : S5(write);
4889     mem    : S3(read);
4890     D0     : S0;        // big decoder only
4891     ALU0   : S4;        // ALU0 only
4892     MEM    : S3;        // any mem
4893 %}
4894 
4895 // Integer ALU reg-reg operation
4896 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4897     single_instruction;
4898     cr     : S4(write);
4899     src1   : S3(read);
4900     src2   : S3(read);
4901     DECODE : S0;        // any decoder
4902     ALU    : S3;        // any alu
4903 %}
4904 
4905 // Integer ALU reg-imm operation
4906 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4907     single_instruction;
4908     cr     : S4(write);
4909     src1   : S3(read);
4910     DECODE : S0;        // any decoder
4911     ALU    : S3;        // any alu
4912 %}
4913 
4914 // Integer ALU reg-mem operation
4915 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4916     single_instruction;
4917     cr     : S4(write);
4918     src1   : S3(read);
4919     src2   : S3(read);
4920     D0     : S0;        // big decoder only
4921     ALU    : S4;        // any alu
4922     MEM    : S3;
4923 %}
4924 
4925 // Conditional move reg-reg
4926 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4927     instruction_count(4);
4928     y      : S4(read);
4929     q      : S3(read);
4930     p      : S3(read);
4931     DECODE : S0(4);     // any decoder
4932 %}
4933 
4934 // Conditional move reg-reg
4935 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4936     single_instruction;
4937     dst    : S4(write);
4938     src    : S3(read);
4939     cr     : S3(read);
4940     DECODE : S0;        // any decoder
4941 %}
4942 
4943 // Conditional move reg-mem
4944 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4945     single_instruction;
4946     dst    : S4(write);
4947     src    : S3(read);
4948     cr     : S3(read);
4949     DECODE : S0;        // any decoder
4950     MEM    : S3;
4951 %}
4952 
4953 // Conditional move reg-reg long
4954 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4955     single_instruction;
4956     dst    : S4(write);
4957     src    : S3(read);
4958     cr     : S3(read);
4959     DECODE : S0(2);     // any 2 decoders
4960 %}
4961 
4962 // Conditional move double reg-reg
4963 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4964     single_instruction;
4965     dst    : S4(write);
4966     src    : S3(read);
4967     cr     : S3(read);
4968     DECODE : S0;        // any decoder
4969 %}
4970 
4971 // Float reg-reg operation
4972 pipe_class fpu_reg(regDPR dst) %{
4973     instruction_count(2);
4974     dst    : S3(read);
4975     DECODE : S0(2);     // any 2 decoders
4976     FPU    : S3;
4977 %}
4978 
4979 // Float reg-reg operation
4980 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4981     instruction_count(2);
4982     dst    : S4(write);
4983     src    : S3(read);
4984     DECODE : S0(2);     // any 2 decoders
4985     FPU    : S3;
4986 %}
4987 
4988 // Float reg-reg operation
4989 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4990     instruction_count(3);
4991     dst    : S4(write);
4992     src1   : S3(read);
4993     src2   : S3(read);
4994     DECODE : S0(3);     // any 3 decoders
4995     FPU    : S3(2);
4996 %}
4997 
4998 // Float reg-reg operation
4999 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
5000     instruction_count(4);
5001     dst    : S4(write);
5002     src1   : S3(read);
5003     src2   : S3(read);
5004     src3   : S3(read);
5005     DECODE : S0(4);     // any 3 decoders
5006     FPU    : S3(2);
5007 %}
5008 
5009 // Float reg-reg operation
5010 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5011     instruction_count(4);
5012     dst    : S4(write);
5013     src1   : S3(read);
5014     src2   : S3(read);
5015     src3   : S3(read);
5016     DECODE : S1(3);     // any 3 decoders
5017     D0     : S0;        // Big decoder only
5018     FPU    : S3(2);
5019     MEM    : S3;
5020 %}
5021 
5022 // Float reg-mem operation
5023 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5024     instruction_count(2);
5025     dst    : S5(write);
5026     mem    : S3(read);
5027     D0     : S0;        // big decoder only
5028     DECODE : S1;        // any decoder for FPU POP
5029     FPU    : S4;
5030     MEM    : S3;        // any mem
5031 %}
5032 
5033 // Float reg-mem operation
5034 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5035     instruction_count(3);
5036     dst    : S5(write);
5037     src1   : S3(read);
5038     mem    : S3(read);
5039     D0     : S0;        // big decoder only
5040     DECODE : S1(2);     // any decoder for FPU POP
5041     FPU    : S4;
5042     MEM    : S3;        // any mem
5043 %}
5044 
5045 // Float mem-reg operation
5046 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5047     instruction_count(2);
5048     src    : S5(read);
5049     mem    : S3(read);
5050     DECODE : S0;        // any decoder for FPU PUSH
5051     D0     : S1;        // big decoder only
5052     FPU    : S4;
5053     MEM    : S3;        // any mem
5054 %}
5055 
5056 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5057     instruction_count(3);
5058     src1   : S3(read);
5059     src2   : S3(read);
5060     mem    : S3(read);
5061     DECODE : S0(2);     // any decoder for FPU PUSH
5062     D0     : S1;        // big decoder only
5063     FPU    : S4;
5064     MEM    : S3;        // any mem
5065 %}
5066 
5067 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5068     instruction_count(3);
5069     src1   : S3(read);
5070     src2   : S3(read);
5071     mem    : S4(read);
5072     DECODE : S0;        // any decoder for FPU PUSH
5073     D0     : S0(2);     // big decoder only
5074     FPU    : S4;
5075     MEM    : S3(2);     // any mem
5076 %}
5077 
5078 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5079     instruction_count(2);
5080     src1   : S3(read);
5081     dst    : S4(read);
5082     D0     : S0(2);     // big decoder only
5083     MEM    : S3(2);     // any mem
5084 %}
5085 
5086 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5087     instruction_count(3);
5088     src1   : S3(read);
5089     src2   : S3(read);
5090     dst    : S4(read);
5091     D0     : S0(3);     // big decoder only
5092     FPU    : S4;
5093     MEM    : S3(3);     // any mem
5094 %}
5095 
5096 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5097     instruction_count(3);
5098     src1   : S4(read);
5099     mem    : S4(read);
5100     DECODE : S0;        // any decoder for FPU PUSH
5101     D0     : S0(2);     // big decoder only
5102     FPU    : S4;
5103     MEM    : S3(2);     // any mem
5104 %}
5105 
5106 // Float load constant
5107 pipe_class fpu_reg_con(regDPR dst) %{
5108     instruction_count(2);
5109     dst    : S5(write);
5110     D0     : S0;        // big decoder only for the load
5111     DECODE : S1;        // any decoder for FPU POP
5112     FPU    : S4;
5113     MEM    : S3;        // any mem
5114 %}
5115 
5116 // Float load constant
5117 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5118     instruction_count(3);
5119     dst    : S5(write);
5120     src    : S3(read);
5121     D0     : S0;        // big decoder only for the load
5122     DECODE : S1(2);     // any decoder for FPU POP
5123     FPU    : S4;
5124     MEM    : S3;        // any mem
5125 %}
5126 
5127 // UnConditional branch
5128 pipe_class pipe_jmp( label labl ) %{
5129     single_instruction;
5130     BR   : S3;
5131 %}
5132 
5133 // Conditional branch
5134 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5135     single_instruction;
5136     cr    : S1(read);
5137     BR    : S3;
5138 %}
5139 
5140 // Allocation idiom
5141 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5142     instruction_count(1); force_serialization;
5143     fixed_latency(6);
5144     heap_ptr : S3(read);
5145     DECODE   : S0(3);
5146     D0       : S2;
5147     MEM      : S3;
5148     ALU      : S3(2);
5149     dst      : S5(write);
5150     BR       : S5;
5151 %}
5152 
5153 // Generic big/slow expanded idiom
5154 pipe_class pipe_slow(  ) %{
5155     instruction_count(10); multiple_bundles; force_serialization;
5156     fixed_latency(100);
5157     D0  : S0(2);
5158     MEM : S3(2);
5159 %}
5160 
5161 // The real do-nothing guy
5162 pipe_class empty( ) %{
5163     instruction_count(0);
5164 %}
5165 
5166 // Define the class for the Nop node
5167 define %{
5168    MachNop = empty;
5169 %}
5170 
5171 %}
5172 
5173 //----------INSTRUCTIONS-------------------------------------------------------
5174 //
5175 // match      -- States which machine-independent subtree may be replaced
5176 //               by this instruction.
5177 // ins_cost   -- The estimated cost of this instruction is used by instruction
5178 //               selection to identify a minimum cost tree of machine
5179 //               instructions that matches a tree of machine-independent
5180 //               instructions.
5181 // format     -- A string providing the disassembly for this instruction.
5182 //               The value of an instruction's operand may be inserted
5183 //               by referring to it with a '$' prefix.
5184 // opcode     -- Three instruction opcodes may be provided.  These are referred
5185 //               to within an encode class as $primary, $secondary, and $tertiary
5186 //               respectively.  The primary opcode is commonly used to
5187 //               indicate the type of machine instruction, while secondary
5188 //               and tertiary are often used for prefix options or addressing
5189 //               modes.
5190 // ins_encode -- A list of encode classes with parameters. The encode class
5191 //               name must have been defined in an 'enc_class' specification
5192 //               in the encode section of the architecture description.
5193 
5194 //----------BSWAP-Instruction--------------------------------------------------
5195 instruct bytes_reverse_int(rRegI dst) %{
5196   match(Set dst (ReverseBytesI dst));
5197 
5198   format %{ "BSWAP  $dst" %}
5199   opcode(0x0F, 0xC8);
5200   ins_encode( OpcP, OpcSReg(dst) );
5201   ins_pipe( ialu_reg );
5202 %}
5203 
5204 instruct bytes_reverse_long(eRegL dst) %{
5205   match(Set dst (ReverseBytesL dst));
5206 
5207   format %{ "BSWAP  $dst.lo\n\t"
5208             "BSWAP  $dst.hi\n\t"
5209             "XCHG   $dst.lo $dst.hi" %}
5210 
5211   ins_cost(125);
5212   ins_encode( bswap_long_bytes(dst) );
5213   ins_pipe( ialu_reg_reg);
5214 %}
5215 
5216 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5217   match(Set dst (ReverseBytesUS dst));
5218   effect(KILL cr);
5219 
5220   format %{ "BSWAP  $dst\n\t"
5221             "SHR    $dst,16\n\t" %}
5222   ins_encode %{
5223     __ bswapl($dst$$Register);
5224     __ shrl($dst$$Register, 16);
5225   %}
5226   ins_pipe( ialu_reg );
5227 %}
5228 
5229 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5230   match(Set dst (ReverseBytesS dst));
5231   effect(KILL cr);
5232 
5233   format %{ "BSWAP  $dst\n\t"
5234             "SAR    $dst,16\n\t" %}
5235   ins_encode %{
5236     __ bswapl($dst$$Register);
5237     __ sarl($dst$$Register, 16);
5238   %}
5239   ins_pipe( ialu_reg );
5240 %}
5241 
5242 
5243 //---------- Zeros Count Instructions ------------------------------------------
5244 
5245 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5246   predicate(UseCountLeadingZerosInstruction);
5247   match(Set dst (CountLeadingZerosI src));
5248   effect(KILL cr);
5249 
5250   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5251   ins_encode %{
5252     __ lzcntl($dst$$Register, $src$$Register);
5253   %}
5254   ins_pipe(ialu_reg);
5255 %}
5256 
5257 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5258   predicate(!UseCountLeadingZerosInstruction);
5259   match(Set dst (CountLeadingZerosI src));
5260   effect(KILL cr);
5261 
5262   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5263             "JNZ    skip\n\t"
5264             "MOV    $dst, -1\n"
5265       "skip:\n\t"
5266             "NEG    $dst\n\t"
5267             "ADD    $dst, 31" %}
5268   ins_encode %{
5269     Register Rdst = $dst$$Register;
5270     Register Rsrc = $src$$Register;
5271     Label skip;
5272     __ bsrl(Rdst, Rsrc);
5273     __ jccb(Assembler::notZero, skip);
5274     __ movl(Rdst, -1);
5275     __ bind(skip);
5276     __ negl(Rdst);
5277     __ addl(Rdst, BitsPerInt - 1);
5278   %}
5279   ins_pipe(ialu_reg);
5280 %}
5281 
5282 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5283   predicate(UseCountLeadingZerosInstruction);
5284   match(Set dst (CountLeadingZerosL src));
5285   effect(TEMP dst, KILL cr);
5286 
5287   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5288             "JNC    done\n\t"
5289             "LZCNT  $dst, $src.lo\n\t"
5290             "ADD    $dst, 32\n"
5291       "done:" %}
5292   ins_encode %{
5293     Register Rdst = $dst$$Register;
5294     Register Rsrc = $src$$Register;
5295     Label done;
5296     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5297     __ jccb(Assembler::carryClear, done);
5298     __ lzcntl(Rdst, Rsrc);
5299     __ addl(Rdst, BitsPerInt);
5300     __ bind(done);
5301   %}
5302   ins_pipe(ialu_reg);
5303 %}
5304 
5305 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5306   predicate(!UseCountLeadingZerosInstruction);
5307   match(Set dst (CountLeadingZerosL src));
5308   effect(TEMP dst, KILL cr);
5309 
5310   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5311             "JZ     msw_is_zero\n\t"
5312             "ADD    $dst, 32\n\t"
5313             "JMP    not_zero\n"
5314       "msw_is_zero:\n\t"
5315             "BSR    $dst, $src.lo\n\t"
5316             "JNZ    not_zero\n\t"
5317             "MOV    $dst, -1\n"
5318       "not_zero:\n\t"
5319             "NEG    $dst\n\t"
5320             "ADD    $dst, 63\n" %}
5321  ins_encode %{
5322     Register Rdst = $dst$$Register;
5323     Register Rsrc = $src$$Register;
5324     Label msw_is_zero;
5325     Label not_zero;
5326     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5327     __ jccb(Assembler::zero, msw_is_zero);
5328     __ addl(Rdst, BitsPerInt);
5329     __ jmpb(not_zero);
5330     __ bind(msw_is_zero);
5331     __ bsrl(Rdst, Rsrc);
5332     __ jccb(Assembler::notZero, not_zero);
5333     __ movl(Rdst, -1);
5334     __ bind(not_zero);
5335     __ negl(Rdst);
5336     __ addl(Rdst, BitsPerLong - 1);
5337   %}
5338   ins_pipe(ialu_reg);
5339 %}
5340 
5341 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5342   predicate(UseCountTrailingZerosInstruction);
5343   match(Set dst (CountTrailingZerosI src));
5344   effect(KILL cr);
5345 
5346   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5347   ins_encode %{
5348     __ tzcntl($dst$$Register, $src$$Register);
5349   %}
5350   ins_pipe(ialu_reg);
5351 %}
5352 
5353 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5354   predicate(!UseCountTrailingZerosInstruction);
5355   match(Set dst (CountTrailingZerosI src));
5356   effect(KILL cr);
5357 
5358   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5359             "JNZ    done\n\t"
5360             "MOV    $dst, 32\n"
5361       "done:" %}
5362   ins_encode %{
5363     Register Rdst = $dst$$Register;
5364     Label done;
5365     __ bsfl(Rdst, $src$$Register);
5366     __ jccb(Assembler::notZero, done);
5367     __ movl(Rdst, BitsPerInt);
5368     __ bind(done);
5369   %}
5370   ins_pipe(ialu_reg);
5371 %}
5372 
5373 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5374   predicate(UseCountTrailingZerosInstruction);
5375   match(Set dst (CountTrailingZerosL src));
5376   effect(TEMP dst, KILL cr);
5377 
5378   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5379             "JNC    done\n\t"
5380             "TZCNT  $dst, $src.hi\n\t"
5381             "ADD    $dst, 32\n"
5382             "done:" %}
5383   ins_encode %{
5384     Register Rdst = $dst$$Register;
5385     Register Rsrc = $src$$Register;
5386     Label done;
5387     __ tzcntl(Rdst, Rsrc);
5388     __ jccb(Assembler::carryClear, done);
5389     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5390     __ addl(Rdst, BitsPerInt);
5391     __ bind(done);
5392   %}
5393   ins_pipe(ialu_reg);
5394 %}
5395 
5396 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5397   predicate(!UseCountTrailingZerosInstruction);
5398   match(Set dst (CountTrailingZerosL src));
5399   effect(TEMP dst, KILL cr);
5400 
5401   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5402             "JNZ    done\n\t"
5403             "BSF    $dst, $src.hi\n\t"
5404             "JNZ    msw_not_zero\n\t"
5405             "MOV    $dst, 32\n"
5406       "msw_not_zero:\n\t"
5407             "ADD    $dst, 32\n"
5408       "done:" %}
5409   ins_encode %{
5410     Register Rdst = $dst$$Register;
5411     Register Rsrc = $src$$Register;
5412     Label msw_not_zero;
5413     Label done;
5414     __ bsfl(Rdst, Rsrc);
5415     __ jccb(Assembler::notZero, done);
5416     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5417     __ jccb(Assembler::notZero, msw_not_zero);
5418     __ movl(Rdst, BitsPerInt);
5419     __ bind(msw_not_zero);
5420     __ addl(Rdst, BitsPerInt);
5421     __ bind(done);
5422   %}
5423   ins_pipe(ialu_reg);
5424 %}
5425 
5426 
5427 //---------- Population Count Instructions -------------------------------------
5428 
5429 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5430   predicate(UsePopCountInstruction);
5431   match(Set dst (PopCountI src));
5432   effect(KILL cr);
5433 
5434   format %{ "POPCNT $dst, $src" %}
5435   ins_encode %{
5436     __ popcntl($dst$$Register, $src$$Register);
5437   %}
5438   ins_pipe(ialu_reg);
5439 %}
5440 
5441 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5442   predicate(UsePopCountInstruction);
5443   match(Set dst (PopCountI (LoadI mem)));
5444   effect(KILL cr);
5445 
5446   format %{ "POPCNT $dst, $mem" %}
5447   ins_encode %{
5448     __ popcntl($dst$$Register, $mem$$Address);
5449   %}
5450   ins_pipe(ialu_reg);
5451 %}
5452 
5453 // Note: Long.bitCount(long) returns an int.
5454 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5455   predicate(UsePopCountInstruction);
5456   match(Set dst (PopCountL src));
5457   effect(KILL cr, TEMP tmp, TEMP dst);
5458 
5459   format %{ "POPCNT $dst, $src.lo\n\t"
5460             "POPCNT $tmp, $src.hi\n\t"
5461             "ADD    $dst, $tmp" %}
5462   ins_encode %{
5463     __ popcntl($dst$$Register, $src$$Register);
5464     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5465     __ addl($dst$$Register, $tmp$$Register);
5466   %}
5467   ins_pipe(ialu_reg);
5468 %}
5469 
5470 // Note: Long.bitCount(long) returns an int.
5471 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5472   predicate(UsePopCountInstruction);
5473   match(Set dst (PopCountL (LoadL mem)));
5474   effect(KILL cr, TEMP tmp, TEMP dst);
5475 
5476   format %{ "POPCNT $dst, $mem\n\t"
5477             "POPCNT $tmp, $mem+4\n\t"
5478             "ADD    $dst, $tmp" %}
5479   ins_encode %{
5480     //__ popcntl($dst$$Register, $mem$$Address$$first);
5481     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5482     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5483     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5484     __ addl($dst$$Register, $tmp$$Register);
5485   %}
5486   ins_pipe(ialu_reg);
5487 %}
5488 
5489 
5490 //----------Load/Store/Move Instructions---------------------------------------
5491 //----------Load Instructions--------------------------------------------------
5492 // Load Byte (8bit signed)
5493 instruct loadB(xRegI dst, memory mem) %{
5494   match(Set dst (LoadB mem));
5495 
5496   ins_cost(125);
5497   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5498 
5499   ins_encode %{
5500     __ movsbl($dst$$Register, $mem$$Address);
5501   %}
5502 
5503   ins_pipe(ialu_reg_mem);
5504 %}
5505 
5506 // Load Byte (8bit signed) into Long Register
5507 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5508   match(Set dst (ConvI2L (LoadB mem)));
5509   effect(KILL cr);
5510 
5511   ins_cost(375);
5512   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5513             "MOV    $dst.hi,$dst.lo\n\t"
5514             "SAR    $dst.hi,7" %}
5515 
5516   ins_encode %{
5517     __ movsbl($dst$$Register, $mem$$Address);
5518     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5519     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5520   %}
5521 
5522   ins_pipe(ialu_reg_mem);
5523 %}
5524 
5525 // Load Unsigned Byte (8bit UNsigned)
5526 instruct loadUB(xRegI dst, memory mem) %{
5527   match(Set dst (LoadUB mem));
5528 
5529   ins_cost(125);
5530   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5531 
5532   ins_encode %{
5533     __ movzbl($dst$$Register, $mem$$Address);
5534   %}
5535 
5536   ins_pipe(ialu_reg_mem);
5537 %}
5538 
5539 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5540 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5541   match(Set dst (ConvI2L (LoadUB mem)));
5542   effect(KILL cr);
5543 
5544   ins_cost(250);
5545   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5546             "XOR    $dst.hi,$dst.hi" %}
5547 
5548   ins_encode %{
5549     Register Rdst = $dst$$Register;
5550     __ movzbl(Rdst, $mem$$Address);
5551     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5552   %}
5553 
5554   ins_pipe(ialu_reg_mem);
5555 %}
5556 
5557 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5558 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5559   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5560   effect(KILL cr);
5561 
5562   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5563             "XOR    $dst.hi,$dst.hi\n\t"
5564             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5565   ins_encode %{
5566     Register Rdst = $dst$$Register;
5567     __ movzbl(Rdst, $mem$$Address);
5568     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5569     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5570   %}
5571   ins_pipe(ialu_reg_mem);
5572 %}
5573 
5574 // Load Short (16bit signed)
5575 instruct loadS(rRegI dst, memory mem) %{
5576   match(Set dst (LoadS mem));
5577 
5578   ins_cost(125);
5579   format %{ "MOVSX  $dst,$mem\t# short" %}
5580 
5581   ins_encode %{
5582     __ movswl($dst$$Register, $mem$$Address);
5583   %}
5584 
5585   ins_pipe(ialu_reg_mem);
5586 %}
5587 
5588 // Load Short (16 bit signed) to Byte (8 bit signed)
5589 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5590   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5591 
5592   ins_cost(125);
5593   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5594   ins_encode %{
5595     __ movsbl($dst$$Register, $mem$$Address);
5596   %}
5597   ins_pipe(ialu_reg_mem);
5598 %}
5599 
5600 // Load Short (16bit signed) into Long Register
5601 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5602   match(Set dst (ConvI2L (LoadS mem)));
5603   effect(KILL cr);
5604 
5605   ins_cost(375);
5606   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5607             "MOV    $dst.hi,$dst.lo\n\t"
5608             "SAR    $dst.hi,15" %}
5609 
5610   ins_encode %{
5611     __ movswl($dst$$Register, $mem$$Address);
5612     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5613     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5614   %}
5615 
5616   ins_pipe(ialu_reg_mem);
5617 %}
5618 
5619 // Load Unsigned Short/Char (16bit unsigned)
5620 instruct loadUS(rRegI dst, memory mem) %{
5621   match(Set dst (LoadUS mem));
5622 
5623   ins_cost(125);
5624   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5625 
5626   ins_encode %{
5627     __ movzwl($dst$$Register, $mem$$Address);
5628   %}
5629 
5630   ins_pipe(ialu_reg_mem);
5631 %}
5632 
5633 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5634 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5635   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5636 
5637   ins_cost(125);
5638   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5639   ins_encode %{
5640     __ movsbl($dst$$Register, $mem$$Address);
5641   %}
5642   ins_pipe(ialu_reg_mem);
5643 %}
5644 
5645 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5646 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5647   match(Set dst (ConvI2L (LoadUS mem)));
5648   effect(KILL cr);
5649 
5650   ins_cost(250);
5651   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5652             "XOR    $dst.hi,$dst.hi" %}
5653 
5654   ins_encode %{
5655     __ movzwl($dst$$Register, $mem$$Address);
5656     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5657   %}
5658 
5659   ins_pipe(ialu_reg_mem);
5660 %}
5661 
5662 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5663 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5664   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5665   effect(KILL cr);
5666 
5667   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5668             "XOR    $dst.hi,$dst.hi" %}
5669   ins_encode %{
5670     Register Rdst = $dst$$Register;
5671     __ movzbl(Rdst, $mem$$Address);
5672     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5673   %}
5674   ins_pipe(ialu_reg_mem);
5675 %}
5676 
5677 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5678 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5679   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5680   effect(KILL cr);
5681 
5682   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5683             "XOR    $dst.hi,$dst.hi\n\t"
5684             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5685   ins_encode %{
5686     Register Rdst = $dst$$Register;
5687     __ movzwl(Rdst, $mem$$Address);
5688     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5689     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5690   %}
5691   ins_pipe(ialu_reg_mem);
5692 %}
5693 
5694 // Load Integer
5695 instruct loadI(rRegI dst, memory mem) %{
5696   match(Set dst (LoadI mem));
5697 
5698   ins_cost(125);
5699   format %{ "MOV    $dst,$mem\t# int" %}
5700 
5701   ins_encode %{
5702     __ movl($dst$$Register, $mem$$Address);
5703   %}
5704 
5705   ins_pipe(ialu_reg_mem);
5706 %}
5707 
5708 // Load Integer (32 bit signed) to Byte (8 bit signed)
5709 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5710   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5711 
5712   ins_cost(125);
5713   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5714   ins_encode %{
5715     __ movsbl($dst$$Register, $mem$$Address);
5716   %}
5717   ins_pipe(ialu_reg_mem);
5718 %}
5719 
5720 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5721 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5722   match(Set dst (AndI (LoadI mem) mask));
5723 
5724   ins_cost(125);
5725   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5726   ins_encode %{
5727     __ movzbl($dst$$Register, $mem$$Address);
5728   %}
5729   ins_pipe(ialu_reg_mem);
5730 %}
5731 
5732 // Load Integer (32 bit signed) to Short (16 bit signed)
5733 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5734   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5735 
5736   ins_cost(125);
5737   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5738   ins_encode %{
5739     __ movswl($dst$$Register, $mem$$Address);
5740   %}
5741   ins_pipe(ialu_reg_mem);
5742 %}
5743 
5744 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5745 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5746   match(Set dst (AndI (LoadI mem) mask));
5747 
5748   ins_cost(125);
5749   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5750   ins_encode %{
5751     __ movzwl($dst$$Register, $mem$$Address);
5752   %}
5753   ins_pipe(ialu_reg_mem);
5754 %}
5755 
5756 // Load Integer into Long Register
5757 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5758   match(Set dst (ConvI2L (LoadI mem)));
5759   effect(KILL cr);
5760 
5761   ins_cost(375);
5762   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5763             "MOV    $dst.hi,$dst.lo\n\t"
5764             "SAR    $dst.hi,31" %}
5765 
5766   ins_encode %{
5767     __ movl($dst$$Register, $mem$$Address);
5768     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5769     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5770   %}
5771 
5772   ins_pipe(ialu_reg_mem);
5773 %}
5774 
5775 // Load Integer with mask 0xFF into Long Register
5776 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5777   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5778   effect(KILL cr);
5779 
5780   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5781             "XOR    $dst.hi,$dst.hi" %}
5782   ins_encode %{
5783     Register Rdst = $dst$$Register;
5784     __ movzbl(Rdst, $mem$$Address);
5785     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5786   %}
5787   ins_pipe(ialu_reg_mem);
5788 %}
5789 
5790 // Load Integer with mask 0xFFFF into Long Register
5791 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5792   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5793   effect(KILL cr);
5794 
5795   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5796             "XOR    $dst.hi,$dst.hi" %}
5797   ins_encode %{
5798     Register Rdst = $dst$$Register;
5799     __ movzwl(Rdst, $mem$$Address);
5800     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5801   %}
5802   ins_pipe(ialu_reg_mem);
5803 %}
5804 
5805 // Load Integer with 31-bit mask into Long Register
5806 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5807   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5808   effect(KILL cr);
5809 
5810   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5811             "XOR    $dst.hi,$dst.hi\n\t"
5812             "AND    $dst.lo,$mask" %}
5813   ins_encode %{
5814     Register Rdst = $dst$$Register;
5815     __ movl(Rdst, $mem$$Address);
5816     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5817     __ andl(Rdst, $mask$$constant);
5818   %}
5819   ins_pipe(ialu_reg_mem);
5820 %}
5821 
5822 // Load Unsigned Integer into Long Register
5823 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5824   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5825   effect(KILL cr);
5826 
5827   ins_cost(250);
5828   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5829             "XOR    $dst.hi,$dst.hi" %}
5830 
5831   ins_encode %{
5832     __ movl($dst$$Register, $mem$$Address);
5833     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5834   %}
5835 
5836   ins_pipe(ialu_reg_mem);
5837 %}
5838 
5839 // Load Long.  Cannot clobber address while loading, so restrict address
5840 // register to ESI
5841 instruct loadL(eRegL dst, load_long_memory mem) %{
5842   predicate(!((LoadLNode*)n)->require_atomic_access());
5843   match(Set dst (LoadL mem));
5844 
5845   ins_cost(250);
5846   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5847             "MOV    $dst.hi,$mem+4" %}
5848 
5849   ins_encode %{
5850     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5851     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5852     __ movl($dst$$Register, Amemlo);
5853     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5854   %}
5855 
5856   ins_pipe(ialu_reg_long_mem);
5857 %}
5858 
5859 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5860 // then store it down to the stack and reload on the int
5861 // side.
5862 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5863   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5864   match(Set dst (LoadL mem));
5865 
5866   ins_cost(200);
5867   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5868             "FISTp  $dst" %}
5869   ins_encode(enc_loadL_volatile(mem,dst));
5870   ins_pipe( fpu_reg_mem );
5871 %}
5872 
5873 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5874   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5875   match(Set dst (LoadL mem));
5876   effect(TEMP tmp);
5877   ins_cost(180);
5878   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5879             "MOVSD  $dst,$tmp" %}
5880   ins_encode %{
5881     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5882     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5883   %}
5884   ins_pipe( pipe_slow );
5885 %}
5886 
5887 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5888   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5889   match(Set dst (LoadL mem));
5890   effect(TEMP tmp);
5891   ins_cost(160);
5892   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5893             "MOVD   $dst.lo,$tmp\n\t"
5894             "PSRLQ  $tmp,32\n\t"
5895             "MOVD   $dst.hi,$tmp" %}
5896   ins_encode %{
5897     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5898     __ movdl($dst$$Register, $tmp$$XMMRegister);
5899     __ psrlq($tmp$$XMMRegister, 32);
5900     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5901   %}
5902   ins_pipe( pipe_slow );
5903 %}
5904 
5905 // Load Range
5906 instruct loadRange(rRegI dst, memory mem) %{
5907   match(Set dst (LoadRange mem));
5908 
5909   ins_cost(125);
5910   format %{ "MOV    $dst,$mem" %}
5911   opcode(0x8B);
5912   ins_encode( OpcP, RegMem(dst,mem));
5913   ins_pipe( ialu_reg_mem );
5914 %}
5915 
5916 
5917 // Load Pointer
5918 instruct loadP(eRegP dst, memory mem) %{
5919   match(Set dst (LoadP mem));
5920 
5921   ins_cost(125);
5922   format %{ "MOV    $dst,$mem" %}
5923   opcode(0x8B);
5924   ins_encode( OpcP, RegMem(dst,mem));
5925   ins_pipe( ialu_reg_mem );
5926 %}
5927 
5928 // Load Klass Pointer
5929 instruct loadKlass(eRegP dst, memory mem) %{
5930   match(Set dst (LoadKlass mem));
5931 
5932   ins_cost(125);
5933   format %{ "MOV    $dst,$mem" %}
5934   opcode(0x8B);
5935   ins_encode( OpcP, RegMem(dst,mem));
5936   ins_pipe( ialu_reg_mem );
5937 %}
5938 
5939 // Load Double
5940 instruct loadDPR(regDPR dst, memory mem) %{
5941   predicate(UseSSE<=1);
5942   match(Set dst (LoadD mem));
5943 
5944   ins_cost(150);
5945   format %{ "FLD_D  ST,$mem\n\t"
5946             "FSTP   $dst" %}
5947   opcode(0xDD);               /* DD /0 */
5948   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5949               Pop_Reg_DPR(dst) );
5950   ins_pipe( fpu_reg_mem );
5951 %}
5952 
5953 // Load Double to XMM
5954 instruct loadD(regD dst, memory mem) %{
5955   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5956   match(Set dst (LoadD mem));
5957   ins_cost(145);
5958   format %{ "MOVSD  $dst,$mem" %}
5959   ins_encode %{
5960     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5961   %}
5962   ins_pipe( pipe_slow );
5963 %}
5964 
5965 instruct loadD_partial(regD dst, memory mem) %{
5966   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5967   match(Set dst (LoadD mem));
5968   ins_cost(145);
5969   format %{ "MOVLPD $dst,$mem" %}
5970   ins_encode %{
5971     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5972   %}
5973   ins_pipe( pipe_slow );
5974 %}
5975 
5976 // Load to XMM register (single-precision floating point)
5977 // MOVSS instruction
5978 instruct loadF(regF dst, memory mem) %{
5979   predicate(UseSSE>=1);
5980   match(Set dst (LoadF mem));
5981   ins_cost(145);
5982   format %{ "MOVSS  $dst,$mem" %}
5983   ins_encode %{
5984     __ movflt ($dst$$XMMRegister, $mem$$Address);
5985   %}
5986   ins_pipe( pipe_slow );
5987 %}
5988 
5989 // Load Float
5990 instruct loadFPR(regFPR dst, memory mem) %{
5991   predicate(UseSSE==0);
5992   match(Set dst (LoadF mem));
5993 
5994   ins_cost(150);
5995   format %{ "FLD_S  ST,$mem\n\t"
5996             "FSTP   $dst" %}
5997   opcode(0xD9);               /* D9 /0 */
5998   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5999               Pop_Reg_FPR(dst) );
6000   ins_pipe( fpu_reg_mem );
6001 %}
6002 
6003 // Load Effective Address
6004 instruct leaP8(eRegP dst, indOffset8 mem) %{
6005   match(Set dst mem);
6006 
6007   ins_cost(110);
6008   format %{ "LEA    $dst,$mem" %}
6009   opcode(0x8D);
6010   ins_encode( OpcP, RegMem(dst,mem));
6011   ins_pipe( ialu_reg_reg_fat );
6012 %}
6013 
6014 instruct leaP32(eRegP dst, indOffset32 mem) %{
6015   match(Set dst mem);
6016 
6017   ins_cost(110);
6018   format %{ "LEA    $dst,$mem" %}
6019   opcode(0x8D);
6020   ins_encode( OpcP, RegMem(dst,mem));
6021   ins_pipe( ialu_reg_reg_fat );
6022 %}
6023 
6024 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6025   match(Set dst mem);
6026 
6027   ins_cost(110);
6028   format %{ "LEA    $dst,$mem" %}
6029   opcode(0x8D);
6030   ins_encode( OpcP, RegMem(dst,mem));
6031   ins_pipe( ialu_reg_reg_fat );
6032 %}
6033 
6034 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6035   match(Set dst mem);
6036 
6037   ins_cost(110);
6038   format %{ "LEA    $dst,$mem" %}
6039   opcode(0x8D);
6040   ins_encode( OpcP, RegMem(dst,mem));
6041   ins_pipe( ialu_reg_reg_fat );
6042 %}
6043 
6044 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6045   match(Set dst mem);
6046 
6047   ins_cost(110);
6048   format %{ "LEA    $dst,$mem" %}
6049   opcode(0x8D);
6050   ins_encode( OpcP, RegMem(dst,mem));
6051   ins_pipe( ialu_reg_reg_fat );
6052 %}
6053 
6054 // Load Constant
6055 instruct loadConI(rRegI dst, immI src) %{
6056   match(Set dst src);
6057 
6058   format %{ "MOV    $dst,$src" %}
6059   ins_encode( LdImmI(dst, src) );
6060   ins_pipe( ialu_reg_fat );
6061 %}
6062 
6063 // Load Constant zero
6064 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6065   match(Set dst src);
6066   effect(KILL cr);
6067 
6068   ins_cost(50);
6069   format %{ "XOR    $dst,$dst" %}
6070   opcode(0x33);  /* + rd */
6071   ins_encode( OpcP, RegReg( dst, dst ) );
6072   ins_pipe( ialu_reg );
6073 %}
6074 
6075 instruct loadConP(eRegP dst, immP src) %{
6076   match(Set dst src);
6077 
6078   format %{ "MOV    $dst,$src" %}
6079   opcode(0xB8);  /* + rd */
6080   ins_encode( LdImmP(dst, src) );
6081   ins_pipe( ialu_reg_fat );
6082 %}
6083 
6084 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6085   match(Set dst src);
6086   effect(KILL cr);
6087   ins_cost(200);
6088   format %{ "MOV    $dst.lo,$src.lo\n\t"
6089             "MOV    $dst.hi,$src.hi" %}
6090   opcode(0xB8);
6091   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6092   ins_pipe( ialu_reg_long_fat );
6093 %}
6094 
6095 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6096   match(Set dst src);
6097   effect(KILL cr);
6098   ins_cost(150);
6099   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6100             "XOR    $dst.hi,$dst.hi" %}
6101   opcode(0x33,0x33);
6102   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6103   ins_pipe( ialu_reg_long );
6104 %}
6105 
6106 // The instruction usage is guarded by predicate in operand immFPR().
6107 instruct loadConFPR(regFPR dst, immFPR con) %{
6108   match(Set dst con);
6109   ins_cost(125);
6110   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6111             "FSTP   $dst" %}
6112   ins_encode %{
6113     __ fld_s($constantaddress($con));
6114     __ fstp_d($dst$$reg);
6115   %}
6116   ins_pipe(fpu_reg_con);
6117 %}
6118 
6119 // The instruction usage is guarded by predicate in operand immFPR0().
6120 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6121   match(Set dst con);
6122   ins_cost(125);
6123   format %{ "FLDZ   ST\n\t"
6124             "FSTP   $dst" %}
6125   ins_encode %{
6126     __ fldz();
6127     __ fstp_d($dst$$reg);
6128   %}
6129   ins_pipe(fpu_reg_con);
6130 %}
6131 
6132 // The instruction usage is guarded by predicate in operand immFPR1().
6133 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6134   match(Set dst con);
6135   ins_cost(125);
6136   format %{ "FLD1   ST\n\t"
6137             "FSTP   $dst" %}
6138   ins_encode %{
6139     __ fld1();
6140     __ fstp_d($dst$$reg);
6141   %}
6142   ins_pipe(fpu_reg_con);
6143 %}
6144 
6145 // The instruction usage is guarded by predicate in operand immF().
6146 instruct loadConF(regF dst, immF con) %{
6147   match(Set dst con);
6148   ins_cost(125);
6149   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6150   ins_encode %{
6151     __ movflt($dst$$XMMRegister, $constantaddress($con));
6152   %}
6153   ins_pipe(pipe_slow);
6154 %}
6155 
6156 // The instruction usage is guarded by predicate in operand immF0().
6157 instruct loadConF0(regF dst, immF0 src) %{
6158   match(Set dst src);
6159   ins_cost(100);
6160   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6161   ins_encode %{
6162     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6163   %}
6164   ins_pipe(pipe_slow);
6165 %}
6166 
6167 // The instruction usage is guarded by predicate in operand immDPR().
6168 instruct loadConDPR(regDPR dst, immDPR con) %{
6169   match(Set dst con);
6170   ins_cost(125);
6171 
6172   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6173             "FSTP   $dst" %}
6174   ins_encode %{
6175     __ fld_d($constantaddress($con));
6176     __ fstp_d($dst$$reg);
6177   %}
6178   ins_pipe(fpu_reg_con);
6179 %}
6180 
6181 // The instruction usage is guarded by predicate in operand immDPR0().
6182 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6183   match(Set dst con);
6184   ins_cost(125);
6185 
6186   format %{ "FLDZ   ST\n\t"
6187             "FSTP   $dst" %}
6188   ins_encode %{
6189     __ fldz();
6190     __ fstp_d($dst$$reg);
6191   %}
6192   ins_pipe(fpu_reg_con);
6193 %}
6194 
6195 // The instruction usage is guarded by predicate in operand immDPR1().
6196 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6197   match(Set dst con);
6198   ins_cost(125);
6199 
6200   format %{ "FLD1   ST\n\t"
6201             "FSTP   $dst" %}
6202   ins_encode %{
6203     __ fld1();
6204     __ fstp_d($dst$$reg);
6205   %}
6206   ins_pipe(fpu_reg_con);
6207 %}
6208 
6209 // The instruction usage is guarded by predicate in operand immD().
6210 instruct loadConD(regD dst, immD con) %{
6211   match(Set dst con);
6212   ins_cost(125);
6213   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6214   ins_encode %{
6215     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6216   %}
6217   ins_pipe(pipe_slow);
6218 %}
6219 
6220 // The instruction usage is guarded by predicate in operand immD0().
6221 instruct loadConD0(regD dst, immD0 src) %{
6222   match(Set dst src);
6223   ins_cost(100);
6224   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6225   ins_encode %{
6226     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6227   %}
6228   ins_pipe( pipe_slow );
6229 %}
6230 
6231 // Load Stack Slot
6232 instruct loadSSI(rRegI dst, stackSlotI src) %{
6233   match(Set dst src);
6234   ins_cost(125);
6235 
6236   format %{ "MOV    $dst,$src" %}
6237   opcode(0x8B);
6238   ins_encode( OpcP, RegMem(dst,src));
6239   ins_pipe( ialu_reg_mem );
6240 %}
6241 
6242 instruct loadSSL(eRegL dst, stackSlotL src) %{
6243   match(Set dst src);
6244 
6245   ins_cost(200);
6246   format %{ "MOV    $dst,$src.lo\n\t"
6247             "MOV    $dst+4,$src.hi" %}
6248   opcode(0x8B, 0x8B);
6249   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6250   ins_pipe( ialu_mem_long_reg );
6251 %}
6252 
6253 // Load Stack Slot
6254 instruct loadSSP(eRegP dst, stackSlotP src) %{
6255   match(Set dst src);
6256   ins_cost(125);
6257 
6258   format %{ "MOV    $dst,$src" %}
6259   opcode(0x8B);
6260   ins_encode( OpcP, RegMem(dst,src));
6261   ins_pipe( ialu_reg_mem );
6262 %}
6263 
6264 // Load Stack Slot
6265 instruct loadSSF(regFPR dst, stackSlotF src) %{
6266   match(Set dst src);
6267   ins_cost(125);
6268 
6269   format %{ "FLD_S  $src\n\t"
6270             "FSTP   $dst" %}
6271   opcode(0xD9);               /* D9 /0, FLD m32real */
6272   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6273               Pop_Reg_FPR(dst) );
6274   ins_pipe( fpu_reg_mem );
6275 %}
6276 
6277 // Load Stack Slot
6278 instruct loadSSD(regDPR dst, stackSlotD src) %{
6279   match(Set dst src);
6280   ins_cost(125);
6281 
6282   format %{ "FLD_D  $src\n\t"
6283             "FSTP   $dst" %}
6284   opcode(0xDD);               /* DD /0, FLD m64real */
6285   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6286               Pop_Reg_DPR(dst) );
6287   ins_pipe( fpu_reg_mem );
6288 %}
6289 
6290 // Prefetch instructions for allocation.
6291 // Must be safe to execute with invalid address (cannot fault).
6292 
6293 instruct prefetchAlloc0( memory mem ) %{
6294   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6295   match(PrefetchAllocation mem);
6296   ins_cost(0);
6297   size(0);
6298   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6299   ins_encode();
6300   ins_pipe(empty);
6301 %}
6302 
6303 instruct prefetchAlloc( memory mem ) %{
6304   predicate(AllocatePrefetchInstr==3);
6305   match( PrefetchAllocation mem );
6306   ins_cost(100);
6307 
6308   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6309   ins_encode %{
6310     __ prefetchw($mem$$Address);
6311   %}
6312   ins_pipe(ialu_mem);
6313 %}
6314 
6315 instruct prefetchAllocNTA( memory mem ) %{
6316   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6317   match(PrefetchAllocation mem);
6318   ins_cost(100);
6319 
6320   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6321   ins_encode %{
6322     __ prefetchnta($mem$$Address);
6323   %}
6324   ins_pipe(ialu_mem);
6325 %}
6326 
6327 instruct prefetchAllocT0( memory mem ) %{
6328   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6329   match(PrefetchAllocation mem);
6330   ins_cost(100);
6331 
6332   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6333   ins_encode %{
6334     __ prefetcht0($mem$$Address);
6335   %}
6336   ins_pipe(ialu_mem);
6337 %}
6338 
6339 instruct prefetchAllocT2( memory mem ) %{
6340   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6341   match(PrefetchAllocation mem);
6342   ins_cost(100);
6343 
6344   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6345   ins_encode %{
6346     __ prefetcht2($mem$$Address);
6347   %}
6348   ins_pipe(ialu_mem);
6349 %}
6350 
6351 //----------Store Instructions-------------------------------------------------
6352 
6353 // Store Byte
6354 instruct storeB(memory mem, xRegI src) %{
6355   match(Set mem (StoreB mem src));
6356 
6357   ins_cost(125);
6358   format %{ "MOV8   $mem,$src" %}
6359   opcode(0x88);
6360   ins_encode( OpcP, RegMem( src, mem ) );
6361   ins_pipe( ialu_mem_reg );
6362 %}
6363 
6364 // Store Char/Short
6365 instruct storeC(memory mem, rRegI src) %{
6366   match(Set mem (StoreC mem src));
6367 
6368   ins_cost(125);
6369   format %{ "MOV16  $mem,$src" %}
6370   opcode(0x89, 0x66);
6371   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6372   ins_pipe( ialu_mem_reg );
6373 %}
6374 
6375 // Store Integer
6376 instruct storeI(memory mem, rRegI src) %{
6377   match(Set mem (StoreI mem src));
6378 
6379   ins_cost(125);
6380   format %{ "MOV    $mem,$src" %}
6381   opcode(0x89);
6382   ins_encode( OpcP, RegMem( src, mem ) );
6383   ins_pipe( ialu_mem_reg );
6384 %}
6385 
6386 // Store Long
6387 instruct storeL(long_memory mem, eRegL src) %{
6388   predicate(!((StoreLNode*)n)->require_atomic_access());
6389   match(Set mem (StoreL mem src));
6390 
6391   ins_cost(200);
6392   format %{ "MOV    $mem,$src.lo\n\t"
6393             "MOV    $mem+4,$src.hi" %}
6394   opcode(0x89, 0x89);
6395   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6396   ins_pipe( ialu_mem_long_reg );
6397 %}
6398 
6399 // Store Long to Integer
6400 instruct storeL2I(memory mem, eRegL src) %{
6401   match(Set mem (StoreI mem (ConvL2I src)));
6402 
6403   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6404   ins_encode %{
6405     __ movl($mem$$Address, $src$$Register);
6406   %}
6407   ins_pipe(ialu_mem_reg);
6408 %}
6409 
6410 // Volatile Store Long.  Must be atomic, so move it into
6411 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6412 // target address before the store (for null-ptr checks)
6413 // so the memory operand is used twice in the encoding.
6414 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6415   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6416   match(Set mem (StoreL mem src));
6417   effect( KILL cr );
6418   ins_cost(400);
6419   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6420             "FILD   $src\n\t"
6421             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6422   opcode(0x3B);
6423   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6424   ins_pipe( fpu_reg_mem );
6425 %}
6426 
6427 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6428   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6429   match(Set mem (StoreL mem src));
6430   effect( TEMP tmp, KILL cr );
6431   ins_cost(380);
6432   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6433             "MOVSD  $tmp,$src\n\t"
6434             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6435   ins_encode %{
6436     __ cmpl(rax, $mem$$Address);
6437     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6438     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6439   %}
6440   ins_pipe( pipe_slow );
6441 %}
6442 
6443 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6444   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6445   match(Set mem (StoreL mem src));
6446   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6447   ins_cost(360);
6448   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6449             "MOVD   $tmp,$src.lo\n\t"
6450             "MOVD   $tmp2,$src.hi\n\t"
6451             "PUNPCKLDQ $tmp,$tmp2\n\t"
6452             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6453   ins_encode %{
6454     __ cmpl(rax, $mem$$Address);
6455     __ movdl($tmp$$XMMRegister, $src$$Register);
6456     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6457     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6458     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6459   %}
6460   ins_pipe( pipe_slow );
6461 %}
6462 
6463 // Store Pointer; for storing unknown oops and raw pointers
6464 instruct storeP(memory mem, anyRegP src) %{
6465   match(Set mem (StoreP mem src));
6466 
6467   ins_cost(125);
6468   format %{ "MOV    $mem,$src" %}
6469   opcode(0x89);
6470   ins_encode( OpcP, RegMem( src, mem ) );
6471   ins_pipe( ialu_mem_reg );
6472 %}
6473 
6474 // Store Integer Immediate
6475 instruct storeImmI(memory mem, immI src) %{
6476   match(Set mem (StoreI mem src));
6477 
6478   ins_cost(150);
6479   format %{ "MOV    $mem,$src" %}
6480   opcode(0xC7);               /* C7 /0 */
6481   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6482   ins_pipe( ialu_mem_imm );
6483 %}
6484 
6485 // Store Short/Char Immediate
6486 instruct storeImmI16(memory mem, immI16 src) %{
6487   predicate(UseStoreImmI16);
6488   match(Set mem (StoreC mem src));
6489 
6490   ins_cost(150);
6491   format %{ "MOV16  $mem,$src" %}
6492   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6493   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6494   ins_pipe( ialu_mem_imm );
6495 %}
6496 
6497 // Store Pointer Immediate; null pointers or constant oops that do not
6498 // need card-mark barriers.
6499 instruct storeImmP(memory mem, immP src) %{
6500   match(Set mem (StoreP mem src));
6501 
6502   ins_cost(150);
6503   format %{ "MOV    $mem,$src" %}
6504   opcode(0xC7);               /* C7 /0 */
6505   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6506   ins_pipe( ialu_mem_imm );
6507 %}
6508 
6509 // Store Byte Immediate
6510 instruct storeImmB(memory mem, immI8 src) %{
6511   match(Set mem (StoreB mem src));
6512 
6513   ins_cost(150);
6514   format %{ "MOV8   $mem,$src" %}
6515   opcode(0xC6);               /* C6 /0 */
6516   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6517   ins_pipe( ialu_mem_imm );
6518 %}
6519 
6520 // Store CMS card-mark Immediate
6521 instruct storeImmCM(memory mem, immI8 src) %{
6522   match(Set mem (StoreCM mem src));
6523 
6524   ins_cost(150);
6525   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6526   opcode(0xC6);               /* C6 /0 */
6527   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6528   ins_pipe( ialu_mem_imm );
6529 %}
6530 
6531 // Store Double
6532 instruct storeDPR( memory mem, regDPR1 src) %{
6533   predicate(UseSSE<=1);
6534   match(Set mem (StoreD mem src));
6535 
6536   ins_cost(100);
6537   format %{ "FST_D  $mem,$src" %}
6538   opcode(0xDD);       /* DD /2 */
6539   ins_encode( enc_FPR_store(mem,src) );
6540   ins_pipe( fpu_mem_reg );
6541 %}
6542 
6543 // Store double does rounding on x86
6544 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6545   predicate(UseSSE<=1);
6546   match(Set mem (StoreD mem (RoundDouble src)));
6547 
6548   ins_cost(100);
6549   format %{ "FST_D  $mem,$src\t# round" %}
6550   opcode(0xDD);       /* DD /2 */
6551   ins_encode( enc_FPR_store(mem,src) );
6552   ins_pipe( fpu_mem_reg );
6553 %}
6554 
6555 // Store XMM register to memory (double-precision floating points)
6556 // MOVSD instruction
6557 instruct storeD(memory mem, regD src) %{
6558   predicate(UseSSE>=2);
6559   match(Set mem (StoreD mem src));
6560   ins_cost(95);
6561   format %{ "MOVSD  $mem,$src" %}
6562   ins_encode %{
6563     __ movdbl($mem$$Address, $src$$XMMRegister);
6564   %}
6565   ins_pipe( pipe_slow );
6566 %}
6567 
6568 // Load Double
6569 instruct MoveD2VL(vlRegD dst, regD src) %{
6570   match(Set dst src);
6571   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6572   ins_encode %{
6573     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6574   %}
6575   ins_pipe( fpu_reg_reg );
6576 %}
6577 
6578 // Load Double
6579 instruct MoveVL2D(regD dst, vlRegD src) %{
6580   match(Set dst src);
6581   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6582   ins_encode %{
6583     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6584   %}
6585   ins_pipe( fpu_reg_reg );
6586 %}
6587 
6588 // Store XMM register to memory (single-precision floating point)
6589 // MOVSS instruction
6590 instruct storeF(memory mem, regF src) %{
6591   predicate(UseSSE>=1);
6592   match(Set mem (StoreF mem src));
6593   ins_cost(95);
6594   format %{ "MOVSS  $mem,$src" %}
6595   ins_encode %{
6596     __ movflt($mem$$Address, $src$$XMMRegister);
6597   %}
6598   ins_pipe( pipe_slow );
6599 %}
6600 
6601 // Load Float
6602 instruct MoveF2VL(vlRegF dst, regF src) %{
6603   match(Set dst src);
6604   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6605   ins_encode %{
6606     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6607   %}
6608   ins_pipe( fpu_reg_reg );
6609 %}
6610 
6611 // Load Float
6612 instruct MoveVL2F(regF dst, vlRegF src) %{
6613   match(Set dst src);
6614   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6615   ins_encode %{
6616     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6617   %}
6618   ins_pipe( fpu_reg_reg );
6619 %}
6620 
6621 // Store Float
6622 instruct storeFPR( memory mem, regFPR1 src) %{
6623   predicate(UseSSE==0);
6624   match(Set mem (StoreF mem src));
6625 
6626   ins_cost(100);
6627   format %{ "FST_S  $mem,$src" %}
6628   opcode(0xD9);       /* D9 /2 */
6629   ins_encode( enc_FPR_store(mem,src) );
6630   ins_pipe( fpu_mem_reg );
6631 %}
6632 
6633 // Store Float does rounding on x86
6634 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6635   predicate(UseSSE==0);
6636   match(Set mem (StoreF mem (RoundFloat src)));
6637 
6638   ins_cost(100);
6639   format %{ "FST_S  $mem,$src\t# round" %}
6640   opcode(0xD9);       /* D9 /2 */
6641   ins_encode( enc_FPR_store(mem,src) );
6642   ins_pipe( fpu_mem_reg );
6643 %}
6644 
6645 // Store Float does rounding on x86
6646 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6647   predicate(UseSSE<=1);
6648   match(Set mem (StoreF mem (ConvD2F src)));
6649 
6650   ins_cost(100);
6651   format %{ "FST_S  $mem,$src\t# D-round" %}
6652   opcode(0xD9);       /* D9 /2 */
6653   ins_encode( enc_FPR_store(mem,src) );
6654   ins_pipe( fpu_mem_reg );
6655 %}
6656 
6657 // Store immediate Float value (it is faster than store from FPU register)
6658 // The instruction usage is guarded by predicate in operand immFPR().
6659 instruct storeFPR_imm( memory mem, immFPR src) %{
6660   match(Set mem (StoreF mem src));
6661 
6662   ins_cost(50);
6663   format %{ "MOV    $mem,$src\t# store float" %}
6664   opcode(0xC7);               /* C7 /0 */
6665   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6666   ins_pipe( ialu_mem_imm );
6667 %}
6668 
6669 // Store immediate Float value (it is faster than store from XMM register)
6670 // The instruction usage is guarded by predicate in operand immF().
6671 instruct storeF_imm( memory mem, immF src) %{
6672   match(Set mem (StoreF mem src));
6673 
6674   ins_cost(50);
6675   format %{ "MOV    $mem,$src\t# store float" %}
6676   opcode(0xC7);               /* C7 /0 */
6677   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6678   ins_pipe( ialu_mem_imm );
6679 %}
6680 
6681 // Store Integer to stack slot
6682 instruct storeSSI(stackSlotI dst, rRegI src) %{
6683   match(Set dst src);
6684 
6685   ins_cost(100);
6686   format %{ "MOV    $dst,$src" %}
6687   opcode(0x89);
6688   ins_encode( OpcPRegSS( dst, src ) );
6689   ins_pipe( ialu_mem_reg );
6690 %}
6691 
6692 // Store Integer to stack slot
6693 instruct storeSSP(stackSlotP dst, eRegP src) %{
6694   match(Set dst src);
6695 
6696   ins_cost(100);
6697   format %{ "MOV    $dst,$src" %}
6698   opcode(0x89);
6699   ins_encode( OpcPRegSS( dst, src ) );
6700   ins_pipe( ialu_mem_reg );
6701 %}
6702 
6703 // Store Long to stack slot
6704 instruct storeSSL(stackSlotL dst, eRegL src) %{
6705   match(Set dst src);
6706 
6707   ins_cost(200);
6708   format %{ "MOV    $dst,$src.lo\n\t"
6709             "MOV    $dst+4,$src.hi" %}
6710   opcode(0x89, 0x89);
6711   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6712   ins_pipe( ialu_mem_long_reg );
6713 %}
6714 
6715 //----------MemBar Instructions-----------------------------------------------
6716 // Memory barrier flavors
6717 
6718 instruct membar_acquire() %{
6719   match(MemBarAcquire);
6720   match(LoadFence);
6721   ins_cost(400);
6722 
6723   size(0);
6724   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6725   ins_encode();
6726   ins_pipe(empty);
6727 %}
6728 
6729 instruct membar_acquire_lock() %{
6730   match(MemBarAcquireLock);
6731   ins_cost(0);
6732 
6733   size(0);
6734   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6735   ins_encode( );
6736   ins_pipe(empty);
6737 %}
6738 
6739 instruct membar_release() %{
6740   match(MemBarRelease);
6741   match(StoreFence);
6742   ins_cost(400);
6743 
6744   size(0);
6745   format %{ "MEMBAR-release ! (empty encoding)" %}
6746   ins_encode( );
6747   ins_pipe(empty);
6748 %}
6749 
6750 instruct membar_release_lock() %{
6751   match(MemBarReleaseLock);
6752   ins_cost(0);
6753 
6754   size(0);
6755   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6756   ins_encode( );
6757   ins_pipe(empty);
6758 %}
6759 
6760 instruct membar_volatile(eFlagsReg cr) %{
6761   match(MemBarVolatile);
6762   effect(KILL cr);
6763   ins_cost(400);
6764 
6765   format %{
6766     $$template
6767     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6768   %}
6769   ins_encode %{
6770     __ membar(Assembler::StoreLoad);
6771   %}
6772   ins_pipe(pipe_slow);
6773 %}
6774 
6775 instruct unnecessary_membar_volatile() %{
6776   match(MemBarVolatile);
6777   predicate(Matcher::post_store_load_barrier(n));
6778   ins_cost(0);
6779 
6780   size(0);
6781   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6782   ins_encode( );
6783   ins_pipe(empty);
6784 %}
6785 
6786 instruct membar_storestore() %{
6787   match(MemBarStoreStore);
6788   ins_cost(0);
6789 
6790   size(0);
6791   format %{ "MEMBAR-storestore (empty encoding)" %}
6792   ins_encode( );
6793   ins_pipe(empty);
6794 %}
6795 
6796 //----------Move Instructions--------------------------------------------------
6797 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6798   match(Set dst (CastX2P src));
6799   format %{ "# X2P  $dst, $src" %}
6800   ins_encode( /*empty encoding*/ );
6801   ins_cost(0);
6802   ins_pipe(empty);
6803 %}
6804 
6805 instruct castP2X(rRegI dst, eRegP src ) %{
6806   match(Set dst (CastP2X src));
6807   ins_cost(50);
6808   format %{ "MOV    $dst, $src\t# CastP2X" %}
6809   ins_encode( enc_Copy( dst, src) );
6810   ins_pipe( ialu_reg_reg );
6811 %}
6812 
6813 //----------Conditional Move---------------------------------------------------
6814 // Conditional move
6815 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6816   predicate(!VM_Version::supports_cmov() );
6817   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6818   ins_cost(200);
6819   format %{ "J$cop,us skip\t# signed cmove\n\t"
6820             "MOV    $dst,$src\n"
6821       "skip:" %}
6822   ins_encode %{
6823     Label Lskip;
6824     // Invert sense of branch from sense of CMOV
6825     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6826     __ movl($dst$$Register, $src$$Register);
6827     __ bind(Lskip);
6828   %}
6829   ins_pipe( pipe_cmov_reg );
6830 %}
6831 
6832 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6833   predicate(!VM_Version::supports_cmov() );
6834   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6835   ins_cost(200);
6836   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6837             "MOV    $dst,$src\n"
6838       "skip:" %}
6839   ins_encode %{
6840     Label Lskip;
6841     // Invert sense of branch from sense of CMOV
6842     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6843     __ movl($dst$$Register, $src$$Register);
6844     __ bind(Lskip);
6845   %}
6846   ins_pipe( pipe_cmov_reg );
6847 %}
6848 
6849 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6850   predicate(VM_Version::supports_cmov() );
6851   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6852   ins_cost(200);
6853   format %{ "CMOV$cop $dst,$src" %}
6854   opcode(0x0F,0x40);
6855   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6856   ins_pipe( pipe_cmov_reg );
6857 %}
6858 
6859 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6860   predicate(VM_Version::supports_cmov() );
6861   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6862   ins_cost(200);
6863   format %{ "CMOV$cop $dst,$src" %}
6864   opcode(0x0F,0x40);
6865   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6866   ins_pipe( pipe_cmov_reg );
6867 %}
6868 
6869 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6870   predicate(VM_Version::supports_cmov() );
6871   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6872   ins_cost(200);
6873   expand %{
6874     cmovI_regU(cop, cr, dst, src);
6875   %}
6876 %}
6877 
6878 // Conditional move
6879 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6880   predicate(VM_Version::supports_cmov() );
6881   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6882   ins_cost(250);
6883   format %{ "CMOV$cop $dst,$src" %}
6884   opcode(0x0F,0x40);
6885   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6886   ins_pipe( pipe_cmov_mem );
6887 %}
6888 
6889 // Conditional move
6890 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6891   predicate(VM_Version::supports_cmov() );
6892   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6893   ins_cost(250);
6894   format %{ "CMOV$cop $dst,$src" %}
6895   opcode(0x0F,0x40);
6896   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6897   ins_pipe( pipe_cmov_mem );
6898 %}
6899 
6900 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6901   predicate(VM_Version::supports_cmov() );
6902   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6903   ins_cost(250);
6904   expand %{
6905     cmovI_memU(cop, cr, dst, src);
6906   %}
6907 %}
6908 
6909 // Conditional move
6910 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6911   predicate(VM_Version::supports_cmov() );
6912   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6913   ins_cost(200);
6914   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6915   opcode(0x0F,0x40);
6916   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6917   ins_pipe( pipe_cmov_reg );
6918 %}
6919 
6920 // Conditional move (non-P6 version)
6921 // Note:  a CMoveP is generated for  stubs and native wrappers
6922 //        regardless of whether we are on a P6, so we
6923 //        emulate a cmov here
6924 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6925   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6926   ins_cost(300);
6927   format %{ "Jn$cop   skip\n\t"
6928           "MOV    $dst,$src\t# pointer\n"
6929       "skip:" %}
6930   opcode(0x8b);
6931   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6932   ins_pipe( pipe_cmov_reg );
6933 %}
6934 
6935 // Conditional move
6936 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6937   predicate(VM_Version::supports_cmov() );
6938   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6939   ins_cost(200);
6940   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6941   opcode(0x0F,0x40);
6942   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6943   ins_pipe( pipe_cmov_reg );
6944 %}
6945 
6946 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6947   predicate(VM_Version::supports_cmov() );
6948   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6949   ins_cost(200);
6950   expand %{
6951     cmovP_regU(cop, cr, dst, src);
6952   %}
6953 %}
6954 
6955 // DISABLED: Requires the ADLC to emit a bottom_type call that
6956 // correctly meets the two pointer arguments; one is an incoming
6957 // register but the other is a memory operand.  ALSO appears to
6958 // be buggy with implicit null checks.
6959 //
6960 //// Conditional move
6961 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6962 //  predicate(VM_Version::supports_cmov() );
6963 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6964 //  ins_cost(250);
6965 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6966 //  opcode(0x0F,0x40);
6967 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6968 //  ins_pipe( pipe_cmov_mem );
6969 //%}
6970 //
6971 //// Conditional move
6972 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6973 //  predicate(VM_Version::supports_cmov() );
6974 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6975 //  ins_cost(250);
6976 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6977 //  opcode(0x0F,0x40);
6978 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6979 //  ins_pipe( pipe_cmov_mem );
6980 //%}
6981 
6982 // Conditional move
6983 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6984   predicate(UseSSE<=1);
6985   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6986   ins_cost(200);
6987   format %{ "FCMOV$cop $dst,$src\t# double" %}
6988   opcode(0xDA);
6989   ins_encode( enc_cmov_dpr(cop,src) );
6990   ins_pipe( pipe_cmovDPR_reg );
6991 %}
6992 
6993 // Conditional move
6994 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6995   predicate(UseSSE==0);
6996   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6997   ins_cost(200);
6998   format %{ "FCMOV$cop $dst,$src\t# float" %}
6999   opcode(0xDA);
7000   ins_encode( enc_cmov_dpr(cop,src) );
7001   ins_pipe( pipe_cmovDPR_reg );
7002 %}
7003 
7004 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7005 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7006   predicate(UseSSE<=1);
7007   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7008   ins_cost(200);
7009   format %{ "Jn$cop   skip\n\t"
7010             "MOV    $dst,$src\t# double\n"
7011       "skip:" %}
7012   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7013   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7014   ins_pipe( pipe_cmovDPR_reg );
7015 %}
7016 
7017 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7018 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7019   predicate(UseSSE==0);
7020   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7021   ins_cost(200);
7022   format %{ "Jn$cop    skip\n\t"
7023             "MOV    $dst,$src\t# float\n"
7024       "skip:" %}
7025   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7026   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7027   ins_pipe( pipe_cmovDPR_reg );
7028 %}
7029 
7030 // No CMOVE with SSE/SSE2
7031 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7032   predicate (UseSSE>=1);
7033   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7034   ins_cost(200);
7035   format %{ "Jn$cop   skip\n\t"
7036             "MOVSS  $dst,$src\t# float\n"
7037       "skip:" %}
7038   ins_encode %{
7039     Label skip;
7040     // Invert sense of branch from sense of CMOV
7041     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7042     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7043     __ bind(skip);
7044   %}
7045   ins_pipe( pipe_slow );
7046 %}
7047 
7048 // No CMOVE with SSE/SSE2
7049 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7050   predicate (UseSSE>=2);
7051   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7052   ins_cost(200);
7053   format %{ "Jn$cop   skip\n\t"
7054             "MOVSD  $dst,$src\t# float\n"
7055       "skip:" %}
7056   ins_encode %{
7057     Label skip;
7058     // Invert sense of branch from sense of CMOV
7059     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7060     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7061     __ bind(skip);
7062   %}
7063   ins_pipe( pipe_slow );
7064 %}
7065 
7066 // unsigned version
7067 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7068   predicate (UseSSE>=1);
7069   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7070   ins_cost(200);
7071   format %{ "Jn$cop   skip\n\t"
7072             "MOVSS  $dst,$src\t# float\n"
7073       "skip:" %}
7074   ins_encode %{
7075     Label skip;
7076     // Invert sense of branch from sense of CMOV
7077     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7078     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7079     __ bind(skip);
7080   %}
7081   ins_pipe( pipe_slow );
7082 %}
7083 
7084 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7085   predicate (UseSSE>=1);
7086   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7087   ins_cost(200);
7088   expand %{
7089     fcmovF_regU(cop, cr, dst, src);
7090   %}
7091 %}
7092 
7093 // unsigned version
7094 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7095   predicate (UseSSE>=2);
7096   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7097   ins_cost(200);
7098   format %{ "Jn$cop   skip\n\t"
7099             "MOVSD  $dst,$src\t# float\n"
7100       "skip:" %}
7101   ins_encode %{
7102     Label skip;
7103     // Invert sense of branch from sense of CMOV
7104     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7105     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7106     __ bind(skip);
7107   %}
7108   ins_pipe( pipe_slow );
7109 %}
7110 
7111 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7112   predicate (UseSSE>=2);
7113   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7114   ins_cost(200);
7115   expand %{
7116     fcmovD_regU(cop, cr, dst, src);
7117   %}
7118 %}
7119 
7120 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7121   predicate(VM_Version::supports_cmov() );
7122   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7123   ins_cost(200);
7124   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7125             "CMOV$cop $dst.hi,$src.hi" %}
7126   opcode(0x0F,0x40);
7127   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7128   ins_pipe( pipe_cmov_reg_long );
7129 %}
7130 
7131 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7132   predicate(VM_Version::supports_cmov() );
7133   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7134   ins_cost(200);
7135   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7136             "CMOV$cop $dst.hi,$src.hi" %}
7137   opcode(0x0F,0x40);
7138   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7139   ins_pipe( pipe_cmov_reg_long );
7140 %}
7141 
7142 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7143   predicate(VM_Version::supports_cmov() );
7144   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7145   ins_cost(200);
7146   expand %{
7147     cmovL_regU(cop, cr, dst, src);
7148   %}
7149 %}
7150 
7151 //----------Arithmetic Instructions--------------------------------------------
7152 //----------Addition Instructions----------------------------------------------
7153 
7154 // Integer Addition Instructions
7155 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7156   match(Set dst (AddI dst src));
7157   effect(KILL cr);
7158 
7159   size(2);
7160   format %{ "ADD    $dst,$src" %}
7161   opcode(0x03);
7162   ins_encode( OpcP, RegReg( dst, src) );
7163   ins_pipe( ialu_reg_reg );
7164 %}
7165 
7166 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7167   match(Set dst (AddI dst src));
7168   effect(KILL cr);
7169 
7170   format %{ "ADD    $dst,$src" %}
7171   opcode(0x81, 0x00); /* /0 id */
7172   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7173   ins_pipe( ialu_reg );
7174 %}
7175 
7176 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7177   predicate(UseIncDec);
7178   match(Set dst (AddI dst src));
7179   effect(KILL cr);
7180 
7181   size(1);
7182   format %{ "INC    $dst" %}
7183   opcode(0x40); /*  */
7184   ins_encode( Opc_plus( primary, dst ) );
7185   ins_pipe( ialu_reg );
7186 %}
7187 
7188 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7189   match(Set dst (AddI src0 src1));
7190   ins_cost(110);
7191 
7192   format %{ "LEA    $dst,[$src0 + $src1]" %}
7193   opcode(0x8D); /* 0x8D /r */
7194   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7195   ins_pipe( ialu_reg_reg );
7196 %}
7197 
7198 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7199   match(Set dst (AddP src0 src1));
7200   ins_cost(110);
7201 
7202   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7203   opcode(0x8D); /* 0x8D /r */
7204   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7205   ins_pipe( ialu_reg_reg );
7206 %}
7207 
7208 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7209   predicate(UseIncDec);
7210   match(Set dst (AddI dst src));
7211   effect(KILL cr);
7212 
7213   size(1);
7214   format %{ "DEC    $dst" %}
7215   opcode(0x48); /*  */
7216   ins_encode( Opc_plus( primary, dst ) );
7217   ins_pipe( ialu_reg );
7218 %}
7219 
7220 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7221   match(Set dst (AddP dst src));
7222   effect(KILL cr);
7223 
7224   size(2);
7225   format %{ "ADD    $dst,$src" %}
7226   opcode(0x03);
7227   ins_encode( OpcP, RegReg( dst, src) );
7228   ins_pipe( ialu_reg_reg );
7229 %}
7230 
7231 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7232   match(Set dst (AddP dst src));
7233   effect(KILL cr);
7234 
7235   format %{ "ADD    $dst,$src" %}
7236   opcode(0x81,0x00); /* Opcode 81 /0 id */
7237   // ins_encode( RegImm( dst, src) );
7238   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7239   ins_pipe( ialu_reg );
7240 %}
7241 
7242 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7243   match(Set dst (AddI dst (LoadI src)));
7244   effect(KILL cr);
7245 
7246   ins_cost(125);
7247   format %{ "ADD    $dst,$src" %}
7248   opcode(0x03);
7249   ins_encode( OpcP, RegMem( dst, src) );
7250   ins_pipe( ialu_reg_mem );
7251 %}
7252 
7253 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7254   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7255   effect(KILL cr);
7256 
7257   ins_cost(150);
7258   format %{ "ADD    $dst,$src" %}
7259   opcode(0x01);  /* Opcode 01 /r */
7260   ins_encode( OpcP, RegMem( src, dst ) );
7261   ins_pipe( ialu_mem_reg );
7262 %}
7263 
7264 // Add Memory with Immediate
7265 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7266   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7267   effect(KILL cr);
7268 
7269   ins_cost(125);
7270   format %{ "ADD    $dst,$src" %}
7271   opcode(0x81);               /* Opcode 81 /0 id */
7272   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7273   ins_pipe( ialu_mem_imm );
7274 %}
7275 
7276 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7277   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7278   effect(KILL cr);
7279 
7280   ins_cost(125);
7281   format %{ "INC    $dst" %}
7282   opcode(0xFF);               /* Opcode FF /0 */
7283   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7284   ins_pipe( ialu_mem_imm );
7285 %}
7286 
7287 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7288   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7289   effect(KILL cr);
7290 
7291   ins_cost(125);
7292   format %{ "DEC    $dst" %}
7293   opcode(0xFF);               /* Opcode FF /1 */
7294   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7295   ins_pipe( ialu_mem_imm );
7296 %}
7297 
7298 
7299 instruct checkCastPP( eRegP dst ) %{
7300   match(Set dst (CheckCastPP dst));
7301 
7302   size(0);
7303   format %{ "#checkcastPP of $dst" %}
7304   ins_encode( /*empty encoding*/ );
7305   ins_pipe( empty );
7306 %}
7307 
7308 instruct castPP( eRegP dst ) %{
7309   match(Set dst (CastPP dst));
7310   format %{ "#castPP of $dst" %}
7311   ins_encode( /*empty encoding*/ );
7312   ins_pipe( empty );
7313 %}
7314 
7315 instruct castII( rRegI dst ) %{
7316   match(Set dst (CastII dst));
7317   format %{ "#castII of $dst" %}
7318   ins_encode( /*empty encoding*/ );
7319   ins_cost(0);
7320   ins_pipe( empty );
7321 %}
7322 
7323 
7324 // Load-locked - same as a regular pointer load when used with compare-swap
7325 instruct loadPLocked(eRegP dst, memory mem) %{
7326   match(Set dst (LoadPLocked mem));
7327 
7328   ins_cost(125);
7329   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7330   opcode(0x8B);
7331   ins_encode( OpcP, RegMem(dst,mem));
7332   ins_pipe( ialu_reg_mem );
7333 %}
7334 
7335 // Conditional-store of the updated heap-top.
7336 // Used during allocation of the shared heap.
7337 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7338 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7339   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7340   // EAX is killed if there is contention, but then it's also unused.
7341   // In the common case of no contention, EAX holds the new oop address.
7342   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7343   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7344   ins_pipe( pipe_cmpxchg );
7345 %}
7346 
7347 // Conditional-store of an int value.
7348 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7349 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7350   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7351   effect(KILL oldval);
7352   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7353   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7354   ins_pipe( pipe_cmpxchg );
7355 %}
7356 
7357 // Conditional-store of a long value.
7358 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7359 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7360   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7361   effect(KILL oldval);
7362   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7363             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7364             "XCHG   EBX,ECX"
7365   %}
7366   ins_encode %{
7367     // Note: we need to swap rbx, and rcx before and after the
7368     //       cmpxchg8 instruction because the instruction uses
7369     //       rcx as the high order word of the new value to store but
7370     //       our register encoding uses rbx.
7371     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7372     __ lock();
7373     __ cmpxchg8($mem$$Address);
7374     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7375   %}
7376   ins_pipe( pipe_cmpxchg );
7377 %}
7378 
7379 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7380 
7381 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7382   predicate(VM_Version::supports_cx8());
7383   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7384   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7385   effect(KILL cr, KILL oldval);
7386   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7387             "MOV    $res,0\n\t"
7388             "JNE,s  fail\n\t"
7389             "MOV    $res,1\n"
7390           "fail:" %}
7391   ins_encode( enc_cmpxchg8(mem_ptr),
7392               enc_flags_ne_to_boolean(res) );
7393   ins_pipe( pipe_cmpxchg );
7394 %}
7395 
7396 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7397   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7398   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7399   effect(KILL cr, KILL oldval);
7400   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7401             "MOV    $res,0\n\t"
7402             "JNE,s  fail\n\t"
7403             "MOV    $res,1\n"
7404           "fail:" %}
7405   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7406   ins_pipe( pipe_cmpxchg );
7407 %}
7408 
7409 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7410   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7411   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7412   effect(KILL cr, KILL oldval);
7413   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7414             "MOV    $res,0\n\t"
7415             "JNE,s  fail\n\t"
7416             "MOV    $res,1\n"
7417           "fail:" %}
7418   ins_encode( enc_cmpxchgb(mem_ptr),
7419               enc_flags_ne_to_boolean(res) );
7420   ins_pipe( pipe_cmpxchg );
7421 %}
7422 
7423 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7424   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7425   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7426   effect(KILL cr, KILL oldval);
7427   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7428             "MOV    $res,0\n\t"
7429             "JNE,s  fail\n\t"
7430             "MOV    $res,1\n"
7431           "fail:" %}
7432   ins_encode( enc_cmpxchgw(mem_ptr),
7433               enc_flags_ne_to_boolean(res) );
7434   ins_pipe( pipe_cmpxchg );
7435 %}
7436 
7437 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7438   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7439   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7440   effect(KILL cr, KILL oldval);
7441   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7442             "MOV    $res,0\n\t"
7443             "JNE,s  fail\n\t"
7444             "MOV    $res,1\n"
7445           "fail:" %}
7446   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7447   ins_pipe( pipe_cmpxchg );
7448 %}
7449 
7450 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7451   predicate(VM_Version::supports_cx8());
7452   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7453   effect(KILL cr);
7454   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7455   ins_encode( enc_cmpxchg8(mem_ptr) );
7456   ins_pipe( pipe_cmpxchg );
7457 %}
7458 
7459 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7460   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7461   effect(KILL cr);
7462   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7463   ins_encode( enc_cmpxchg(mem_ptr) );
7464   ins_pipe( pipe_cmpxchg );
7465 %}
7466 
7467 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7468   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7469   effect(KILL cr);
7470   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7471   ins_encode( enc_cmpxchgb(mem_ptr) );
7472   ins_pipe( pipe_cmpxchg );
7473 %}
7474 
7475 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7476   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7477   effect(KILL cr);
7478   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7479   ins_encode( enc_cmpxchgw(mem_ptr) );
7480   ins_pipe( pipe_cmpxchg );
7481 %}
7482 
7483 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7484   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7485   effect(KILL cr);
7486   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7487   ins_encode( enc_cmpxchg(mem_ptr) );
7488   ins_pipe( pipe_cmpxchg );
7489 %}
7490 
7491 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7492   predicate(n->as_LoadStore()->result_not_used());
7493   match(Set dummy (GetAndAddB mem add));
7494   effect(KILL cr);
7495   format %{ "ADDB  [$mem],$add" %}
7496   ins_encode %{
7497     __ lock();
7498     __ addb($mem$$Address, $add$$constant);
7499   %}
7500   ins_pipe( pipe_cmpxchg );
7501 %}
7502 
7503 // Important to match to xRegI: only 8-bit regs.
7504 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7505   match(Set newval (GetAndAddB mem newval));
7506   effect(KILL cr);
7507   format %{ "XADDB  [$mem],$newval" %}
7508   ins_encode %{
7509     __ lock();
7510     __ xaddb($mem$$Address, $newval$$Register);
7511   %}
7512   ins_pipe( pipe_cmpxchg );
7513 %}
7514 
7515 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7516   predicate(n->as_LoadStore()->result_not_used());
7517   match(Set dummy (GetAndAddS mem add));
7518   effect(KILL cr);
7519   format %{ "ADDS  [$mem],$add" %}
7520   ins_encode %{
7521     __ lock();
7522     __ addw($mem$$Address, $add$$constant);
7523   %}
7524   ins_pipe( pipe_cmpxchg );
7525 %}
7526 
7527 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7528   match(Set newval (GetAndAddS mem newval));
7529   effect(KILL cr);
7530   format %{ "XADDS  [$mem],$newval" %}
7531   ins_encode %{
7532     __ lock();
7533     __ xaddw($mem$$Address, $newval$$Register);
7534   %}
7535   ins_pipe( pipe_cmpxchg );
7536 %}
7537 
7538 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7539   predicate(n->as_LoadStore()->result_not_used());
7540   match(Set dummy (GetAndAddI mem add));
7541   effect(KILL cr);
7542   format %{ "ADDL  [$mem],$add" %}
7543   ins_encode %{
7544     __ lock();
7545     __ addl($mem$$Address, $add$$constant);
7546   %}
7547   ins_pipe( pipe_cmpxchg );
7548 %}
7549 
7550 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7551   match(Set newval (GetAndAddI mem newval));
7552   effect(KILL cr);
7553   format %{ "XADDL  [$mem],$newval" %}
7554   ins_encode %{
7555     __ lock();
7556     __ xaddl($mem$$Address, $newval$$Register);
7557   %}
7558   ins_pipe( pipe_cmpxchg );
7559 %}
7560 
7561 // Important to match to xRegI: only 8-bit regs.
7562 instruct xchgB( memory mem, xRegI newval) %{
7563   match(Set newval (GetAndSetB mem newval));
7564   format %{ "XCHGB  $newval,[$mem]" %}
7565   ins_encode %{
7566     __ xchgb($newval$$Register, $mem$$Address);
7567   %}
7568   ins_pipe( pipe_cmpxchg );
7569 %}
7570 
7571 instruct xchgS( memory mem, rRegI newval) %{
7572   match(Set newval (GetAndSetS mem newval));
7573   format %{ "XCHGW  $newval,[$mem]" %}
7574   ins_encode %{
7575     __ xchgw($newval$$Register, $mem$$Address);
7576   %}
7577   ins_pipe( pipe_cmpxchg );
7578 %}
7579 
7580 instruct xchgI( memory mem, rRegI newval) %{
7581   match(Set newval (GetAndSetI mem newval));
7582   format %{ "XCHGL  $newval,[$mem]" %}
7583   ins_encode %{
7584     __ xchgl($newval$$Register, $mem$$Address);
7585   %}
7586   ins_pipe( pipe_cmpxchg );
7587 %}
7588 
7589 instruct xchgP( memory mem, pRegP newval) %{
7590   match(Set newval (GetAndSetP mem newval));
7591   format %{ "XCHGL  $newval,[$mem]" %}
7592   ins_encode %{
7593     __ xchgl($newval$$Register, $mem$$Address);
7594   %}
7595   ins_pipe( pipe_cmpxchg );
7596 %}
7597 
7598 //----------Subtraction Instructions-------------------------------------------
7599 
7600 // Integer Subtraction Instructions
7601 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7602   match(Set dst (SubI dst src));
7603   effect(KILL cr);
7604 
7605   size(2);
7606   format %{ "SUB    $dst,$src" %}
7607   opcode(0x2B);
7608   ins_encode( OpcP, RegReg( dst, src) );
7609   ins_pipe( ialu_reg_reg );
7610 %}
7611 
7612 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7613   match(Set dst (SubI dst src));
7614   effect(KILL cr);
7615 
7616   format %{ "SUB    $dst,$src" %}
7617   opcode(0x81,0x05);  /* Opcode 81 /5 */
7618   // ins_encode( RegImm( dst, src) );
7619   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7620   ins_pipe( ialu_reg );
7621 %}
7622 
7623 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7624   match(Set dst (SubI dst (LoadI src)));
7625   effect(KILL cr);
7626 
7627   ins_cost(125);
7628   format %{ "SUB    $dst,$src" %}
7629   opcode(0x2B);
7630   ins_encode( OpcP, RegMem( dst, src) );
7631   ins_pipe( ialu_reg_mem );
7632 %}
7633 
7634 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7635   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7636   effect(KILL cr);
7637 
7638   ins_cost(150);
7639   format %{ "SUB    $dst,$src" %}
7640   opcode(0x29);  /* Opcode 29 /r */
7641   ins_encode( OpcP, RegMem( src, dst ) );
7642   ins_pipe( ialu_mem_reg );
7643 %}
7644 
7645 // Subtract from a pointer
7646 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7647   match(Set dst (AddP dst (SubI zero src)));
7648   effect(KILL cr);
7649 
7650   size(2);
7651   format %{ "SUB    $dst,$src" %}
7652   opcode(0x2B);
7653   ins_encode( OpcP, RegReg( dst, src) );
7654   ins_pipe( ialu_reg_reg );
7655 %}
7656 
7657 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7658   match(Set dst (SubI zero dst));
7659   effect(KILL cr);
7660 
7661   size(2);
7662   format %{ "NEG    $dst" %}
7663   opcode(0xF7,0x03);  // Opcode F7 /3
7664   ins_encode( OpcP, RegOpc( dst ) );
7665   ins_pipe( ialu_reg );
7666 %}
7667 
7668 //----------Multiplication/Division Instructions-------------------------------
7669 // Integer Multiplication Instructions
7670 // Multiply Register
7671 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7672   match(Set dst (MulI dst src));
7673   effect(KILL cr);
7674 
7675   size(3);
7676   ins_cost(300);
7677   format %{ "IMUL   $dst,$src" %}
7678   opcode(0xAF, 0x0F);
7679   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7680   ins_pipe( ialu_reg_reg_alu0 );
7681 %}
7682 
7683 // Multiply 32-bit Immediate
7684 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7685   match(Set dst (MulI src imm));
7686   effect(KILL cr);
7687 
7688   ins_cost(300);
7689   format %{ "IMUL   $dst,$src,$imm" %}
7690   opcode(0x69);  /* 69 /r id */
7691   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7692   ins_pipe( ialu_reg_reg_alu0 );
7693 %}
7694 
7695 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7696   match(Set dst src);
7697   effect(KILL cr);
7698 
7699   // Note that this is artificially increased to make it more expensive than loadConL
7700   ins_cost(250);
7701   format %{ "MOV    EAX,$src\t// low word only" %}
7702   opcode(0xB8);
7703   ins_encode( LdImmL_Lo(dst, src) );
7704   ins_pipe( ialu_reg_fat );
7705 %}
7706 
7707 // Multiply by 32-bit Immediate, taking the shifted high order results
7708 //  (special case for shift by 32)
7709 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7710   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7711   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7712              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7713              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7714   effect(USE src1, KILL cr);
7715 
7716   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7717   ins_cost(0*100 + 1*400 - 150);
7718   format %{ "IMUL   EDX:EAX,$src1" %}
7719   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7720   ins_pipe( pipe_slow );
7721 %}
7722 
7723 // Multiply by 32-bit Immediate, taking the shifted high order results
7724 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7725   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7726   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7727              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7728              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7729   effect(USE src1, KILL cr);
7730 
7731   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7732   ins_cost(1*100 + 1*400 - 150);
7733   format %{ "IMUL   EDX:EAX,$src1\n\t"
7734             "SAR    EDX,$cnt-32" %}
7735   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7736   ins_pipe( pipe_slow );
7737 %}
7738 
7739 // Multiply Memory 32-bit Immediate
7740 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7741   match(Set dst (MulI (LoadI src) imm));
7742   effect(KILL cr);
7743 
7744   ins_cost(300);
7745   format %{ "IMUL   $dst,$src,$imm" %}
7746   opcode(0x69);  /* 69 /r id */
7747   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7748   ins_pipe( ialu_reg_mem_alu0 );
7749 %}
7750 
7751 // Multiply Memory
7752 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7753   match(Set dst (MulI dst (LoadI src)));
7754   effect(KILL cr);
7755 
7756   ins_cost(350);
7757   format %{ "IMUL   $dst,$src" %}
7758   opcode(0xAF, 0x0F);
7759   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7760   ins_pipe( ialu_reg_mem_alu0 );
7761 %}
7762 
7763 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7764 %{
7765   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7766   effect(KILL cr, KILL src2);
7767 
7768   expand %{ mulI_eReg(dst, src1, cr);
7769            mulI_eReg(src2, src3, cr);
7770            addI_eReg(dst, src2, cr); %}
7771 %}
7772 
7773 // Multiply Register Int to Long
7774 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7775   // Basic Idea: long = (long)int * (long)int
7776   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7777   effect(DEF dst, USE src, USE src1, KILL flags);
7778 
7779   ins_cost(300);
7780   format %{ "IMUL   $dst,$src1" %}
7781 
7782   ins_encode( long_int_multiply( dst, src1 ) );
7783   ins_pipe( ialu_reg_reg_alu0 );
7784 %}
7785 
7786 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7787   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7788   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7789   effect(KILL flags);
7790 
7791   ins_cost(300);
7792   format %{ "MUL    $dst,$src1" %}
7793 
7794   ins_encode( long_uint_multiply(dst, src1) );
7795   ins_pipe( ialu_reg_reg_alu0 );
7796 %}
7797 
7798 // Multiply Register Long
7799 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7800   match(Set dst (MulL dst src));
7801   effect(KILL cr, TEMP tmp);
7802   ins_cost(4*100+3*400);
7803 // Basic idea: lo(result) = lo(x_lo * y_lo)
7804 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7805   format %{ "MOV    $tmp,$src.lo\n\t"
7806             "IMUL   $tmp,EDX\n\t"
7807             "MOV    EDX,$src.hi\n\t"
7808             "IMUL   EDX,EAX\n\t"
7809             "ADD    $tmp,EDX\n\t"
7810             "MUL    EDX:EAX,$src.lo\n\t"
7811             "ADD    EDX,$tmp" %}
7812   ins_encode( long_multiply( dst, src, tmp ) );
7813   ins_pipe( pipe_slow );
7814 %}
7815 
7816 // Multiply Register Long where the left operand's high 32 bits are zero
7817 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7818   predicate(is_operand_hi32_zero(n->in(1)));
7819   match(Set dst (MulL dst src));
7820   effect(KILL cr, TEMP tmp);
7821   ins_cost(2*100+2*400);
7822 // Basic idea: lo(result) = lo(x_lo * y_lo)
7823 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7824   format %{ "MOV    $tmp,$src.hi\n\t"
7825             "IMUL   $tmp,EAX\n\t"
7826             "MUL    EDX:EAX,$src.lo\n\t"
7827             "ADD    EDX,$tmp" %}
7828   ins_encode %{
7829     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7830     __ imull($tmp$$Register, rax);
7831     __ mull($src$$Register);
7832     __ addl(rdx, $tmp$$Register);
7833   %}
7834   ins_pipe( pipe_slow );
7835 %}
7836 
7837 // Multiply Register Long where the right operand's high 32 bits are zero
7838 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7839   predicate(is_operand_hi32_zero(n->in(2)));
7840   match(Set dst (MulL dst src));
7841   effect(KILL cr, TEMP tmp);
7842   ins_cost(2*100+2*400);
7843 // Basic idea: lo(result) = lo(x_lo * y_lo)
7844 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7845   format %{ "MOV    $tmp,$src.lo\n\t"
7846             "IMUL   $tmp,EDX\n\t"
7847             "MUL    EDX:EAX,$src.lo\n\t"
7848             "ADD    EDX,$tmp" %}
7849   ins_encode %{
7850     __ movl($tmp$$Register, $src$$Register);
7851     __ imull($tmp$$Register, rdx);
7852     __ mull($src$$Register);
7853     __ addl(rdx, $tmp$$Register);
7854   %}
7855   ins_pipe( pipe_slow );
7856 %}
7857 
7858 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7859 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7860   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7861   match(Set dst (MulL dst src));
7862   effect(KILL cr);
7863   ins_cost(1*400);
7864 // Basic idea: lo(result) = lo(x_lo * y_lo)
7865 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7866   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7867   ins_encode %{
7868     __ mull($src$$Register);
7869   %}
7870   ins_pipe( pipe_slow );
7871 %}
7872 
7873 // Multiply Register Long by small constant
7874 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7875   match(Set dst (MulL dst src));
7876   effect(KILL cr, TEMP tmp);
7877   ins_cost(2*100+2*400);
7878   size(12);
7879 // Basic idea: lo(result) = lo(src * EAX)
7880 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7881   format %{ "IMUL   $tmp,EDX,$src\n\t"
7882             "MOV    EDX,$src\n\t"
7883             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7884             "ADD    EDX,$tmp" %}
7885   ins_encode( long_multiply_con( dst, src, tmp ) );
7886   ins_pipe( pipe_slow );
7887 %}
7888 
7889 // Integer DIV with Register
7890 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7891   match(Set rax (DivI rax div));
7892   effect(KILL rdx, KILL cr);
7893   size(26);
7894   ins_cost(30*100+10*100);
7895   format %{ "CMP    EAX,0x80000000\n\t"
7896             "JNE,s  normal\n\t"
7897             "XOR    EDX,EDX\n\t"
7898             "CMP    ECX,-1\n\t"
7899             "JE,s   done\n"
7900     "normal: CDQ\n\t"
7901             "IDIV   $div\n\t"
7902     "done:"        %}
7903   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7904   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7905   ins_pipe( ialu_reg_reg_alu0 );
7906 %}
7907 
7908 // Divide Register Long
7909 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7910   match(Set dst (DivL src1 src2));
7911   effect( KILL cr, KILL cx, KILL bx );
7912   ins_cost(10000);
7913   format %{ "PUSH   $src1.hi\n\t"
7914             "PUSH   $src1.lo\n\t"
7915             "PUSH   $src2.hi\n\t"
7916             "PUSH   $src2.lo\n\t"
7917             "CALL   SharedRuntime::ldiv\n\t"
7918             "ADD    ESP,16" %}
7919   ins_encode( long_div(src1,src2) );
7920   ins_pipe( pipe_slow );
7921 %}
7922 
7923 // Integer DIVMOD with Register, both quotient and mod results
7924 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7925   match(DivModI rax div);
7926   effect(KILL cr);
7927   size(26);
7928   ins_cost(30*100+10*100);
7929   format %{ "CMP    EAX,0x80000000\n\t"
7930             "JNE,s  normal\n\t"
7931             "XOR    EDX,EDX\n\t"
7932             "CMP    ECX,-1\n\t"
7933             "JE,s   done\n"
7934     "normal: CDQ\n\t"
7935             "IDIV   $div\n\t"
7936     "done:"        %}
7937   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7938   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7939   ins_pipe( pipe_slow );
7940 %}
7941 
7942 // Integer MOD with Register
7943 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7944   match(Set rdx (ModI rax div));
7945   effect(KILL rax, KILL cr);
7946 
7947   size(26);
7948   ins_cost(300);
7949   format %{ "CDQ\n\t"
7950             "IDIV   $div" %}
7951   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7952   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7953   ins_pipe( ialu_reg_reg_alu0 );
7954 %}
7955 
7956 // Remainder Register Long
7957 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7958   match(Set dst (ModL src1 src2));
7959   effect( KILL cr, KILL cx, KILL bx );
7960   ins_cost(10000);
7961   format %{ "PUSH   $src1.hi\n\t"
7962             "PUSH   $src1.lo\n\t"
7963             "PUSH   $src2.hi\n\t"
7964             "PUSH   $src2.lo\n\t"
7965             "CALL   SharedRuntime::lrem\n\t"
7966             "ADD    ESP,16" %}
7967   ins_encode( long_mod(src1,src2) );
7968   ins_pipe( pipe_slow );
7969 %}
7970 
7971 // Divide Register Long (no special case since divisor != -1)
7972 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7973   match(Set dst (DivL dst imm));
7974   effect( TEMP tmp, TEMP tmp2, KILL cr );
7975   ins_cost(1000);
7976   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7977             "XOR    $tmp2,$tmp2\n\t"
7978             "CMP    $tmp,EDX\n\t"
7979             "JA,s   fast\n\t"
7980             "MOV    $tmp2,EAX\n\t"
7981             "MOV    EAX,EDX\n\t"
7982             "MOV    EDX,0\n\t"
7983             "JLE,s  pos\n\t"
7984             "LNEG   EAX : $tmp2\n\t"
7985             "DIV    $tmp # unsigned division\n\t"
7986             "XCHG   EAX,$tmp2\n\t"
7987             "DIV    $tmp\n\t"
7988             "LNEG   $tmp2 : EAX\n\t"
7989             "JMP,s  done\n"
7990     "pos:\n\t"
7991             "DIV    $tmp\n\t"
7992             "XCHG   EAX,$tmp2\n"
7993     "fast:\n\t"
7994             "DIV    $tmp\n"
7995     "done:\n\t"
7996             "MOV    EDX,$tmp2\n\t"
7997             "NEG    EDX:EAX # if $imm < 0" %}
7998   ins_encode %{
7999     int con = (int)$imm$$constant;
8000     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8001     int pcon = (con > 0) ? con : -con;
8002     Label Lfast, Lpos, Ldone;
8003 
8004     __ movl($tmp$$Register, pcon);
8005     __ xorl($tmp2$$Register,$tmp2$$Register);
8006     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8007     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8008 
8009     __ movl($tmp2$$Register, $dst$$Register); // save
8010     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8011     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8012     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8013 
8014     // Negative dividend.
8015     // convert value to positive to use unsigned division
8016     __ lneg($dst$$Register, $tmp2$$Register);
8017     __ divl($tmp$$Register);
8018     __ xchgl($dst$$Register, $tmp2$$Register);
8019     __ divl($tmp$$Register);
8020     // revert result back to negative
8021     __ lneg($tmp2$$Register, $dst$$Register);
8022     __ jmpb(Ldone);
8023 
8024     __ bind(Lpos);
8025     __ divl($tmp$$Register); // Use unsigned division
8026     __ xchgl($dst$$Register, $tmp2$$Register);
8027     // Fallthrow for final divide, tmp2 has 32 bit hi result
8028 
8029     __ bind(Lfast);
8030     // fast path: src is positive
8031     __ divl($tmp$$Register); // Use unsigned division
8032 
8033     __ bind(Ldone);
8034     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8035     if (con < 0) {
8036       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8037     }
8038   %}
8039   ins_pipe( pipe_slow );
8040 %}
8041 
8042 // Remainder Register Long (remainder fit into 32 bits)
8043 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8044   match(Set dst (ModL dst imm));
8045   effect( TEMP tmp, TEMP tmp2, KILL cr );
8046   ins_cost(1000);
8047   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8048             "CMP    $tmp,EDX\n\t"
8049             "JA,s   fast\n\t"
8050             "MOV    $tmp2,EAX\n\t"
8051             "MOV    EAX,EDX\n\t"
8052             "MOV    EDX,0\n\t"
8053             "JLE,s  pos\n\t"
8054             "LNEG   EAX : $tmp2\n\t"
8055             "DIV    $tmp # unsigned division\n\t"
8056             "MOV    EAX,$tmp2\n\t"
8057             "DIV    $tmp\n\t"
8058             "NEG    EDX\n\t"
8059             "JMP,s  done\n"
8060     "pos:\n\t"
8061             "DIV    $tmp\n\t"
8062             "MOV    EAX,$tmp2\n"
8063     "fast:\n\t"
8064             "DIV    $tmp\n"
8065     "done:\n\t"
8066             "MOV    EAX,EDX\n\t"
8067             "SAR    EDX,31\n\t" %}
8068   ins_encode %{
8069     int con = (int)$imm$$constant;
8070     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8071     int pcon = (con > 0) ? con : -con;
8072     Label  Lfast, Lpos, Ldone;
8073 
8074     __ movl($tmp$$Register, pcon);
8075     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8076     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8077 
8078     __ movl($tmp2$$Register, $dst$$Register); // save
8079     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8080     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8081     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8082 
8083     // Negative dividend.
8084     // convert value to positive to use unsigned division
8085     __ lneg($dst$$Register, $tmp2$$Register);
8086     __ divl($tmp$$Register);
8087     __ movl($dst$$Register, $tmp2$$Register);
8088     __ divl($tmp$$Register);
8089     // revert remainder back to negative
8090     __ negl(HIGH_FROM_LOW($dst$$Register));
8091     __ jmpb(Ldone);
8092 
8093     __ bind(Lpos);
8094     __ divl($tmp$$Register);
8095     __ movl($dst$$Register, $tmp2$$Register);
8096 
8097     __ bind(Lfast);
8098     // fast path: src is positive
8099     __ divl($tmp$$Register);
8100 
8101     __ bind(Ldone);
8102     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8103     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8104 
8105   %}
8106   ins_pipe( pipe_slow );
8107 %}
8108 
8109 // Integer Shift Instructions
8110 // Shift Left by one
8111 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8112   match(Set dst (LShiftI dst shift));
8113   effect(KILL cr);
8114 
8115   size(2);
8116   format %{ "SHL    $dst,$shift" %}
8117   opcode(0xD1, 0x4);  /* D1 /4 */
8118   ins_encode( OpcP, RegOpc( dst ) );
8119   ins_pipe( ialu_reg );
8120 %}
8121 
8122 // Shift Left by 8-bit immediate
8123 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8124   match(Set dst (LShiftI dst shift));
8125   effect(KILL cr);
8126 
8127   size(3);
8128   format %{ "SHL    $dst,$shift" %}
8129   opcode(0xC1, 0x4);  /* C1 /4 ib */
8130   ins_encode( RegOpcImm( dst, shift) );
8131   ins_pipe( ialu_reg );
8132 %}
8133 
8134 // Shift Left by variable
8135 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8136   match(Set dst (LShiftI dst shift));
8137   effect(KILL cr);
8138 
8139   size(2);
8140   format %{ "SHL    $dst,$shift" %}
8141   opcode(0xD3, 0x4);  /* D3 /4 */
8142   ins_encode( OpcP, RegOpc( dst ) );
8143   ins_pipe( ialu_reg_reg );
8144 %}
8145 
8146 // Arithmetic shift right by one
8147 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8148   match(Set dst (RShiftI dst shift));
8149   effect(KILL cr);
8150 
8151   size(2);
8152   format %{ "SAR    $dst,$shift" %}
8153   opcode(0xD1, 0x7);  /* D1 /7 */
8154   ins_encode( OpcP, RegOpc( dst ) );
8155   ins_pipe( ialu_reg );
8156 %}
8157 
8158 // Arithmetic shift right by one
8159 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8160   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8161   effect(KILL cr);
8162   format %{ "SAR    $dst,$shift" %}
8163   opcode(0xD1, 0x7);  /* D1 /7 */
8164   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8165   ins_pipe( ialu_mem_imm );
8166 %}
8167 
8168 // Arithmetic Shift Right by 8-bit immediate
8169 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8170   match(Set dst (RShiftI dst shift));
8171   effect(KILL cr);
8172 
8173   size(3);
8174   format %{ "SAR    $dst,$shift" %}
8175   opcode(0xC1, 0x7);  /* C1 /7 ib */
8176   ins_encode( RegOpcImm( dst, shift ) );
8177   ins_pipe( ialu_mem_imm );
8178 %}
8179 
8180 // Arithmetic Shift Right by 8-bit immediate
8181 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8182   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8183   effect(KILL cr);
8184 
8185   format %{ "SAR    $dst,$shift" %}
8186   opcode(0xC1, 0x7);  /* C1 /7 ib */
8187   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8188   ins_pipe( ialu_mem_imm );
8189 %}
8190 
8191 // Arithmetic Shift Right by variable
8192 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8193   match(Set dst (RShiftI dst shift));
8194   effect(KILL cr);
8195 
8196   size(2);
8197   format %{ "SAR    $dst,$shift" %}
8198   opcode(0xD3, 0x7);  /* D3 /7 */
8199   ins_encode( OpcP, RegOpc( dst ) );
8200   ins_pipe( ialu_reg_reg );
8201 %}
8202 
8203 // Logical shift right by one
8204 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8205   match(Set dst (URShiftI dst shift));
8206   effect(KILL cr);
8207 
8208   size(2);
8209   format %{ "SHR    $dst,$shift" %}
8210   opcode(0xD1, 0x5);  /* D1 /5 */
8211   ins_encode( OpcP, RegOpc( dst ) );
8212   ins_pipe( ialu_reg );
8213 %}
8214 
8215 // Logical Shift Right by 8-bit immediate
8216 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8217   match(Set dst (URShiftI dst shift));
8218   effect(KILL cr);
8219 
8220   size(3);
8221   format %{ "SHR    $dst,$shift" %}
8222   opcode(0xC1, 0x5);  /* C1 /5 ib */
8223   ins_encode( RegOpcImm( dst, shift) );
8224   ins_pipe( ialu_reg );
8225 %}
8226 
8227 
8228 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8229 // This idiom is used by the compiler for the i2b bytecode.
8230 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8231   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8232 
8233   size(3);
8234   format %{ "MOVSX  $dst,$src :8" %}
8235   ins_encode %{
8236     __ movsbl($dst$$Register, $src$$Register);
8237   %}
8238   ins_pipe(ialu_reg_reg);
8239 %}
8240 
8241 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8242 // This idiom is used by the compiler the i2s bytecode.
8243 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8244   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8245 
8246   size(3);
8247   format %{ "MOVSX  $dst,$src :16" %}
8248   ins_encode %{
8249     __ movswl($dst$$Register, $src$$Register);
8250   %}
8251   ins_pipe(ialu_reg_reg);
8252 %}
8253 
8254 
8255 // Logical Shift Right by variable
8256 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8257   match(Set dst (URShiftI dst shift));
8258   effect(KILL cr);
8259 
8260   size(2);
8261   format %{ "SHR    $dst,$shift" %}
8262   opcode(0xD3, 0x5);  /* D3 /5 */
8263   ins_encode( OpcP, RegOpc( dst ) );
8264   ins_pipe( ialu_reg_reg );
8265 %}
8266 
8267 
8268 //----------Logical Instructions-----------------------------------------------
8269 //----------Integer Logical Instructions---------------------------------------
8270 // And Instructions
8271 // And Register with Register
8272 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8273   match(Set dst (AndI dst src));
8274   effect(KILL cr);
8275 
8276   size(2);
8277   format %{ "AND    $dst,$src" %}
8278   opcode(0x23);
8279   ins_encode( OpcP, RegReg( dst, src) );
8280   ins_pipe( ialu_reg_reg );
8281 %}
8282 
8283 // And Register with Immediate
8284 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8285   match(Set dst (AndI dst src));
8286   effect(KILL cr);
8287 
8288   format %{ "AND    $dst,$src" %}
8289   opcode(0x81,0x04);  /* Opcode 81 /4 */
8290   // ins_encode( RegImm( dst, src) );
8291   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8292   ins_pipe( ialu_reg );
8293 %}
8294 
8295 // And Register with Memory
8296 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8297   match(Set dst (AndI dst (LoadI src)));
8298   effect(KILL cr);
8299 
8300   ins_cost(125);
8301   format %{ "AND    $dst,$src" %}
8302   opcode(0x23);
8303   ins_encode( OpcP, RegMem( dst, src) );
8304   ins_pipe( ialu_reg_mem );
8305 %}
8306 
8307 // And Memory with Register
8308 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8309   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8310   effect(KILL cr);
8311 
8312   ins_cost(150);
8313   format %{ "AND    $dst,$src" %}
8314   opcode(0x21);  /* Opcode 21 /r */
8315   ins_encode( OpcP, RegMem( src, dst ) );
8316   ins_pipe( ialu_mem_reg );
8317 %}
8318 
8319 // And Memory with Immediate
8320 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8321   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8322   effect(KILL cr);
8323 
8324   ins_cost(125);
8325   format %{ "AND    $dst,$src" %}
8326   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8327   // ins_encode( MemImm( dst, src) );
8328   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8329   ins_pipe( ialu_mem_imm );
8330 %}
8331 
8332 // BMI1 instructions
8333 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8334   match(Set dst (AndI (XorI src1 minus_1) src2));
8335   predicate(UseBMI1Instructions);
8336   effect(KILL cr);
8337 
8338   format %{ "ANDNL  $dst, $src1, $src2" %}
8339 
8340   ins_encode %{
8341     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8342   %}
8343   ins_pipe(ialu_reg);
8344 %}
8345 
8346 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8347   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8348   predicate(UseBMI1Instructions);
8349   effect(KILL cr);
8350 
8351   ins_cost(125);
8352   format %{ "ANDNL  $dst, $src1, $src2" %}
8353 
8354   ins_encode %{
8355     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8356   %}
8357   ins_pipe(ialu_reg_mem);
8358 %}
8359 
8360 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8361   match(Set dst (AndI (SubI imm_zero src) src));
8362   predicate(UseBMI1Instructions);
8363   effect(KILL cr);
8364 
8365   format %{ "BLSIL  $dst, $src" %}
8366 
8367   ins_encode %{
8368     __ blsil($dst$$Register, $src$$Register);
8369   %}
8370   ins_pipe(ialu_reg);
8371 %}
8372 
8373 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8374   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8375   predicate(UseBMI1Instructions);
8376   effect(KILL cr);
8377 
8378   ins_cost(125);
8379   format %{ "BLSIL  $dst, $src" %}
8380 
8381   ins_encode %{
8382     __ blsil($dst$$Register, $src$$Address);
8383   %}
8384   ins_pipe(ialu_reg_mem);
8385 %}
8386 
8387 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8388 %{
8389   match(Set dst (XorI (AddI src minus_1) src));
8390   predicate(UseBMI1Instructions);
8391   effect(KILL cr);
8392 
8393   format %{ "BLSMSKL $dst, $src" %}
8394 
8395   ins_encode %{
8396     __ blsmskl($dst$$Register, $src$$Register);
8397   %}
8398 
8399   ins_pipe(ialu_reg);
8400 %}
8401 
8402 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8403 %{
8404   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8405   predicate(UseBMI1Instructions);
8406   effect(KILL cr);
8407 
8408   ins_cost(125);
8409   format %{ "BLSMSKL $dst, $src" %}
8410 
8411   ins_encode %{
8412     __ blsmskl($dst$$Register, $src$$Address);
8413   %}
8414 
8415   ins_pipe(ialu_reg_mem);
8416 %}
8417 
8418 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8419 %{
8420   match(Set dst (AndI (AddI src minus_1) src) );
8421   predicate(UseBMI1Instructions);
8422   effect(KILL cr);
8423 
8424   format %{ "BLSRL  $dst, $src" %}
8425 
8426   ins_encode %{
8427     __ blsrl($dst$$Register, $src$$Register);
8428   %}
8429 
8430   ins_pipe(ialu_reg);
8431 %}
8432 
8433 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8434 %{
8435   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8436   predicate(UseBMI1Instructions);
8437   effect(KILL cr);
8438 
8439   ins_cost(125);
8440   format %{ "BLSRL  $dst, $src" %}
8441 
8442   ins_encode %{
8443     __ blsrl($dst$$Register, $src$$Address);
8444   %}
8445 
8446   ins_pipe(ialu_reg_mem);
8447 %}
8448 
8449 // Or Instructions
8450 // Or Register with Register
8451 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8452   match(Set dst (OrI dst src));
8453   effect(KILL cr);
8454 
8455   size(2);
8456   format %{ "OR     $dst,$src" %}
8457   opcode(0x0B);
8458   ins_encode( OpcP, RegReg( dst, src) );
8459   ins_pipe( ialu_reg_reg );
8460 %}
8461 
8462 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8463   match(Set dst (OrI dst (CastP2X src)));
8464   effect(KILL cr);
8465 
8466   size(2);
8467   format %{ "OR     $dst,$src" %}
8468   opcode(0x0B);
8469   ins_encode( OpcP, RegReg( dst, src) );
8470   ins_pipe( ialu_reg_reg );
8471 %}
8472 
8473 
8474 // Or Register with Immediate
8475 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8476   match(Set dst (OrI dst src));
8477   effect(KILL cr);
8478 
8479   format %{ "OR     $dst,$src" %}
8480   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8481   // ins_encode( RegImm( dst, src) );
8482   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8483   ins_pipe( ialu_reg );
8484 %}
8485 
8486 // Or Register with Memory
8487 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8488   match(Set dst (OrI dst (LoadI src)));
8489   effect(KILL cr);
8490 
8491   ins_cost(125);
8492   format %{ "OR     $dst,$src" %}
8493   opcode(0x0B);
8494   ins_encode( OpcP, RegMem( dst, src) );
8495   ins_pipe( ialu_reg_mem );
8496 %}
8497 
8498 // Or Memory with Register
8499 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8500   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8501   effect(KILL cr);
8502 
8503   ins_cost(150);
8504   format %{ "OR     $dst,$src" %}
8505   opcode(0x09);  /* Opcode 09 /r */
8506   ins_encode( OpcP, RegMem( src, dst ) );
8507   ins_pipe( ialu_mem_reg );
8508 %}
8509 
8510 // Or Memory with Immediate
8511 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8512   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8513   effect(KILL cr);
8514 
8515   ins_cost(125);
8516   format %{ "OR     $dst,$src" %}
8517   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8518   // ins_encode( MemImm( dst, src) );
8519   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8520   ins_pipe( ialu_mem_imm );
8521 %}
8522 
8523 // ROL/ROR
8524 // ROL expand
8525 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8526   effect(USE_DEF dst, USE shift, KILL cr);
8527 
8528   format %{ "ROL    $dst, $shift" %}
8529   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8530   ins_encode( OpcP, RegOpc( dst ));
8531   ins_pipe( ialu_reg );
8532 %}
8533 
8534 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8535   effect(USE_DEF dst, USE shift, KILL cr);
8536 
8537   format %{ "ROL    $dst, $shift" %}
8538   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8539   ins_encode( RegOpcImm(dst, shift) );
8540   ins_pipe(ialu_reg);
8541 %}
8542 
8543 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8544   effect(USE_DEF dst, USE shift, KILL cr);
8545 
8546   format %{ "ROL    $dst, $shift" %}
8547   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8548   ins_encode(OpcP, RegOpc(dst));
8549   ins_pipe( ialu_reg_reg );
8550 %}
8551 // end of ROL expand
8552 
8553 // ROL 32bit by one once
8554 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8555   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8556 
8557   expand %{
8558     rolI_eReg_imm1(dst, lshift, cr);
8559   %}
8560 %}
8561 
8562 // ROL 32bit var by imm8 once
8563 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8564   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8565   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8566 
8567   expand %{
8568     rolI_eReg_imm8(dst, lshift, cr);
8569   %}
8570 %}
8571 
8572 // ROL 32bit var by var once
8573 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8574   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8575 
8576   expand %{
8577     rolI_eReg_CL(dst, shift, cr);
8578   %}
8579 %}
8580 
8581 // ROL 32bit var by var once
8582 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8583   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8584 
8585   expand %{
8586     rolI_eReg_CL(dst, shift, cr);
8587   %}
8588 %}
8589 
8590 // ROR expand
8591 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8592   effect(USE_DEF dst, USE shift, KILL cr);
8593 
8594   format %{ "ROR    $dst, $shift" %}
8595   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8596   ins_encode( OpcP, RegOpc( dst ) );
8597   ins_pipe( ialu_reg );
8598 %}
8599 
8600 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8601   effect (USE_DEF dst, USE shift, KILL cr);
8602 
8603   format %{ "ROR    $dst, $shift" %}
8604   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8605   ins_encode( RegOpcImm(dst, shift) );
8606   ins_pipe( ialu_reg );
8607 %}
8608 
8609 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8610   effect(USE_DEF dst, USE shift, KILL cr);
8611 
8612   format %{ "ROR    $dst, $shift" %}
8613   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8614   ins_encode(OpcP, RegOpc(dst));
8615   ins_pipe( ialu_reg_reg );
8616 %}
8617 // end of ROR expand
8618 
8619 // ROR right once
8620 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8621   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8622 
8623   expand %{
8624     rorI_eReg_imm1(dst, rshift, cr);
8625   %}
8626 %}
8627 
8628 // ROR 32bit by immI8 once
8629 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8630   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8631   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8632 
8633   expand %{
8634     rorI_eReg_imm8(dst, rshift, cr);
8635   %}
8636 %}
8637 
8638 // ROR 32bit var by var once
8639 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8640   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8641 
8642   expand %{
8643     rorI_eReg_CL(dst, shift, cr);
8644   %}
8645 %}
8646 
8647 // ROR 32bit var by var once
8648 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8649   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8650 
8651   expand %{
8652     rorI_eReg_CL(dst, shift, cr);
8653   %}
8654 %}
8655 
8656 // Xor Instructions
8657 // Xor Register with Register
8658 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8659   match(Set dst (XorI dst src));
8660   effect(KILL cr);
8661 
8662   size(2);
8663   format %{ "XOR    $dst,$src" %}
8664   opcode(0x33);
8665   ins_encode( OpcP, RegReg( dst, src) );
8666   ins_pipe( ialu_reg_reg );
8667 %}
8668 
8669 // Xor Register with Immediate -1
8670 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8671   match(Set dst (XorI dst imm));
8672 
8673   size(2);
8674   format %{ "NOT    $dst" %}
8675   ins_encode %{
8676      __ notl($dst$$Register);
8677   %}
8678   ins_pipe( ialu_reg );
8679 %}
8680 
8681 // Xor Register with Immediate
8682 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8683   match(Set dst (XorI dst src));
8684   effect(KILL cr);
8685 
8686   format %{ "XOR    $dst,$src" %}
8687   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8688   // ins_encode( RegImm( dst, src) );
8689   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8690   ins_pipe( ialu_reg );
8691 %}
8692 
8693 // Xor Register with Memory
8694 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8695   match(Set dst (XorI dst (LoadI src)));
8696   effect(KILL cr);
8697 
8698   ins_cost(125);
8699   format %{ "XOR    $dst,$src" %}
8700   opcode(0x33);
8701   ins_encode( OpcP, RegMem(dst, src) );
8702   ins_pipe( ialu_reg_mem );
8703 %}
8704 
8705 // Xor Memory with Register
8706 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8707   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8708   effect(KILL cr);
8709 
8710   ins_cost(150);
8711   format %{ "XOR    $dst,$src" %}
8712   opcode(0x31);  /* Opcode 31 /r */
8713   ins_encode( OpcP, RegMem( src, dst ) );
8714   ins_pipe( ialu_mem_reg );
8715 %}
8716 
8717 // Xor Memory with Immediate
8718 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8719   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8720   effect(KILL cr);
8721 
8722   ins_cost(125);
8723   format %{ "XOR    $dst,$src" %}
8724   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8725   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8726   ins_pipe( ialu_mem_imm );
8727 %}
8728 
8729 //----------Convert Int to Boolean---------------------------------------------
8730 
8731 instruct movI_nocopy(rRegI dst, rRegI src) %{
8732   effect( DEF dst, USE src );
8733   format %{ "MOV    $dst,$src" %}
8734   ins_encode( enc_Copy( dst, src) );
8735   ins_pipe( ialu_reg_reg );
8736 %}
8737 
8738 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8739   effect( USE_DEF dst, USE src, KILL cr );
8740 
8741   size(4);
8742   format %{ "NEG    $dst\n\t"
8743             "ADC    $dst,$src" %}
8744   ins_encode( neg_reg(dst),
8745               OpcRegReg(0x13,dst,src) );
8746   ins_pipe( ialu_reg_reg_long );
8747 %}
8748 
8749 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8750   match(Set dst (Conv2B src));
8751 
8752   expand %{
8753     movI_nocopy(dst,src);
8754     ci2b(dst,src,cr);
8755   %}
8756 %}
8757 
8758 instruct movP_nocopy(rRegI dst, eRegP src) %{
8759   effect( DEF dst, USE src );
8760   format %{ "MOV    $dst,$src" %}
8761   ins_encode( enc_Copy( dst, src) );
8762   ins_pipe( ialu_reg_reg );
8763 %}
8764 
8765 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8766   effect( USE_DEF dst, USE src, KILL cr );
8767   format %{ "NEG    $dst\n\t"
8768             "ADC    $dst,$src" %}
8769   ins_encode( neg_reg(dst),
8770               OpcRegReg(0x13,dst,src) );
8771   ins_pipe( ialu_reg_reg_long );
8772 %}
8773 
8774 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8775   match(Set dst (Conv2B src));
8776 
8777   expand %{
8778     movP_nocopy(dst,src);
8779     cp2b(dst,src,cr);
8780   %}
8781 %}
8782 
8783 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8784   match(Set dst (CmpLTMask p q));
8785   effect(KILL cr);
8786   ins_cost(400);
8787 
8788   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8789   format %{ "XOR    $dst,$dst\n\t"
8790             "CMP    $p,$q\n\t"
8791             "SETlt  $dst\n\t"
8792             "NEG    $dst" %}
8793   ins_encode %{
8794     Register Rp = $p$$Register;
8795     Register Rq = $q$$Register;
8796     Register Rd = $dst$$Register;
8797     Label done;
8798     __ xorl(Rd, Rd);
8799     __ cmpl(Rp, Rq);
8800     __ setb(Assembler::less, Rd);
8801     __ negl(Rd);
8802   %}
8803 
8804   ins_pipe(pipe_slow);
8805 %}
8806 
8807 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8808   match(Set dst (CmpLTMask dst zero));
8809   effect(DEF dst, KILL cr);
8810   ins_cost(100);
8811 
8812   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8813   ins_encode %{
8814   __ sarl($dst$$Register, 31);
8815   %}
8816   ins_pipe(ialu_reg);
8817 %}
8818 
8819 /* better to save a register than avoid a branch */
8820 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8821   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8822   effect(KILL cr);
8823   ins_cost(400);
8824   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8825             "JGE    done\n\t"
8826             "ADD    $p,$y\n"
8827             "done:  " %}
8828   ins_encode %{
8829     Register Rp = $p$$Register;
8830     Register Rq = $q$$Register;
8831     Register Ry = $y$$Register;
8832     Label done;
8833     __ subl(Rp, Rq);
8834     __ jccb(Assembler::greaterEqual, done);
8835     __ addl(Rp, Ry);
8836     __ bind(done);
8837   %}
8838 
8839   ins_pipe(pipe_cmplt);
8840 %}
8841 
8842 /* better to save a register than avoid a branch */
8843 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8844   match(Set y (AndI (CmpLTMask p q) y));
8845   effect(KILL cr);
8846 
8847   ins_cost(300);
8848 
8849   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8850             "JLT      done\n\t"
8851             "XORL     $y, $y\n"
8852             "done:  " %}
8853   ins_encode %{
8854     Register Rp = $p$$Register;
8855     Register Rq = $q$$Register;
8856     Register Ry = $y$$Register;
8857     Label done;
8858     __ cmpl(Rp, Rq);
8859     __ jccb(Assembler::less, done);
8860     __ xorl(Ry, Ry);
8861     __ bind(done);
8862   %}
8863 
8864   ins_pipe(pipe_cmplt);
8865 %}
8866 
8867 /* If I enable this, I encourage spilling in the inner loop of compress.
8868 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8869   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8870 */
8871 //----------Overflow Math Instructions-----------------------------------------
8872 
8873 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8874 %{
8875   match(Set cr (OverflowAddI op1 op2));
8876   effect(DEF cr, USE_KILL op1, USE op2);
8877 
8878   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8879 
8880   ins_encode %{
8881     __ addl($op1$$Register, $op2$$Register);
8882   %}
8883   ins_pipe(ialu_reg_reg);
8884 %}
8885 
8886 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8887 %{
8888   match(Set cr (OverflowAddI op1 op2));
8889   effect(DEF cr, USE_KILL op1, USE op2);
8890 
8891   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8892 
8893   ins_encode %{
8894     __ addl($op1$$Register, $op2$$constant);
8895   %}
8896   ins_pipe(ialu_reg_reg);
8897 %}
8898 
8899 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8900 %{
8901   match(Set cr (OverflowSubI op1 op2));
8902 
8903   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8904   ins_encode %{
8905     __ cmpl($op1$$Register, $op2$$Register);
8906   %}
8907   ins_pipe(ialu_reg_reg);
8908 %}
8909 
8910 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8911 %{
8912   match(Set cr (OverflowSubI op1 op2));
8913 
8914   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8915   ins_encode %{
8916     __ cmpl($op1$$Register, $op2$$constant);
8917   %}
8918   ins_pipe(ialu_reg_reg);
8919 %}
8920 
8921 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8922 %{
8923   match(Set cr (OverflowSubI zero op2));
8924   effect(DEF cr, USE_KILL op2);
8925 
8926   format %{ "NEG    $op2\t# overflow check int" %}
8927   ins_encode %{
8928     __ negl($op2$$Register);
8929   %}
8930   ins_pipe(ialu_reg_reg);
8931 %}
8932 
8933 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8934 %{
8935   match(Set cr (OverflowMulI op1 op2));
8936   effect(DEF cr, USE_KILL op1, USE op2);
8937 
8938   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8939   ins_encode %{
8940     __ imull($op1$$Register, $op2$$Register);
8941   %}
8942   ins_pipe(ialu_reg_reg_alu0);
8943 %}
8944 
8945 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8946 %{
8947   match(Set cr (OverflowMulI op1 op2));
8948   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8949 
8950   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8951   ins_encode %{
8952     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8953   %}
8954   ins_pipe(ialu_reg_reg_alu0);
8955 %}
8956 
8957 //----------Long Instructions------------------------------------------------
8958 // Add Long Register with Register
8959 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8960   match(Set dst (AddL dst src));
8961   effect(KILL cr);
8962   ins_cost(200);
8963   format %{ "ADD    $dst.lo,$src.lo\n\t"
8964             "ADC    $dst.hi,$src.hi" %}
8965   opcode(0x03, 0x13);
8966   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8967   ins_pipe( ialu_reg_reg_long );
8968 %}
8969 
8970 // Add Long Register with Immediate
8971 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8972   match(Set dst (AddL dst src));
8973   effect(KILL cr);
8974   format %{ "ADD    $dst.lo,$src.lo\n\t"
8975             "ADC    $dst.hi,$src.hi" %}
8976   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8977   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8978   ins_pipe( ialu_reg_long );
8979 %}
8980 
8981 // Add Long Register with Memory
8982 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8983   match(Set dst (AddL dst (LoadL mem)));
8984   effect(KILL cr);
8985   ins_cost(125);
8986   format %{ "ADD    $dst.lo,$mem\n\t"
8987             "ADC    $dst.hi,$mem+4" %}
8988   opcode(0x03, 0x13);
8989   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8990   ins_pipe( ialu_reg_long_mem );
8991 %}
8992 
8993 // Subtract Long Register with Register.
8994 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8995   match(Set dst (SubL dst src));
8996   effect(KILL cr);
8997   ins_cost(200);
8998   format %{ "SUB    $dst.lo,$src.lo\n\t"
8999             "SBB    $dst.hi,$src.hi" %}
9000   opcode(0x2B, 0x1B);
9001   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9002   ins_pipe( ialu_reg_reg_long );
9003 %}
9004 
9005 // Subtract Long Register with Immediate
9006 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9007   match(Set dst (SubL dst src));
9008   effect(KILL cr);
9009   format %{ "SUB    $dst.lo,$src.lo\n\t"
9010             "SBB    $dst.hi,$src.hi" %}
9011   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9012   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9013   ins_pipe( ialu_reg_long );
9014 %}
9015 
9016 // Subtract Long Register with Memory
9017 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9018   match(Set dst (SubL dst (LoadL mem)));
9019   effect(KILL cr);
9020   ins_cost(125);
9021   format %{ "SUB    $dst.lo,$mem\n\t"
9022             "SBB    $dst.hi,$mem+4" %}
9023   opcode(0x2B, 0x1B);
9024   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9025   ins_pipe( ialu_reg_long_mem );
9026 %}
9027 
9028 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9029   match(Set dst (SubL zero dst));
9030   effect(KILL cr);
9031   ins_cost(300);
9032   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9033   ins_encode( neg_long(dst) );
9034   ins_pipe( ialu_reg_reg_long );
9035 %}
9036 
9037 // And Long Register with Register
9038 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9039   match(Set dst (AndL dst src));
9040   effect(KILL cr);
9041   format %{ "AND    $dst.lo,$src.lo\n\t"
9042             "AND    $dst.hi,$src.hi" %}
9043   opcode(0x23,0x23);
9044   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9045   ins_pipe( ialu_reg_reg_long );
9046 %}
9047 
9048 // And Long Register with Immediate
9049 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9050   match(Set dst (AndL dst src));
9051   effect(KILL cr);
9052   format %{ "AND    $dst.lo,$src.lo\n\t"
9053             "AND    $dst.hi,$src.hi" %}
9054   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9055   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9056   ins_pipe( ialu_reg_long );
9057 %}
9058 
9059 // And Long Register with Memory
9060 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9061   match(Set dst (AndL dst (LoadL mem)));
9062   effect(KILL cr);
9063   ins_cost(125);
9064   format %{ "AND    $dst.lo,$mem\n\t"
9065             "AND    $dst.hi,$mem+4" %}
9066   opcode(0x23, 0x23);
9067   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9068   ins_pipe( ialu_reg_long_mem );
9069 %}
9070 
9071 // BMI1 instructions
9072 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9073   match(Set dst (AndL (XorL src1 minus_1) src2));
9074   predicate(UseBMI1Instructions);
9075   effect(KILL cr, TEMP dst);
9076 
9077   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9078             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9079          %}
9080 
9081   ins_encode %{
9082     Register Rdst = $dst$$Register;
9083     Register Rsrc1 = $src1$$Register;
9084     Register Rsrc2 = $src2$$Register;
9085     __ andnl(Rdst, Rsrc1, Rsrc2);
9086     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9087   %}
9088   ins_pipe(ialu_reg_reg_long);
9089 %}
9090 
9091 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9092   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9093   predicate(UseBMI1Instructions);
9094   effect(KILL cr, TEMP dst);
9095 
9096   ins_cost(125);
9097   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9098             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9099          %}
9100 
9101   ins_encode %{
9102     Register Rdst = $dst$$Register;
9103     Register Rsrc1 = $src1$$Register;
9104     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9105 
9106     __ andnl(Rdst, Rsrc1, $src2$$Address);
9107     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9108   %}
9109   ins_pipe(ialu_reg_mem);
9110 %}
9111 
9112 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9113   match(Set dst (AndL (SubL imm_zero src) src));
9114   predicate(UseBMI1Instructions);
9115   effect(KILL cr, TEMP dst);
9116 
9117   format %{ "MOVL   $dst.hi, 0\n\t"
9118             "BLSIL  $dst.lo, $src.lo\n\t"
9119             "JNZ    done\n\t"
9120             "BLSIL  $dst.hi, $src.hi\n"
9121             "done:"
9122          %}
9123 
9124   ins_encode %{
9125     Label done;
9126     Register Rdst = $dst$$Register;
9127     Register Rsrc = $src$$Register;
9128     __ movl(HIGH_FROM_LOW(Rdst), 0);
9129     __ blsil(Rdst, Rsrc);
9130     __ jccb(Assembler::notZero, done);
9131     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9132     __ bind(done);
9133   %}
9134   ins_pipe(ialu_reg);
9135 %}
9136 
9137 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9138   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9139   predicate(UseBMI1Instructions);
9140   effect(KILL cr, TEMP dst);
9141 
9142   ins_cost(125);
9143   format %{ "MOVL   $dst.hi, 0\n\t"
9144             "BLSIL  $dst.lo, $src\n\t"
9145             "JNZ    done\n\t"
9146             "BLSIL  $dst.hi, $src+4\n"
9147             "done:"
9148          %}
9149 
9150   ins_encode %{
9151     Label done;
9152     Register Rdst = $dst$$Register;
9153     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9154 
9155     __ movl(HIGH_FROM_LOW(Rdst), 0);
9156     __ blsil(Rdst, $src$$Address);
9157     __ jccb(Assembler::notZero, done);
9158     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9159     __ bind(done);
9160   %}
9161   ins_pipe(ialu_reg_mem);
9162 %}
9163 
9164 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9165 %{
9166   match(Set dst (XorL (AddL src minus_1) src));
9167   predicate(UseBMI1Instructions);
9168   effect(KILL cr, TEMP dst);
9169 
9170   format %{ "MOVL    $dst.hi, 0\n\t"
9171             "BLSMSKL $dst.lo, $src.lo\n\t"
9172             "JNC     done\n\t"
9173             "BLSMSKL $dst.hi, $src.hi\n"
9174             "done:"
9175          %}
9176 
9177   ins_encode %{
9178     Label done;
9179     Register Rdst = $dst$$Register;
9180     Register Rsrc = $src$$Register;
9181     __ movl(HIGH_FROM_LOW(Rdst), 0);
9182     __ blsmskl(Rdst, Rsrc);
9183     __ jccb(Assembler::carryClear, done);
9184     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9185     __ bind(done);
9186   %}
9187 
9188   ins_pipe(ialu_reg);
9189 %}
9190 
9191 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9192 %{
9193   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9194   predicate(UseBMI1Instructions);
9195   effect(KILL cr, TEMP dst);
9196 
9197   ins_cost(125);
9198   format %{ "MOVL    $dst.hi, 0\n\t"
9199             "BLSMSKL $dst.lo, $src\n\t"
9200             "JNC     done\n\t"
9201             "BLSMSKL $dst.hi, $src+4\n"
9202             "done:"
9203          %}
9204 
9205   ins_encode %{
9206     Label done;
9207     Register Rdst = $dst$$Register;
9208     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9209 
9210     __ movl(HIGH_FROM_LOW(Rdst), 0);
9211     __ blsmskl(Rdst, $src$$Address);
9212     __ jccb(Assembler::carryClear, done);
9213     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9214     __ bind(done);
9215   %}
9216 
9217   ins_pipe(ialu_reg_mem);
9218 %}
9219 
9220 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9221 %{
9222   match(Set dst (AndL (AddL src minus_1) src) );
9223   predicate(UseBMI1Instructions);
9224   effect(KILL cr, TEMP dst);
9225 
9226   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9227             "BLSRL  $dst.lo, $src.lo\n\t"
9228             "JNC    done\n\t"
9229             "BLSRL  $dst.hi, $src.hi\n"
9230             "done:"
9231   %}
9232 
9233   ins_encode %{
9234     Label done;
9235     Register Rdst = $dst$$Register;
9236     Register Rsrc = $src$$Register;
9237     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9238     __ blsrl(Rdst, Rsrc);
9239     __ jccb(Assembler::carryClear, done);
9240     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9241     __ bind(done);
9242   %}
9243 
9244   ins_pipe(ialu_reg);
9245 %}
9246 
9247 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9248 %{
9249   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9250   predicate(UseBMI1Instructions);
9251   effect(KILL cr, TEMP dst);
9252 
9253   ins_cost(125);
9254   format %{ "MOVL   $dst.hi, $src+4\n\t"
9255             "BLSRL  $dst.lo, $src\n\t"
9256             "JNC    done\n\t"
9257             "BLSRL  $dst.hi, $src+4\n"
9258             "done:"
9259   %}
9260 
9261   ins_encode %{
9262     Label done;
9263     Register Rdst = $dst$$Register;
9264     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9265     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9266     __ blsrl(Rdst, $src$$Address);
9267     __ jccb(Assembler::carryClear, done);
9268     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9269     __ bind(done);
9270   %}
9271 
9272   ins_pipe(ialu_reg_mem);
9273 %}
9274 
9275 // Or Long Register with Register
9276 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9277   match(Set dst (OrL dst src));
9278   effect(KILL cr);
9279   format %{ "OR     $dst.lo,$src.lo\n\t"
9280             "OR     $dst.hi,$src.hi" %}
9281   opcode(0x0B,0x0B);
9282   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9283   ins_pipe( ialu_reg_reg_long );
9284 %}
9285 
9286 // Or Long Register with Immediate
9287 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9288   match(Set dst (OrL dst src));
9289   effect(KILL cr);
9290   format %{ "OR     $dst.lo,$src.lo\n\t"
9291             "OR     $dst.hi,$src.hi" %}
9292   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9293   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9294   ins_pipe( ialu_reg_long );
9295 %}
9296 
9297 // Or Long Register with Memory
9298 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9299   match(Set dst (OrL dst (LoadL mem)));
9300   effect(KILL cr);
9301   ins_cost(125);
9302   format %{ "OR     $dst.lo,$mem\n\t"
9303             "OR     $dst.hi,$mem+4" %}
9304   opcode(0x0B,0x0B);
9305   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9306   ins_pipe( ialu_reg_long_mem );
9307 %}
9308 
9309 // Xor Long Register with Register
9310 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9311   match(Set dst (XorL dst src));
9312   effect(KILL cr);
9313   format %{ "XOR    $dst.lo,$src.lo\n\t"
9314             "XOR    $dst.hi,$src.hi" %}
9315   opcode(0x33,0x33);
9316   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9317   ins_pipe( ialu_reg_reg_long );
9318 %}
9319 
9320 // Xor Long Register with Immediate -1
9321 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9322   match(Set dst (XorL dst imm));
9323   format %{ "NOT    $dst.lo\n\t"
9324             "NOT    $dst.hi" %}
9325   ins_encode %{
9326      __ notl($dst$$Register);
9327      __ notl(HIGH_FROM_LOW($dst$$Register));
9328   %}
9329   ins_pipe( ialu_reg_long );
9330 %}
9331 
9332 // Xor Long Register with Immediate
9333 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9334   match(Set dst (XorL dst src));
9335   effect(KILL cr);
9336   format %{ "XOR    $dst.lo,$src.lo\n\t"
9337             "XOR    $dst.hi,$src.hi" %}
9338   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9339   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9340   ins_pipe( ialu_reg_long );
9341 %}
9342 
9343 // Xor Long Register with Memory
9344 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9345   match(Set dst (XorL dst (LoadL mem)));
9346   effect(KILL cr);
9347   ins_cost(125);
9348   format %{ "XOR    $dst.lo,$mem\n\t"
9349             "XOR    $dst.hi,$mem+4" %}
9350   opcode(0x33,0x33);
9351   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9352   ins_pipe( ialu_reg_long_mem );
9353 %}
9354 
9355 // Shift Left Long by 1
9356 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9357   predicate(UseNewLongLShift);
9358   match(Set dst (LShiftL dst cnt));
9359   effect(KILL cr);
9360   ins_cost(100);
9361   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9362             "ADC    $dst.hi,$dst.hi" %}
9363   ins_encode %{
9364     __ addl($dst$$Register,$dst$$Register);
9365     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9366   %}
9367   ins_pipe( ialu_reg_long );
9368 %}
9369 
9370 // Shift Left Long by 2
9371 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9372   predicate(UseNewLongLShift);
9373   match(Set dst (LShiftL dst cnt));
9374   effect(KILL cr);
9375   ins_cost(100);
9376   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9377             "ADC    $dst.hi,$dst.hi\n\t"
9378             "ADD    $dst.lo,$dst.lo\n\t"
9379             "ADC    $dst.hi,$dst.hi" %}
9380   ins_encode %{
9381     __ addl($dst$$Register,$dst$$Register);
9382     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9383     __ addl($dst$$Register,$dst$$Register);
9384     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9385   %}
9386   ins_pipe( ialu_reg_long );
9387 %}
9388 
9389 // Shift Left Long by 3
9390 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9391   predicate(UseNewLongLShift);
9392   match(Set dst (LShiftL dst cnt));
9393   effect(KILL cr);
9394   ins_cost(100);
9395   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9396             "ADC    $dst.hi,$dst.hi\n\t"
9397             "ADD    $dst.lo,$dst.lo\n\t"
9398             "ADC    $dst.hi,$dst.hi\n\t"
9399             "ADD    $dst.lo,$dst.lo\n\t"
9400             "ADC    $dst.hi,$dst.hi" %}
9401   ins_encode %{
9402     __ addl($dst$$Register,$dst$$Register);
9403     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9404     __ addl($dst$$Register,$dst$$Register);
9405     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9406     __ addl($dst$$Register,$dst$$Register);
9407     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9408   %}
9409   ins_pipe( ialu_reg_long );
9410 %}
9411 
9412 // Shift Left Long by 1-31
9413 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9414   match(Set dst (LShiftL dst cnt));
9415   effect(KILL cr);
9416   ins_cost(200);
9417   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9418             "SHL    $dst.lo,$cnt" %}
9419   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9420   ins_encode( move_long_small_shift(dst,cnt) );
9421   ins_pipe( ialu_reg_long );
9422 %}
9423 
9424 // Shift Left Long by 32-63
9425 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9426   match(Set dst (LShiftL dst cnt));
9427   effect(KILL cr);
9428   ins_cost(300);
9429   format %{ "MOV    $dst.hi,$dst.lo\n"
9430           "\tSHL    $dst.hi,$cnt-32\n"
9431           "\tXOR    $dst.lo,$dst.lo" %}
9432   opcode(0xC1, 0x4);  /* C1 /4 ib */
9433   ins_encode( move_long_big_shift_clr(dst,cnt) );
9434   ins_pipe( ialu_reg_long );
9435 %}
9436 
9437 // Shift Left Long by variable
9438 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9439   match(Set dst (LShiftL dst shift));
9440   effect(KILL cr);
9441   ins_cost(500+200);
9442   size(17);
9443   format %{ "TEST   $shift,32\n\t"
9444             "JEQ,s  small\n\t"
9445             "MOV    $dst.hi,$dst.lo\n\t"
9446             "XOR    $dst.lo,$dst.lo\n"
9447     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9448             "SHL    $dst.lo,$shift" %}
9449   ins_encode( shift_left_long( dst, shift ) );
9450   ins_pipe( pipe_slow );
9451 %}
9452 
9453 // Shift Right Long by 1-31
9454 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9455   match(Set dst (URShiftL dst cnt));
9456   effect(KILL cr);
9457   ins_cost(200);
9458   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9459             "SHR    $dst.hi,$cnt" %}
9460   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9461   ins_encode( move_long_small_shift(dst,cnt) );
9462   ins_pipe( ialu_reg_long );
9463 %}
9464 
9465 // Shift Right Long by 32-63
9466 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9467   match(Set dst (URShiftL dst cnt));
9468   effect(KILL cr);
9469   ins_cost(300);
9470   format %{ "MOV    $dst.lo,$dst.hi\n"
9471           "\tSHR    $dst.lo,$cnt-32\n"
9472           "\tXOR    $dst.hi,$dst.hi" %}
9473   opcode(0xC1, 0x5);  /* C1 /5 ib */
9474   ins_encode( move_long_big_shift_clr(dst,cnt) );
9475   ins_pipe( ialu_reg_long );
9476 %}
9477 
9478 // Shift Right Long by variable
9479 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9480   match(Set dst (URShiftL dst shift));
9481   effect(KILL cr);
9482   ins_cost(600);
9483   size(17);
9484   format %{ "TEST   $shift,32\n\t"
9485             "JEQ,s  small\n\t"
9486             "MOV    $dst.lo,$dst.hi\n\t"
9487             "XOR    $dst.hi,$dst.hi\n"
9488     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9489             "SHR    $dst.hi,$shift" %}
9490   ins_encode( shift_right_long( dst, shift ) );
9491   ins_pipe( pipe_slow );
9492 %}
9493 
9494 // Shift Right Long by 1-31
9495 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9496   match(Set dst (RShiftL dst cnt));
9497   effect(KILL cr);
9498   ins_cost(200);
9499   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9500             "SAR    $dst.hi,$cnt" %}
9501   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9502   ins_encode( move_long_small_shift(dst,cnt) );
9503   ins_pipe( ialu_reg_long );
9504 %}
9505 
9506 // Shift Right Long by 32-63
9507 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9508   match(Set dst (RShiftL dst cnt));
9509   effect(KILL cr);
9510   ins_cost(300);
9511   format %{ "MOV    $dst.lo,$dst.hi\n"
9512           "\tSAR    $dst.lo,$cnt-32\n"
9513           "\tSAR    $dst.hi,31" %}
9514   opcode(0xC1, 0x7);  /* C1 /7 ib */
9515   ins_encode( move_long_big_shift_sign(dst,cnt) );
9516   ins_pipe( ialu_reg_long );
9517 %}
9518 
9519 // Shift Right arithmetic Long by variable
9520 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9521   match(Set dst (RShiftL dst shift));
9522   effect(KILL cr);
9523   ins_cost(600);
9524   size(18);
9525   format %{ "TEST   $shift,32\n\t"
9526             "JEQ,s  small\n\t"
9527             "MOV    $dst.lo,$dst.hi\n\t"
9528             "SAR    $dst.hi,31\n"
9529     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9530             "SAR    $dst.hi,$shift" %}
9531   ins_encode( shift_right_arith_long( dst, shift ) );
9532   ins_pipe( pipe_slow );
9533 %}
9534 
9535 
9536 //----------Double Instructions------------------------------------------------
9537 // Double Math
9538 
9539 // Compare & branch
9540 
9541 // P6 version of float compare, sets condition codes in EFLAGS
9542 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9543   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9544   match(Set cr (CmpD src1 src2));
9545   effect(KILL rax);
9546   ins_cost(150);
9547   format %{ "FLD    $src1\n\t"
9548             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9549             "JNP    exit\n\t"
9550             "MOV    ah,1       // saw a NaN, set CF\n\t"
9551             "SAHF\n"
9552      "exit:\tNOP               // avoid branch to branch" %}
9553   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9554   ins_encode( Push_Reg_DPR(src1),
9555               OpcP, RegOpc(src2),
9556               cmpF_P6_fixup );
9557   ins_pipe( pipe_slow );
9558 %}
9559 
9560 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9561   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9562   match(Set cr (CmpD src1 src2));
9563   ins_cost(150);
9564   format %{ "FLD    $src1\n\t"
9565             "FUCOMIP ST,$src2  // P6 instruction" %}
9566   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9567   ins_encode( Push_Reg_DPR(src1),
9568               OpcP, RegOpc(src2));
9569   ins_pipe( pipe_slow );
9570 %}
9571 
9572 // Compare & branch
9573 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9574   predicate(UseSSE<=1);
9575   match(Set cr (CmpD src1 src2));
9576   effect(KILL rax);
9577   ins_cost(200);
9578   format %{ "FLD    $src1\n\t"
9579             "FCOMp  $src2\n\t"
9580             "FNSTSW AX\n\t"
9581             "TEST   AX,0x400\n\t"
9582             "JZ,s   flags\n\t"
9583             "MOV    AH,1\t# unordered treat as LT\n"
9584     "flags:\tSAHF" %}
9585   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9586   ins_encode( Push_Reg_DPR(src1),
9587               OpcP, RegOpc(src2),
9588               fpu_flags);
9589   ins_pipe( pipe_slow );
9590 %}
9591 
9592 // Compare vs zero into -1,0,1
9593 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9594   predicate(UseSSE<=1);
9595   match(Set dst (CmpD3 src1 zero));
9596   effect(KILL cr, KILL rax);
9597   ins_cost(280);
9598   format %{ "FTSTD  $dst,$src1" %}
9599   opcode(0xE4, 0xD9);
9600   ins_encode( Push_Reg_DPR(src1),
9601               OpcS, OpcP, PopFPU,
9602               CmpF_Result(dst));
9603   ins_pipe( pipe_slow );
9604 %}
9605 
9606 // Compare into -1,0,1
9607 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9608   predicate(UseSSE<=1);
9609   match(Set dst (CmpD3 src1 src2));
9610   effect(KILL cr, KILL rax);
9611   ins_cost(300);
9612   format %{ "FCMPD  $dst,$src1,$src2" %}
9613   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9614   ins_encode( Push_Reg_DPR(src1),
9615               OpcP, RegOpc(src2),
9616               CmpF_Result(dst));
9617   ins_pipe( pipe_slow );
9618 %}
9619 
9620 // float compare and set condition codes in EFLAGS by XMM regs
9621 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9622   predicate(UseSSE>=2);
9623   match(Set cr (CmpD src1 src2));
9624   ins_cost(145);
9625   format %{ "UCOMISD $src1,$src2\n\t"
9626             "JNP,s   exit\n\t"
9627             "PUSHF\t# saw NaN, set CF\n\t"
9628             "AND     [rsp], #0xffffff2b\n\t"
9629             "POPF\n"
9630     "exit:" %}
9631   ins_encode %{
9632     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9633     emit_cmpfp_fixup(_masm);
9634   %}
9635   ins_pipe( pipe_slow );
9636 %}
9637 
9638 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9639   predicate(UseSSE>=2);
9640   match(Set cr (CmpD src1 src2));
9641   ins_cost(100);
9642   format %{ "UCOMISD $src1,$src2" %}
9643   ins_encode %{
9644     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9645   %}
9646   ins_pipe( pipe_slow );
9647 %}
9648 
9649 // float compare and set condition codes in EFLAGS by XMM regs
9650 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9651   predicate(UseSSE>=2);
9652   match(Set cr (CmpD src1 (LoadD src2)));
9653   ins_cost(145);
9654   format %{ "UCOMISD $src1,$src2\n\t"
9655             "JNP,s   exit\n\t"
9656             "PUSHF\t# saw NaN, set CF\n\t"
9657             "AND     [rsp], #0xffffff2b\n\t"
9658             "POPF\n"
9659     "exit:" %}
9660   ins_encode %{
9661     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9662     emit_cmpfp_fixup(_masm);
9663   %}
9664   ins_pipe( pipe_slow );
9665 %}
9666 
9667 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9668   predicate(UseSSE>=2);
9669   match(Set cr (CmpD src1 (LoadD src2)));
9670   ins_cost(100);
9671   format %{ "UCOMISD $src1,$src2" %}
9672   ins_encode %{
9673     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9674   %}
9675   ins_pipe( pipe_slow );
9676 %}
9677 
9678 // Compare into -1,0,1 in XMM
9679 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9680   predicate(UseSSE>=2);
9681   match(Set dst (CmpD3 src1 src2));
9682   effect(KILL cr);
9683   ins_cost(255);
9684   format %{ "UCOMISD $src1, $src2\n\t"
9685             "MOV     $dst, #-1\n\t"
9686             "JP,s    done\n\t"
9687             "JB,s    done\n\t"
9688             "SETNE   $dst\n\t"
9689             "MOVZB   $dst, $dst\n"
9690     "done:" %}
9691   ins_encode %{
9692     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9693     emit_cmpfp3(_masm, $dst$$Register);
9694   %}
9695   ins_pipe( pipe_slow );
9696 %}
9697 
9698 // Compare into -1,0,1 in XMM and memory
9699 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9700   predicate(UseSSE>=2);
9701   match(Set dst (CmpD3 src1 (LoadD src2)));
9702   effect(KILL cr);
9703   ins_cost(275);
9704   format %{ "UCOMISD $src1, $src2\n\t"
9705             "MOV     $dst, #-1\n\t"
9706             "JP,s    done\n\t"
9707             "JB,s    done\n\t"
9708             "SETNE   $dst\n\t"
9709             "MOVZB   $dst, $dst\n"
9710     "done:" %}
9711   ins_encode %{
9712     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9713     emit_cmpfp3(_masm, $dst$$Register);
9714   %}
9715   ins_pipe( pipe_slow );
9716 %}
9717 
9718 
9719 instruct subDPR_reg(regDPR dst, regDPR src) %{
9720   predicate (UseSSE <=1);
9721   match(Set dst (SubD dst src));
9722 
9723   format %{ "FLD    $src\n\t"
9724             "DSUBp  $dst,ST" %}
9725   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9726   ins_cost(150);
9727   ins_encode( Push_Reg_DPR(src),
9728               OpcP, RegOpc(dst) );
9729   ins_pipe( fpu_reg_reg );
9730 %}
9731 
9732 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9733   predicate (UseSSE <=1);
9734   match(Set dst (RoundDouble (SubD src1 src2)));
9735   ins_cost(250);
9736 
9737   format %{ "FLD    $src2\n\t"
9738             "DSUB   ST,$src1\n\t"
9739             "FSTP_D $dst\t# D-round" %}
9740   opcode(0xD8, 0x5);
9741   ins_encode( Push_Reg_DPR(src2),
9742               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9743   ins_pipe( fpu_mem_reg_reg );
9744 %}
9745 
9746 
9747 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9748   predicate (UseSSE <=1);
9749   match(Set dst (SubD dst (LoadD src)));
9750   ins_cost(150);
9751 
9752   format %{ "FLD    $src\n\t"
9753             "DSUBp  $dst,ST" %}
9754   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9755   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9756               OpcP, RegOpc(dst) );
9757   ins_pipe( fpu_reg_mem );
9758 %}
9759 
9760 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9761   predicate (UseSSE<=1);
9762   match(Set dst (AbsD src));
9763   ins_cost(100);
9764   format %{ "FABS" %}
9765   opcode(0xE1, 0xD9);
9766   ins_encode( OpcS, OpcP );
9767   ins_pipe( fpu_reg_reg );
9768 %}
9769 
9770 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9771   predicate(UseSSE<=1);
9772   match(Set dst (NegD src));
9773   ins_cost(100);
9774   format %{ "FCHS" %}
9775   opcode(0xE0, 0xD9);
9776   ins_encode( OpcS, OpcP );
9777   ins_pipe( fpu_reg_reg );
9778 %}
9779 
9780 instruct addDPR_reg(regDPR dst, regDPR src) %{
9781   predicate(UseSSE<=1);
9782   match(Set dst (AddD dst src));
9783   format %{ "FLD    $src\n\t"
9784             "DADD   $dst,ST" %}
9785   size(4);
9786   ins_cost(150);
9787   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9788   ins_encode( Push_Reg_DPR(src),
9789               OpcP, RegOpc(dst) );
9790   ins_pipe( fpu_reg_reg );
9791 %}
9792 
9793 
9794 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9795   predicate(UseSSE<=1);
9796   match(Set dst (RoundDouble (AddD src1 src2)));
9797   ins_cost(250);
9798 
9799   format %{ "FLD    $src2\n\t"
9800             "DADD   ST,$src1\n\t"
9801             "FSTP_D $dst\t# D-round" %}
9802   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9803   ins_encode( Push_Reg_DPR(src2),
9804               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9805   ins_pipe( fpu_mem_reg_reg );
9806 %}
9807 
9808 
9809 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9810   predicate(UseSSE<=1);
9811   match(Set dst (AddD dst (LoadD src)));
9812   ins_cost(150);
9813 
9814   format %{ "FLD    $src\n\t"
9815             "DADDp  $dst,ST" %}
9816   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9817   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9818               OpcP, RegOpc(dst) );
9819   ins_pipe( fpu_reg_mem );
9820 %}
9821 
9822 // add-to-memory
9823 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9824   predicate(UseSSE<=1);
9825   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9826   ins_cost(150);
9827 
9828   format %{ "FLD_D  $dst\n\t"
9829             "DADD   ST,$src\n\t"
9830             "FST_D  $dst" %}
9831   opcode(0xDD, 0x0);
9832   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9833               Opcode(0xD8), RegOpc(src),
9834               set_instruction_start,
9835               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9836   ins_pipe( fpu_reg_mem );
9837 %}
9838 
9839 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9840   predicate(UseSSE<=1);
9841   match(Set dst (AddD dst con));
9842   ins_cost(125);
9843   format %{ "FLD1\n\t"
9844             "DADDp  $dst,ST" %}
9845   ins_encode %{
9846     __ fld1();
9847     __ faddp($dst$$reg);
9848   %}
9849   ins_pipe(fpu_reg);
9850 %}
9851 
9852 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9853   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9854   match(Set dst (AddD dst con));
9855   ins_cost(200);
9856   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9857             "DADDp  $dst,ST" %}
9858   ins_encode %{
9859     __ fld_d($constantaddress($con));
9860     __ faddp($dst$$reg);
9861   %}
9862   ins_pipe(fpu_reg_mem);
9863 %}
9864 
9865 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9866   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9867   match(Set dst (RoundDouble (AddD src con)));
9868   ins_cost(200);
9869   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9870             "DADD   ST,$src\n\t"
9871             "FSTP_D $dst\t# D-round" %}
9872   ins_encode %{
9873     __ fld_d($constantaddress($con));
9874     __ fadd($src$$reg);
9875     __ fstp_d(Address(rsp, $dst$$disp));
9876   %}
9877   ins_pipe(fpu_mem_reg_con);
9878 %}
9879 
9880 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9881   predicate(UseSSE<=1);
9882   match(Set dst (MulD dst src));
9883   format %{ "FLD    $src\n\t"
9884             "DMULp  $dst,ST" %}
9885   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9886   ins_cost(150);
9887   ins_encode( Push_Reg_DPR(src),
9888               OpcP, RegOpc(dst) );
9889   ins_pipe( fpu_reg_reg );
9890 %}
9891 
9892 // Strict FP instruction biases argument before multiply then
9893 // biases result to avoid double rounding of subnormals.
9894 //
9895 // scale arg1 by multiplying arg1 by 2^(-15360)
9896 // load arg2
9897 // multiply scaled arg1 by arg2
9898 // rescale product by 2^(15360)
9899 //
9900 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9901   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9902   match(Set dst (MulD dst src));
9903   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9904 
9905   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9906             "DMULp  $dst,ST\n\t"
9907             "FLD    $src\n\t"
9908             "DMULp  $dst,ST\n\t"
9909             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9910             "DMULp  $dst,ST\n\t" %}
9911   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9912   ins_encode( strictfp_bias1(dst),
9913               Push_Reg_DPR(src),
9914               OpcP, RegOpc(dst),
9915               strictfp_bias2(dst) );
9916   ins_pipe( fpu_reg_reg );
9917 %}
9918 
9919 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9920   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9921   match(Set dst (MulD dst con));
9922   ins_cost(200);
9923   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9924             "DMULp  $dst,ST" %}
9925   ins_encode %{
9926     __ fld_d($constantaddress($con));
9927     __ fmulp($dst$$reg);
9928   %}
9929   ins_pipe(fpu_reg_mem);
9930 %}
9931 
9932 
9933 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9934   predicate( UseSSE<=1 );
9935   match(Set dst (MulD dst (LoadD src)));
9936   ins_cost(200);
9937   format %{ "FLD_D  $src\n\t"
9938             "DMULp  $dst,ST" %}
9939   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9940   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9941               OpcP, RegOpc(dst) );
9942   ins_pipe( fpu_reg_mem );
9943 %}
9944 
9945 //
9946 // Cisc-alternate to reg-reg multiply
9947 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9948   predicate( UseSSE<=1 );
9949   match(Set dst (MulD src (LoadD mem)));
9950   ins_cost(250);
9951   format %{ "FLD_D  $mem\n\t"
9952             "DMUL   ST,$src\n\t"
9953             "FSTP_D $dst" %}
9954   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9955   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9956               OpcReg_FPR(src),
9957               Pop_Reg_DPR(dst) );
9958   ins_pipe( fpu_reg_reg_mem );
9959 %}
9960 
9961 
9962 // MACRO3 -- addDPR a mulDPR
9963 // This instruction is a '2-address' instruction in that the result goes
9964 // back to src2.  This eliminates a move from the macro; possibly the
9965 // register allocator will have to add it back (and maybe not).
9966 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9967   predicate( UseSSE<=1 );
9968   match(Set src2 (AddD (MulD src0 src1) src2));
9969   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9970             "DMUL   ST,$src1\n\t"
9971             "DADDp  $src2,ST" %}
9972   ins_cost(250);
9973   opcode(0xDD); /* LoadD DD /0 */
9974   ins_encode( Push_Reg_FPR(src0),
9975               FMul_ST_reg(src1),
9976               FAddP_reg_ST(src2) );
9977   ins_pipe( fpu_reg_reg_reg );
9978 %}
9979 
9980 
9981 // MACRO3 -- subDPR a mulDPR
9982 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9983   predicate( UseSSE<=1 );
9984   match(Set src2 (SubD (MulD src0 src1) src2));
9985   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9986             "DMUL   ST,$src1\n\t"
9987             "DSUBRp $src2,ST" %}
9988   ins_cost(250);
9989   ins_encode( Push_Reg_FPR(src0),
9990               FMul_ST_reg(src1),
9991               Opcode(0xDE), Opc_plus(0xE0,src2));
9992   ins_pipe( fpu_reg_reg_reg );
9993 %}
9994 
9995 
9996 instruct divDPR_reg(regDPR dst, regDPR src) %{
9997   predicate( UseSSE<=1 );
9998   match(Set dst (DivD dst src));
9999 
10000   format %{ "FLD    $src\n\t"
10001             "FDIVp  $dst,ST" %}
10002   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10003   ins_cost(150);
10004   ins_encode( Push_Reg_DPR(src),
10005               OpcP, RegOpc(dst) );
10006   ins_pipe( fpu_reg_reg );
10007 %}
10008 
10009 // Strict FP instruction biases argument before division then
10010 // biases result, to avoid double rounding of subnormals.
10011 //
10012 // scale dividend by multiplying dividend by 2^(-15360)
10013 // load divisor
10014 // divide scaled dividend by divisor
10015 // rescale quotient by 2^(15360)
10016 //
10017 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10018   predicate (UseSSE<=1);
10019   match(Set dst (DivD dst src));
10020   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10021   ins_cost(01);
10022 
10023   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10024             "DMULp  $dst,ST\n\t"
10025             "FLD    $src\n\t"
10026             "FDIVp  $dst,ST\n\t"
10027             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10028             "DMULp  $dst,ST\n\t" %}
10029   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10030   ins_encode( strictfp_bias1(dst),
10031               Push_Reg_DPR(src),
10032               OpcP, RegOpc(dst),
10033               strictfp_bias2(dst) );
10034   ins_pipe( fpu_reg_reg );
10035 %}
10036 
10037 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10038   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10039   match(Set dst (RoundDouble (DivD src1 src2)));
10040 
10041   format %{ "FLD    $src1\n\t"
10042             "FDIV   ST,$src2\n\t"
10043             "FSTP_D $dst\t# D-round" %}
10044   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10045   ins_encode( Push_Reg_DPR(src1),
10046               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10047   ins_pipe( fpu_mem_reg_reg );
10048 %}
10049 
10050 
10051 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10052   predicate(UseSSE<=1);
10053   match(Set dst (ModD dst src));
10054   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10055 
10056   format %{ "DMOD   $dst,$src" %}
10057   ins_cost(250);
10058   ins_encode(Push_Reg_Mod_DPR(dst, src),
10059               emitModDPR(),
10060               Push_Result_Mod_DPR(src),
10061               Pop_Reg_DPR(dst));
10062   ins_pipe( pipe_slow );
10063 %}
10064 
10065 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10066   predicate(UseSSE>=2);
10067   match(Set dst (ModD src0 src1));
10068   effect(KILL rax, KILL cr);
10069 
10070   format %{ "SUB    ESP,8\t # DMOD\n"
10071           "\tMOVSD  [ESP+0],$src1\n"
10072           "\tFLD_D  [ESP+0]\n"
10073           "\tMOVSD  [ESP+0],$src0\n"
10074           "\tFLD_D  [ESP+0]\n"
10075      "loop:\tFPREM\n"
10076           "\tFWAIT\n"
10077           "\tFNSTSW AX\n"
10078           "\tSAHF\n"
10079           "\tJP     loop\n"
10080           "\tFSTP_D [ESP+0]\n"
10081           "\tMOVSD  $dst,[ESP+0]\n"
10082           "\tADD    ESP,8\n"
10083           "\tFSTP   ST0\t # Restore FPU Stack"
10084     %}
10085   ins_cost(250);
10086   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10087   ins_pipe( pipe_slow );
10088 %}
10089 
10090 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10091   predicate (UseSSE<=1);
10092   match(Set dst(AtanD dst src));
10093   format %{ "DATA   $dst,$src" %}
10094   opcode(0xD9, 0xF3);
10095   ins_encode( Push_Reg_DPR(src),
10096               OpcP, OpcS, RegOpc(dst) );
10097   ins_pipe( pipe_slow );
10098 %}
10099 
10100 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10101   predicate (UseSSE>=2);
10102   match(Set dst(AtanD dst src));
10103   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10104   format %{ "DATA   $dst,$src" %}
10105   opcode(0xD9, 0xF3);
10106   ins_encode( Push_SrcD(src),
10107               OpcP, OpcS, Push_ResultD(dst) );
10108   ins_pipe( pipe_slow );
10109 %}
10110 
10111 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10112   predicate (UseSSE<=1);
10113   match(Set dst (SqrtD src));
10114   format %{ "DSQRT  $dst,$src" %}
10115   opcode(0xFA, 0xD9);
10116   ins_encode( Push_Reg_DPR(src),
10117               OpcS, OpcP, Pop_Reg_DPR(dst) );
10118   ins_pipe( pipe_slow );
10119 %}
10120 
10121 //-------------Float Instructions-------------------------------
10122 // Float Math
10123 
10124 // Code for float compare:
10125 //     fcompp();
10126 //     fwait(); fnstsw_ax();
10127 //     sahf();
10128 //     movl(dst, unordered_result);
10129 //     jcc(Assembler::parity, exit);
10130 //     movl(dst, less_result);
10131 //     jcc(Assembler::below, exit);
10132 //     movl(dst, equal_result);
10133 //     jcc(Assembler::equal, exit);
10134 //     movl(dst, greater_result);
10135 //   exit:
10136 
10137 // P6 version of float compare, sets condition codes in EFLAGS
10138 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10139   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10140   match(Set cr (CmpF src1 src2));
10141   effect(KILL rax);
10142   ins_cost(150);
10143   format %{ "FLD    $src1\n\t"
10144             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10145             "JNP    exit\n\t"
10146             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10147             "SAHF\n"
10148      "exit:\tNOP               // avoid branch to branch" %}
10149   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10150   ins_encode( Push_Reg_DPR(src1),
10151               OpcP, RegOpc(src2),
10152               cmpF_P6_fixup );
10153   ins_pipe( pipe_slow );
10154 %}
10155 
10156 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10157   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10158   match(Set cr (CmpF src1 src2));
10159   ins_cost(100);
10160   format %{ "FLD    $src1\n\t"
10161             "FUCOMIP ST,$src2  // P6 instruction" %}
10162   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10163   ins_encode( Push_Reg_DPR(src1),
10164               OpcP, RegOpc(src2));
10165   ins_pipe( pipe_slow );
10166 %}
10167 
10168 
10169 // Compare & branch
10170 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10171   predicate(UseSSE == 0);
10172   match(Set cr (CmpF src1 src2));
10173   effect(KILL rax);
10174   ins_cost(200);
10175   format %{ "FLD    $src1\n\t"
10176             "FCOMp  $src2\n\t"
10177             "FNSTSW AX\n\t"
10178             "TEST   AX,0x400\n\t"
10179             "JZ,s   flags\n\t"
10180             "MOV    AH,1\t# unordered treat as LT\n"
10181     "flags:\tSAHF" %}
10182   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10183   ins_encode( Push_Reg_DPR(src1),
10184               OpcP, RegOpc(src2),
10185               fpu_flags);
10186   ins_pipe( pipe_slow );
10187 %}
10188 
10189 // Compare vs zero into -1,0,1
10190 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10191   predicate(UseSSE == 0);
10192   match(Set dst (CmpF3 src1 zero));
10193   effect(KILL cr, KILL rax);
10194   ins_cost(280);
10195   format %{ "FTSTF  $dst,$src1" %}
10196   opcode(0xE4, 0xD9);
10197   ins_encode( Push_Reg_DPR(src1),
10198               OpcS, OpcP, PopFPU,
10199               CmpF_Result(dst));
10200   ins_pipe( pipe_slow );
10201 %}
10202 
10203 // Compare into -1,0,1
10204 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10205   predicate(UseSSE == 0);
10206   match(Set dst (CmpF3 src1 src2));
10207   effect(KILL cr, KILL rax);
10208   ins_cost(300);
10209   format %{ "FCMPF  $dst,$src1,$src2" %}
10210   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10211   ins_encode( Push_Reg_DPR(src1),
10212               OpcP, RegOpc(src2),
10213               CmpF_Result(dst));
10214   ins_pipe( pipe_slow );
10215 %}
10216 
10217 // float compare and set condition codes in EFLAGS by XMM regs
10218 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10219   predicate(UseSSE>=1);
10220   match(Set cr (CmpF src1 src2));
10221   ins_cost(145);
10222   format %{ "UCOMISS $src1,$src2\n\t"
10223             "JNP,s   exit\n\t"
10224             "PUSHF\t# saw NaN, set CF\n\t"
10225             "AND     [rsp], #0xffffff2b\n\t"
10226             "POPF\n"
10227     "exit:" %}
10228   ins_encode %{
10229     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10230     emit_cmpfp_fixup(_masm);
10231   %}
10232   ins_pipe( pipe_slow );
10233 %}
10234 
10235 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10236   predicate(UseSSE>=1);
10237   match(Set cr (CmpF src1 src2));
10238   ins_cost(100);
10239   format %{ "UCOMISS $src1,$src2" %}
10240   ins_encode %{
10241     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10242   %}
10243   ins_pipe( pipe_slow );
10244 %}
10245 
10246 // float compare and set condition codes in EFLAGS by XMM regs
10247 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10248   predicate(UseSSE>=1);
10249   match(Set cr (CmpF src1 (LoadF src2)));
10250   ins_cost(165);
10251   format %{ "UCOMISS $src1,$src2\n\t"
10252             "JNP,s   exit\n\t"
10253             "PUSHF\t# saw NaN, set CF\n\t"
10254             "AND     [rsp], #0xffffff2b\n\t"
10255             "POPF\n"
10256     "exit:" %}
10257   ins_encode %{
10258     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10259     emit_cmpfp_fixup(_masm);
10260   %}
10261   ins_pipe( pipe_slow );
10262 %}
10263 
10264 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10265   predicate(UseSSE>=1);
10266   match(Set cr (CmpF src1 (LoadF src2)));
10267   ins_cost(100);
10268   format %{ "UCOMISS $src1,$src2" %}
10269   ins_encode %{
10270     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10271   %}
10272   ins_pipe( pipe_slow );
10273 %}
10274 
10275 // Compare into -1,0,1 in XMM
10276 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10277   predicate(UseSSE>=1);
10278   match(Set dst (CmpF3 src1 src2));
10279   effect(KILL cr);
10280   ins_cost(255);
10281   format %{ "UCOMISS $src1, $src2\n\t"
10282             "MOV     $dst, #-1\n\t"
10283             "JP,s    done\n\t"
10284             "JB,s    done\n\t"
10285             "SETNE   $dst\n\t"
10286             "MOVZB   $dst, $dst\n"
10287     "done:" %}
10288   ins_encode %{
10289     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10290     emit_cmpfp3(_masm, $dst$$Register);
10291   %}
10292   ins_pipe( pipe_slow );
10293 %}
10294 
10295 // Compare into -1,0,1 in XMM and memory
10296 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10297   predicate(UseSSE>=1);
10298   match(Set dst (CmpF3 src1 (LoadF src2)));
10299   effect(KILL cr);
10300   ins_cost(275);
10301   format %{ "UCOMISS $src1, $src2\n\t"
10302             "MOV     $dst, #-1\n\t"
10303             "JP,s    done\n\t"
10304             "JB,s    done\n\t"
10305             "SETNE   $dst\n\t"
10306             "MOVZB   $dst, $dst\n"
10307     "done:" %}
10308   ins_encode %{
10309     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10310     emit_cmpfp3(_masm, $dst$$Register);
10311   %}
10312   ins_pipe( pipe_slow );
10313 %}
10314 
10315 // Spill to obtain 24-bit precision
10316 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10317   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10318   match(Set dst (SubF src1 src2));
10319 
10320   format %{ "FSUB   $dst,$src1 - $src2" %}
10321   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10322   ins_encode( Push_Reg_FPR(src1),
10323               OpcReg_FPR(src2),
10324               Pop_Mem_FPR(dst) );
10325   ins_pipe( fpu_mem_reg_reg );
10326 %}
10327 //
10328 // This instruction does not round to 24-bits
10329 instruct subFPR_reg(regFPR dst, regFPR src) %{
10330   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10331   match(Set dst (SubF dst src));
10332 
10333   format %{ "FSUB   $dst,$src" %}
10334   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10335   ins_encode( Push_Reg_FPR(src),
10336               OpcP, RegOpc(dst) );
10337   ins_pipe( fpu_reg_reg );
10338 %}
10339 
10340 // Spill to obtain 24-bit precision
10341 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10342   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10343   match(Set dst (AddF src1 src2));
10344 
10345   format %{ "FADD   $dst,$src1,$src2" %}
10346   opcode(0xD8, 0x0); /* D8 C0+i */
10347   ins_encode( Push_Reg_FPR(src2),
10348               OpcReg_FPR(src1),
10349               Pop_Mem_FPR(dst) );
10350   ins_pipe( fpu_mem_reg_reg );
10351 %}
10352 //
10353 // This instruction does not round to 24-bits
10354 instruct addFPR_reg(regFPR dst, regFPR src) %{
10355   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10356   match(Set dst (AddF dst src));
10357 
10358   format %{ "FLD    $src\n\t"
10359             "FADDp  $dst,ST" %}
10360   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10361   ins_encode( Push_Reg_FPR(src),
10362               OpcP, RegOpc(dst) );
10363   ins_pipe( fpu_reg_reg );
10364 %}
10365 
10366 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10367   predicate(UseSSE==0);
10368   match(Set dst (AbsF src));
10369   ins_cost(100);
10370   format %{ "FABS" %}
10371   opcode(0xE1, 0xD9);
10372   ins_encode( OpcS, OpcP );
10373   ins_pipe( fpu_reg_reg );
10374 %}
10375 
10376 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10377   predicate(UseSSE==0);
10378   match(Set dst (NegF src));
10379   ins_cost(100);
10380   format %{ "FCHS" %}
10381   opcode(0xE0, 0xD9);
10382   ins_encode( OpcS, OpcP );
10383   ins_pipe( fpu_reg_reg );
10384 %}
10385 
10386 // Cisc-alternate to addFPR_reg
10387 // Spill to obtain 24-bit precision
10388 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10389   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10390   match(Set dst (AddF src1 (LoadF src2)));
10391 
10392   format %{ "FLD    $src2\n\t"
10393             "FADD   ST,$src1\n\t"
10394             "FSTP_S $dst" %}
10395   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10396   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10397               OpcReg_FPR(src1),
10398               Pop_Mem_FPR(dst) );
10399   ins_pipe( fpu_mem_reg_mem );
10400 %}
10401 //
10402 // Cisc-alternate to addFPR_reg
10403 // This instruction does not round to 24-bits
10404 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10405   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10406   match(Set dst (AddF dst (LoadF src)));
10407 
10408   format %{ "FADD   $dst,$src" %}
10409   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10410   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10411               OpcP, RegOpc(dst) );
10412   ins_pipe( fpu_reg_mem );
10413 %}
10414 
10415 // // Following two instructions for _222_mpegaudio
10416 // Spill to obtain 24-bit precision
10417 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10418   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10419   match(Set dst (AddF src1 src2));
10420 
10421   format %{ "FADD   $dst,$src1,$src2" %}
10422   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10423   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10424               OpcReg_FPR(src2),
10425               Pop_Mem_FPR(dst) );
10426   ins_pipe( fpu_mem_reg_mem );
10427 %}
10428 
10429 // Cisc-spill variant
10430 // Spill to obtain 24-bit precision
10431 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10432   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10433   match(Set dst (AddF src1 (LoadF src2)));
10434 
10435   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10436   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10437   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10438               set_instruction_start,
10439               OpcP, RMopc_Mem(secondary,src1),
10440               Pop_Mem_FPR(dst) );
10441   ins_pipe( fpu_mem_mem_mem );
10442 %}
10443 
10444 // Spill to obtain 24-bit precision
10445 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10446   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10447   match(Set dst (AddF src1 src2));
10448 
10449   format %{ "FADD   $dst,$src1,$src2" %}
10450   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10451   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10452               set_instruction_start,
10453               OpcP, RMopc_Mem(secondary,src1),
10454               Pop_Mem_FPR(dst) );
10455   ins_pipe( fpu_mem_mem_mem );
10456 %}
10457 
10458 
10459 // Spill to obtain 24-bit precision
10460 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10461   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10462   match(Set dst (AddF src con));
10463   format %{ "FLD    $src\n\t"
10464             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10465             "FSTP_S $dst"  %}
10466   ins_encode %{
10467     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10468     __ fadd_s($constantaddress($con));
10469     __ fstp_s(Address(rsp, $dst$$disp));
10470   %}
10471   ins_pipe(fpu_mem_reg_con);
10472 %}
10473 //
10474 // This instruction does not round to 24-bits
10475 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10476   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10477   match(Set dst (AddF src con));
10478   format %{ "FLD    $src\n\t"
10479             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10480             "FSTP   $dst"  %}
10481   ins_encode %{
10482     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10483     __ fadd_s($constantaddress($con));
10484     __ fstp_d($dst$$reg);
10485   %}
10486   ins_pipe(fpu_reg_reg_con);
10487 %}
10488 
10489 // Spill to obtain 24-bit precision
10490 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10491   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10492   match(Set dst (MulF src1 src2));
10493 
10494   format %{ "FLD    $src1\n\t"
10495             "FMUL   $src2\n\t"
10496             "FSTP_S $dst"  %}
10497   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10498   ins_encode( Push_Reg_FPR(src1),
10499               OpcReg_FPR(src2),
10500               Pop_Mem_FPR(dst) );
10501   ins_pipe( fpu_mem_reg_reg );
10502 %}
10503 //
10504 // This instruction does not round to 24-bits
10505 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10506   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10507   match(Set dst (MulF src1 src2));
10508 
10509   format %{ "FLD    $src1\n\t"
10510             "FMUL   $src2\n\t"
10511             "FSTP_S $dst"  %}
10512   opcode(0xD8, 0x1); /* D8 C8+i */
10513   ins_encode( Push_Reg_FPR(src2),
10514               OpcReg_FPR(src1),
10515               Pop_Reg_FPR(dst) );
10516   ins_pipe( fpu_reg_reg_reg );
10517 %}
10518 
10519 
10520 // Spill to obtain 24-bit precision
10521 // Cisc-alternate to reg-reg multiply
10522 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10523   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10524   match(Set dst (MulF src1 (LoadF src2)));
10525 
10526   format %{ "FLD_S  $src2\n\t"
10527             "FMUL   $src1\n\t"
10528             "FSTP_S $dst"  %}
10529   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10530   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10531               OpcReg_FPR(src1),
10532               Pop_Mem_FPR(dst) );
10533   ins_pipe( fpu_mem_reg_mem );
10534 %}
10535 //
10536 // This instruction does not round to 24-bits
10537 // Cisc-alternate to reg-reg multiply
10538 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10539   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10540   match(Set dst (MulF src1 (LoadF src2)));
10541 
10542   format %{ "FMUL   $dst,$src1,$src2" %}
10543   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10544   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10545               OpcReg_FPR(src1),
10546               Pop_Reg_FPR(dst) );
10547   ins_pipe( fpu_reg_reg_mem );
10548 %}
10549 
10550 // Spill to obtain 24-bit precision
10551 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10552   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10553   match(Set dst (MulF src1 src2));
10554 
10555   format %{ "FMUL   $dst,$src1,$src2" %}
10556   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10557   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10558               set_instruction_start,
10559               OpcP, RMopc_Mem(secondary,src1),
10560               Pop_Mem_FPR(dst) );
10561   ins_pipe( fpu_mem_mem_mem );
10562 %}
10563 
10564 // Spill to obtain 24-bit precision
10565 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10566   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10567   match(Set dst (MulF src con));
10568 
10569   format %{ "FLD    $src\n\t"
10570             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10571             "FSTP_S $dst"  %}
10572   ins_encode %{
10573     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10574     __ fmul_s($constantaddress($con));
10575     __ fstp_s(Address(rsp, $dst$$disp));
10576   %}
10577   ins_pipe(fpu_mem_reg_con);
10578 %}
10579 //
10580 // This instruction does not round to 24-bits
10581 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10582   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10583   match(Set dst (MulF src con));
10584 
10585   format %{ "FLD    $src\n\t"
10586             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10587             "FSTP   $dst"  %}
10588   ins_encode %{
10589     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10590     __ fmul_s($constantaddress($con));
10591     __ fstp_d($dst$$reg);
10592   %}
10593   ins_pipe(fpu_reg_reg_con);
10594 %}
10595 
10596 
10597 //
10598 // MACRO1 -- subsume unshared load into mulFPR
10599 // This instruction does not round to 24-bits
10600 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10601   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10602   match(Set dst (MulF (LoadF mem1) src));
10603 
10604   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10605             "FMUL   ST,$src\n\t"
10606             "FSTP   $dst" %}
10607   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10608   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10609               OpcReg_FPR(src),
10610               Pop_Reg_FPR(dst) );
10611   ins_pipe( fpu_reg_reg_mem );
10612 %}
10613 //
10614 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10615 // This instruction does not round to 24-bits
10616 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10617   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10618   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10619   ins_cost(95);
10620 
10621   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10622             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10623             "FADD   ST,$src2\n\t"
10624             "FSTP   $dst" %}
10625   opcode(0xD9); /* LoadF D9 /0 */
10626   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10627               FMul_ST_reg(src1),
10628               FAdd_ST_reg(src2),
10629               Pop_Reg_FPR(dst) );
10630   ins_pipe( fpu_reg_mem_reg_reg );
10631 %}
10632 
10633 // MACRO3 -- addFPR a mulFPR
10634 // This instruction does not round to 24-bits.  It is a '2-address'
10635 // instruction in that the result goes back to src2.  This eliminates
10636 // a move from the macro; possibly the register allocator will have
10637 // to add it back (and maybe not).
10638 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10639   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10640   match(Set src2 (AddF (MulF src0 src1) src2));
10641 
10642   format %{ "FLD    $src0     ===MACRO3===\n\t"
10643             "FMUL   ST,$src1\n\t"
10644             "FADDP  $src2,ST" %}
10645   opcode(0xD9); /* LoadF D9 /0 */
10646   ins_encode( Push_Reg_FPR(src0),
10647               FMul_ST_reg(src1),
10648               FAddP_reg_ST(src2) );
10649   ins_pipe( fpu_reg_reg_reg );
10650 %}
10651 
10652 // MACRO4 -- divFPR subFPR
10653 // This instruction does not round to 24-bits
10654 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10655   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10656   match(Set dst (DivF (SubF src2 src1) src3));
10657 
10658   format %{ "FLD    $src2   ===MACRO4===\n\t"
10659             "FSUB   ST,$src1\n\t"
10660             "FDIV   ST,$src3\n\t"
10661             "FSTP  $dst" %}
10662   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10663   ins_encode( Push_Reg_FPR(src2),
10664               subFPR_divFPR_encode(src1,src3),
10665               Pop_Reg_FPR(dst) );
10666   ins_pipe( fpu_reg_reg_reg_reg );
10667 %}
10668 
10669 // Spill to obtain 24-bit precision
10670 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10671   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10672   match(Set dst (DivF src1 src2));
10673 
10674   format %{ "FDIV   $dst,$src1,$src2" %}
10675   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10676   ins_encode( Push_Reg_FPR(src1),
10677               OpcReg_FPR(src2),
10678               Pop_Mem_FPR(dst) );
10679   ins_pipe( fpu_mem_reg_reg );
10680 %}
10681 //
10682 // This instruction does not round to 24-bits
10683 instruct divFPR_reg(regFPR dst, regFPR src) %{
10684   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10685   match(Set dst (DivF dst src));
10686 
10687   format %{ "FDIV   $dst,$src" %}
10688   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10689   ins_encode( Push_Reg_FPR(src),
10690               OpcP, RegOpc(dst) );
10691   ins_pipe( fpu_reg_reg );
10692 %}
10693 
10694 
10695 // Spill to obtain 24-bit precision
10696 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10697   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10698   match(Set dst (ModF src1 src2));
10699   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10700 
10701   format %{ "FMOD   $dst,$src1,$src2" %}
10702   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10703               emitModDPR(),
10704               Push_Result_Mod_DPR(src2),
10705               Pop_Mem_FPR(dst));
10706   ins_pipe( pipe_slow );
10707 %}
10708 //
10709 // This instruction does not round to 24-bits
10710 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10711   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10712   match(Set dst (ModF dst src));
10713   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10714 
10715   format %{ "FMOD   $dst,$src" %}
10716   ins_encode(Push_Reg_Mod_DPR(dst, src),
10717               emitModDPR(),
10718               Push_Result_Mod_DPR(src),
10719               Pop_Reg_FPR(dst));
10720   ins_pipe( pipe_slow );
10721 %}
10722 
10723 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10724   predicate(UseSSE>=1);
10725   match(Set dst (ModF src0 src1));
10726   effect(KILL rax, KILL cr);
10727   format %{ "SUB    ESP,4\t # FMOD\n"
10728           "\tMOVSS  [ESP+0],$src1\n"
10729           "\tFLD_S  [ESP+0]\n"
10730           "\tMOVSS  [ESP+0],$src0\n"
10731           "\tFLD_S  [ESP+0]\n"
10732      "loop:\tFPREM\n"
10733           "\tFWAIT\n"
10734           "\tFNSTSW AX\n"
10735           "\tSAHF\n"
10736           "\tJP     loop\n"
10737           "\tFSTP_S [ESP+0]\n"
10738           "\tMOVSS  $dst,[ESP+0]\n"
10739           "\tADD    ESP,4\n"
10740           "\tFSTP   ST0\t # Restore FPU Stack"
10741     %}
10742   ins_cost(250);
10743   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10744   ins_pipe( pipe_slow );
10745 %}
10746 
10747 
10748 //----------Arithmetic Conversion Instructions---------------------------------
10749 // The conversions operations are all Alpha sorted.  Please keep it that way!
10750 
10751 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10752   predicate(UseSSE==0);
10753   match(Set dst (RoundFloat src));
10754   ins_cost(125);
10755   format %{ "FST_S  $dst,$src\t# F-round" %}
10756   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10757   ins_pipe( fpu_mem_reg );
10758 %}
10759 
10760 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10761   predicate(UseSSE<=1);
10762   match(Set dst (RoundDouble src));
10763   ins_cost(125);
10764   format %{ "FST_D  $dst,$src\t# D-round" %}
10765   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10766   ins_pipe( fpu_mem_reg );
10767 %}
10768 
10769 // Force rounding to 24-bit precision and 6-bit exponent
10770 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10771   predicate(UseSSE==0);
10772   match(Set dst (ConvD2F src));
10773   format %{ "FST_S  $dst,$src\t# F-round" %}
10774   expand %{
10775     roundFloat_mem_reg(dst,src);
10776   %}
10777 %}
10778 
10779 // Force rounding to 24-bit precision and 6-bit exponent
10780 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10781   predicate(UseSSE==1);
10782   match(Set dst (ConvD2F src));
10783   effect( KILL cr );
10784   format %{ "SUB    ESP,4\n\t"
10785             "FST_S  [ESP],$src\t# F-round\n\t"
10786             "MOVSS  $dst,[ESP]\n\t"
10787             "ADD ESP,4" %}
10788   ins_encode %{
10789     __ subptr(rsp, 4);
10790     if ($src$$reg != FPR1L_enc) {
10791       __ fld_s($src$$reg-1);
10792       __ fstp_s(Address(rsp, 0));
10793     } else {
10794       __ fst_s(Address(rsp, 0));
10795     }
10796     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10797     __ addptr(rsp, 4);
10798   %}
10799   ins_pipe( pipe_slow );
10800 %}
10801 
10802 // Force rounding double precision to single precision
10803 instruct convD2F_reg(regF dst, regD src) %{
10804   predicate(UseSSE>=2);
10805   match(Set dst (ConvD2F src));
10806   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10807   ins_encode %{
10808     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10809   %}
10810   ins_pipe( pipe_slow );
10811 %}
10812 
10813 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10814   predicate(UseSSE==0);
10815   match(Set dst (ConvF2D src));
10816   format %{ "FST_S  $dst,$src\t# D-round" %}
10817   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10818   ins_pipe( fpu_reg_reg );
10819 %}
10820 
10821 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10822   predicate(UseSSE==1);
10823   match(Set dst (ConvF2D src));
10824   format %{ "FST_D  $dst,$src\t# D-round" %}
10825   expand %{
10826     roundDouble_mem_reg(dst,src);
10827   %}
10828 %}
10829 
10830 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10831   predicate(UseSSE==1);
10832   match(Set dst (ConvF2D src));
10833   effect( KILL cr );
10834   format %{ "SUB    ESP,4\n\t"
10835             "MOVSS  [ESP] $src\n\t"
10836             "FLD_S  [ESP]\n\t"
10837             "ADD    ESP,4\n\t"
10838             "FSTP   $dst\t# D-round" %}
10839   ins_encode %{
10840     __ subptr(rsp, 4);
10841     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10842     __ fld_s(Address(rsp, 0));
10843     __ addptr(rsp, 4);
10844     __ fstp_d($dst$$reg);
10845   %}
10846   ins_pipe( pipe_slow );
10847 %}
10848 
10849 instruct convF2D_reg(regD dst, regF src) %{
10850   predicate(UseSSE>=2);
10851   match(Set dst (ConvF2D src));
10852   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10853   ins_encode %{
10854     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10855   %}
10856   ins_pipe( pipe_slow );
10857 %}
10858 
10859 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10860 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10861   predicate(UseSSE<=1);
10862   match(Set dst (ConvD2I src));
10863   effect( KILL tmp, KILL cr );
10864   format %{ "FLD    $src\t# Convert double to int \n\t"
10865             "FLDCW  trunc mode\n\t"
10866             "SUB    ESP,4\n\t"
10867             "FISTp  [ESP + #0]\n\t"
10868             "FLDCW  std/24-bit mode\n\t"
10869             "POP    EAX\n\t"
10870             "CMP    EAX,0x80000000\n\t"
10871             "JNE,s  fast\n\t"
10872             "FLD_D  $src\n\t"
10873             "CALL   d2i_wrapper\n"
10874       "fast:" %}
10875   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10876   ins_pipe( pipe_slow );
10877 %}
10878 
10879 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10880 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10881   predicate(UseSSE>=2);
10882   match(Set dst (ConvD2I src));
10883   effect( KILL tmp, KILL cr );
10884   format %{ "CVTTSD2SI $dst, $src\n\t"
10885             "CMP    $dst,0x80000000\n\t"
10886             "JNE,s  fast\n\t"
10887             "SUB    ESP, 8\n\t"
10888             "MOVSD  [ESP], $src\n\t"
10889             "FLD_D  [ESP]\n\t"
10890             "ADD    ESP, 8\n\t"
10891             "CALL   d2i_wrapper\n"
10892       "fast:" %}
10893   ins_encode %{
10894     Label fast;
10895     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10896     __ cmpl($dst$$Register, 0x80000000);
10897     __ jccb(Assembler::notEqual, fast);
10898     __ subptr(rsp, 8);
10899     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10900     __ fld_d(Address(rsp, 0));
10901     __ addptr(rsp, 8);
10902     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10903     __ bind(fast);
10904   %}
10905   ins_pipe( pipe_slow );
10906 %}
10907 
10908 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10909   predicate(UseSSE<=1);
10910   match(Set dst (ConvD2L src));
10911   effect( KILL cr );
10912   format %{ "FLD    $src\t# Convert double to long\n\t"
10913             "FLDCW  trunc mode\n\t"
10914             "SUB    ESP,8\n\t"
10915             "FISTp  [ESP + #0]\n\t"
10916             "FLDCW  std/24-bit mode\n\t"
10917             "POP    EAX\n\t"
10918             "POP    EDX\n\t"
10919             "CMP    EDX,0x80000000\n\t"
10920             "JNE,s  fast\n\t"
10921             "TEST   EAX,EAX\n\t"
10922             "JNE,s  fast\n\t"
10923             "FLD    $src\n\t"
10924             "CALL   d2l_wrapper\n"
10925       "fast:" %}
10926   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10927   ins_pipe( pipe_slow );
10928 %}
10929 
10930 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10931 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10932   predicate (UseSSE>=2);
10933   match(Set dst (ConvD2L src));
10934   effect( KILL cr );
10935   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10936             "MOVSD  [ESP],$src\n\t"
10937             "FLD_D  [ESP]\n\t"
10938             "FLDCW  trunc mode\n\t"
10939             "FISTp  [ESP + #0]\n\t"
10940             "FLDCW  std/24-bit mode\n\t"
10941             "POP    EAX\n\t"
10942             "POP    EDX\n\t"
10943             "CMP    EDX,0x80000000\n\t"
10944             "JNE,s  fast\n\t"
10945             "TEST   EAX,EAX\n\t"
10946             "JNE,s  fast\n\t"
10947             "SUB    ESP,8\n\t"
10948             "MOVSD  [ESP],$src\n\t"
10949             "FLD_D  [ESP]\n\t"
10950             "ADD    ESP,8\n\t"
10951             "CALL   d2l_wrapper\n"
10952       "fast:" %}
10953   ins_encode %{
10954     Label fast;
10955     __ subptr(rsp, 8);
10956     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10957     __ fld_d(Address(rsp, 0));
10958     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10959     __ fistp_d(Address(rsp, 0));
10960     // Restore the rounding mode, mask the exception
10961     if (Compile::current()->in_24_bit_fp_mode()) {
10962       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10963     } else {
10964       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10965     }
10966     // Load the converted long, adjust CPU stack
10967     __ pop(rax);
10968     __ pop(rdx);
10969     __ cmpl(rdx, 0x80000000);
10970     __ jccb(Assembler::notEqual, fast);
10971     __ testl(rax, rax);
10972     __ jccb(Assembler::notEqual, fast);
10973     __ subptr(rsp, 8);
10974     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10975     __ fld_d(Address(rsp, 0));
10976     __ addptr(rsp, 8);
10977     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10978     __ bind(fast);
10979   %}
10980   ins_pipe( pipe_slow );
10981 %}
10982 
10983 // Convert a double to an int.  Java semantics require we do complex
10984 // manglations in the corner cases.  So we set the rounding mode to
10985 // 'zero', store the darned double down as an int, and reset the
10986 // rounding mode to 'nearest'.  The hardware stores a flag value down
10987 // if we would overflow or converted a NAN; we check for this and
10988 // and go the slow path if needed.
10989 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10990   predicate(UseSSE==0);
10991   match(Set dst (ConvF2I src));
10992   effect( KILL tmp, KILL cr );
10993   format %{ "FLD    $src\t# Convert float to int \n\t"
10994             "FLDCW  trunc mode\n\t"
10995             "SUB    ESP,4\n\t"
10996             "FISTp  [ESP + #0]\n\t"
10997             "FLDCW  std/24-bit mode\n\t"
10998             "POP    EAX\n\t"
10999             "CMP    EAX,0x80000000\n\t"
11000             "JNE,s  fast\n\t"
11001             "FLD    $src\n\t"
11002             "CALL   d2i_wrapper\n"
11003       "fast:" %}
11004   // DPR2I_encoding works for FPR2I
11005   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11006   ins_pipe( pipe_slow );
11007 %}
11008 
11009 // Convert a float in xmm to an int reg.
11010 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11011   predicate(UseSSE>=1);
11012   match(Set dst (ConvF2I src));
11013   effect( KILL tmp, KILL cr );
11014   format %{ "CVTTSS2SI $dst, $src\n\t"
11015             "CMP    $dst,0x80000000\n\t"
11016             "JNE,s  fast\n\t"
11017             "SUB    ESP, 4\n\t"
11018             "MOVSS  [ESP], $src\n\t"
11019             "FLD    [ESP]\n\t"
11020             "ADD    ESP, 4\n\t"
11021             "CALL   d2i_wrapper\n"
11022       "fast:" %}
11023   ins_encode %{
11024     Label fast;
11025     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11026     __ cmpl($dst$$Register, 0x80000000);
11027     __ jccb(Assembler::notEqual, fast);
11028     __ subptr(rsp, 4);
11029     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11030     __ fld_s(Address(rsp, 0));
11031     __ addptr(rsp, 4);
11032     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11033     __ bind(fast);
11034   %}
11035   ins_pipe( pipe_slow );
11036 %}
11037 
11038 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11039   predicate(UseSSE==0);
11040   match(Set dst (ConvF2L src));
11041   effect( KILL cr );
11042   format %{ "FLD    $src\t# Convert float to long\n\t"
11043             "FLDCW  trunc mode\n\t"
11044             "SUB    ESP,8\n\t"
11045             "FISTp  [ESP + #0]\n\t"
11046             "FLDCW  std/24-bit mode\n\t"
11047             "POP    EAX\n\t"
11048             "POP    EDX\n\t"
11049             "CMP    EDX,0x80000000\n\t"
11050             "JNE,s  fast\n\t"
11051             "TEST   EAX,EAX\n\t"
11052             "JNE,s  fast\n\t"
11053             "FLD    $src\n\t"
11054             "CALL   d2l_wrapper\n"
11055       "fast:" %}
11056   // DPR2L_encoding works for FPR2L
11057   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11058   ins_pipe( pipe_slow );
11059 %}
11060 
11061 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11062 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11063   predicate (UseSSE>=1);
11064   match(Set dst (ConvF2L src));
11065   effect( KILL cr );
11066   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11067             "MOVSS  [ESP],$src\n\t"
11068             "FLD_S  [ESP]\n\t"
11069             "FLDCW  trunc mode\n\t"
11070             "FISTp  [ESP + #0]\n\t"
11071             "FLDCW  std/24-bit mode\n\t"
11072             "POP    EAX\n\t"
11073             "POP    EDX\n\t"
11074             "CMP    EDX,0x80000000\n\t"
11075             "JNE,s  fast\n\t"
11076             "TEST   EAX,EAX\n\t"
11077             "JNE,s  fast\n\t"
11078             "SUB    ESP,4\t# Convert float to long\n\t"
11079             "MOVSS  [ESP],$src\n\t"
11080             "FLD_S  [ESP]\n\t"
11081             "ADD    ESP,4\n\t"
11082             "CALL   d2l_wrapper\n"
11083       "fast:" %}
11084   ins_encode %{
11085     Label fast;
11086     __ subptr(rsp, 8);
11087     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11088     __ fld_s(Address(rsp, 0));
11089     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11090     __ fistp_d(Address(rsp, 0));
11091     // Restore the rounding mode, mask the exception
11092     if (Compile::current()->in_24_bit_fp_mode()) {
11093       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11094     } else {
11095       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11096     }
11097     // Load the converted long, adjust CPU stack
11098     __ pop(rax);
11099     __ pop(rdx);
11100     __ cmpl(rdx, 0x80000000);
11101     __ jccb(Assembler::notEqual, fast);
11102     __ testl(rax, rax);
11103     __ jccb(Assembler::notEqual, fast);
11104     __ subptr(rsp, 4);
11105     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11106     __ fld_s(Address(rsp, 0));
11107     __ addptr(rsp, 4);
11108     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11109     __ bind(fast);
11110   %}
11111   ins_pipe( pipe_slow );
11112 %}
11113 
11114 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11115   predicate( UseSSE<=1 );
11116   match(Set dst (ConvI2D src));
11117   format %{ "FILD   $src\n\t"
11118             "FSTP   $dst" %}
11119   opcode(0xDB, 0x0);  /* DB /0 */
11120   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11121   ins_pipe( fpu_reg_mem );
11122 %}
11123 
11124 instruct convI2D_reg(regD dst, rRegI src) %{
11125   predicate( UseSSE>=2 && !UseXmmI2D );
11126   match(Set dst (ConvI2D src));
11127   format %{ "CVTSI2SD $dst,$src" %}
11128   ins_encode %{
11129     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11130   %}
11131   ins_pipe( pipe_slow );
11132 %}
11133 
11134 instruct convI2D_mem(regD dst, memory mem) %{
11135   predicate( UseSSE>=2 );
11136   match(Set dst (ConvI2D (LoadI mem)));
11137   format %{ "CVTSI2SD $dst,$mem" %}
11138   ins_encode %{
11139     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11140   %}
11141   ins_pipe( pipe_slow );
11142 %}
11143 
11144 instruct convXI2D_reg(regD dst, rRegI src)
11145 %{
11146   predicate( UseSSE>=2 && UseXmmI2D );
11147   match(Set dst (ConvI2D src));
11148 
11149   format %{ "MOVD  $dst,$src\n\t"
11150             "CVTDQ2PD $dst,$dst\t# i2d" %}
11151   ins_encode %{
11152     __ movdl($dst$$XMMRegister, $src$$Register);
11153     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11154   %}
11155   ins_pipe(pipe_slow); // XXX
11156 %}
11157 
11158 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11159   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11160   match(Set dst (ConvI2D (LoadI mem)));
11161   format %{ "FILD   $mem\n\t"
11162             "FSTP   $dst" %}
11163   opcode(0xDB);      /* DB /0 */
11164   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11165               Pop_Reg_DPR(dst));
11166   ins_pipe( fpu_reg_mem );
11167 %}
11168 
11169 // Convert a byte to a float; no rounding step needed.
11170 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11171   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11172   match(Set dst (ConvI2F src));
11173   format %{ "FILD   $src\n\t"
11174             "FSTP   $dst" %}
11175 
11176   opcode(0xDB, 0x0);  /* DB /0 */
11177   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11178   ins_pipe( fpu_reg_mem );
11179 %}
11180 
11181 // In 24-bit mode, force exponent rounding by storing back out
11182 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11183   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11184   match(Set dst (ConvI2F src));
11185   ins_cost(200);
11186   format %{ "FILD   $src\n\t"
11187             "FSTP_S $dst" %}
11188   opcode(0xDB, 0x0);  /* DB /0 */
11189   ins_encode( Push_Mem_I(src),
11190               Pop_Mem_FPR(dst));
11191   ins_pipe( fpu_mem_mem );
11192 %}
11193 
11194 // In 24-bit mode, force exponent rounding by storing back out
11195 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11196   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11197   match(Set dst (ConvI2F (LoadI mem)));
11198   ins_cost(200);
11199   format %{ "FILD   $mem\n\t"
11200             "FSTP_S $dst" %}
11201   opcode(0xDB);  /* DB /0 */
11202   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11203               Pop_Mem_FPR(dst));
11204   ins_pipe( fpu_mem_mem );
11205 %}
11206 
11207 // This instruction does not round to 24-bits
11208 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11209   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11210   match(Set dst (ConvI2F src));
11211   format %{ "FILD   $src\n\t"
11212             "FSTP   $dst" %}
11213   opcode(0xDB, 0x0);  /* DB /0 */
11214   ins_encode( Push_Mem_I(src),
11215               Pop_Reg_FPR(dst));
11216   ins_pipe( fpu_reg_mem );
11217 %}
11218 
11219 // This instruction does not round to 24-bits
11220 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11221   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11222   match(Set dst (ConvI2F (LoadI mem)));
11223   format %{ "FILD   $mem\n\t"
11224             "FSTP   $dst" %}
11225   opcode(0xDB);      /* DB /0 */
11226   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11227               Pop_Reg_FPR(dst));
11228   ins_pipe( fpu_reg_mem );
11229 %}
11230 
11231 // Convert an int to a float in xmm; no rounding step needed.
11232 instruct convI2F_reg(regF dst, rRegI src) %{
11233   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11234   match(Set dst (ConvI2F src));
11235   format %{ "CVTSI2SS $dst, $src" %}
11236   ins_encode %{
11237     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11238   %}
11239   ins_pipe( pipe_slow );
11240 %}
11241 
11242  instruct convXI2F_reg(regF dst, rRegI src)
11243 %{
11244   predicate( UseSSE>=2 && UseXmmI2F );
11245   match(Set dst (ConvI2F src));
11246 
11247   format %{ "MOVD  $dst,$src\n\t"
11248             "CVTDQ2PS $dst,$dst\t# i2f" %}
11249   ins_encode %{
11250     __ movdl($dst$$XMMRegister, $src$$Register);
11251     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11252   %}
11253   ins_pipe(pipe_slow); // XXX
11254 %}
11255 
11256 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11257   match(Set dst (ConvI2L src));
11258   effect(KILL cr);
11259   ins_cost(375);
11260   format %{ "MOV    $dst.lo,$src\n\t"
11261             "MOV    $dst.hi,$src\n\t"
11262             "SAR    $dst.hi,31" %}
11263   ins_encode(convert_int_long(dst,src));
11264   ins_pipe( ialu_reg_reg_long );
11265 %}
11266 
11267 // Zero-extend convert int to long
11268 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11269   match(Set dst (AndL (ConvI2L src) mask) );
11270   effect( KILL flags );
11271   ins_cost(250);
11272   format %{ "MOV    $dst.lo,$src\n\t"
11273             "XOR    $dst.hi,$dst.hi" %}
11274   opcode(0x33); // XOR
11275   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11276   ins_pipe( ialu_reg_reg_long );
11277 %}
11278 
11279 // Zero-extend long
11280 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11281   match(Set dst (AndL src mask) );
11282   effect( KILL flags );
11283   ins_cost(250);
11284   format %{ "MOV    $dst.lo,$src.lo\n\t"
11285             "XOR    $dst.hi,$dst.hi\n\t" %}
11286   opcode(0x33); // XOR
11287   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11288   ins_pipe( ialu_reg_reg_long );
11289 %}
11290 
11291 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11292   predicate (UseSSE<=1);
11293   match(Set dst (ConvL2D src));
11294   effect( KILL cr );
11295   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11296             "PUSH   $src.lo\n\t"
11297             "FILD   ST,[ESP + #0]\n\t"
11298             "ADD    ESP,8\n\t"
11299             "FSTP_D $dst\t# D-round" %}
11300   opcode(0xDF, 0x5);  /* DF /5 */
11301   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11302   ins_pipe( pipe_slow );
11303 %}
11304 
11305 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11306   predicate (UseSSE>=2);
11307   match(Set dst (ConvL2D src));
11308   effect( KILL cr );
11309   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11310             "PUSH   $src.lo\n\t"
11311             "FILD_D [ESP]\n\t"
11312             "FSTP_D [ESP]\n\t"
11313             "MOVSD  $dst,[ESP]\n\t"
11314             "ADD    ESP,8" %}
11315   opcode(0xDF, 0x5);  /* DF /5 */
11316   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11317   ins_pipe( pipe_slow );
11318 %}
11319 
11320 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11321   predicate (UseSSE>=1);
11322   match(Set dst (ConvL2F src));
11323   effect( KILL cr );
11324   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11325             "PUSH   $src.lo\n\t"
11326             "FILD_D [ESP]\n\t"
11327             "FSTP_S [ESP]\n\t"
11328             "MOVSS  $dst,[ESP]\n\t"
11329             "ADD    ESP,8" %}
11330   opcode(0xDF, 0x5);  /* DF /5 */
11331   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11332   ins_pipe( pipe_slow );
11333 %}
11334 
11335 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11336   match(Set dst (ConvL2F src));
11337   effect( KILL cr );
11338   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11339             "PUSH   $src.lo\n\t"
11340             "FILD   ST,[ESP + #0]\n\t"
11341             "ADD    ESP,8\n\t"
11342             "FSTP_S $dst\t# F-round" %}
11343   opcode(0xDF, 0x5);  /* DF /5 */
11344   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11345   ins_pipe( pipe_slow );
11346 %}
11347 
11348 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11349   match(Set dst (ConvL2I src));
11350   effect( DEF dst, USE src );
11351   format %{ "MOV    $dst,$src.lo" %}
11352   ins_encode(enc_CopyL_Lo(dst,src));
11353   ins_pipe( ialu_reg_reg );
11354 %}
11355 
11356 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11357   match(Set dst (MoveF2I src));
11358   effect( DEF dst, USE src );
11359   ins_cost(100);
11360   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11361   ins_encode %{
11362     __ movl($dst$$Register, Address(rsp, $src$$disp));
11363   %}
11364   ins_pipe( ialu_reg_mem );
11365 %}
11366 
11367 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11368   predicate(UseSSE==0);
11369   match(Set dst (MoveF2I src));
11370   effect( DEF dst, USE src );
11371 
11372   ins_cost(125);
11373   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11374   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11375   ins_pipe( fpu_mem_reg );
11376 %}
11377 
11378 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11379   predicate(UseSSE>=1);
11380   match(Set dst (MoveF2I src));
11381   effect( DEF dst, USE src );
11382 
11383   ins_cost(95);
11384   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11385   ins_encode %{
11386     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11387   %}
11388   ins_pipe( pipe_slow );
11389 %}
11390 
11391 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11392   predicate(UseSSE>=2);
11393   match(Set dst (MoveF2I src));
11394   effect( DEF dst, USE src );
11395   ins_cost(85);
11396   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11397   ins_encode %{
11398     __ movdl($dst$$Register, $src$$XMMRegister);
11399   %}
11400   ins_pipe( pipe_slow );
11401 %}
11402 
11403 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11404   match(Set dst (MoveI2F src));
11405   effect( DEF dst, USE src );
11406 
11407   ins_cost(100);
11408   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11409   ins_encode %{
11410     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11411   %}
11412   ins_pipe( ialu_mem_reg );
11413 %}
11414 
11415 
11416 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11417   predicate(UseSSE==0);
11418   match(Set dst (MoveI2F src));
11419   effect(DEF dst, USE src);
11420 
11421   ins_cost(125);
11422   format %{ "FLD_S  $src\n\t"
11423             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11424   opcode(0xD9);               /* D9 /0, FLD m32real */
11425   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11426               Pop_Reg_FPR(dst) );
11427   ins_pipe( fpu_reg_mem );
11428 %}
11429 
11430 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11431   predicate(UseSSE>=1);
11432   match(Set dst (MoveI2F src));
11433   effect( DEF dst, USE src );
11434 
11435   ins_cost(95);
11436   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11437   ins_encode %{
11438     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11439   %}
11440   ins_pipe( pipe_slow );
11441 %}
11442 
11443 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11444   predicate(UseSSE>=2);
11445   match(Set dst (MoveI2F src));
11446   effect( DEF dst, USE src );
11447 
11448   ins_cost(85);
11449   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11450   ins_encode %{
11451     __ movdl($dst$$XMMRegister, $src$$Register);
11452   %}
11453   ins_pipe( pipe_slow );
11454 %}
11455 
11456 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11457   match(Set dst (MoveD2L src));
11458   effect(DEF dst, USE src);
11459 
11460   ins_cost(250);
11461   format %{ "MOV    $dst.lo,$src\n\t"
11462             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11463   opcode(0x8B, 0x8B);
11464   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11465   ins_pipe( ialu_mem_long_reg );
11466 %}
11467 
11468 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11469   predicate(UseSSE<=1);
11470   match(Set dst (MoveD2L src));
11471   effect(DEF dst, USE src);
11472 
11473   ins_cost(125);
11474   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11475   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11476   ins_pipe( fpu_mem_reg );
11477 %}
11478 
11479 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11480   predicate(UseSSE>=2);
11481   match(Set dst (MoveD2L src));
11482   effect(DEF dst, USE src);
11483   ins_cost(95);
11484   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11485   ins_encode %{
11486     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11487   %}
11488   ins_pipe( pipe_slow );
11489 %}
11490 
11491 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11492   predicate(UseSSE>=2);
11493   match(Set dst (MoveD2L src));
11494   effect(DEF dst, USE src, TEMP tmp);
11495   ins_cost(85);
11496   format %{ "MOVD   $dst.lo,$src\n\t"
11497             "PSHUFLW $tmp,$src,0x4E\n\t"
11498             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11499   ins_encode %{
11500     __ movdl($dst$$Register, $src$$XMMRegister);
11501     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11502     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11503   %}
11504   ins_pipe( pipe_slow );
11505 %}
11506 
11507 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11508   match(Set dst (MoveL2D src));
11509   effect(DEF dst, USE src);
11510 
11511   ins_cost(200);
11512   format %{ "MOV    $dst,$src.lo\n\t"
11513             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11514   opcode(0x89, 0x89);
11515   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11516   ins_pipe( ialu_mem_long_reg );
11517 %}
11518 
11519 
11520 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11521   predicate(UseSSE<=1);
11522   match(Set dst (MoveL2D src));
11523   effect(DEF dst, USE src);
11524   ins_cost(125);
11525 
11526   format %{ "FLD_D  $src\n\t"
11527             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11528   opcode(0xDD);               /* DD /0, FLD m64real */
11529   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11530               Pop_Reg_DPR(dst) );
11531   ins_pipe( fpu_reg_mem );
11532 %}
11533 
11534 
11535 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11536   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11537   match(Set dst (MoveL2D src));
11538   effect(DEF dst, USE src);
11539 
11540   ins_cost(95);
11541   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11542   ins_encode %{
11543     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11544   %}
11545   ins_pipe( pipe_slow );
11546 %}
11547 
11548 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11549   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11550   match(Set dst (MoveL2D src));
11551   effect(DEF dst, USE src);
11552 
11553   ins_cost(95);
11554   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11555   ins_encode %{
11556     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11557   %}
11558   ins_pipe( pipe_slow );
11559 %}
11560 
11561 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11562   predicate(UseSSE>=2);
11563   match(Set dst (MoveL2D src));
11564   effect(TEMP dst, USE src, TEMP tmp);
11565   ins_cost(85);
11566   format %{ "MOVD   $dst,$src.lo\n\t"
11567             "MOVD   $tmp,$src.hi\n\t"
11568             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11569   ins_encode %{
11570     __ movdl($dst$$XMMRegister, $src$$Register);
11571     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11572     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11573   %}
11574   ins_pipe( pipe_slow );
11575 %}
11576 
11577 
11578 // =======================================================================
11579 // fast clearing of an array
11580 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11581   predicate(!((ClearArrayNode*)n)->is_large());
11582   match(Set dummy (ClearArray cnt base));
11583   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11584 
11585   format %{ $$template
11586     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11587     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11588     $$emit$$"JG     LARGE\n\t"
11589     $$emit$$"SHL    ECX, 1\n\t"
11590     $$emit$$"DEC    ECX\n\t"
11591     $$emit$$"JS     DONE\t# Zero length\n\t"
11592     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11593     $$emit$$"DEC    ECX\n\t"
11594     $$emit$$"JGE    LOOP\n\t"
11595     $$emit$$"JMP    DONE\n\t"
11596     $$emit$$"# LARGE:\n\t"
11597     if (UseFastStosb) {
11598        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11599        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11600     } else if (UseXMMForObjInit) {
11601        $$emit$$"MOV     RDI,RAX\n\t"
11602        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11603        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11604        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11605        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11606        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11607        $$emit$$"ADD     0x40,RAX\n\t"
11608        $$emit$$"# L_zero_64_bytes:\n\t"
11609        $$emit$$"SUB     0x8,RCX\n\t"
11610        $$emit$$"JGE     L_loop\n\t"
11611        $$emit$$"ADD     0x4,RCX\n\t"
11612        $$emit$$"JL      L_tail\n\t"
11613        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11614        $$emit$$"ADD     0x20,RAX\n\t"
11615        $$emit$$"SUB     0x4,RCX\n\t"
11616        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11617        $$emit$$"ADD     0x4,RCX\n\t"
11618        $$emit$$"JLE     L_end\n\t"
11619        $$emit$$"DEC     RCX\n\t"
11620        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11621        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11622        $$emit$$"ADD     0x8,RAX\n\t"
11623        $$emit$$"DEC     RCX\n\t"
11624        $$emit$$"JGE     L_sloop\n\t"
11625        $$emit$$"# L_end:\n\t"
11626     } else {
11627        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11628        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11629     }
11630     $$emit$$"# DONE"
11631   %}
11632   ins_encode %{
11633     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11634                  $tmp$$XMMRegister, false);
11635   %}
11636   ins_pipe( pipe_slow );
11637 %}
11638 
11639 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11640   predicate(((ClearArrayNode*)n)->is_large());
11641   match(Set dummy (ClearArray cnt base));
11642   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11643   format %{ $$template
11644     if (UseFastStosb) {
11645        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11646        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11647        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11648     } else if (UseXMMForObjInit) {
11649        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11650        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11651        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11652        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11653        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11654        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11655        $$emit$$"ADD     0x40,RAX\n\t"
11656        $$emit$$"# L_zero_64_bytes:\n\t"
11657        $$emit$$"SUB     0x8,RCX\n\t"
11658        $$emit$$"JGE     L_loop\n\t"
11659        $$emit$$"ADD     0x4,RCX\n\t"
11660        $$emit$$"JL      L_tail\n\t"
11661        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11662        $$emit$$"ADD     0x20,RAX\n\t"
11663        $$emit$$"SUB     0x4,RCX\n\t"
11664        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11665        $$emit$$"ADD     0x4,RCX\n\t"
11666        $$emit$$"JLE     L_end\n\t"
11667        $$emit$$"DEC     RCX\n\t"
11668        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11669        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11670        $$emit$$"ADD     0x8,RAX\n\t"
11671        $$emit$$"DEC     RCX\n\t"
11672        $$emit$$"JGE     L_sloop\n\t"
11673        $$emit$$"# L_end:\n\t"
11674     } else {
11675        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11676        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11677        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11678     }
11679     $$emit$$"# DONE"
11680   %}
11681   ins_encode %{
11682     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11683                  $tmp$$XMMRegister, true);
11684   %}
11685   ins_pipe( pipe_slow );
11686 %}
11687 
11688 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11689                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11690   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11691   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11692   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11693 
11694   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11695   ins_encode %{
11696     __ string_compare($str1$$Register, $str2$$Register,
11697                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11698                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11699   %}
11700   ins_pipe( pipe_slow );
11701 %}
11702 
11703 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11704                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11705   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11706   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11707   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11708 
11709   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11710   ins_encode %{
11711     __ string_compare($str1$$Register, $str2$$Register,
11712                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11713                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11714   %}
11715   ins_pipe( pipe_slow );
11716 %}
11717 
11718 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11719                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11720   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11721   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11722   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11723 
11724   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11725   ins_encode %{
11726     __ string_compare($str1$$Register, $str2$$Register,
11727                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11728                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11734                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11735   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11736   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11737   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11738 
11739   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11740   ins_encode %{
11741     __ string_compare($str2$$Register, $str1$$Register,
11742                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11743                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11744   %}
11745   ins_pipe( pipe_slow );
11746 %}
11747 
11748 // fast string equals
11749 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11750                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11751   match(Set result (StrEquals (Binary str1 str2) cnt));
11752   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11753 
11754   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11755   ins_encode %{
11756     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11757                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11758                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11759   %}
11760 
11761   ins_pipe( pipe_slow );
11762 %}
11763 
11764 // fast search of substring with known size.
11765 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11766                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11767   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11768   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11769   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11770 
11771   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11772   ins_encode %{
11773     int icnt2 = (int)$int_cnt2$$constant;
11774     if (icnt2 >= 16) {
11775       // IndexOf for constant substrings with size >= 16 elements
11776       // which don't need to be loaded through stack.
11777       __ string_indexofC8($str1$$Register, $str2$$Register,
11778                           $cnt1$$Register, $cnt2$$Register,
11779                           icnt2, $result$$Register,
11780                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11781     } else {
11782       // Small strings are loaded through stack if they cross page boundary.
11783       __ string_indexof($str1$$Register, $str2$$Register,
11784                         $cnt1$$Register, $cnt2$$Register,
11785                         icnt2, $result$$Register,
11786                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11787     }
11788   %}
11789   ins_pipe( pipe_slow );
11790 %}
11791 
11792 // fast search of substring with known size.
11793 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11794                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11795   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11796   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11797   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11798 
11799   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11800   ins_encode %{
11801     int icnt2 = (int)$int_cnt2$$constant;
11802     if (icnt2 >= 8) {
11803       // IndexOf for constant substrings with size >= 8 elements
11804       // which don't need to be loaded through stack.
11805       __ string_indexofC8($str1$$Register, $str2$$Register,
11806                           $cnt1$$Register, $cnt2$$Register,
11807                           icnt2, $result$$Register,
11808                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11809     } else {
11810       // Small strings are loaded through stack if they cross page boundary.
11811       __ string_indexof($str1$$Register, $str2$$Register,
11812                         $cnt1$$Register, $cnt2$$Register,
11813                         icnt2, $result$$Register,
11814                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11815     }
11816   %}
11817   ins_pipe( pipe_slow );
11818 %}
11819 
11820 // fast search of substring with known size.
11821 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11822                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11823   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11824   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11825   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11826 
11827   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11828   ins_encode %{
11829     int icnt2 = (int)$int_cnt2$$constant;
11830     if (icnt2 >= 8) {
11831       // IndexOf for constant substrings with size >= 8 elements
11832       // which don't need to be loaded through stack.
11833       __ string_indexofC8($str1$$Register, $str2$$Register,
11834                           $cnt1$$Register, $cnt2$$Register,
11835                           icnt2, $result$$Register,
11836                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11837     } else {
11838       // Small strings are loaded through stack if they cross page boundary.
11839       __ string_indexof($str1$$Register, $str2$$Register,
11840                         $cnt1$$Register, $cnt2$$Register,
11841                         icnt2, $result$$Register,
11842                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11843     }
11844   %}
11845   ins_pipe( pipe_slow );
11846 %}
11847 
11848 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11849                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11850   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11851   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11852   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11853 
11854   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11855   ins_encode %{
11856     __ string_indexof($str1$$Register, $str2$$Register,
11857                       $cnt1$$Register, $cnt2$$Register,
11858                       (-1), $result$$Register,
11859                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11860   %}
11861   ins_pipe( pipe_slow );
11862 %}
11863 
11864 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11865                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11866   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11867   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11868   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11869 
11870   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11871   ins_encode %{
11872     __ string_indexof($str1$$Register, $str2$$Register,
11873                       $cnt1$$Register, $cnt2$$Register,
11874                       (-1), $result$$Register,
11875                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11876   %}
11877   ins_pipe( pipe_slow );
11878 %}
11879 
11880 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11881                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11882   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11883   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11884   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11885 
11886   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11887   ins_encode %{
11888     __ string_indexof($str1$$Register, $str2$$Register,
11889                       $cnt1$$Register, $cnt2$$Register,
11890                       (-1), $result$$Register,
11891                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11892   %}
11893   ins_pipe( pipe_slow );
11894 %}
11895 
11896 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11897                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11898   predicate(UseSSE42Intrinsics);
11899   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11900   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11901   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11902   ins_encode %{
11903     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11904                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11905   %}
11906   ins_pipe( pipe_slow );
11907 %}
11908 
11909 // fast array equals
11910 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11911                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11912 %{
11913   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11914   match(Set result (AryEq ary1 ary2));
11915   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11916   //ins_cost(300);
11917 
11918   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11919   ins_encode %{
11920     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11921                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11922                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11923   %}
11924   ins_pipe( pipe_slow );
11925 %}
11926 
11927 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11928                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11929 %{
11930   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11931   match(Set result (AryEq ary1 ary2));
11932   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11933   //ins_cost(300);
11934 
11935   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11936   ins_encode %{
11937     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11938                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11939                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11940   %}
11941   ins_pipe( pipe_slow );
11942 %}
11943 
11944 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11945                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11946 %{
11947   match(Set result (HasNegatives ary1 len));
11948   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11949 
11950   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11951   ins_encode %{
11952     __ has_negatives($ary1$$Register, $len$$Register,
11953                      $result$$Register, $tmp3$$Register,
11954                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11955   %}
11956   ins_pipe( pipe_slow );
11957 %}
11958 
11959 // fast char[] to byte[] compression
11960 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11961                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11962   match(Set result (StrCompressedCopy src (Binary dst len)));
11963   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11964 
11965   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11966   ins_encode %{
11967     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11968                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11969                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11970   %}
11971   ins_pipe( pipe_slow );
11972 %}
11973 
11974 // fast byte[] to char[] inflation
11975 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11976                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11977   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11978   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11979 
11980   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11981   ins_encode %{
11982     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11983                           $tmp1$$XMMRegister, $tmp2$$Register);
11984   %}
11985   ins_pipe( pipe_slow );
11986 %}
11987 
11988 // encode char[] to byte[] in ISO_8859_1
11989 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11990                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11991                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11992   match(Set result (EncodeISOArray src (Binary dst len)));
11993   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11994 
11995   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11996   ins_encode %{
11997     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11998                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11999                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
12000   %}
12001   ins_pipe( pipe_slow );
12002 %}
12003 
12004 
12005 //----------Control Flow Instructions------------------------------------------
12006 // Signed compare Instructions
12007 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12008   match(Set cr (CmpI op1 op2));
12009   effect( DEF cr, USE op1, USE op2 );
12010   format %{ "CMP    $op1,$op2" %}
12011   opcode(0x3B);  /* Opcode 3B /r */
12012   ins_encode( OpcP, RegReg( op1, op2) );
12013   ins_pipe( ialu_cr_reg_reg );
12014 %}
12015 
12016 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12017   match(Set cr (CmpI op1 op2));
12018   effect( DEF cr, USE op1 );
12019   format %{ "CMP    $op1,$op2" %}
12020   opcode(0x81,0x07);  /* Opcode 81 /7 */
12021   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12022   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12023   ins_pipe( ialu_cr_reg_imm );
12024 %}
12025 
12026 // Cisc-spilled version of cmpI_eReg
12027 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12028   match(Set cr (CmpI op1 (LoadI op2)));
12029 
12030   format %{ "CMP    $op1,$op2" %}
12031   ins_cost(500);
12032   opcode(0x3B);  /* Opcode 3B /r */
12033   ins_encode( OpcP, RegMem( op1, op2) );
12034   ins_pipe( ialu_cr_reg_mem );
12035 %}
12036 
12037 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12038   match(Set cr (CmpI src zero));
12039   effect( DEF cr, USE src );
12040 
12041   format %{ "TEST   $src,$src" %}
12042   opcode(0x85);
12043   ins_encode( OpcP, RegReg( src, src ) );
12044   ins_pipe( ialu_cr_reg_imm );
12045 %}
12046 
12047 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12048   match(Set cr (CmpI (AndI src con) zero));
12049 
12050   format %{ "TEST   $src,$con" %}
12051   opcode(0xF7,0x00);
12052   ins_encode( OpcP, RegOpc(src), Con32(con) );
12053   ins_pipe( ialu_cr_reg_imm );
12054 %}
12055 
12056 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12057   match(Set cr (CmpI (AndI src mem) zero));
12058 
12059   format %{ "TEST   $src,$mem" %}
12060   opcode(0x85);
12061   ins_encode( OpcP, RegMem( src, mem ) );
12062   ins_pipe( ialu_cr_reg_mem );
12063 %}
12064 
12065 // Unsigned compare Instructions; really, same as signed except they
12066 // produce an eFlagsRegU instead of eFlagsReg.
12067 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12068   match(Set cr (CmpU op1 op2));
12069 
12070   format %{ "CMPu   $op1,$op2" %}
12071   opcode(0x3B);  /* Opcode 3B /r */
12072   ins_encode( OpcP, RegReg( op1, op2) );
12073   ins_pipe( ialu_cr_reg_reg );
12074 %}
12075 
12076 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12077   match(Set cr (CmpU op1 op2));
12078 
12079   format %{ "CMPu   $op1,$op2" %}
12080   opcode(0x81,0x07);  /* Opcode 81 /7 */
12081   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12082   ins_pipe( ialu_cr_reg_imm );
12083 %}
12084 
12085 // // Cisc-spilled version of cmpU_eReg
12086 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12087   match(Set cr (CmpU op1 (LoadI op2)));
12088 
12089   format %{ "CMPu   $op1,$op2" %}
12090   ins_cost(500);
12091   opcode(0x3B);  /* Opcode 3B /r */
12092   ins_encode( OpcP, RegMem( op1, op2) );
12093   ins_pipe( ialu_cr_reg_mem );
12094 %}
12095 
12096 // // Cisc-spilled version of cmpU_eReg
12097 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12098 //  match(Set cr (CmpU (LoadI op1) op2));
12099 //
12100 //  format %{ "CMPu   $op1,$op2" %}
12101 //  ins_cost(500);
12102 //  opcode(0x39);  /* Opcode 39 /r */
12103 //  ins_encode( OpcP, RegMem( op1, op2) );
12104 //%}
12105 
12106 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12107   match(Set cr (CmpU src zero));
12108 
12109   format %{ "TESTu  $src,$src" %}
12110   opcode(0x85);
12111   ins_encode( OpcP, RegReg( src, src ) );
12112   ins_pipe( ialu_cr_reg_imm );
12113 %}
12114 
12115 // Unsigned pointer compare Instructions
12116 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12117   match(Set cr (CmpP op1 op2));
12118 
12119   format %{ "CMPu   $op1,$op2" %}
12120   opcode(0x3B);  /* Opcode 3B /r */
12121   ins_encode( OpcP, RegReg( op1, op2) );
12122   ins_pipe( ialu_cr_reg_reg );
12123 %}
12124 
12125 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12126   match(Set cr (CmpP op1 op2));
12127 
12128   format %{ "CMPu   $op1,$op2" %}
12129   opcode(0x81,0x07);  /* Opcode 81 /7 */
12130   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12131   ins_pipe( ialu_cr_reg_imm );
12132 %}
12133 
12134 // // Cisc-spilled version of cmpP_eReg
12135 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12136   match(Set cr (CmpP op1 (LoadP op2)));
12137 
12138   format %{ "CMPu   $op1,$op2" %}
12139   ins_cost(500);
12140   opcode(0x3B);  /* Opcode 3B /r */
12141   ins_encode( OpcP, RegMem( op1, op2) );
12142   ins_pipe( ialu_cr_reg_mem );
12143 %}
12144 
12145 // // Cisc-spilled version of cmpP_eReg
12146 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12147 //  match(Set cr (CmpP (LoadP op1) op2));
12148 //
12149 //  format %{ "CMPu   $op1,$op2" %}
12150 //  ins_cost(500);
12151 //  opcode(0x39);  /* Opcode 39 /r */
12152 //  ins_encode( OpcP, RegMem( op1, op2) );
12153 //%}
12154 
12155 // Compare raw pointer (used in out-of-heap check).
12156 // Only works because non-oop pointers must be raw pointers
12157 // and raw pointers have no anti-dependencies.
12158 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12159   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12160   match(Set cr (CmpP op1 (LoadP op2)));
12161 
12162   format %{ "CMPu   $op1,$op2" %}
12163   opcode(0x3B);  /* Opcode 3B /r */
12164   ins_encode( OpcP, RegMem( op1, op2) );
12165   ins_pipe( ialu_cr_reg_mem );
12166 %}
12167 
12168 //
12169 // This will generate a signed flags result. This should be ok
12170 // since any compare to a zero should be eq/neq.
12171 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12172   match(Set cr (CmpP src zero));
12173 
12174   format %{ "TEST   $src,$src" %}
12175   opcode(0x85);
12176   ins_encode( OpcP, RegReg( src, src ) );
12177   ins_pipe( ialu_cr_reg_imm );
12178 %}
12179 
12180 // Cisc-spilled version of testP_reg
12181 // This will generate a signed flags result. This should be ok
12182 // since any compare to a zero should be eq/neq.
12183 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12184   match(Set cr (CmpP (LoadP op) zero));
12185 
12186   format %{ "TEST   $op,0xFFFFFFFF" %}
12187   ins_cost(500);
12188   opcode(0xF7);               /* Opcode F7 /0 */
12189   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12190   ins_pipe( ialu_cr_reg_imm );
12191 %}
12192 
12193 // Yanked all unsigned pointer compare operations.
12194 // Pointer compares are done with CmpP which is already unsigned.
12195 
12196 //----------Max and Min--------------------------------------------------------
12197 // Min Instructions
12198 ////
12199 //   *** Min and Max using the conditional move are slower than the
12200 //   *** branch version on a Pentium III.
12201 // // Conditional move for min
12202 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12203 //  effect( USE_DEF op2, USE op1, USE cr );
12204 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12205 //  opcode(0x4C,0x0F);
12206 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12207 //  ins_pipe( pipe_cmov_reg );
12208 //%}
12209 //
12210 //// Min Register with Register (P6 version)
12211 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12212 //  predicate(VM_Version::supports_cmov() );
12213 //  match(Set op2 (MinI op1 op2));
12214 //  ins_cost(200);
12215 //  expand %{
12216 //    eFlagsReg cr;
12217 //    compI_eReg(cr,op1,op2);
12218 //    cmovI_reg_lt(op2,op1,cr);
12219 //  %}
12220 //%}
12221 
12222 // Min Register with Register (generic version)
12223 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12224   match(Set dst (MinI dst src));
12225   effect(KILL flags);
12226   ins_cost(300);
12227 
12228   format %{ "MIN    $dst,$src" %}
12229   opcode(0xCC);
12230   ins_encode( min_enc(dst,src) );
12231   ins_pipe( pipe_slow );
12232 %}
12233 
12234 // Max Register with Register
12235 //   *** Min and Max using the conditional move are slower than the
12236 //   *** branch version on a Pentium III.
12237 // // Conditional move for max
12238 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12239 //  effect( USE_DEF op2, USE op1, USE cr );
12240 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12241 //  opcode(0x4F,0x0F);
12242 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12243 //  ins_pipe( pipe_cmov_reg );
12244 //%}
12245 //
12246 // // Max Register with Register (P6 version)
12247 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12248 //  predicate(VM_Version::supports_cmov() );
12249 //  match(Set op2 (MaxI op1 op2));
12250 //  ins_cost(200);
12251 //  expand %{
12252 //    eFlagsReg cr;
12253 //    compI_eReg(cr,op1,op2);
12254 //    cmovI_reg_gt(op2,op1,cr);
12255 //  %}
12256 //%}
12257 
12258 // Max Register with Register (generic version)
12259 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12260   match(Set dst (MaxI dst src));
12261   effect(KILL flags);
12262   ins_cost(300);
12263 
12264   format %{ "MAX    $dst,$src" %}
12265   opcode(0xCC);
12266   ins_encode( max_enc(dst,src) );
12267   ins_pipe( pipe_slow );
12268 %}
12269 
12270 // ============================================================================
12271 // Counted Loop limit node which represents exact final iterator value.
12272 // Note: the resulting value should fit into integer range since
12273 // counted loops have limit check on overflow.
12274 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12275   match(Set limit (LoopLimit (Binary init limit) stride));
12276   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12277   ins_cost(300);
12278 
12279   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12280   ins_encode %{
12281     int strd = (int)$stride$$constant;
12282     assert(strd != 1 && strd != -1, "sanity");
12283     int m1 = (strd > 0) ? 1 : -1;
12284     // Convert limit to long (EAX:EDX)
12285     __ cdql();
12286     // Convert init to long (init:tmp)
12287     __ movl($tmp$$Register, $init$$Register);
12288     __ sarl($tmp$$Register, 31);
12289     // $limit - $init
12290     __ subl($limit$$Register, $init$$Register);
12291     __ sbbl($limit_hi$$Register, $tmp$$Register);
12292     // + ($stride - 1)
12293     if (strd > 0) {
12294       __ addl($limit$$Register, (strd - 1));
12295       __ adcl($limit_hi$$Register, 0);
12296       __ movl($tmp$$Register, strd);
12297     } else {
12298       __ addl($limit$$Register, (strd + 1));
12299       __ adcl($limit_hi$$Register, -1);
12300       __ lneg($limit_hi$$Register, $limit$$Register);
12301       __ movl($tmp$$Register, -strd);
12302     }
12303     // signed devision: (EAX:EDX) / pos_stride
12304     __ idivl($tmp$$Register);
12305     if (strd < 0) {
12306       // restore sign
12307       __ negl($tmp$$Register);
12308     }
12309     // (EAX) * stride
12310     __ mull($tmp$$Register);
12311     // + init (ignore upper bits)
12312     __ addl($limit$$Register, $init$$Register);
12313   %}
12314   ins_pipe( pipe_slow );
12315 %}
12316 
12317 // ============================================================================
12318 // Branch Instructions
12319 // Jump Table
12320 instruct jumpXtnd(rRegI switch_val) %{
12321   match(Jump switch_val);
12322   ins_cost(350);
12323   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12324   ins_encode %{
12325     // Jump to Address(table_base + switch_reg)
12326     Address index(noreg, $switch_val$$Register, Address::times_1);
12327     __ jump(ArrayAddress($constantaddress, index));
12328   %}
12329   ins_pipe(pipe_jmp);
12330 %}
12331 
12332 // Jump Direct - Label defines a relative address from JMP+1
12333 instruct jmpDir(label labl) %{
12334   match(Goto);
12335   effect(USE labl);
12336 
12337   ins_cost(300);
12338   format %{ "JMP    $labl" %}
12339   size(5);
12340   ins_encode %{
12341     Label* L = $labl$$label;
12342     __ jmp(*L, false); // Always long jump
12343   %}
12344   ins_pipe( pipe_jmp );
12345 %}
12346 
12347 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12348 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12349   match(If cop cr);
12350   effect(USE labl);
12351 
12352   ins_cost(300);
12353   format %{ "J$cop    $labl" %}
12354   size(6);
12355   ins_encode %{
12356     Label* L = $labl$$label;
12357     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12358   %}
12359   ins_pipe( pipe_jcc );
12360 %}
12361 
12362 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12363 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12364   predicate(!n->has_vector_mask_set());
12365   match(CountedLoopEnd cop cr);
12366   effect(USE labl);
12367 
12368   ins_cost(300);
12369   format %{ "J$cop    $labl\t# Loop end" %}
12370   size(6);
12371   ins_encode %{
12372     Label* L = $labl$$label;
12373     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12374   %}
12375   ins_pipe( pipe_jcc );
12376 %}
12377 
12378 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12379 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12380   predicate(!n->has_vector_mask_set());
12381   match(CountedLoopEnd cop cmp);
12382   effect(USE labl);
12383 
12384   ins_cost(300);
12385   format %{ "J$cop,u  $labl\t# Loop end" %}
12386   size(6);
12387   ins_encode %{
12388     Label* L = $labl$$label;
12389     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12390   %}
12391   ins_pipe( pipe_jcc );
12392 %}
12393 
12394 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12395   predicate(!n->has_vector_mask_set());
12396   match(CountedLoopEnd cop cmp);
12397   effect(USE labl);
12398 
12399   ins_cost(200);
12400   format %{ "J$cop,u  $labl\t# Loop end" %}
12401   size(6);
12402   ins_encode %{
12403     Label* L = $labl$$label;
12404     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12405   %}
12406   ins_pipe( pipe_jcc );
12407 %}
12408 
12409 // mask version
12410 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12411 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12412   predicate(n->has_vector_mask_set());
12413   match(CountedLoopEnd cop cr);
12414   effect(USE labl);
12415 
12416   ins_cost(400);
12417   format %{ "J$cop    $labl\t# Loop end\n\t"
12418             "restorevectmask \t# vector mask restore for loops" %}
12419   size(10);
12420   ins_encode %{
12421     Label* L = $labl$$label;
12422     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12423     __ restorevectmask();
12424   %}
12425   ins_pipe( pipe_jcc );
12426 %}
12427 
12428 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12429 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12430   predicate(n->has_vector_mask_set());
12431   match(CountedLoopEnd cop cmp);
12432   effect(USE labl);
12433 
12434   ins_cost(400);
12435   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12436             "restorevectmask \t# vector mask restore for loops" %}
12437   size(10);
12438   ins_encode %{
12439     Label* L = $labl$$label;
12440     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12441     __ restorevectmask();
12442   %}
12443   ins_pipe( pipe_jcc );
12444 %}
12445 
12446 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12447   predicate(n->has_vector_mask_set());
12448   match(CountedLoopEnd cop cmp);
12449   effect(USE labl);
12450 
12451   ins_cost(300);
12452   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12453             "restorevectmask \t# vector mask restore for loops" %}
12454   size(10);
12455   ins_encode %{
12456     Label* L = $labl$$label;
12457     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12458     __ restorevectmask();
12459   %}
12460   ins_pipe( pipe_jcc );
12461 %}
12462 
12463 // Jump Direct Conditional - using unsigned comparison
12464 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12465   match(If cop cmp);
12466   effect(USE labl);
12467 
12468   ins_cost(300);
12469   format %{ "J$cop,u  $labl" %}
12470   size(6);
12471   ins_encode %{
12472     Label* L = $labl$$label;
12473     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12474   %}
12475   ins_pipe(pipe_jcc);
12476 %}
12477 
12478 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12479   match(If cop cmp);
12480   effect(USE labl);
12481 
12482   ins_cost(200);
12483   format %{ "J$cop,u  $labl" %}
12484   size(6);
12485   ins_encode %{
12486     Label* L = $labl$$label;
12487     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12488   %}
12489   ins_pipe(pipe_jcc);
12490 %}
12491 
12492 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12493   match(If cop cmp);
12494   effect(USE labl);
12495 
12496   ins_cost(200);
12497   format %{ $$template
12498     if ($cop$$cmpcode == Assembler::notEqual) {
12499       $$emit$$"JP,u   $labl\n\t"
12500       $$emit$$"J$cop,u   $labl"
12501     } else {
12502       $$emit$$"JP,u   done\n\t"
12503       $$emit$$"J$cop,u   $labl\n\t"
12504       $$emit$$"done:"
12505     }
12506   %}
12507   ins_encode %{
12508     Label* l = $labl$$label;
12509     if ($cop$$cmpcode == Assembler::notEqual) {
12510       __ jcc(Assembler::parity, *l, false);
12511       __ jcc(Assembler::notEqual, *l, false);
12512     } else if ($cop$$cmpcode == Assembler::equal) {
12513       Label done;
12514       __ jccb(Assembler::parity, done);
12515       __ jcc(Assembler::equal, *l, false);
12516       __ bind(done);
12517     } else {
12518        ShouldNotReachHere();
12519     }
12520   %}
12521   ins_pipe(pipe_jcc);
12522 %}
12523 
12524 // ============================================================================
12525 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12526 // array for an instance of the superklass.  Set a hidden internal cache on a
12527 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12528 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12529 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12530   match(Set result (PartialSubtypeCheck sub super));
12531   effect( KILL rcx, KILL cr );
12532 
12533   ins_cost(1100);  // slightly larger than the next version
12534   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12535             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12536             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12537             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12538             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12539             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12540             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12541      "miss:\t" %}
12542 
12543   opcode(0x1); // Force a XOR of EDI
12544   ins_encode( enc_PartialSubtypeCheck() );
12545   ins_pipe( pipe_slow );
12546 %}
12547 
12548 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12549   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12550   effect( KILL rcx, KILL result );
12551 
12552   ins_cost(1000);
12553   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12554             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12555             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12556             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12557             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12558             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12559      "miss:\t" %}
12560 
12561   opcode(0x0);  // No need to XOR EDI
12562   ins_encode( enc_PartialSubtypeCheck() );
12563   ins_pipe( pipe_slow );
12564 %}
12565 
12566 // ============================================================================
12567 // Branch Instructions -- short offset versions
12568 //
12569 // These instructions are used to replace jumps of a long offset (the default
12570 // match) with jumps of a shorter offset.  These instructions are all tagged
12571 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12572 // match rules in general matching.  Instead, the ADLC generates a conversion
12573 // method in the MachNode which can be used to do in-place replacement of the
12574 // long variant with the shorter variant.  The compiler will determine if a
12575 // branch can be taken by the is_short_branch_offset() predicate in the machine
12576 // specific code section of the file.
12577 
12578 // Jump Direct - Label defines a relative address from JMP+1
12579 instruct jmpDir_short(label labl) %{
12580   match(Goto);
12581   effect(USE labl);
12582 
12583   ins_cost(300);
12584   format %{ "JMP,s  $labl" %}
12585   size(2);
12586   ins_encode %{
12587     Label* L = $labl$$label;
12588     __ jmpb(*L);
12589   %}
12590   ins_pipe( pipe_jmp );
12591   ins_short_branch(1);
12592 %}
12593 
12594 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12595 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12596   match(If cop cr);
12597   effect(USE labl);
12598 
12599   ins_cost(300);
12600   format %{ "J$cop,s  $labl" %}
12601   size(2);
12602   ins_encode %{
12603     Label* L = $labl$$label;
12604     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12605   %}
12606   ins_pipe( pipe_jcc );
12607   ins_short_branch(1);
12608 %}
12609 
12610 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12611 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12612   match(CountedLoopEnd cop cr);
12613   effect(USE labl);
12614 
12615   ins_cost(300);
12616   format %{ "J$cop,s  $labl\t# Loop end" %}
12617   size(2);
12618   ins_encode %{
12619     Label* L = $labl$$label;
12620     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12621   %}
12622   ins_pipe( pipe_jcc );
12623   ins_short_branch(1);
12624 %}
12625 
12626 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12627 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12628   match(CountedLoopEnd cop cmp);
12629   effect(USE labl);
12630 
12631   ins_cost(300);
12632   format %{ "J$cop,us $labl\t# Loop end" %}
12633   size(2);
12634   ins_encode %{
12635     Label* L = $labl$$label;
12636     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12637   %}
12638   ins_pipe( pipe_jcc );
12639   ins_short_branch(1);
12640 %}
12641 
12642 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12643   match(CountedLoopEnd cop cmp);
12644   effect(USE labl);
12645 
12646   ins_cost(300);
12647   format %{ "J$cop,us $labl\t# Loop end" %}
12648   size(2);
12649   ins_encode %{
12650     Label* L = $labl$$label;
12651     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12652   %}
12653   ins_pipe( pipe_jcc );
12654   ins_short_branch(1);
12655 %}
12656 
12657 // Jump Direct Conditional - using unsigned comparison
12658 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12659   match(If cop cmp);
12660   effect(USE labl);
12661 
12662   ins_cost(300);
12663   format %{ "J$cop,us $labl" %}
12664   size(2);
12665   ins_encode %{
12666     Label* L = $labl$$label;
12667     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12668   %}
12669   ins_pipe( pipe_jcc );
12670   ins_short_branch(1);
12671 %}
12672 
12673 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12674   match(If cop cmp);
12675   effect(USE labl);
12676 
12677   ins_cost(300);
12678   format %{ "J$cop,us $labl" %}
12679   size(2);
12680   ins_encode %{
12681     Label* L = $labl$$label;
12682     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12683   %}
12684   ins_pipe( pipe_jcc );
12685   ins_short_branch(1);
12686 %}
12687 
12688 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12689   match(If cop cmp);
12690   effect(USE labl);
12691 
12692   ins_cost(300);
12693   format %{ $$template
12694     if ($cop$$cmpcode == Assembler::notEqual) {
12695       $$emit$$"JP,u,s   $labl\n\t"
12696       $$emit$$"J$cop,u,s   $labl"
12697     } else {
12698       $$emit$$"JP,u,s   done\n\t"
12699       $$emit$$"J$cop,u,s  $labl\n\t"
12700       $$emit$$"done:"
12701     }
12702   %}
12703   size(4);
12704   ins_encode %{
12705     Label* l = $labl$$label;
12706     if ($cop$$cmpcode == Assembler::notEqual) {
12707       __ jccb(Assembler::parity, *l);
12708       __ jccb(Assembler::notEqual, *l);
12709     } else if ($cop$$cmpcode == Assembler::equal) {
12710       Label done;
12711       __ jccb(Assembler::parity, done);
12712       __ jccb(Assembler::equal, *l);
12713       __ bind(done);
12714     } else {
12715        ShouldNotReachHere();
12716     }
12717   %}
12718   ins_pipe(pipe_jcc);
12719   ins_short_branch(1);
12720 %}
12721 
12722 // ============================================================================
12723 // Long Compare
12724 //
12725 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12726 // is tricky.  The flavor of compare used depends on whether we are testing
12727 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12728 // The GE test is the negated LT test.  The LE test can be had by commuting
12729 // the operands (yielding a GE test) and then negating; negate again for the
12730 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12731 // NE test is negated from that.
12732 
12733 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12734 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12735 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12736 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12737 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12738 // foo match ends up with the wrong leaf.  One fix is to not match both
12739 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12740 // both forms beat the trinary form of long-compare and both are very useful
12741 // on Intel which has so few registers.
12742 
12743 // Manifest a CmpL result in an integer register.  Very painful.
12744 // This is the test to avoid.
12745 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12746   match(Set dst (CmpL3 src1 src2));
12747   effect( KILL flags );
12748   ins_cost(1000);
12749   format %{ "XOR    $dst,$dst\n\t"
12750             "CMP    $src1.hi,$src2.hi\n\t"
12751             "JLT,s  m_one\n\t"
12752             "JGT,s  p_one\n\t"
12753             "CMP    $src1.lo,$src2.lo\n\t"
12754             "JB,s   m_one\n\t"
12755             "JEQ,s  done\n"
12756     "p_one:\tINC    $dst\n\t"
12757             "JMP,s  done\n"
12758     "m_one:\tDEC    $dst\n"
12759      "done:" %}
12760   ins_encode %{
12761     Label p_one, m_one, done;
12762     __ xorptr($dst$$Register, $dst$$Register);
12763     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12764     __ jccb(Assembler::less,    m_one);
12765     __ jccb(Assembler::greater, p_one);
12766     __ cmpl($src1$$Register, $src2$$Register);
12767     __ jccb(Assembler::below,   m_one);
12768     __ jccb(Assembler::equal,   done);
12769     __ bind(p_one);
12770     __ incrementl($dst$$Register);
12771     __ jmpb(done);
12772     __ bind(m_one);
12773     __ decrementl($dst$$Register);
12774     __ bind(done);
12775   %}
12776   ins_pipe( pipe_slow );
12777 %}
12778 
12779 //======
12780 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12781 // compares.  Can be used for LE or GT compares by reversing arguments.
12782 // NOT GOOD FOR EQ/NE tests.
12783 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12784   match( Set flags (CmpL src zero ));
12785   ins_cost(100);
12786   format %{ "TEST   $src.hi,$src.hi" %}
12787   opcode(0x85);
12788   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12789   ins_pipe( ialu_cr_reg_reg );
12790 %}
12791 
12792 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12793 // compares.  Can be used for LE or GT compares by reversing arguments.
12794 // NOT GOOD FOR EQ/NE tests.
12795 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12796   match( Set flags (CmpL src1 src2 ));
12797   effect( TEMP tmp );
12798   ins_cost(300);
12799   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12800             "MOV    $tmp,$src1.hi\n\t"
12801             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12802   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12803   ins_pipe( ialu_cr_reg_reg );
12804 %}
12805 
12806 // Long compares reg < zero/req OR reg >= zero/req.
12807 // Just a wrapper for a normal branch, plus the predicate test.
12808 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12809   match(If cmp flags);
12810   effect(USE labl);
12811   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12812   expand %{
12813     jmpCon(cmp,flags,labl);    // JLT or JGE...
12814   %}
12815 %}
12816 
12817 //======
12818 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12819 // compares.  Can be used for LE or GT compares by reversing arguments.
12820 // NOT GOOD FOR EQ/NE tests.
12821 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12822   match(Set flags (CmpUL src zero));
12823   ins_cost(100);
12824   format %{ "TEST   $src.hi,$src.hi" %}
12825   opcode(0x85);
12826   ins_encode(OpcP, RegReg_Hi2(src, src));
12827   ins_pipe(ialu_cr_reg_reg);
12828 %}
12829 
12830 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12831 // compares.  Can be used for LE or GT compares by reversing arguments.
12832 // NOT GOOD FOR EQ/NE tests.
12833 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12834   match(Set flags (CmpUL src1 src2));
12835   effect(TEMP tmp);
12836   ins_cost(300);
12837   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12838             "MOV    $tmp,$src1.hi\n\t"
12839             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12840   ins_encode(long_cmp_flags2(src1, src2, tmp));
12841   ins_pipe(ialu_cr_reg_reg);
12842 %}
12843 
12844 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12845 // Just a wrapper for a normal branch, plus the predicate test.
12846 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12847   match(If cmp flags);
12848   effect(USE labl);
12849   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12850   expand %{
12851     jmpCon(cmp, flags, labl);    // JLT or JGE...
12852   %}
12853 %}
12854 
12855 // Compare 2 longs and CMOVE longs.
12856 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12857   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12858   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12859   ins_cost(400);
12860   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12861             "CMOV$cmp $dst.hi,$src.hi" %}
12862   opcode(0x0F,0x40);
12863   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12864   ins_pipe( pipe_cmov_reg_long );
12865 %}
12866 
12867 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12868   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12869   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12870   ins_cost(500);
12871   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12872             "CMOV$cmp $dst.hi,$src.hi" %}
12873   opcode(0x0F,0x40);
12874   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12875   ins_pipe( pipe_cmov_reg_long );
12876 %}
12877 
12878 // Compare 2 longs and CMOVE ints.
12879 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12880   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12881   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12882   ins_cost(200);
12883   format %{ "CMOV$cmp $dst,$src" %}
12884   opcode(0x0F,0x40);
12885   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12886   ins_pipe( pipe_cmov_reg );
12887 %}
12888 
12889 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12890   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12891   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12892   ins_cost(250);
12893   format %{ "CMOV$cmp $dst,$src" %}
12894   opcode(0x0F,0x40);
12895   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12896   ins_pipe( pipe_cmov_mem );
12897 %}
12898 
12899 // Compare 2 longs and CMOVE ints.
12900 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12901   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12902   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12903   ins_cost(200);
12904   format %{ "CMOV$cmp $dst,$src" %}
12905   opcode(0x0F,0x40);
12906   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12907   ins_pipe( pipe_cmov_reg );
12908 %}
12909 
12910 // Compare 2 longs and CMOVE doubles
12911 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12912   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12913   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12914   ins_cost(200);
12915   expand %{
12916     fcmovDPR_regS(cmp,flags,dst,src);
12917   %}
12918 %}
12919 
12920 // Compare 2 longs and CMOVE doubles
12921 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12922   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12923   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12924   ins_cost(200);
12925   expand %{
12926     fcmovD_regS(cmp,flags,dst,src);
12927   %}
12928 %}
12929 
12930 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12931   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12932   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12933   ins_cost(200);
12934   expand %{
12935     fcmovFPR_regS(cmp,flags,dst,src);
12936   %}
12937 %}
12938 
12939 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12940   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12941   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12942   ins_cost(200);
12943   expand %{
12944     fcmovF_regS(cmp,flags,dst,src);
12945   %}
12946 %}
12947 
12948 //======
12949 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12950 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12951   match( Set flags (CmpL src zero ));
12952   effect(TEMP tmp);
12953   ins_cost(200);
12954   format %{ "MOV    $tmp,$src.lo\n\t"
12955             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12956   ins_encode( long_cmp_flags0( src, tmp ) );
12957   ins_pipe( ialu_reg_reg_long );
12958 %}
12959 
12960 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12961 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12962   match( Set flags (CmpL src1 src2 ));
12963   ins_cost(200+300);
12964   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12965             "JNE,s  skip\n\t"
12966             "CMP    $src1.hi,$src2.hi\n\t"
12967      "skip:\t" %}
12968   ins_encode( long_cmp_flags1( src1, src2 ) );
12969   ins_pipe( ialu_cr_reg_reg );
12970 %}
12971 
12972 // Long compare reg == zero/reg OR reg != zero/reg
12973 // Just a wrapper for a normal branch, plus the predicate test.
12974 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12975   match(If cmp flags);
12976   effect(USE labl);
12977   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12978   expand %{
12979     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12980   %}
12981 %}
12982 
12983 //======
12984 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12985 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12986   match(Set flags (CmpUL src zero));
12987   effect(TEMP tmp);
12988   ins_cost(200);
12989   format %{ "MOV    $tmp,$src.lo\n\t"
12990             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12991   ins_encode(long_cmp_flags0(src, tmp));
12992   ins_pipe(ialu_reg_reg_long);
12993 %}
12994 
12995 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12996 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12997   match(Set flags (CmpUL src1 src2));
12998   ins_cost(200+300);
12999   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13000             "JNE,s  skip\n\t"
13001             "CMP    $src1.hi,$src2.hi\n\t"
13002      "skip:\t" %}
13003   ins_encode(long_cmp_flags1(src1, src2));
13004   ins_pipe(ialu_cr_reg_reg);
13005 %}
13006 
13007 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13008 // Just a wrapper for a normal branch, plus the predicate test.
13009 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13010   match(If cmp flags);
13011   effect(USE labl);
13012   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13013   expand %{
13014     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13015   %}
13016 %}
13017 
13018 // Compare 2 longs and CMOVE longs.
13019 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13020   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13021   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13022   ins_cost(400);
13023   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13024             "CMOV$cmp $dst.hi,$src.hi" %}
13025   opcode(0x0F,0x40);
13026   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13027   ins_pipe( pipe_cmov_reg_long );
13028 %}
13029 
13030 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13031   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13032   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13033   ins_cost(500);
13034   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13035             "CMOV$cmp $dst.hi,$src.hi" %}
13036   opcode(0x0F,0x40);
13037   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13038   ins_pipe( pipe_cmov_reg_long );
13039 %}
13040 
13041 // Compare 2 longs and CMOVE ints.
13042 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13043   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13044   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13045   ins_cost(200);
13046   format %{ "CMOV$cmp $dst,$src" %}
13047   opcode(0x0F,0x40);
13048   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13049   ins_pipe( pipe_cmov_reg );
13050 %}
13051 
13052 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13053   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13054   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13055   ins_cost(250);
13056   format %{ "CMOV$cmp $dst,$src" %}
13057   opcode(0x0F,0x40);
13058   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13059   ins_pipe( pipe_cmov_mem );
13060 %}
13061 
13062 // Compare 2 longs and CMOVE ints.
13063 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13064   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13065   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13066   ins_cost(200);
13067   format %{ "CMOV$cmp $dst,$src" %}
13068   opcode(0x0F,0x40);
13069   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13070   ins_pipe( pipe_cmov_reg );
13071 %}
13072 
13073 // Compare 2 longs and CMOVE doubles
13074 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13075   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13076   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13077   ins_cost(200);
13078   expand %{
13079     fcmovDPR_regS(cmp,flags,dst,src);
13080   %}
13081 %}
13082 
13083 // Compare 2 longs and CMOVE doubles
13084 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13085   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13086   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13087   ins_cost(200);
13088   expand %{
13089     fcmovD_regS(cmp,flags,dst,src);
13090   %}
13091 %}
13092 
13093 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13094   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13095   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13096   ins_cost(200);
13097   expand %{
13098     fcmovFPR_regS(cmp,flags,dst,src);
13099   %}
13100 %}
13101 
13102 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13103   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13104   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13105   ins_cost(200);
13106   expand %{
13107     fcmovF_regS(cmp,flags,dst,src);
13108   %}
13109 %}
13110 
13111 //======
13112 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13113 // Same as cmpL_reg_flags_LEGT except must negate src
13114 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13115   match( Set flags (CmpL src zero ));
13116   effect( TEMP tmp );
13117   ins_cost(300);
13118   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13119             "CMP    $tmp,$src.lo\n\t"
13120             "SBB    $tmp,$src.hi\n\t" %}
13121   ins_encode( long_cmp_flags3(src, tmp) );
13122   ins_pipe( ialu_reg_reg_long );
13123 %}
13124 
13125 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13126 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13127 // requires a commuted test to get the same result.
13128 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13129   match( Set flags (CmpL src1 src2 ));
13130   effect( TEMP tmp );
13131   ins_cost(300);
13132   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13133             "MOV    $tmp,$src2.hi\n\t"
13134             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13135   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13136   ins_pipe( ialu_cr_reg_reg );
13137 %}
13138 
13139 // Long compares reg < zero/req OR reg >= zero/req.
13140 // Just a wrapper for a normal branch, plus the predicate test
13141 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13142   match(If cmp flags);
13143   effect(USE labl);
13144   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13145   ins_cost(300);
13146   expand %{
13147     jmpCon(cmp,flags,labl);    // JGT or JLE...
13148   %}
13149 %}
13150 
13151 //======
13152 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13153 // Same as cmpUL_reg_flags_LEGT except must negate src
13154 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13155   match(Set flags (CmpUL src zero));
13156   effect(TEMP tmp);
13157   ins_cost(300);
13158   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13159             "CMP    $tmp,$src.lo\n\t"
13160             "SBB    $tmp,$src.hi\n\t" %}
13161   ins_encode(long_cmp_flags3(src, tmp));
13162   ins_pipe(ialu_reg_reg_long);
13163 %}
13164 
13165 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13166 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13167 // requires a commuted test to get the same result.
13168 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13169   match(Set flags (CmpUL src1 src2));
13170   effect(TEMP tmp);
13171   ins_cost(300);
13172   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13173             "MOV    $tmp,$src2.hi\n\t"
13174             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13175   ins_encode(long_cmp_flags2( src2, src1, tmp));
13176   ins_pipe(ialu_cr_reg_reg);
13177 %}
13178 
13179 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13180 // Just a wrapper for a normal branch, plus the predicate test
13181 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13182   match(If cmp flags);
13183   effect(USE labl);
13184   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13185   ins_cost(300);
13186   expand %{
13187     jmpCon(cmp, flags, labl);    // JGT or JLE...
13188   %}
13189 %}
13190 
13191 // Compare 2 longs and CMOVE longs.
13192 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13193   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13194   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13195   ins_cost(400);
13196   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13197             "CMOV$cmp $dst.hi,$src.hi" %}
13198   opcode(0x0F,0x40);
13199   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13200   ins_pipe( pipe_cmov_reg_long );
13201 %}
13202 
13203 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13204   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13205   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13206   ins_cost(500);
13207   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13208             "CMOV$cmp $dst.hi,$src.hi+4" %}
13209   opcode(0x0F,0x40);
13210   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13211   ins_pipe( pipe_cmov_reg_long );
13212 %}
13213 
13214 // Compare 2 longs and CMOVE ints.
13215 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13216   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13217   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13218   ins_cost(200);
13219   format %{ "CMOV$cmp $dst,$src" %}
13220   opcode(0x0F,0x40);
13221   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13222   ins_pipe( pipe_cmov_reg );
13223 %}
13224 
13225 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13226   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13227   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13228   ins_cost(250);
13229   format %{ "CMOV$cmp $dst,$src" %}
13230   opcode(0x0F,0x40);
13231   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13232   ins_pipe( pipe_cmov_mem );
13233 %}
13234 
13235 // Compare 2 longs and CMOVE ptrs.
13236 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13237   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13238   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13239   ins_cost(200);
13240   format %{ "CMOV$cmp $dst,$src" %}
13241   opcode(0x0F,0x40);
13242   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13243   ins_pipe( pipe_cmov_reg );
13244 %}
13245 
13246 // Compare 2 longs and CMOVE doubles
13247 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13248   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13249   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13250   ins_cost(200);
13251   expand %{
13252     fcmovDPR_regS(cmp,flags,dst,src);
13253   %}
13254 %}
13255 
13256 // Compare 2 longs and CMOVE doubles
13257 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13258   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13259   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13260   ins_cost(200);
13261   expand %{
13262     fcmovD_regS(cmp,flags,dst,src);
13263   %}
13264 %}
13265 
13266 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13267   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13268   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13269   ins_cost(200);
13270   expand %{
13271     fcmovFPR_regS(cmp,flags,dst,src);
13272   %}
13273 %}
13274 
13275 
13276 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13277   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13278   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13279   ins_cost(200);
13280   expand %{
13281     fcmovF_regS(cmp,flags,dst,src);
13282   %}
13283 %}
13284 
13285 
13286 // ============================================================================
13287 // Procedure Call/Return Instructions
13288 // Call Java Static Instruction
13289 // Note: If this code changes, the corresponding ret_addr_offset() and
13290 //       compute_padding() functions will have to be adjusted.
13291 instruct CallStaticJavaDirect(method meth) %{
13292   match(CallStaticJava);
13293   effect(USE meth);
13294 
13295   ins_cost(300);
13296   format %{ "CALL,static " %}
13297   opcode(0xE8); /* E8 cd */
13298   ins_encode( pre_call_resets,
13299               Java_Static_Call( meth ),
13300               call_epilog,
13301               post_call_FPU );
13302   ins_pipe( pipe_slow );
13303   ins_alignment(4);
13304 %}
13305 
13306 // Call Java Dynamic Instruction
13307 // Note: If this code changes, the corresponding ret_addr_offset() and
13308 //       compute_padding() functions will have to be adjusted.
13309 instruct CallDynamicJavaDirect(method meth) %{
13310   match(CallDynamicJava);
13311   effect(USE meth);
13312 
13313   ins_cost(300);
13314   format %{ "MOV    EAX,(oop)-1\n\t"
13315             "CALL,dynamic" %}
13316   opcode(0xE8); /* E8 cd */
13317   ins_encode( pre_call_resets,
13318               Java_Dynamic_Call( meth ),
13319               call_epilog,
13320               post_call_FPU );
13321   ins_pipe( pipe_slow );
13322   ins_alignment(4);
13323 %}
13324 
13325 // Call Runtime Instruction
13326 instruct CallRuntimeDirect(method meth) %{
13327   match(CallRuntime );
13328   effect(USE meth);
13329 
13330   ins_cost(300);
13331   format %{ "CALL,runtime " %}
13332   opcode(0xE8); /* E8 cd */
13333   // Use FFREEs to clear entries in float stack
13334   ins_encode( pre_call_resets,
13335               FFree_Float_Stack_All,
13336               Java_To_Runtime( meth ),
13337               post_call_FPU );
13338   ins_pipe( pipe_slow );
13339 %}
13340 
13341 // Call runtime without safepoint
13342 instruct CallLeafDirect(method meth) %{
13343   match(CallLeaf);
13344   effect(USE meth);
13345 
13346   ins_cost(300);
13347   format %{ "CALL_LEAF,runtime " %}
13348   opcode(0xE8); /* E8 cd */
13349   ins_encode( pre_call_resets,
13350               FFree_Float_Stack_All,
13351               Java_To_Runtime( meth ),
13352               Verify_FPU_For_Leaf, post_call_FPU );
13353   ins_pipe( pipe_slow );
13354 %}
13355 
13356 instruct CallLeafNoFPDirect(method meth) %{
13357   match(CallLeafNoFP);
13358   effect(USE meth);
13359 
13360   ins_cost(300);
13361   format %{ "CALL_LEAF_NOFP,runtime " %}
13362   opcode(0xE8); /* E8 cd */
13363   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13364   ins_pipe( pipe_slow );
13365 %}
13366 
13367 
13368 // Return Instruction
13369 // Remove the return address & jump to it.
13370 instruct Ret() %{
13371   match(Return);
13372   format %{ "RET" %}
13373   opcode(0xC3);
13374   ins_encode(OpcP);
13375   ins_pipe( pipe_jmp );
13376 %}
13377 
13378 // Tail Call; Jump from runtime stub to Java code.
13379 // Also known as an 'interprocedural jump'.
13380 // Target of jump will eventually return to caller.
13381 // TailJump below removes the return address.
13382 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13383   match(TailCall jump_target method_oop );
13384   ins_cost(300);
13385   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13386   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13387   ins_encode( OpcP, RegOpc(jump_target) );
13388   ins_pipe( pipe_jmp );
13389 %}
13390 
13391 
13392 // Tail Jump; remove the return address; jump to target.
13393 // TailCall above leaves the return address around.
13394 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13395   match( TailJump jump_target ex_oop );
13396   ins_cost(300);
13397   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13398             "JMP    $jump_target " %}
13399   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13400   ins_encode( enc_pop_rdx,
13401               OpcP, RegOpc(jump_target) );
13402   ins_pipe( pipe_jmp );
13403 %}
13404 
13405 // Create exception oop: created by stack-crawling runtime code.
13406 // Created exception is now available to this handler, and is setup
13407 // just prior to jumping to this handler.  No code emitted.
13408 instruct CreateException( eAXRegP ex_oop )
13409 %{
13410   match(Set ex_oop (CreateEx));
13411 
13412   size(0);
13413   // use the following format syntax
13414   format %{ "# exception oop is in EAX; no code emitted" %}
13415   ins_encode();
13416   ins_pipe( empty );
13417 %}
13418 
13419 
13420 // Rethrow exception:
13421 // The exception oop will come in the first argument position.
13422 // Then JUMP (not call) to the rethrow stub code.
13423 instruct RethrowException()
13424 %{
13425   match(Rethrow);
13426 
13427   // use the following format syntax
13428   format %{ "JMP    rethrow_stub" %}
13429   ins_encode(enc_rethrow);
13430   ins_pipe( pipe_jmp );
13431 %}
13432 
13433 // inlined locking and unlocking
13434 
13435 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13436   predicate(Compile::current()->use_rtm());
13437   match(Set cr (FastLock object box));
13438   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13439   ins_cost(300);
13440   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13441   ins_encode %{
13442     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13443                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13444                  _counters, _rtm_counters, _stack_rtm_counters,
13445                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13446                  true, ra_->C->profile_rtm());
13447   %}
13448   ins_pipe(pipe_slow);
13449 %}
13450 
13451 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13452   predicate(!Compile::current()->use_rtm());
13453   match(Set cr (FastLock object box));
13454   effect(TEMP tmp, TEMP scr, USE_KILL box);
13455   ins_cost(300);
13456   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13457   ins_encode %{
13458     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13459                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13460   %}
13461   ins_pipe(pipe_slow);
13462 %}
13463 
13464 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13465   match(Set cr (FastUnlock object box));
13466   effect(TEMP tmp, USE_KILL box);
13467   ins_cost(300);
13468   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13469   ins_encode %{
13470     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13471   %}
13472   ins_pipe(pipe_slow);
13473 %}
13474 
13475 
13476 
13477 // ============================================================================
13478 // Safepoint Instruction
13479 instruct safePoint_poll(eFlagsReg cr) %{
13480   predicate(SafepointMechanism::uses_global_page_poll());
13481   match(SafePoint);
13482   effect(KILL cr);
13483 
13484   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13485   // On SPARC that might be acceptable as we can generate the address with
13486   // just a sethi, saving an or.  By polling at offset 0 we can end up
13487   // putting additional pressure on the index-0 in the D$.  Because of
13488   // alignment (just like the situation at hand) the lower indices tend
13489   // to see more traffic.  It'd be better to change the polling address
13490   // to offset 0 of the last $line in the polling page.
13491 
13492   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13493   ins_cost(125);
13494   size(6) ;
13495   ins_encode( Safepoint_Poll() );
13496   ins_pipe( ialu_reg_mem );
13497 %}
13498 
13499 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13500   predicate(SafepointMechanism::uses_thread_local_poll());
13501   match(SafePoint poll);
13502   effect(KILL cr, USE poll);
13503 
13504   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13505   ins_cost(125);
13506   // EBP would need size(3)
13507   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13508   ins_encode %{
13509     __ relocate(relocInfo::poll_type);
13510     address pre_pc = __ pc();
13511     __ testl(rax, Address($poll$$Register, 0));
13512     address post_pc = __ pc();
13513     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13514   %}
13515   ins_pipe(ialu_reg_mem);
13516 %}
13517 
13518 
13519 // ============================================================================
13520 // This name is KNOWN by the ADLC and cannot be changed.
13521 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13522 // for this guy.
13523 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13524   match(Set dst (ThreadLocal));
13525   effect(DEF dst, KILL cr);
13526 
13527   format %{ "MOV    $dst, Thread::current()" %}
13528   ins_encode %{
13529     Register dstReg = as_Register($dst$$reg);
13530     __ get_thread(dstReg);
13531   %}
13532   ins_pipe( ialu_reg_fat );
13533 %}
13534 
13535 
13536 
13537 //----------PEEPHOLE RULES-----------------------------------------------------
13538 // These must follow all instruction definitions as they use the names
13539 // defined in the instructions definitions.
13540 //
13541 // peepmatch ( root_instr_name [preceding_instruction]* );
13542 //
13543 // peepconstraint %{
13544 // (instruction_number.operand_name relational_op instruction_number.operand_name
13545 //  [, ...] );
13546 // // instruction numbers are zero-based using left to right order in peepmatch
13547 //
13548 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13549 // // provide an instruction_number.operand_name for each operand that appears
13550 // // in the replacement instruction's match rule
13551 //
13552 // ---------VM FLAGS---------------------------------------------------------
13553 //
13554 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13555 //
13556 // Each peephole rule is given an identifying number starting with zero and
13557 // increasing by one in the order seen by the parser.  An individual peephole
13558 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13559 // on the command-line.
13560 //
13561 // ---------CURRENT LIMITATIONS----------------------------------------------
13562 //
13563 // Only match adjacent instructions in same basic block
13564 // Only equality constraints
13565 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13566 // Only one replacement instruction
13567 //
13568 // ---------EXAMPLE----------------------------------------------------------
13569 //
13570 // // pertinent parts of existing instructions in architecture description
13571 // instruct movI(rRegI dst, rRegI src) %{
13572 //   match(Set dst (CopyI src));
13573 // %}
13574 //
13575 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13576 //   match(Set dst (AddI dst src));
13577 //   effect(KILL cr);
13578 // %}
13579 //
13580 // // Change (inc mov) to lea
13581 // peephole %{
13582 //   // increment preceeded by register-register move
13583 //   peepmatch ( incI_eReg movI );
13584 //   // require that the destination register of the increment
13585 //   // match the destination register of the move
13586 //   peepconstraint ( 0.dst == 1.dst );
13587 //   // construct a replacement instruction that sets
13588 //   // the destination to ( move's source register + one )
13589 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13590 // %}
13591 //
13592 // Implementation no longer uses movX instructions since
13593 // machine-independent system no longer uses CopyX nodes.
13594 //
13595 // peephole %{
13596 //   peepmatch ( incI_eReg movI );
13597 //   peepconstraint ( 0.dst == 1.dst );
13598 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13599 // %}
13600 //
13601 // peephole %{
13602 //   peepmatch ( decI_eReg movI );
13603 //   peepconstraint ( 0.dst == 1.dst );
13604 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13605 // %}
13606 //
13607 // peephole %{
13608 //   peepmatch ( addI_eReg_imm movI );
13609 //   peepconstraint ( 0.dst == 1.dst );
13610 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13611 // %}
13612 //
13613 // peephole %{
13614 //   peepmatch ( addP_eReg_imm movP );
13615 //   peepconstraint ( 0.dst == 1.dst );
13616 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13617 // %}
13618 
13619 // // Change load of spilled value to only a spill
13620 // instruct storeI(memory mem, rRegI src) %{
13621 //   match(Set mem (StoreI mem src));
13622 // %}
13623 //
13624 // instruct loadI(rRegI dst, memory mem) %{
13625 //   match(Set dst (LoadI mem));
13626 // %}
13627 //
13628 peephole %{
13629   peepmatch ( loadI storeI );
13630   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13631   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13632 %}
13633 
13634 //----------SMARTSPILL RULES---------------------------------------------------
13635 // These must follow all instruction definitions as they use the names
13636 // defined in the instructions definitions.