Old src/hotspot/cpu/x86/x86

   1 //
   2 // Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 //
 318 // Compute padding required for nodes which need alignment
 319 //
 320 
 321 // The address of the call instruction needs to be 4-byte aligned to
 322 // ensure that it does not span a cache line so that it can be patched.
 323 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 324   current_offset += pre_call_resets_size();  // skip fldcw, if any
 325   current_offset += 1;      // skip call opcode byte
 326   return align_up(current_offset, alignment_required()) - current_offset;
 327 }
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 5;      // skip MOV instruction
 334   current_offset += 1;      // skip call opcode byte
 335   return align_up(current_offset, alignment_required()) - current_offset;
 336 }
 337 
 338 // EMIT_RM()
 339 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 340   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 341   cbuf.insts()->emit_int8(c);
 342 }
 343 
 344 // EMIT_CC()
 345 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 346   unsigned char c = (unsigned char)( f1 | f2 );
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_OPCODE()
 351 void emit_opcode(CodeBuffer &cbuf, int code) {
 352   cbuf.insts()->emit_int8((unsigned char) code);
 353 }
 354 
 355 // EMIT_OPCODE() w/ relocation information
 356 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 357   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 358   emit_opcode(cbuf, code);
 359 }
 360 
 361 // EMIT_D8()
 362 void emit_d8(CodeBuffer &cbuf, int d8) {
 363   cbuf.insts()->emit_int8((unsigned char) d8);
 364 }
 365 
 366 // EMIT_D16()
 367 void emit_d16(CodeBuffer &cbuf, int d16) {
 368   cbuf.insts()->emit_int16(d16);
 369 }
 370 
 371 // EMIT_D32()
 372 void emit_d32(CodeBuffer &cbuf, int d32) {
 373   cbuf.insts()->emit_int32(d32);
 374 }
 375 
 376 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 377 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 378         int format) {
 379   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 380   cbuf.insts()->emit_int32(d32);
 381 }
 382 
 383 // emit 32 bit value and construct relocation entry from RelocationHolder
 384 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 385         int format) {
 386 #ifdef ASSERT
 387   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 388     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
 389   }
 390 #endif
 391   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 392   cbuf.insts()->emit_int32(d32);
 393 }
 394 
 395 // Access stack slot for load or store
 396 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 397   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 398   if( -128 <= disp && disp <= 127 ) {
 399     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 400     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 401     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 402   } else {
 403     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 404     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 405     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 406   }
 407 }
 408 
 409    // rRegI ereg, memory mem) %{    // emit_reg_mem
 410 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 411   // There is no index & no scale, use form without SIB byte
 412   if ((index == 0x4) &&
 413       (scale == 0) && (base != ESP_enc)) {
 414     // If no displacement, mode is 0x0; unless base is [EBP]
 415     if ( (displace == 0) && (base != EBP_enc) ) {
 416       emit_rm(cbuf, 0x0, reg_encoding, base);
 417     }
 418     else {                    // If 8-bit displacement, mode 0x1
 419       if ((displace >= -128) && (displace <= 127)
 420           && (disp_reloc == relocInfo::none) ) {
 421         emit_rm(cbuf, 0x1, reg_encoding, base);
 422         emit_d8(cbuf, displace);
 423       }
 424       else {                  // If 32-bit displacement
 425         if (base == -1) { // Special flag for absolute address
 426           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 427           // (manual lies; no SIB needed here)
 428           if ( disp_reloc != relocInfo::none ) {
 429             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 430           } else {
 431             emit_d32      (cbuf, displace);
 432           }
 433         }
 434         else {                // Normal base + offset
 435           emit_rm(cbuf, 0x2, reg_encoding, base);
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442       }
 443     }
 444   }
 445   else {                      // Else, encode with the SIB byte
 446     // If no displacement, mode is 0x0; unless base is [EBP]
 447     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 448       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 449       emit_rm(cbuf, scale, index, base);
 450     }
 451     else {                    // If 8-bit displacement, mode 0x1
 452       if ((displace >= -128) && (displace <= 127)
 453           && (disp_reloc == relocInfo::none) ) {
 454         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 455         emit_rm(cbuf, scale, index, base);
 456         emit_d8(cbuf, displace);
 457       }
 458       else {                  // If 32-bit displacement
 459         if (base == 0x04 ) {
 460           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 461           emit_rm(cbuf, scale, index, 0x04);
 462         } else {
 463           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 464           emit_rm(cbuf, scale, index, base);
 465         }
 466         if ( disp_reloc != relocInfo::none ) {
 467           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 468         } else {
 469           emit_d32      (cbuf, displace);
 470         }
 471       }
 472     }
 473   }
 474 }
 475 
 476 
 477 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 478   if( dst_encoding == src_encoding ) {
 479     // reg-reg copy, use an empty encoding
 480   } else {
 481     emit_opcode( cbuf, 0x8B );
 482     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 483   }
 484 }
 485 
 486 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 487   Label exit;
 488   __ jccb(Assembler::noParity, exit);
 489   __ pushf();
 490   //
 491   // comiss/ucomiss instructions set ZF,PF,CF flags and
 492   // zero OF,AF,SF for NaN values.
 493   // Fixup flags by zeroing ZF,PF so that compare of NaN
 494   // values returns 'less than' result (CF is set).
 495   // Leave the rest of flags unchanged.
 496   //
 497   //    7 6 5 4 3 2 1 0
 498   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 499   //    0 0 1 0 1 0 1 1   (0x2B)
 500   //
 501   __ andl(Address(rsp, 0), 0xffffff2b);
 502   __ popf();
 503   __ bind(exit);
 504 }
 505 
 506 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 507   Label done;
 508   __ movl(dst, -1);
 509   __ jcc(Assembler::parity, done);
 510   __ jcc(Assembler::below, done);
 511   __ setb(Assembler::notEqual, dst);
 512   __ movzbl(dst, dst);
 513   __ bind(done);
 514 }
 515 
 516 
 517 //=============================================================================
 518 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 519 
 520 int ConstantTable::calculate_table_base_offset() const {
 521   return 0;  // absolute addressing, no offset
 522 }
 523 
 524 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 525 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 526   ShouldNotReachHere();
 527 }
 528 
 529 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 530   // Empty encoding
 531 }
 532 
 533 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 534   return 0;
 535 }
 536 
 537 #ifndef PRODUCT
 538 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 539   st->print("# MachConstantBaseNode (empty encoding)");
 540 }
 541 #endif
 542 
 543 
 544 //=============================================================================
 545 #ifndef PRODUCT
 546 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   Compile* C = ra_->C;
 548 
 549   int framesize = C->output()->frame_size_in_bytes();
 550   int bangsize = C->output()->bang_size_in_bytes();
 551   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 552   // Remove wordSize for return addr which is already pushed.
 553   framesize -= wordSize;
 554 
 555   if (C->output()->need_stack_bang(bangsize)) {
 556     framesize -= wordSize;
 557     st->print("# stack bang (%d bytes)", bangsize);
 558     st->print("\n\t");
 559     st->print("PUSH   EBP\t# Save EBP");
 560     if (PreserveFramePointer) {
 561       st->print("\n\t");
 562       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 563     }
 564     if (framesize) {
 565       st->print("\n\t");
 566       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 567     }
 568   } else {
 569     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 570     st->print("\n\t");
 571     framesize -= wordSize;
 572     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 573     if (PreserveFramePointer) {
 574       st->print("\n\t");
 575       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 576       if (framesize > 0) {
 577         st->print("\n\t");
 578         st->print("ADD    EBP, #%d", framesize);
 579       }
 580     }
 581   }
 582 
 583   if (VerifyStackAtCalls) {
 584     st->print("\n\t");
 585     framesize -= wordSize;
 586     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 587   }
 588 
 589   if( C->in_24_bit_fp_mode() ) {
 590     st->print("\n\t");
 591     st->print("FLDCW  \t# load 24 bit fpu control word");
 592   }
 593   if (UseSSE >= 2 && VerifyFPU) {
 594     st->print("\n\t");
 595     st->print("# verify FPU stack (must be clean on entry)");
 596   }
 597 
 598 #ifdef ASSERT
 599   if (VerifyStackAtCalls) {
 600     st->print("\n\t");
 601     st->print("# stack alignment check");
 602   }
 603 #endif
 604   st->cr();
 605 }
 606 #endif
 607 
 608 
 609 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 610   Compile* C = ra_->C;
 611   MacroAssembler _masm(&cbuf);
 612 
 613   int framesize = C->output()->frame_size_in_bytes();
 614   int bangsize = C->output()->bang_size_in_bytes();
 615 
 616   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 617 
 618   C->output()->set_frame_complete(cbuf.insts_size());
 619 
 620   if (C->has_mach_constant_base_node()) {
 621     // NOTE: We set the table base offset here because users might be
 622     // emitted before MachConstantBaseNode.
 623     ConstantTable& constant_table = C->output()->constant_table();
 624     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 625   }
 626 }
 627 
 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 629   return MachNode::size(ra_); // too many variables; just compute it the hard way
 630 }
 631 
 632 int MachPrologNode::reloc() const {
 633   return 0; // a large enough number
 634 }
 635 
 636 //=============================================================================
 637 #ifndef PRODUCT
 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 639   Compile *C = ra_->C;
 640   int framesize = C->output()->frame_size_in_bytes();
 641   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 642   // Remove two words for return addr and rbp,
 643   framesize -= 2*wordSize;
 644 
 645   if (C->max_vector_size() > 16) {
 646     st->print("VZEROUPPER");
 647     st->cr(); st->print("\t");
 648   }
 649   if (C->in_24_bit_fp_mode()) {
 650     st->print("FLDCW  standard control word");
 651     st->cr(); st->print("\t");
 652   }
 653   if (framesize) {
 654     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 655     st->cr(); st->print("\t");
 656   }
 657   st->print_cr("POPL   EBP"); st->print("\t");
 658   if (do_polling() && C->is_method_compilation()) {
 659     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 660     st->cr(); st->print("\t");
 661   }
 662 }
 663 #endif
 664 
 665 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 666   Compile *C = ra_->C;
 667   MacroAssembler _masm(&cbuf);
 668 
 669   if (C->max_vector_size() > 16) {
 670     // Clear upper bits of YMM registers when current compiled code uses
 671     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 672     _masm.vzeroupper();
 673   }
 674   // If method set FPU control word, restore to standard control word
 675   if (C->in_24_bit_fp_mode()) {
 676     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 677   }
 678 
 679   int framesize = C->output()->frame_size_in_bytes();
 680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 681   // Remove two words for return addr and rbp,
 682   framesize -= 2*wordSize;
 683 
 684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 685 
 686   if (framesize >= 128) {
 687     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 688     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 689     emit_d32(cbuf, framesize);
 690   } else if (framesize) {
 691     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 692     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 693     emit_d8(cbuf, framesize);
 694   }
 695 
 696   emit_opcode(cbuf, 0x58 | EBP_enc);
 697 
 698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 699     __ reserved_stack_check();
 700   }
 701 
 702   if (do_polling() && C->is_method_compilation()) {
 703     Register pollReg = as_Register(EBX_enc);
 704     MacroAssembler masm(&cbuf);
 705     masm.get_thread(pollReg);
 706     masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 707     masm.relocate(relocInfo::poll_return_type);
 708     masm.testl(rax, Address(pollReg, 0));
 709   }
 710 }
 711 
 712 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 713   return MachNode::size(ra_); // too many variables; just compute it
 714                               // the hard way
 715 }
 716 
 717 int MachEpilogNode::reloc() const {
 718   return 0; // a large enough number
 719 }
 720 
 721 const Pipeline * MachEpilogNode::pipeline() const {
 722   return MachNode::pipeline_class();
 723 }
 724 
 725 //=============================================================================
 726 
 727 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 728 static enum RC rc_class( OptoReg::Name reg ) {
 729 
 730   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 731   if (OptoReg::is_stack(reg)) return rc_stack;
 732 
 733   VMReg r = OptoReg::as_VMReg(reg);
 734   if (r->is_Register()) return rc_int;
 735   if (r->is_FloatRegister()) {
 736     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 737     return rc_float;
 738   }
 739   assert(r->is_XMMRegister(), "must be");
 740   return rc_xmm;
 741 }
 742 
 743 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 744                         int opcode, const char *op_str, int size, outputStream* st ) {
 745   if( cbuf ) {
 746     emit_opcode  (*cbuf, opcode );
 747     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 748 #ifndef PRODUCT
 749   } else if( !do_size ) {
 750     if( size != 0 ) st->print("\n\t");
 751     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 752       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 753       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 754     } else { // FLD, FST, PUSH, POP
 755       st->print("%s [ESP + #%d]",op_str,offset);
 756     }
 757 #endif
 758   }
 759   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 760   return size+3+offset_size;
 761 }
 762 
 763 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 764 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 765                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 766   int in_size_in_bits = Assembler::EVEX_32bit;
 767   int evex_encoding = 0;
 768   if (reg_lo+1 == reg_hi) {
 769     in_size_in_bits = Assembler::EVEX_64bit;
 770     evex_encoding = Assembler::VEX_W;
 771   }
 772   if (cbuf) {
 773     MacroAssembler _masm(cbuf);
 774     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 775     //                          it maps more cases to single byte displacement
 776     _masm.set_managed();
 777     if (reg_lo+1 == reg_hi) { // double move?
 778       if (is_load) {
 779         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 780       } else {
 781         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 782       }
 783     } else {
 784       if (is_load) {
 785         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 786       } else {
 787         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 788       }
 789     }
 790 #ifndef PRODUCT
 791   } else if (!do_size) {
 792     if (size != 0) st->print("\n\t");
 793     if (reg_lo+1 == reg_hi) { // double move?
 794       if (is_load) st->print("%s %s,[ESP + #%d]",
 795                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 796                               Matcher::regName[reg_lo], offset);
 797       else         st->print("MOVSD  [ESP + #%d],%s",
 798                               offset, Matcher::regName[reg_lo]);
 799     } else {
 800       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 801                               Matcher::regName[reg_lo], offset);
 802       else         st->print("MOVSS  [ESP + #%d],%s",
 803                               offset, Matcher::regName[reg_lo]);
 804     }
 805 #endif
 806   }
 807   bool is_single_byte = false;
 808   if ((UseAVX > 2) && (offset != 0)) {
 809     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 810   }
 811   int offset_size = 0;
 812   if (UseAVX > 2 ) {
 813     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 814   } else {
 815     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 816   }
 817   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 818   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 819   return size+5+offset_size;
 820 }
 821 
 822 
 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 824                             int src_hi, int dst_hi, int size, outputStream* st ) {
 825   if (cbuf) {
 826     MacroAssembler _masm(cbuf);
 827     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 828     _masm.set_managed();
 829     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 830       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 831                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 832     } else {
 833       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 834                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 835     }
 836 #ifndef PRODUCT
 837   } else if (!do_size) {
 838     if (size != 0) st->print("\n\t");
 839     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 840       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 841         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 842       } else {
 843         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 844       }
 845     } else {
 846       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 847         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 848       } else {
 849         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 850       }
 851     }
 852 #endif
 853   }
 854   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 855   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 856   int sz = (UseAVX > 2) ? 6 : 4;
 857   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 858       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 859   return size + sz;
 860 }
 861 
 862 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 863                             int src_hi, int dst_hi, int size, outputStream* st ) {
 864   // 32-bit
 865   if (cbuf) {
 866     MacroAssembler _masm(cbuf);
 867     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 868     _masm.set_managed();
 869     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 870              as_Register(Matcher::_regEncode[src_lo]));
 871 #ifndef PRODUCT
 872   } else if (!do_size) {
 873     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 874 #endif
 875   }
 876   return (UseAVX> 2) ? 6 : 4;
 877 }
 878 
 879 
 880 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 881                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 882   // 32-bit
 883   if (cbuf) {
 884     MacroAssembler _masm(cbuf);
 885     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 886     _masm.set_managed();
 887     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 888              as_XMMRegister(Matcher::_regEncode[src_lo]));
 889 #ifndef PRODUCT
 890   } else if (!do_size) {
 891     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 892 #endif
 893   }
 894   return (UseAVX> 2) ? 6 : 4;
 895 }
 896 
 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 898   if( cbuf ) {
 899     emit_opcode(*cbuf, 0x8B );
 900     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 901 #ifndef PRODUCT
 902   } else if( !do_size ) {
 903     if( size != 0 ) st->print("\n\t");
 904     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 905 #endif
 906   }
 907   return size+2;
 908 }
 909 
 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 911                                  int offset, int size, outputStream* st ) {
 912   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 913     if( cbuf ) {
 914       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 915       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 916 #ifndef PRODUCT
 917     } else if( !do_size ) {
 918       if( size != 0 ) st->print("\n\t");
 919       st->print("FLD    %s",Matcher::regName[src_lo]);
 920 #endif
 921     }
 922     size += 2;
 923   }
 924 
 925   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 926   const char *op_str;
 927   int op;
 928   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 929     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 930     op = 0xDD;
 931   } else {                   // 32-bit store
 932     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 933     op = 0xD9;
 934     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 935   }
 936 
 937   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 938 }
 939 
 940 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 941 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 942                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 943 
 944 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 945                             int stack_offset, int reg, uint ireg, outputStream* st);
 946 
 947 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 948                                      int dst_offset, uint ireg, outputStream* st) {
 949   int calc_size = 0;
 950   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 951   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 952   switch (ireg) {
 953   case Op_VecS:
 954     calc_size = 3+src_offset_size + 3+dst_offset_size;
 955     break;
 956   case Op_VecD: {
 957     calc_size = 3+src_offset_size + 3+dst_offset_size;
 958     int tmp_src_offset = src_offset + 4;
 959     int tmp_dst_offset = dst_offset + 4;
 960     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 961     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 962     calc_size += 3+src_offset_size + 3+dst_offset_size;
 963     break;
 964   }
 965   case Op_VecX:
 966   case Op_VecY:
 967   case Op_VecZ:
 968     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 969     break;
 970   default:
 971     ShouldNotReachHere();
 972   }
 973   if (cbuf) {
 974     MacroAssembler _masm(cbuf);
 975     int offset = __ offset();
 976     switch (ireg) {
 977     case Op_VecS:
 978       __ pushl(Address(rsp, src_offset));
 979       __ popl (Address(rsp, dst_offset));
 980       break;
 981     case Op_VecD:
 982       __ pushl(Address(rsp, src_offset));
 983       __ popl (Address(rsp, dst_offset));
 984       __ pushl(Address(rsp, src_offset+4));
 985       __ popl (Address(rsp, dst_offset+4));
 986       break;
 987     case Op_VecX:
 988       __ movdqu(Address(rsp, -16), xmm0);
 989       __ movdqu(xmm0, Address(rsp, src_offset));
 990       __ movdqu(Address(rsp, dst_offset), xmm0);
 991       __ movdqu(xmm0, Address(rsp, -16));
 992       break;
 993     case Op_VecY:
 994       __ vmovdqu(Address(rsp, -32), xmm0);
 995       __ vmovdqu(xmm0, Address(rsp, src_offset));
 996       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 997       __ vmovdqu(xmm0, Address(rsp, -32));
 998       break;
 999     case Op_VecZ:
1000       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1001       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1002       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1003       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1004       break;
1005     default:
1006       ShouldNotReachHere();
1007     }
1008     int size = __ offset() - offset;
1009     assert(size == calc_size, "incorrect size calculation");
1010     return size;
1011 #ifndef PRODUCT
1012   } else if (!do_size) {
1013     switch (ireg) {
1014     case Op_VecS:
1015       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1016                 "popl    [rsp + #%d]",
1017                 src_offset, dst_offset);
1018       break;
1019     case Op_VecD:
1020       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1021                 "popq    [rsp + #%d]\n\t"
1022                 "pushl   [rsp + #%d]\n\t"
1023                 "popq    [rsp + #%d]",
1024                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1025       break;
1026      case Op_VecX:
1027       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1028                 "movdqu  xmm0, [rsp + #%d]\n\t"
1029                 "movdqu  [rsp + #%d], xmm0\n\t"
1030                 "movdqu  xmm0, [rsp - #16]",
1031                 src_offset, dst_offset);
1032       break;
1033     case Op_VecY:
1034       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1035                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1036                 "vmovdqu [rsp + #%d], xmm0\n\t"
1037                 "vmovdqu xmm0, [rsp - #32]",
1038                 src_offset, dst_offset);
1039       break;
1040     case Op_VecZ:
1041       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1042                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1043                 "vmovdqu [rsp + #%d], xmm0\n\t"
1044                 "vmovdqu xmm0, [rsp - #64]",
1045                 src_offset, dst_offset);
1046       break;
1047     default:
1048       ShouldNotReachHere();
1049     }
1050 #endif
1051   }
1052   return calc_size;
1053 }
1054 
1055 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1056   // Get registers to move
1057   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1058   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1059   OptoReg::Name dst_second = ra_->get_reg_second(this );
1060   OptoReg::Name dst_first = ra_->get_reg_first(this );
1061 
1062   enum RC src_second_rc = rc_class(src_second);
1063   enum RC src_first_rc = rc_class(src_first);
1064   enum RC dst_second_rc = rc_class(dst_second);
1065   enum RC dst_first_rc = rc_class(dst_first);
1066 
1067   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1068 
1069   // Generate spill code!
1070   int size = 0;
1071 
1072   if( src_first == dst_first && src_second == dst_second )
1073     return size;            // Self copy, no move
1074 
1075   if (bottom_type()->isa_vect() != NULL) {
1076     uint ireg = ideal_reg();
1077     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1078     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1079     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1080     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1081       // mem -> mem
1082       int src_offset = ra_->reg2offset(src_first);
1083       int dst_offset = ra_->reg2offset(dst_first);
1084       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1085     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1086       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1087     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1088       int stack_offset = ra_->reg2offset(dst_first);
1089       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1090     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1091       int stack_offset = ra_->reg2offset(src_first);
1092       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1093     } else {
1094       ShouldNotReachHere();
1095     }
1096   }
1097 
1098   // --------------------------------------
1099   // Check for mem-mem move.  push/pop to move.
1100   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1101     if( src_second == dst_first ) { // overlapping stack copy ranges
1102       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1103       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1104       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1105       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1106     }
1107     // move low bits
1108     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1109     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1110     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1111       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1112       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1113     }
1114     return size;
1115   }
1116 
1117   // --------------------------------------
1118   // Check for integer reg-reg copy
1119   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1120     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1121 
1122   // Check for integer store
1123   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1125 
1126   // Check for integer load
1127   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1128     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1129 
1130   // Check for integer reg-xmm reg copy
1131   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1132     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1133             "no 64 bit integer-float reg moves" );
1134     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1135   }
1136   // --------------------------------------
1137   // Check for float reg-reg copy
1138   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1139     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1140             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1141     if( cbuf ) {
1142 
1143       // Note the mucking with the register encode to compensate for the 0/1
1144       // indexing issue mentioned in a comment in the reg_def sections
1145       // for FPR registers many lines above here.
1146 
1147       if( src_first != FPR1L_num ) {
1148         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1149         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1150         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1151         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1152      } else {
1153         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1154         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1155      }
1156 #ifndef PRODUCT
1157     } else if( !do_size ) {
1158       if( size != 0 ) st->print("\n\t");
1159       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1160       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1161 #endif
1162     }
1163     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1164   }
1165 
1166   // Check for float store
1167   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1168     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1169   }
1170 
1171   // Check for float load
1172   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1173     int offset = ra_->reg2offset(src_first);
1174     const char *op_str;
1175     int op;
1176     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1177       op_str = "FLD_D";
1178       op = 0xDD;
1179     } else {                   // 32-bit load
1180       op_str = "FLD_S";
1181       op = 0xD9;
1182       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1183     }
1184     if( cbuf ) {
1185       emit_opcode  (*cbuf, op );
1186       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1187       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1188       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1189 #ifndef PRODUCT
1190     } else if( !do_size ) {
1191       if( size != 0 ) st->print("\n\t");
1192       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1193 #endif
1194     }
1195     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1196     return size + 3+offset_size+2;
1197   }
1198 
1199   // Check for xmm reg-reg copy
1200   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1201     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1202             (src_first+1 == src_second && dst_first+1 == dst_second),
1203             "no non-adjacent float-moves" );
1204     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1205   }
1206 
1207   // Check for xmm reg-integer reg copy
1208   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1209     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1210             "no 64 bit float-integer reg moves" );
1211     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1212   }
1213 
1214   // Check for xmm store
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1216     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1217   }
1218 
1219   // Check for float xmm load
1220   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1221     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1222   }
1223 
1224   // Copy from float reg to xmm reg
1225   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1226     // copy to the top of stack from floating point reg
1227     // and use LEA to preserve flags
1228     if( cbuf ) {
1229       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1230       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1231       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1232       emit_d8(*cbuf,0xF8);
1233 #ifndef PRODUCT
1234     } else if( !do_size ) {
1235       if( size != 0 ) st->print("\n\t");
1236       st->print("LEA    ESP,[ESP-8]");
1237 #endif
1238     }
1239     size += 4;
1240 
1241     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1242 
1243     // Copy from the temp memory to the xmm reg.
1244     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1245 
1246     if( cbuf ) {
1247       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1248       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1249       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1250       emit_d8(*cbuf,0x08);
1251 #ifndef PRODUCT
1252     } else if( !do_size ) {
1253       if( size != 0 ) st->print("\n\t");
1254       st->print("LEA    ESP,[ESP+8]");
1255 #endif
1256     }
1257     size += 4;
1258     return size;
1259   }
1260 
1261   assert( size > 0, "missed a case" );
1262 
1263   // --------------------------------------------------------------------
1264   // Check for second bits still needing moving.
1265   if( src_second == dst_second )
1266     return size;               // Self copy; no move
1267   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1268 
1269   // Check for second word int-int move
1270   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1271     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1272 
1273   // Check for second word integer store
1274   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1275     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1276 
1277   // Check for second word integer load
1278   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1279     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1280 
1281 
1282   Unimplemented();
1283   return 0; // Mute compiler
1284 }
1285 
1286 #ifndef PRODUCT
1287 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1288   implementation( NULL, ra_, false, st );
1289 }
1290 #endif
1291 
1292 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1293   implementation( &cbuf, ra_, false, NULL );
1294 }
1295 
1296 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1297   return MachNode::size(ra_);
1298 }
1299 
1300 
1301 //=============================================================================
1302 #ifndef PRODUCT
1303 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1304   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1305   int reg = ra_->get_reg_first(this);
1306   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1307 }
1308 #endif
1309 
1310 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1311   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1312   int reg = ra_->get_encode(this);
1313   if( offset >= 128 ) {
1314     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1315     emit_rm(cbuf, 0x2, reg, 0x04);
1316     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1317     emit_d32(cbuf, offset);
1318   }
1319   else {
1320     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1321     emit_rm(cbuf, 0x1, reg, 0x04);
1322     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1323     emit_d8(cbuf, offset);
1324   }
1325 }
1326 
1327 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   if( offset >= 128 ) {
1330     return 7;
1331   }
1332   else {
1333     return 4;
1334   }
1335 }
1336 
1337 //=============================================================================
1338 #ifndef PRODUCT
1339 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1340   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1341   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1342   st->print_cr("\tNOP");
1343   st->print_cr("\tNOP");
1344   if( !OptoBreakpoint )
1345     st->print_cr("\tNOP");
1346 }
1347 #endif
1348 
1349 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1350   MacroAssembler masm(&cbuf);
1351 #ifdef ASSERT
1352   uint insts_size = cbuf.insts_size();
1353 #endif
1354   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1355   masm.jump_cc(Assembler::notEqual,
1356                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1357   /* WARNING these NOPs are critical so that verified entry point is properly
1358      aligned for patching by NativeJump::patch_verified_entry() */
1359   int nops_cnt = 2;
1360   if( !OptoBreakpoint ) // Leave space for int3
1361      nops_cnt += 1;
1362   masm.nop(nops_cnt);
1363 
1364   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1365 }
1366 
1367 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1368   return OptoBreakpoint ? 11 : 12;
1369 }
1370 
1371 
1372 //=============================================================================
1373 
1374 int Matcher::regnum_to_fpu_offset(int regnum) {
1375   return regnum - 32; // The FP registers are in the second chunk
1376 }
1377 
1378 // This is UltraSparc specific, true just means we have fast l2f conversion
1379 const bool Matcher::convL2FSupported(void) {
1380   return true;
1381 }
1382 
1383 // Is this branch offset short enough that a short branch can be used?
1384 //
1385 // NOTE: If the platform does not provide any short branch variants, then
1386 //       this method should return false for offset 0.
1387 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1388   // The passed offset is relative to address of the branch.
1389   // On 86 a branch displacement is calculated relative to address
1390   // of a next instruction.
1391   offset -= br_size;
1392 
1393   // the short version of jmpConUCF2 contains multiple branches,
1394   // making the reach slightly less
1395   if (rule == jmpConUCF2_rule)
1396     return (-126 <= offset && offset <= 125);
1397   return (-128 <= offset && offset <= 127);
1398 }
1399 
1400 const bool Matcher::isSimpleConstant64(jlong value) {
1401   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402   return false;
1403 }
1404 
1405 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 const bool Matcher::init_array_count_is_in_bytes = false;
1407 
1408 // Needs 2 CMOV's for longs.
1409 const int Matcher::long_cmove_cost() { return 1; }
1410 
1411 // No CMOVF/CMOVD with SSE/SSE2
1412 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1413 
1414 // Does the CPU require late expand (see block.cpp for description of late expand)?
1415 const bool Matcher::require_postalloc_expand = false;
1416 
1417 // Do we need to mask the count passed to shift instructions or does
1418 // the cpu only look at the lower 5/6 bits anyway?
1419 const bool Matcher::need_masked_shift_count = false;
1420 
1421 bool Matcher::narrow_oop_use_complex_address() {
1422   ShouldNotCallThis();
1423   return true;
1424 }
1425 
1426 bool Matcher::narrow_klass_use_complex_address() {
1427   ShouldNotCallThis();
1428   return true;
1429 }
1430 
1431 bool Matcher::const_oop_prefer_decode() {
1432   ShouldNotCallThis();
1433   return true;
1434 }
1435 
1436 bool Matcher::const_klass_prefer_decode() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 // Is it better to copy float constants, or load them directly from memory?
1442 // Intel can load a float constant from a direct address, requiring no
1443 // extra registers.  Most RISCs will have to materialize an address into a
1444 // register first, so they would do better to copy the constant from stack.
1445 const bool Matcher::rematerialize_float_constants = true;
1446 
1447 // If CPU can load and store mis-aligned doubles directly then no fixup is
1448 // needed.  Else we split the double into 2 integer pieces and move it
1449 // piece-by-piece.  Only happens when passing doubles into C code as the
1450 // Java calling convention forces doubles to be aligned.
1451 const bool Matcher::misaligned_doubles_ok = true;
1452 
1453 
1454 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1455   // Get the memory operand from the node
1456   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1457   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1458   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1459   uint opcnt     = 1;                 // First operand
1460   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1461   while( idx >= skipped+num_edges ) {
1462     skipped += num_edges;
1463     opcnt++;                          // Bump operand count
1464     assert( opcnt < numopnds, "Accessing non-existent operand" );
1465     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1466   }
1467 
1468   MachOper *memory = node->_opnds[opcnt];
1469   MachOper *new_memory = NULL;
1470   switch (memory->opcode()) {
1471   case DIRECT:
1472   case INDOFFSET32X:
1473     // No transformation necessary.
1474     return;
1475   case INDIRECT:
1476     new_memory = new indirect_win95_safeOper( );
1477     break;
1478   case INDOFFSET8:
1479     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1480     break;
1481   case INDOFFSET32:
1482     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1483     break;
1484   case INDINDEXOFFSET:
1485     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1486     break;
1487   case INDINDEXSCALE:
1488     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1489     break;
1490   case INDINDEXSCALEOFFSET:
1491     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1492     break;
1493   case LOAD_LONG_INDIRECT:
1494   case LOAD_LONG_INDOFFSET32:
1495     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1496     return;
1497   default:
1498     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1499     return;
1500   }
1501   node->_opnds[opcnt] = new_memory;
1502 }
1503 
1504 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
1505 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1506 
1507 // Are floats conerted to double when stored to stack during deoptimization?
1508 // On x32 it is stored with convertion only when FPU is used for floats.
1509 bool Matcher::float_in_double() { return (UseSSE == 0); }
1510 
1511 // Do ints take an entire long register or just half?
1512 const bool Matcher::int_in_long = false;
1513 
1514 // Return whether or not this register is ever used as an argument.  This
1515 // function is used on startup to build the trampoline stubs in generateOptoStub.
1516 // Registers not mentioned will be killed by the VM call in the trampoline, and
1517 // arguments in those registers not be available to the callee.
1518 bool Matcher::can_be_java_arg( int reg ) {
1519   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1520   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1521   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1522   return false;
1523 }
1524 
1525 bool Matcher::is_spillable_arg( int reg ) {
1526   return can_be_java_arg(reg);
1527 }
1528 
1529 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1530   // Use hardware integer DIV instruction when
1531   // it is faster than a code which use multiply.
1532   // Only when constant divisor fits into 32 bit
1533   // (min_jint is excluded to get only correct
1534   // positive 32 bit values from negative).
1535   return VM_Version::has_fast_idiv() &&
1536          (divisor == (int)divisor && divisor != min_jint);
1537 }
1538 
1539 // Register for DIVI projection of divmodI
1540 RegMask Matcher::divI_proj_mask() {
1541   return EAX_REG_mask();
1542 }
1543 
1544 // Register for MODI projection of divmodI
1545 RegMask Matcher::modI_proj_mask() {
1546   return EDX_REG_mask();
1547 }
1548 
1549 // Register for DIVL projection of divmodL
1550 RegMask Matcher::divL_proj_mask() {
1551   ShouldNotReachHere();
1552   return RegMask();
1553 }
1554 
1555 // Register for MODL projection of divmodL
1556 RegMask Matcher::modL_proj_mask() {
1557   ShouldNotReachHere();
1558   return RegMask();
1559 }
1560 
1561 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1562   return NO_REG_mask();
1563 }
1564 
1565 // Returns true if the high 32 bits of the value is known to be zero.
1566 bool is_operand_hi32_zero(Node* n) {
1567   int opc = n->Opcode();
1568   if (opc == Op_AndL) {
1569     Node* o2 = n->in(2);
1570     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1571       return true;
1572     }
1573   }
1574   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1575     return true;
1576   }
1577   return false;
1578 }
1579 
1580 %}
1581 
1582 //----------ENCODING BLOCK-----------------------------------------------------
1583 // This block specifies the encoding classes used by the compiler to output
1584 // byte streams.  Encoding classes generate functions which are called by
1585 // Machine Instruction Nodes in order to generate the bit encoding of the
1586 // instruction.  Operands specify their base encoding interface with the
1587 // interface keyword.  There are currently supported four interfaces,
1588 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1589 // operand to generate a function which returns its register number when
1590 // queried.   CONST_INTER causes an operand to generate a function which
1591 // returns the value of the constant when queried.  MEMORY_INTER causes an
1592 // operand to generate four functions which return the Base Register, the
1593 // Index Register, the Scale Value, and the Offset Value of the operand when
1594 // queried.  COND_INTER causes an operand to generate six functions which
1595 // return the encoding code (ie - encoding bits for the instruction)
1596 // associated with each basic boolean condition for a conditional instruction.
1597 // Instructions specify two basic values for encoding.  They use the
1598 // ins_encode keyword to specify their encoding class (which must be one of
1599 // the class names specified in the encoding block), and they use the
1600 // opcode keyword to specify, in order, their primary, secondary, and
1601 // tertiary opcode.  Only the opcode sections which a particular instruction
1602 // needs for encoding need to be specified.
1603 encode %{
1604   // Build emit functions for each basic byte or larger field in the intel
1605   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1606   // code in the enc_class source block.  Emit functions will live in the
1607   // main source block for now.  In future, we can generalize this by
1608   // adding a syntax that specifies the sizes of fields in an order,
1609   // so that the adlc can build the emit functions automagically
1610 
1611   // Emit primary opcode
1612   enc_class OpcP %{
1613     emit_opcode(cbuf, $primary);
1614   %}
1615 
1616   // Emit secondary opcode
1617   enc_class OpcS %{
1618     emit_opcode(cbuf, $secondary);
1619   %}
1620 
1621   // Emit opcode directly
1622   enc_class Opcode(immI d8) %{
1623     emit_opcode(cbuf, $d8$$constant);
1624   %}
1625 
1626   enc_class SizePrefix %{
1627     emit_opcode(cbuf,0x66);
1628   %}
1629 
1630   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1631     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1632   %}
1633 
1634   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1635     emit_opcode(cbuf,$opcode$$constant);
1636     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1637   %}
1638 
1639   enc_class mov_r32_imm0( rRegI dst ) %{
1640     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1641     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1642   %}
1643 
1644   enc_class cdq_enc %{
1645     // Full implementation of Java idiv and irem; checks for
1646     // special case as described in JVM spec., p.243 & p.271.
1647     //
1648     //         normal case                           special case
1649     //
1650     // input : rax,: dividend                         min_int
1651     //         reg: divisor                          -1
1652     //
1653     // output: rax,: quotient  (= rax, idiv reg)       min_int
1654     //         rdx: remainder (= rax, irem reg)       0
1655     //
1656     //  Code sequnce:
1657     //
1658     //  81 F8 00 00 00 80    cmp         rax,80000000h
1659     //  0F 85 0B 00 00 00    jne         normal_case
1660     //  33 D2                xor         rdx,edx
1661     //  83 F9 FF             cmp         rcx,0FFh
1662     //  0F 84 03 00 00 00    je          done
1663     //                  normal_case:
1664     //  99                   cdq
1665     //  F7 F9                idiv        rax,ecx
1666     //                  done:
1667     //
1668     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1669     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1670     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1671     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1672     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1673     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1674     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1675     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1676     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1677     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1678     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1679     // normal_case:
1680     emit_opcode(cbuf,0x99);                                         // cdq
1681     // idiv (note: must be emitted by the user of this rule)
1682     // normal:
1683   %}
1684 
1685   // Dense encoding for older common ops
1686   enc_class Opc_plus(immI opcode, rRegI reg) %{
1687     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1688   %}
1689 
1690 
1691   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1692   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1693     // Check for 8-bit immediate, and set sign extend bit in opcode
1694     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1695       emit_opcode(cbuf, $primary | 0x02);
1696     }
1697     else {                          // If 32-bit immediate
1698       emit_opcode(cbuf, $primary);
1699     }
1700   %}
1701 
1702   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1703     // Emit primary opcode and set sign-extend bit
1704     // Check for 8-bit immediate, and set sign extend bit in opcode
1705     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1706       emit_opcode(cbuf, $primary | 0x02);    }
1707     else {                          // If 32-bit immediate
1708       emit_opcode(cbuf, $primary);
1709     }
1710     // Emit r/m byte with secondary opcode, after primary opcode.
1711     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1712   %}
1713 
1714   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1715     // Check for 8-bit immediate, and set sign extend bit in opcode
1716     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1717       $$$emit8$imm$$constant;
1718     }
1719     else {                          // If 32-bit immediate
1720       // Output immediate
1721       $$$emit32$imm$$constant;
1722     }
1723   %}
1724 
1725   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1726     // Emit primary opcode and set sign-extend bit
1727     // Check for 8-bit immediate, and set sign extend bit in opcode
1728     int con = (int)$imm$$constant; // Throw away top bits
1729     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1730     // Emit r/m byte with secondary opcode, after primary opcode.
1731     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1732     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1733     else                               emit_d32(cbuf,con);
1734   %}
1735 
1736   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1737     // Emit primary opcode and set sign-extend bit
1738     // Check for 8-bit immediate, and set sign extend bit in opcode
1739     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1740     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1741     // Emit r/m byte with tertiary opcode, after primary opcode.
1742     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1743     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1744     else                               emit_d32(cbuf,con);
1745   %}
1746 
1747   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1748     emit_cc(cbuf, $secondary, $dst$$reg );
1749   %}
1750 
1751   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1752     int destlo = $dst$$reg;
1753     int desthi = HIGH_FROM_LOW(destlo);
1754     // bswap lo
1755     emit_opcode(cbuf, 0x0F);
1756     emit_cc(cbuf, 0xC8, destlo);
1757     // bswap hi
1758     emit_opcode(cbuf, 0x0F);
1759     emit_cc(cbuf, 0xC8, desthi);
1760     // xchg lo and hi
1761     emit_opcode(cbuf, 0x87);
1762     emit_rm(cbuf, 0x3, destlo, desthi);
1763   %}
1764 
1765   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1766     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1767   %}
1768 
1769   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1770     $$$emit8$primary;
1771     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1772   %}
1773 
1774   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1775     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1776     emit_d8(cbuf, op >> 8 );
1777     emit_d8(cbuf, op & 255);
1778   %}
1779 
1780   // emulate a CMOV with a conditional branch around a MOV
1781   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1782     // Invert sense of branch from sense of CMOV
1783     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1784     emit_d8( cbuf, $brOffs$$constant );
1785   %}
1786 
1787   enc_class enc_PartialSubtypeCheck( ) %{
1788     Register Redi = as_Register(EDI_enc); // result register
1789     Register Reax = as_Register(EAX_enc); // super class
1790     Register Recx = as_Register(ECX_enc); // killed
1791     Register Resi = as_Register(ESI_enc); // sub class
1792     Label miss;
1793 
1794     MacroAssembler _masm(&cbuf);
1795     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1796                                      NULL, &miss,
1797                                      /*set_cond_codes:*/ true);
1798     if ($primary) {
1799       __ xorptr(Redi, Redi);
1800     }
1801     __ bind(miss);
1802   %}
1803 
1804   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1805     MacroAssembler masm(&cbuf);
1806     int start = masm.offset();
1807     if (UseSSE >= 2) {
1808       if (VerifyFPU) {
1809         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1810       }
1811     } else {
1812       // External c_calling_convention expects the FPU stack to be 'clean'.
1813       // Compiled code leaves it dirty.  Do cleanup now.
1814       masm.empty_FPU_stack();
1815     }
1816     if (sizeof_FFree_Float_Stack_All == -1) {
1817       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1818     } else {
1819       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1820     }
1821   %}
1822 
1823   enc_class Verify_FPU_For_Leaf %{
1824     if( VerifyFPU ) {
1825       MacroAssembler masm(&cbuf);
1826       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1827     }
1828   %}
1829 
1830   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1831     // This is the instruction starting address for relocation info.
1832     cbuf.set_insts_mark();
1833     $$$emit8$primary;
1834     // CALL directly to the runtime
1835     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1836                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1837 
1838     if (UseSSE >= 2) {
1839       MacroAssembler _masm(&cbuf);
1840       BasicType rt = tf()->return_type();
1841 
1842       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1843         // A C runtime call where the return value is unused.  In SSE2+
1844         // mode the result needs to be removed from the FPU stack.  It's
1845         // likely that this function call could be removed by the
1846         // optimizer if the C function is a pure function.
1847         __ ffree(0);
1848       } else if (rt == T_FLOAT) {
1849         __ lea(rsp, Address(rsp, -4));
1850         __ fstp_s(Address(rsp, 0));
1851         __ movflt(xmm0, Address(rsp, 0));
1852         __ lea(rsp, Address(rsp,  4));
1853       } else if (rt == T_DOUBLE) {
1854         __ lea(rsp, Address(rsp, -8));
1855         __ fstp_d(Address(rsp, 0));
1856         __ movdbl(xmm0, Address(rsp, 0));
1857         __ lea(rsp, Address(rsp,  8));
1858       }
1859     }
1860   %}
1861 
1862   enc_class pre_call_resets %{
1863     // If method sets FPU control word restore it here
1864     debug_only(int off0 = cbuf.insts_size());
1865     if (ra_->C->in_24_bit_fp_mode()) {
1866       MacroAssembler _masm(&cbuf);
1867       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1868     }
1869     // Clear upper bits of YMM registers when current compiled code uses
1870     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1871     MacroAssembler _masm(&cbuf);
1872     __ vzeroupper();
1873     debug_only(int off1 = cbuf.insts_size());
1874     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1875   %}
1876 
1877   enc_class post_call_FPU %{
1878     // If method sets FPU control word do it here also
1879     if (Compile::current()->in_24_bit_fp_mode()) {
1880       MacroAssembler masm(&cbuf);
1881       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1882     }
1883   %}
1884 
1885   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1886     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1887     // who we intended to call.
1888     cbuf.set_insts_mark();
1889     $$$emit8$primary;
1890 
1891     if (!_method) {
1892       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1893                      runtime_call_Relocation::spec(),
1894                      RELOC_IMM32);
1895     } else {
1896       int method_index = resolved_method_index(cbuf);
1897       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1898                                                   : static_call_Relocation::spec(method_index);
1899       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1900                      rspec, RELOC_DISP32);
1901       // Emit stubs for static call.
1902       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1903       if (stub == NULL) {
1904         ciEnv::current()->record_failure("CodeCache is full");
1905         return;
1906       }
1907     }
1908   %}
1909 
1910   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1911     MacroAssembler _masm(&cbuf);
1912     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1913   %}
1914 
1915   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1916     int disp = in_bytes(Method::from_compiled_offset());
1917     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1918 
1919     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1920     cbuf.set_insts_mark();
1921     $$$emit8$primary;
1922     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1923     emit_d8(cbuf, disp);             // Displacement
1924 
1925   %}
1926 
1927 //   Following encoding is no longer used, but may be restored if calling
1928 //   convention changes significantly.
1929 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1930 //
1931 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1932 //     // int ic_reg     = Matcher::inline_cache_reg();
1933 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1934 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1935 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1936 //
1937 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1938 //     // // so we load it immediately before the call
1939 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1940 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1941 //
1942 //     // xor rbp,ebp
1943 //     emit_opcode(cbuf, 0x33);
1944 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1945 //
1946 //     // CALL to interpreter.
1947 //     cbuf.set_insts_mark();
1948 //     $$$emit8$primary;
1949 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1950 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1951 //   %}
1952 
1953   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1954     $$$emit8$primary;
1955     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1956     $$$emit8$shift$$constant;
1957   %}
1958 
1959   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1960     // Load immediate does not have a zero or sign extended version
1961     // for 8-bit immediates
1962     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1963     $$$emit32$src$$constant;
1964   %}
1965 
1966   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1967     // Load immediate does not have a zero or sign extended version
1968     // for 8-bit immediates
1969     emit_opcode(cbuf, $primary + $dst$$reg);
1970     $$$emit32$src$$constant;
1971   %}
1972 
1973   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1974     // Load immediate does not have a zero or sign extended version
1975     // for 8-bit immediates
1976     int dst_enc = $dst$$reg;
1977     int src_con = $src$$constant & 0x0FFFFFFFFL;
1978     if (src_con == 0) {
1979       // xor dst, dst
1980       emit_opcode(cbuf, 0x33);
1981       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1982     } else {
1983       emit_opcode(cbuf, $primary + dst_enc);
1984       emit_d32(cbuf, src_con);
1985     }
1986   %}
1987 
1988   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1989     // Load immediate does not have a zero or sign extended version
1990     // for 8-bit immediates
1991     int dst_enc = $dst$$reg + 2;
1992     int src_con = ((julong)($src$$constant)) >> 32;
1993     if (src_con == 0) {
1994       // xor dst, dst
1995       emit_opcode(cbuf, 0x33);
1996       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1997     } else {
1998       emit_opcode(cbuf, $primary + dst_enc);
1999       emit_d32(cbuf, src_con);
2000     }
2001   %}
2002 
2003 
2004   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2005   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2006     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2007   %}
2008 
2009   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2010     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2011   %}
2012 
2013   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2014     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2015   %}
2016 
2017   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2018     $$$emit8$primary;
2019     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2020   %}
2021 
2022   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2023     $$$emit8$secondary;
2024     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2025   %}
2026 
2027   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2028     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2029   %}
2030 
2031   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2032     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2033   %}
2034 
2035   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2036     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2037   %}
2038 
2039   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2040     // Output immediate
2041     $$$emit32$src$$constant;
2042   %}
2043 
2044   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2045     // Output Float immediate bits
2046     jfloat jf = $src$$constant;
2047     int    jf_as_bits = jint_cast( jf );
2048     emit_d32(cbuf, jf_as_bits);
2049   %}
2050 
2051   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2052     // Output Float immediate bits
2053     jfloat jf = $src$$constant;
2054     int    jf_as_bits = jint_cast( jf );
2055     emit_d32(cbuf, jf_as_bits);
2056   %}
2057 
2058   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2059     // Output immediate
2060     $$$emit16$src$$constant;
2061   %}
2062 
2063   enc_class Con_d32(immI src) %{
2064     emit_d32(cbuf,$src$$constant);
2065   %}
2066 
2067   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2068     // Output immediate memory reference
2069     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2070     emit_d32(cbuf, 0x00);
2071   %}
2072 
2073   enc_class lock_prefix( ) %{
2074     emit_opcode(cbuf,0xF0);         // [Lock]
2075   %}
2076 
2077   // Cmp-xchg long value.
2078   // Note: we need to swap rbx, and rcx before and after the
2079   //       cmpxchg8 instruction because the instruction uses
2080   //       rcx as the high order word of the new value to store but
2081   //       our register encoding uses rbx,.
2082   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2083 
2084     // XCHG  rbx,ecx
2085     emit_opcode(cbuf,0x87);
2086     emit_opcode(cbuf,0xD9);
2087     // [Lock]
2088     emit_opcode(cbuf,0xF0);
2089     // CMPXCHG8 [Eptr]
2090     emit_opcode(cbuf,0x0F);
2091     emit_opcode(cbuf,0xC7);
2092     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2093     // XCHG  rbx,ecx
2094     emit_opcode(cbuf,0x87);
2095     emit_opcode(cbuf,0xD9);
2096   %}
2097 
2098   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2099     // [Lock]
2100     emit_opcode(cbuf,0xF0);
2101 
2102     // CMPXCHG [Eptr]
2103     emit_opcode(cbuf,0x0F);
2104     emit_opcode(cbuf,0xB1);
2105     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2106   %}
2107 
2108   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2109     // [Lock]
2110     emit_opcode(cbuf,0xF0);
2111 
2112     // CMPXCHGB [Eptr]
2113     emit_opcode(cbuf,0x0F);
2114     emit_opcode(cbuf,0xB0);
2115     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2116   %}
2117 
2118   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2119     // [Lock]
2120     emit_opcode(cbuf,0xF0);
2121 
2122     // 16-bit mode
2123     emit_opcode(cbuf, 0x66);
2124 
2125     // CMPXCHGW [Eptr]
2126     emit_opcode(cbuf,0x0F);
2127     emit_opcode(cbuf,0xB1);
2128     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2129   %}
2130 
2131   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2132     int res_encoding = $res$$reg;
2133 
2134     // MOV  res,0
2135     emit_opcode( cbuf, 0xB8 + res_encoding);
2136     emit_d32( cbuf, 0 );
2137     // JNE,s  fail
2138     emit_opcode(cbuf,0x75);
2139     emit_d8(cbuf, 5 );
2140     // MOV  res,1
2141     emit_opcode( cbuf, 0xB8 + res_encoding);
2142     emit_d32( cbuf, 1 );
2143     // fail:
2144   %}
2145 
2146   enc_class set_instruction_start( ) %{
2147     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2148   %}
2149 
2150   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2151     int reg_encoding = $ereg$$reg;
2152     int base  = $mem$$base;
2153     int index = $mem$$index;
2154     int scale = $mem$$scale;
2155     int displace = $mem$$disp;
2156     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2157     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2158   %}
2159 
2160   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2161     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2162     int base  = $mem$$base;
2163     int index = $mem$$index;
2164     int scale = $mem$$scale;
2165     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2166     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2167     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2168   %}
2169 
2170   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2171     int r1, r2;
2172     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2173     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2174     emit_opcode(cbuf,0x0F);
2175     emit_opcode(cbuf,$tertiary);
2176     emit_rm(cbuf, 0x3, r1, r2);
2177     emit_d8(cbuf,$cnt$$constant);
2178     emit_d8(cbuf,$primary);
2179     emit_rm(cbuf, 0x3, $secondary, r1);
2180     emit_d8(cbuf,$cnt$$constant);
2181   %}
2182 
2183   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2184     emit_opcode( cbuf, 0x8B ); // Move
2185     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2186     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2187       emit_d8(cbuf,$primary);
2188       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2189       emit_d8(cbuf,$cnt$$constant-32);
2190     }
2191     emit_d8(cbuf,$primary);
2192     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2193     emit_d8(cbuf,31);
2194   %}
2195 
2196   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2197     int r1, r2;
2198     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2199     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2200 
2201     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2202     emit_rm(cbuf, 0x3, r1, r2);
2203     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2204       emit_opcode(cbuf,$primary);
2205       emit_rm(cbuf, 0x3, $secondary, r1);
2206       emit_d8(cbuf,$cnt$$constant-32);
2207     }
2208     emit_opcode(cbuf,0x33);  // XOR r2,r2
2209     emit_rm(cbuf, 0x3, r2, r2);
2210   %}
2211 
2212   // Clone of RegMem but accepts an extra parameter to access each
2213   // half of a double in memory; it never needs relocation info.
2214   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2215     emit_opcode(cbuf,$opcode$$constant);
2216     int reg_encoding = $rm_reg$$reg;
2217     int base     = $mem$$base;
2218     int index    = $mem$$index;
2219     int scale    = $mem$$scale;
2220     int displace = $mem$$disp + $disp_for_half$$constant;
2221     relocInfo::relocType disp_reloc = relocInfo::none;
2222     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2223   %}
2224 
2225   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2226   //
2227   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2228   // and it never needs relocation information.
2229   // Frequently used to move data between FPU's Stack Top and memory.
2230   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2231     int rm_byte_opcode = $rm_opcode$$constant;
2232     int base     = $mem$$base;
2233     int index    = $mem$$index;
2234     int scale    = $mem$$scale;
2235     int displace = $mem$$disp;
2236     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2237     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2238   %}
2239 
2240   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2241     int rm_byte_opcode = $rm_opcode$$constant;
2242     int base     = $mem$$base;
2243     int index    = $mem$$index;
2244     int scale    = $mem$$scale;
2245     int displace = $mem$$disp;
2246     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2247     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2248   %}
2249 
2250   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2251     int reg_encoding = $dst$$reg;
2252     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2253     int index        = 0x04;            // 0x04 indicates no index
2254     int scale        = 0x00;            // 0x00 indicates no scale
2255     int displace     = $src1$$constant; // 0x00 indicates no displacement
2256     relocInfo::relocType disp_reloc = relocInfo::none;
2257     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2258   %}
2259 
2260   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2261     // Compare dst,src
2262     emit_opcode(cbuf,0x3B);
2263     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2264     // jmp dst < src around move
2265     emit_opcode(cbuf,0x7C);
2266     emit_d8(cbuf,2);
2267     // move dst,src
2268     emit_opcode(cbuf,0x8B);
2269     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2270   %}
2271 
2272   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2273     // Compare dst,src
2274     emit_opcode(cbuf,0x3B);
2275     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2276     // jmp dst > src around move
2277     emit_opcode(cbuf,0x7F);
2278     emit_d8(cbuf,2);
2279     // move dst,src
2280     emit_opcode(cbuf,0x8B);
2281     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2282   %}
2283 
2284   enc_class enc_FPR_store(memory mem, regDPR src) %{
2285     // If src is FPR1, we can just FST to store it.
2286     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2287     int reg_encoding = 0x2; // Just store
2288     int base  = $mem$$base;
2289     int index = $mem$$index;
2290     int scale = $mem$$scale;
2291     int displace = $mem$$disp;
2292     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2293     if( $src$$reg != FPR1L_enc ) {
2294       reg_encoding = 0x3;  // Store & pop
2295       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2296       emit_d8( cbuf, 0xC0-1+$src$$reg );
2297     }
2298     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2299     emit_opcode(cbuf,$primary);
2300     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2301   %}
2302 
2303   enc_class neg_reg(rRegI dst) %{
2304     // NEG $dst
2305     emit_opcode(cbuf,0xF7);
2306     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2307   %}
2308 
2309   enc_class setLT_reg(eCXRegI dst) %{
2310     // SETLT $dst
2311     emit_opcode(cbuf,0x0F);
2312     emit_opcode(cbuf,0x9C);
2313     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2314   %}
2315 
2316   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2317     int tmpReg = $tmp$$reg;
2318 
2319     // SUB $p,$q
2320     emit_opcode(cbuf,0x2B);
2321     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2322     // SBB $tmp,$tmp
2323     emit_opcode(cbuf,0x1B);
2324     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2325     // AND $tmp,$y
2326     emit_opcode(cbuf,0x23);
2327     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2328     // ADD $p,$tmp
2329     emit_opcode(cbuf,0x03);
2330     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2331   %}
2332 
2333   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2334     // TEST shift,32
2335     emit_opcode(cbuf,0xF7);
2336     emit_rm(cbuf, 0x3, 0, ECX_enc);
2337     emit_d32(cbuf,0x20);
2338     // JEQ,s small
2339     emit_opcode(cbuf, 0x74);
2340     emit_d8(cbuf, 0x04);
2341     // MOV    $dst.hi,$dst.lo
2342     emit_opcode( cbuf, 0x8B );
2343     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2344     // CLR    $dst.lo
2345     emit_opcode(cbuf, 0x33);
2346     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2347 // small:
2348     // SHLD   $dst.hi,$dst.lo,$shift
2349     emit_opcode(cbuf,0x0F);
2350     emit_opcode(cbuf,0xA5);
2351     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2352     // SHL    $dst.lo,$shift"
2353     emit_opcode(cbuf,0xD3);
2354     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2355   %}
2356 
2357   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2358     // TEST shift,32
2359     emit_opcode(cbuf,0xF7);
2360     emit_rm(cbuf, 0x3, 0, ECX_enc);
2361     emit_d32(cbuf,0x20);
2362     // JEQ,s small
2363     emit_opcode(cbuf, 0x74);
2364     emit_d8(cbuf, 0x04);
2365     // MOV    $dst.lo,$dst.hi
2366     emit_opcode( cbuf, 0x8B );
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2368     // CLR    $dst.hi
2369     emit_opcode(cbuf, 0x33);
2370     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2371 // small:
2372     // SHRD   $dst.lo,$dst.hi,$shift
2373     emit_opcode(cbuf,0x0F);
2374     emit_opcode(cbuf,0xAD);
2375     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2376     // SHR    $dst.hi,$shift"
2377     emit_opcode(cbuf,0xD3);
2378     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2379   %}
2380 
2381   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2382     // TEST shift,32
2383     emit_opcode(cbuf,0xF7);
2384     emit_rm(cbuf, 0x3, 0, ECX_enc);
2385     emit_d32(cbuf,0x20);
2386     // JEQ,s small
2387     emit_opcode(cbuf, 0x74);
2388     emit_d8(cbuf, 0x05);
2389     // MOV    $dst.lo,$dst.hi
2390     emit_opcode( cbuf, 0x8B );
2391     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2392     // SAR    $dst.hi,31
2393     emit_opcode(cbuf, 0xC1);
2394     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2395     emit_d8(cbuf, 0x1F );
2396 // small:
2397     // SHRD   $dst.lo,$dst.hi,$shift
2398     emit_opcode(cbuf,0x0F);
2399     emit_opcode(cbuf,0xAD);
2400     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2401     // SAR    $dst.hi,$shift"
2402     emit_opcode(cbuf,0xD3);
2403     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2404   %}
2405 
2406 
2407   // ----------------- Encodings for floating point unit -----------------
2408   // May leave result in FPU-TOS or FPU reg depending on opcodes
2409   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2410     $$$emit8$primary;
2411     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2412   %}
2413 
2414   // Pop argument in FPR0 with FSTP ST(0)
2415   enc_class PopFPU() %{
2416     emit_opcode( cbuf, 0xDD );
2417     emit_d8( cbuf, 0xD8 );
2418   %}
2419 
2420   // !!!!! equivalent to Pop_Reg_F
2421   enc_class Pop_Reg_DPR( regDPR dst ) %{
2422     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2423     emit_d8( cbuf, 0xD8+$dst$$reg );
2424   %}
2425 
2426   enc_class Push_Reg_DPR( regDPR dst ) %{
2427     emit_opcode( cbuf, 0xD9 );
2428     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2429   %}
2430 
2431   enc_class strictfp_bias1( regDPR dst ) %{
2432     emit_opcode( cbuf, 0xDB );           // FLD m80real
2433     emit_opcode( cbuf, 0x2D );
2434     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2435     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2436     emit_opcode( cbuf, 0xC8+$dst$$reg );
2437   %}
2438 
2439   enc_class strictfp_bias2( regDPR dst ) %{
2440     emit_opcode( cbuf, 0xDB );           // FLD m80real
2441     emit_opcode( cbuf, 0x2D );
2442     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2443     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2444     emit_opcode( cbuf, 0xC8+$dst$$reg );
2445   %}
2446 
2447   // Special case for moving an integer register to a stack slot.
2448   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2449     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2450   %}
2451 
2452   // Special case for moving a register to a stack slot.
2453   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2454     // Opcode already emitted
2455     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2456     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2457     emit_d32(cbuf, $dst$$disp);   // Displacement
2458   %}
2459 
2460   // Push the integer in stackSlot 'src' onto FP-stack
2461   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2462     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2463   %}
2464 
2465   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2466   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2467     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2468   %}
2469 
2470   // Same as Pop_Mem_F except for opcode
2471   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2472   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2473     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2474   %}
2475 
2476   enc_class Pop_Reg_FPR( regFPR dst ) %{
2477     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2478     emit_d8( cbuf, 0xD8+$dst$$reg );
2479   %}
2480 
2481   enc_class Push_Reg_FPR( regFPR dst ) %{
2482     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2483     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2484   %}
2485 
2486   // Push FPU's float to a stack-slot, and pop FPU-stack
2487   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2488     int pop = 0x02;
2489     if ($src$$reg != FPR1L_enc) {
2490       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2491       emit_d8( cbuf, 0xC0-1+$src$$reg );
2492       pop = 0x03;
2493     }
2494     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2495   %}
2496 
2497   // Push FPU's double to a stack-slot, and pop FPU-stack
2498   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2499     int pop = 0x02;
2500     if ($src$$reg != FPR1L_enc) {
2501       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2502       emit_d8( cbuf, 0xC0-1+$src$$reg );
2503       pop = 0x03;
2504     }
2505     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2506   %}
2507 
2508   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2509   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2510     int pop = 0xD0 - 1; // -1 since we skip FLD
2511     if ($src$$reg != FPR1L_enc) {
2512       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2513       emit_d8( cbuf, 0xC0-1+$src$$reg );
2514       pop = 0xD8;
2515     }
2516     emit_opcode( cbuf, 0xDD );
2517     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2518   %}
2519 
2520 
2521   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2522     // load dst in FPR0
2523     emit_opcode( cbuf, 0xD9 );
2524     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2525     if ($src$$reg != FPR1L_enc) {
2526       // fincstp
2527       emit_opcode (cbuf, 0xD9);
2528       emit_opcode (cbuf, 0xF7);
2529       // swap src with FPR1:
2530       // FXCH FPR1 with src
2531       emit_opcode(cbuf, 0xD9);
2532       emit_d8(cbuf, 0xC8-1+$src$$reg );
2533       // fdecstp
2534       emit_opcode (cbuf, 0xD9);
2535       emit_opcode (cbuf, 0xF6);
2536     }
2537   %}
2538 
2539   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2540     MacroAssembler _masm(&cbuf);
2541     __ subptr(rsp, 8);
2542     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2543     __ fld_d(Address(rsp, 0));
2544     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2545     __ fld_d(Address(rsp, 0));
2546   %}
2547 
2548   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2549     MacroAssembler _masm(&cbuf);
2550     __ subptr(rsp, 4);
2551     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2552     __ fld_s(Address(rsp, 0));
2553     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2554     __ fld_s(Address(rsp, 0));
2555   %}
2556 
2557   enc_class Push_ResultD(regD dst) %{
2558     MacroAssembler _masm(&cbuf);
2559     __ fstp_d(Address(rsp, 0));
2560     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2561     __ addptr(rsp, 8);
2562   %}
2563 
2564   enc_class Push_ResultF(regF dst, immI d8) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ fstp_s(Address(rsp, 0));
2567     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2568     __ addptr(rsp, $d8$$constant);
2569   %}
2570 
2571   enc_class Push_SrcD(regD src) %{
2572     MacroAssembler _masm(&cbuf);
2573     __ subptr(rsp, 8);
2574     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2575     __ fld_d(Address(rsp, 0));
2576   %}
2577 
2578   enc_class push_stack_temp_qword() %{
2579     MacroAssembler _masm(&cbuf);
2580     __ subptr(rsp, 8);
2581   %}
2582 
2583   enc_class pop_stack_temp_qword() %{
2584     MacroAssembler _masm(&cbuf);
2585     __ addptr(rsp, 8);
2586   %}
2587 
2588   enc_class push_xmm_to_fpr1(regD src) %{
2589     MacroAssembler _masm(&cbuf);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class Push_Result_Mod_DPR( regDPR src) %{
2595     if ($src$$reg != FPR1L_enc) {
2596       // fincstp
2597       emit_opcode (cbuf, 0xD9);
2598       emit_opcode (cbuf, 0xF7);
2599       // FXCH FPR1 with src
2600       emit_opcode(cbuf, 0xD9);
2601       emit_d8(cbuf, 0xC8-1+$src$$reg );
2602       // fdecstp
2603       emit_opcode (cbuf, 0xD9);
2604       emit_opcode (cbuf, 0xF6);
2605     }
2606     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2607     // // FSTP   FPR$dst$$reg
2608     // emit_opcode( cbuf, 0xDD );
2609     // emit_d8( cbuf, 0xD8+$dst$$reg );
2610   %}
2611 
2612   enc_class fnstsw_sahf_skip_parity() %{
2613     // fnstsw ax
2614     emit_opcode( cbuf, 0xDF );
2615     emit_opcode( cbuf, 0xE0 );
2616     // sahf
2617     emit_opcode( cbuf, 0x9E );
2618     // jnp  ::skip
2619     emit_opcode( cbuf, 0x7B );
2620     emit_opcode( cbuf, 0x05 );
2621   %}
2622 
2623   enc_class emitModDPR() %{
2624     // fprem must be iterative
2625     // :: loop
2626     // fprem
2627     emit_opcode( cbuf, 0xD9 );
2628     emit_opcode( cbuf, 0xF8 );
2629     // wait
2630     emit_opcode( cbuf, 0x9b );
2631     // fnstsw ax
2632     emit_opcode( cbuf, 0xDF );
2633     emit_opcode( cbuf, 0xE0 );
2634     // sahf
2635     emit_opcode( cbuf, 0x9E );
2636     // jp  ::loop
2637     emit_opcode( cbuf, 0x0F );
2638     emit_opcode( cbuf, 0x8A );
2639     emit_opcode( cbuf, 0xF4 );
2640     emit_opcode( cbuf, 0xFF );
2641     emit_opcode( cbuf, 0xFF );
2642     emit_opcode( cbuf, 0xFF );
2643   %}
2644 
2645   enc_class fpu_flags() %{
2646     // fnstsw_ax
2647     emit_opcode( cbuf, 0xDF);
2648     emit_opcode( cbuf, 0xE0);
2649     // test ax,0x0400
2650     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2651     emit_opcode( cbuf, 0xA9 );
2652     emit_d16   ( cbuf, 0x0400 );
2653     // // // This sequence works, but stalls for 12-16 cycles on PPro
2654     // // test rax,0x0400
2655     // emit_opcode( cbuf, 0xA9 );
2656     // emit_d32   ( cbuf, 0x00000400 );
2657     //
2658     // jz exit (no unordered comparison)
2659     emit_opcode( cbuf, 0x74 );
2660     emit_d8    ( cbuf, 0x02 );
2661     // mov ah,1 - treat as LT case (set carry flag)
2662     emit_opcode( cbuf, 0xB4 );
2663     emit_d8    ( cbuf, 0x01 );
2664     // sahf
2665     emit_opcode( cbuf, 0x9E);
2666   %}
2667 
2668   enc_class cmpF_P6_fixup() %{
2669     // Fixup the integer flags in case comparison involved a NaN
2670     //
2671     // JNP exit (no unordered comparison, P-flag is set by NaN)
2672     emit_opcode( cbuf, 0x7B );
2673     emit_d8    ( cbuf, 0x03 );
2674     // MOV AH,1 - treat as LT case (set carry flag)
2675     emit_opcode( cbuf, 0xB4 );
2676     emit_d8    ( cbuf, 0x01 );
2677     // SAHF
2678     emit_opcode( cbuf, 0x9E);
2679     // NOP     // target for branch to avoid branch to branch
2680     emit_opcode( cbuf, 0x90);
2681   %}
2682 
2683 //     fnstsw_ax();
2684 //     sahf();
2685 //     movl(dst, nan_result);
2686 //     jcc(Assembler::parity, exit);
2687 //     movl(dst, less_result);
2688 //     jcc(Assembler::below, exit);
2689 //     movl(dst, equal_result);
2690 //     jcc(Assembler::equal, exit);
2691 //     movl(dst, greater_result);
2692 
2693 // less_result     =  1;
2694 // greater_result  = -1;
2695 // equal_result    = 0;
2696 // nan_result      = -1;
2697 
2698   enc_class CmpF_Result(rRegI dst) %{
2699     // fnstsw_ax();
2700     emit_opcode( cbuf, 0xDF);
2701     emit_opcode( cbuf, 0xE0);
2702     // sahf
2703     emit_opcode( cbuf, 0x9E);
2704     // movl(dst, nan_result);
2705     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2706     emit_d32( cbuf, -1 );
2707     // jcc(Assembler::parity, exit);
2708     emit_opcode( cbuf, 0x7A );
2709     emit_d8    ( cbuf, 0x13 );
2710     // movl(dst, less_result);
2711     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2712     emit_d32( cbuf, -1 );
2713     // jcc(Assembler::below, exit);
2714     emit_opcode( cbuf, 0x72 );
2715     emit_d8    ( cbuf, 0x0C );
2716     // movl(dst, equal_result);
2717     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2718     emit_d32( cbuf, 0 );
2719     // jcc(Assembler::equal, exit);
2720     emit_opcode( cbuf, 0x74 );
2721     emit_d8    ( cbuf, 0x05 );
2722     // movl(dst, greater_result);
2723     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2724     emit_d32( cbuf, 1 );
2725   %}
2726 
2727 
2728   // Compare the longs and set flags
2729   // BROKEN!  Do Not use as-is
2730   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2731     // CMP    $src1.hi,$src2.hi
2732     emit_opcode( cbuf, 0x3B );
2733     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2734     // JNE,s  done
2735     emit_opcode(cbuf,0x75);
2736     emit_d8(cbuf, 2 );
2737     // CMP    $src1.lo,$src2.lo
2738     emit_opcode( cbuf, 0x3B );
2739     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2740 // done:
2741   %}
2742 
2743   enc_class convert_int_long( regL dst, rRegI src ) %{
2744     // mov $dst.lo,$src
2745     int dst_encoding = $dst$$reg;
2746     int src_encoding = $src$$reg;
2747     encode_Copy( cbuf, dst_encoding  , src_encoding );
2748     // mov $dst.hi,$src
2749     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2750     // sar $dst.hi,31
2751     emit_opcode( cbuf, 0xC1 );
2752     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2753     emit_d8(cbuf, 0x1F );
2754   %}
2755 
2756   enc_class convert_long_double( eRegL src ) %{
2757     // push $src.hi
2758     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2759     // push $src.lo
2760     emit_opcode(cbuf, 0x50+$src$$reg  );
2761     // fild 64-bits at [SP]
2762     emit_opcode(cbuf,0xdf);
2763     emit_d8(cbuf, 0x6C);
2764     emit_d8(cbuf, 0x24);
2765     emit_d8(cbuf, 0x00);
2766     // pop stack
2767     emit_opcode(cbuf, 0x83); // add  SP, #8
2768     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2769     emit_d8(cbuf, 0x8);
2770   %}
2771 
2772   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2773     // IMUL   EDX:EAX,$src1
2774     emit_opcode( cbuf, 0xF7 );
2775     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2776     // SAR    EDX,$cnt-32
2777     int shift_count = ((int)$cnt$$constant) - 32;
2778     if (shift_count > 0) {
2779       emit_opcode(cbuf, 0xC1);
2780       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2781       emit_d8(cbuf, shift_count);
2782     }
2783   %}
2784 
2785   // this version doesn't have add sp, 8
2786   enc_class convert_long_double2( eRegL src ) %{
2787     // push $src.hi
2788     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2789     // push $src.lo
2790     emit_opcode(cbuf, 0x50+$src$$reg  );
2791     // fild 64-bits at [SP]
2792     emit_opcode(cbuf,0xdf);
2793     emit_d8(cbuf, 0x6C);
2794     emit_d8(cbuf, 0x24);
2795     emit_d8(cbuf, 0x00);
2796   %}
2797 
2798   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2799     // Basic idea: long = (long)int * (long)int
2800     // IMUL EDX:EAX, src
2801     emit_opcode( cbuf, 0xF7 );
2802     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2803   %}
2804 
2805   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2806     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2807     // MUL EDX:EAX, src
2808     emit_opcode( cbuf, 0xF7 );
2809     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2810   %}
2811 
2812   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2813     // Basic idea: lo(result) = lo(x_lo * y_lo)
2814     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2815     // MOV    $tmp,$src.lo
2816     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2817     // IMUL   $tmp,EDX
2818     emit_opcode( cbuf, 0x0F );
2819     emit_opcode( cbuf, 0xAF );
2820     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2821     // MOV    EDX,$src.hi
2822     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2823     // IMUL   EDX,EAX
2824     emit_opcode( cbuf, 0x0F );
2825     emit_opcode( cbuf, 0xAF );
2826     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2827     // ADD    $tmp,EDX
2828     emit_opcode( cbuf, 0x03 );
2829     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2830     // MUL   EDX:EAX,$src.lo
2831     emit_opcode( cbuf, 0xF7 );
2832     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2833     // ADD    EDX,ESI
2834     emit_opcode( cbuf, 0x03 );
2835     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2836   %}
2837 
2838   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2839     // Basic idea: lo(result) = lo(src * y_lo)
2840     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2841     // IMUL   $tmp,EDX,$src
2842     emit_opcode( cbuf, 0x6B );
2843     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2844     emit_d8( cbuf, (int)$src$$constant );
2845     // MOV    EDX,$src
2846     emit_opcode(cbuf, 0xB8 + EDX_enc);
2847     emit_d32( cbuf, (int)$src$$constant );
2848     // MUL   EDX:EAX,EDX
2849     emit_opcode( cbuf, 0xF7 );
2850     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2851     // ADD    EDX,ESI
2852     emit_opcode( cbuf, 0x03 );
2853     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2854   %}
2855 
2856   enc_class long_div( eRegL src1, eRegL src2 ) %{
2857     // PUSH src1.hi
2858     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2859     // PUSH src1.lo
2860     emit_opcode(cbuf,               0x50+$src1$$reg  );
2861     // PUSH src2.hi
2862     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2863     // PUSH src2.lo
2864     emit_opcode(cbuf,               0x50+$src2$$reg  );
2865     // CALL directly to the runtime
2866     cbuf.set_insts_mark();
2867     emit_opcode(cbuf,0xE8);       // Call into runtime
2868     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2869     // Restore stack
2870     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2871     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2872     emit_d8(cbuf, 4*4);
2873   %}
2874 
2875   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2876     // PUSH src1.hi
2877     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2878     // PUSH src1.lo
2879     emit_opcode(cbuf,               0x50+$src1$$reg  );
2880     // PUSH src2.hi
2881     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2882     // PUSH src2.lo
2883     emit_opcode(cbuf,               0x50+$src2$$reg  );
2884     // CALL directly to the runtime
2885     cbuf.set_insts_mark();
2886     emit_opcode(cbuf,0xE8);       // Call into runtime
2887     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2888     // Restore stack
2889     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2890     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2891     emit_d8(cbuf, 4*4);
2892   %}
2893 
2894   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2895     // MOV   $tmp,$src.lo
2896     emit_opcode(cbuf, 0x8B);
2897     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2898     // OR    $tmp,$src.hi
2899     emit_opcode(cbuf, 0x0B);
2900     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2901   %}
2902 
2903   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2904     // CMP    $src1.lo,$src2.lo
2905     emit_opcode( cbuf, 0x3B );
2906     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2907     // JNE,s  skip
2908     emit_cc(cbuf, 0x70, 0x5);
2909     emit_d8(cbuf,2);
2910     // CMP    $src1.hi,$src2.hi
2911     emit_opcode( cbuf, 0x3B );
2912     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2913   %}
2914 
2915   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2916     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2917     emit_opcode( cbuf, 0x3B );
2918     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2919     // MOV    $tmp,$src1.hi
2920     emit_opcode( cbuf, 0x8B );
2921     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2922     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2923     emit_opcode( cbuf, 0x1B );
2924     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2925   %}
2926 
2927   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2928     // XOR    $tmp,$tmp
2929     emit_opcode(cbuf,0x33);  // XOR
2930     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2931     // CMP    $tmp,$src.lo
2932     emit_opcode( cbuf, 0x3B );
2933     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2934     // SBB    $tmp,$src.hi
2935     emit_opcode( cbuf, 0x1B );
2936     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2937   %}
2938 
2939  // Sniff, sniff... smells like Gnu Superoptimizer
2940   enc_class neg_long( eRegL dst ) %{
2941     emit_opcode(cbuf,0xF7);    // NEG hi
2942     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2943     emit_opcode(cbuf,0xF7);    // NEG lo
2944     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2945     emit_opcode(cbuf,0x83);    // SBB hi,0
2946     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2947     emit_d8    (cbuf,0 );
2948   %}
2949 
2950   enc_class enc_pop_rdx() %{
2951     emit_opcode(cbuf,0x5A);
2952   %}
2953 
2954   enc_class enc_rethrow() %{
2955     cbuf.set_insts_mark();
2956     emit_opcode(cbuf, 0xE9);        // jmp    entry
2957     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2958                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2959   %}
2960 
2961 
2962   // Convert a double to an int.  Java semantics require we do complex
2963   // manglelations in the corner cases.  So we set the rounding mode to
2964   // 'zero', store the darned double down as an int, and reset the
2965   // rounding mode to 'nearest'.  The hardware throws an exception which
2966   // patches up the correct value directly to the stack.
2967   enc_class DPR2I_encoding( regDPR src ) %{
2968     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2969     // exceptions here, so that a NAN or other corner-case value will
2970     // thrown an exception (but normal values get converted at full speed).
2971     // However, I2C adapters and other float-stack manglers leave pending
2972     // invalid-op exceptions hanging.  We would have to clear them before
2973     // enabling them and that is more expensive than just testing for the
2974     // invalid value Intel stores down in the corner cases.
2975     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2976     emit_opcode(cbuf,0x2D);
2977     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2978     // Allocate a word
2979     emit_opcode(cbuf,0x83);            // SUB ESP,4
2980     emit_opcode(cbuf,0xEC);
2981     emit_d8(cbuf,0x04);
2982     // Encoding assumes a double has been pushed into FPR0.
2983     // Store down the double as an int, popping the FPU stack
2984     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2985     emit_opcode(cbuf,0x1C);
2986     emit_d8(cbuf,0x24);
2987     // Restore the rounding mode; mask the exception
2988     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2989     emit_opcode(cbuf,0x2D);
2990     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2991         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2992         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2993 
2994     // Load the converted int; adjust CPU stack
2995     emit_opcode(cbuf,0x58);       // POP EAX
2996     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2997     emit_d32   (cbuf,0x80000000); //         0x80000000
2998     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2999     emit_d8    (cbuf,0x07);       // Size of slow_call
3000     // Push src onto stack slow-path
3001     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3002     emit_d8    (cbuf,0xC0-1+$src$$reg );
3003     // CALL directly to the runtime
3004     cbuf.set_insts_mark();
3005     emit_opcode(cbuf,0xE8);       // Call into runtime
3006     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3007     // Carry on here...
3008   %}
3009 
3010   enc_class DPR2L_encoding( regDPR src ) %{
3011     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3012     emit_opcode(cbuf,0x2D);
3013     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3014     // Allocate a word
3015     emit_opcode(cbuf,0x83);            // SUB ESP,8
3016     emit_opcode(cbuf,0xEC);
3017     emit_d8(cbuf,0x08);
3018     // Encoding assumes a double has been pushed into FPR0.
3019     // Store down the double as a long, popping the FPU stack
3020     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3021     emit_opcode(cbuf,0x3C);
3022     emit_d8(cbuf,0x24);
3023     // Restore the rounding mode; mask the exception
3024     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3025     emit_opcode(cbuf,0x2D);
3026     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3027         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3028         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3029 
3030     // Load the converted int; adjust CPU stack
3031     emit_opcode(cbuf,0x58);       // POP EAX
3032     emit_opcode(cbuf,0x5A);       // POP EDX
3033     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3034     emit_d8    (cbuf,0xFA);       // rdx
3035     emit_d32   (cbuf,0x80000000); //         0x80000000
3036     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3037     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3038     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3039     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3040     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3041     emit_d8    (cbuf,0x07);       // Size of slow_call
3042     // Push src onto stack slow-path
3043     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3044     emit_d8    (cbuf,0xC0-1+$src$$reg );
3045     // CALL directly to the runtime
3046     cbuf.set_insts_mark();
3047     emit_opcode(cbuf,0xE8);       // Call into runtime
3048     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3049     // Carry on here...
3050   %}
3051 
3052   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3053     // Operand was loaded from memory into fp ST (stack top)
3054     // FMUL   ST,$src  /* D8 C8+i */
3055     emit_opcode(cbuf, 0xD8);
3056     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3057   %}
3058 
3059   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3060     // FADDP  ST,src2  /* D8 C0+i */
3061     emit_opcode(cbuf, 0xD8);
3062     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3063     //could use FADDP  src2,fpST  /* DE C0+i */
3064   %}
3065 
3066   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3067     // FADDP  src2,ST  /* DE C0+i */
3068     emit_opcode(cbuf, 0xDE);
3069     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3070   %}
3071 
3072   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3073     // Operand has been loaded into fp ST (stack top)
3074       // FSUB   ST,$src1
3075       emit_opcode(cbuf, 0xD8);
3076       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3077 
3078       // FDIV
3079       emit_opcode(cbuf, 0xD8);
3080       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3081   %}
3082 
3083   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3084     // Operand was loaded from memory into fp ST (stack top)
3085     // FADD   ST,$src  /* D8 C0+i */
3086     emit_opcode(cbuf, 0xD8);
3087     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3088 
3089     // FMUL  ST,src2  /* D8 C*+i */
3090     emit_opcode(cbuf, 0xD8);
3091     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3092   %}
3093 
3094 
3095   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3096     // Operand was loaded from memory into fp ST (stack top)
3097     // FADD   ST,$src  /* D8 C0+i */
3098     emit_opcode(cbuf, 0xD8);
3099     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3100 
3101     // FMULP  src2,ST  /* DE C8+i */
3102     emit_opcode(cbuf, 0xDE);
3103     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3104   %}
3105 
3106   // Atomically load the volatile long
3107   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3108     emit_opcode(cbuf,0xDF);
3109     int rm_byte_opcode = 0x05;
3110     int base     = $mem$$base;
3111     int index    = $mem$$index;
3112     int scale    = $mem$$scale;
3113     int displace = $mem$$disp;
3114     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3115     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3116     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3117   %}
3118 
3119   // Volatile Store Long.  Must be atomic, so move it into
3120   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3121   // target address before the store (for null-ptr checks)
3122   // so the memory operand is used twice in the encoding.
3123   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3124     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3125     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3126     emit_opcode(cbuf,0xDF);
3127     int rm_byte_opcode = 0x07;
3128     int base     = $mem$$base;
3129     int index    = $mem$$index;
3130     int scale    = $mem$$scale;
3131     int displace = $mem$$disp;
3132     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3133     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3134   %}
3135 
3136 %}
3137 
3138 
3139 //----------FRAME--------------------------------------------------------------
3140 // Definition of frame structure and management information.
3141 //
3142 //  S T A C K   L A Y O U T    Allocators stack-slot number
3143 //                             |   (to get allocators register number
3144 //  G  Owned by    |        |  v    add OptoReg::stack0())
3145 //  r   CALLER     |        |
3146 //  o     |        +--------+      pad to even-align allocators stack-slot
3147 //  w     V        |  pad0  |        numbers; owned by CALLER
3148 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3149 //  h     ^        |   in   |  5
3150 //        |        |  args  |  4   Holes in incoming args owned by SELF
3151 //  |     |        |        |  3
3152 //  |     |        +--------+
3153 //  V     |        | old out|      Empty on Intel, window on Sparc
3154 //        |    old |preserve|      Must be even aligned.
3155 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3156 //        |        |   in   |  3   area for Intel ret address
3157 //     Owned by    |preserve|      Empty on Sparc.
3158 //       SELF      +--------+
3159 //        |        |  pad2  |  2   pad to align old SP
3160 //        |        +--------+  1
3161 //        |        | locks  |  0
3162 //        |        +--------+----> OptoReg::stack0(), even aligned
3163 //        |        |  pad1  | 11   pad to align new SP
3164 //        |        +--------+
3165 //        |        |        | 10
3166 //        |        | spills |  9   spills
3167 //        V        |        |  8   (pad0 slot for callee)
3168 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3169 //        ^        |  out   |  7
3170 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3171 //     Owned by    +--------+
3172 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3173 //        |    new |preserve|      Must be even-aligned.
3174 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3175 //        |        |        |
3176 //
3177 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3178 //         known from SELF's arguments and the Java calling convention.
3179 //         Region 6-7 is determined per call site.
3180 // Note 2: If the calling convention leaves holes in the incoming argument
3181 //         area, those holes are owned by SELF.  Holes in the outgoing area
3182 //         are owned by the CALLEE.  Holes should not be nessecary in the
3183 //         incoming area, as the Java calling convention is completely under
3184 //         the control of the AD file.  Doubles can be sorted and packed to
3185 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3186 //         varargs C calling conventions.
3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3188 //         even aligned with pad0 as needed.
3189 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3190 //         region 6-11 is even aligned; it may be padded out more so that
3191 //         the region from SP to FP meets the minimum stack alignment.
3192 
3193 frame %{
3194   // What direction does stack grow in (assumed to be same for C & Java)
3195   stack_direction(TOWARDS_LOW);
3196 
3197   // These three registers define part of the calling convention
3198   // between compiled code and the interpreter.
3199   inline_cache_reg(EAX);                // Inline Cache Register
3200   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3201 
3202   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3203   cisc_spilling_operand_name(indOffset32);
3204 
3205   // Number of stack slots consumed by locking an object
3206   sync_stack_slots(1);
3207 
3208   // Compiled code's Frame Pointer
3209   frame_pointer(ESP);
3210   // Interpreter stores its frame pointer in a register which is
3211   // stored to the stack by I2CAdaptors.
3212   // I2CAdaptors convert from interpreted java to compiled java.
3213   interpreter_frame_pointer(EBP);
3214 
3215   // Stack alignment requirement
3216   // Alignment size in bytes (128-bit -> 16 bytes)
3217   stack_alignment(StackAlignmentInBytes);
3218 
3219   // Number of stack slots between incoming argument block and the start of
3220   // a new frame.  The PROLOG must add this many slots to the stack.  The
3221   // EPILOG must remove this many slots.  Intel needs one slot for
3222   // return address and one for rbp, (must save rbp)
3223   in_preserve_stack_slots(2+VerifyStackAtCalls);
3224 
3225   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3226   // for calls to C.  Supports the var-args backing area for register parms.
3227   varargs_C_out_slots_killed(0);
3228 
3229   // The after-PROLOG location of the return address.  Location of
3230   // return address specifies a type (REG or STACK) and a number
3231   // representing the register number (i.e. - use a register name) or
3232   // stack slot.
3233   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3234   // Otherwise, it is above the locks and verification slot and alignment word
3235   return_addr(STACK - 1 +
3236               align_up((Compile::current()->in_preserve_stack_slots() +
3237                         Compile::current()->fixed_slots()),
3238                        stack_alignment_in_slots()));
3239 
3240   // Body of function which returns an integer array locating
3241   // arguments either in registers or in stack slots.  Passed an array
3242   // of ideal registers called "sig" and a "length" count.  Stack-slot
3243   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3244   // arguments for a CALLEE.  Incoming stack arguments are
3245   // automatically biased by the preserve_stack_slots field above.
3246   calling_convention %{
3247     // No difference between ingoing/outgoing just pass false
3248     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3249   %}
3250 
3251 
3252   // Body of function which returns an integer array locating
3253   // arguments either in registers or in stack slots.  Passed an array
3254   // of ideal registers called "sig" and a "length" count.  Stack-slot
3255   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3256   // arguments for a CALLEE.  Incoming stack arguments are
3257   // automatically biased by the preserve_stack_slots field above.
3258   c_calling_convention %{
3259     // This is obviously always outgoing
3260     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3261   %}
3262 
3263   // Location of C & interpreter return values
3264   c_return_value %{
3265     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3266     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3267     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3268 
3269     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3270     // that C functions return float and double results in XMM0.
3271     if( ideal_reg == Op_RegD && UseSSE>=2 )
3272       return OptoRegPair(XMM0b_num,XMM0_num);
3273     if( ideal_reg == Op_RegF && UseSSE>=2 )
3274       return OptoRegPair(OptoReg::Bad,XMM0_num);
3275 
3276     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3277   %}
3278 
3279   // Location of return values
3280   return_value %{
3281     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3282     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3283     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3284     if( ideal_reg == Op_RegD && UseSSE>=2 )
3285       return OptoRegPair(XMM0b_num,XMM0_num);
3286     if( ideal_reg == Op_RegF && UseSSE>=1 )
3287       return OptoRegPair(OptoReg::Bad,XMM0_num);
3288     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3289   %}
3290 
3291 %}
3292 
3293 //----------ATTRIBUTES---------------------------------------------------------
3294 //----------Operand Attributes-------------------------------------------------
3295 op_attrib op_cost(0);        // Required cost attribute
3296 
3297 //----------Instruction Attributes---------------------------------------------
3298 ins_attrib ins_cost(100);       // Required cost attribute
3299 ins_attrib ins_size(8);         // Required size attribute (in bits)
3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3301                                 // non-matching short branch variant of some
3302                                                             // long branch?
3303 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3304                                 // specifies the alignment that some part of the instruction (not
3305                                 // necessarily the start) requires.  If > 1, a compute_padding()
3306                                 // function must be provided for the instruction
3307 
3308 //----------OPERANDS-----------------------------------------------------------
3309 // Operand definitions must precede instruction definitions for correct parsing
3310 // in the ADLC because operands constitute user defined types which are used in
3311 // instruction definitions.
3312 
3313 //----------Simple Operands----------------------------------------------------
3314 // Immediate Operands
3315 // Integer Immediate
3316 operand immI() %{
3317   match(ConI);
3318 
3319   op_cost(10);
3320   format %{ %}
3321   interface(CONST_INTER);
3322 %}
3323 
3324 // Constant for test vs zero
3325 operand immI0() %{
3326   predicate(n->get_int() == 0);
3327   match(ConI);
3328 
3329   op_cost(0);
3330   format %{ %}
3331   interface(CONST_INTER);
3332 %}
3333 
3334 // Constant for increment
3335 operand immI1() %{
3336   predicate(n->get_int() == 1);
3337   match(ConI);
3338 
3339   op_cost(0);
3340   format %{ %}
3341   interface(CONST_INTER);
3342 %}
3343 
3344 // Constant for decrement
3345 operand immI_M1() %{
3346   predicate(n->get_int() == -1);
3347   match(ConI);
3348 
3349   op_cost(0);
3350   format %{ %}
3351   interface(CONST_INTER);
3352 %}
3353 
3354 // Valid scale values for addressing modes
3355 operand immI2() %{
3356   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3357   match(ConI);
3358 
3359   format %{ %}
3360   interface(CONST_INTER);
3361 %}
3362 
3363 operand immI8() %{
3364   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3365   match(ConI);
3366 
3367   op_cost(5);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 operand immI16() %{
3373   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3374   match(ConI);
3375 
3376   op_cost(10);
3377   format %{ %}
3378   interface(CONST_INTER);
3379 %}
3380 
3381 // Int Immediate non-negative
3382 operand immU31()
3383 %{
3384   predicate(n->get_int() >= 0);
3385   match(ConI);
3386 
3387   op_cost(0);
3388   format %{ %}
3389   interface(CONST_INTER);
3390 %}
3391 
3392 // Constant for long shifts
3393 operand immI_32() %{
3394   predicate( n->get_int() == 32 );
3395   match(ConI);
3396 
3397   op_cost(0);
3398   format %{ %}
3399   interface(CONST_INTER);
3400 %}
3401 
3402 operand immI_1_31() %{
3403   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3404   match(ConI);
3405 
3406   op_cost(0);
3407   format %{ %}
3408   interface(CONST_INTER);
3409 %}
3410 
3411 operand immI_32_63() %{
3412   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3413   match(ConI);
3414   op_cost(0);
3415 
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 operand immI_1() %{
3421   predicate( n->get_int() == 1 );
3422   match(ConI);
3423 
3424   op_cost(0);
3425   format %{ %}
3426   interface(CONST_INTER);
3427 %}
3428 
3429 operand immI_2() %{
3430   predicate( n->get_int() == 2 );
3431   match(ConI);
3432 
3433   op_cost(0);
3434   format %{ %}
3435   interface(CONST_INTER);
3436 %}
3437 
3438 operand immI_3() %{
3439   predicate( n->get_int() == 3 );
3440   match(ConI);
3441 
3442   op_cost(0);
3443   format %{ %}
3444   interface(CONST_INTER);
3445 %}
3446 
3447 // Pointer Immediate
3448 operand immP() %{
3449   match(ConP);
3450 
3451   op_cost(10);
3452   format %{ %}
3453   interface(CONST_INTER);
3454 %}
3455 
3456 // NULL Pointer Immediate
3457 operand immP0() %{
3458   predicate( n->get_ptr() == 0 );
3459   match(ConP);
3460   op_cost(0);
3461 
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 // Long Immediate
3467 operand immL() %{
3468   match(ConL);
3469 
3470   op_cost(20);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Long Immediate zero
3476 operand immL0() %{
3477   predicate( n->get_long() == 0L );
3478   match(ConL);
3479   op_cost(0);
3480 
3481   format %{ %}
3482   interface(CONST_INTER);
3483 %}
3484 
3485 // Long Immediate zero
3486 operand immL_M1() %{
3487   predicate( n->get_long() == -1L );
3488   match(ConL);
3489   op_cost(0);
3490 
3491   format %{ %}
3492   interface(CONST_INTER);
3493 %}
3494 
3495 // Long immediate from 0 to 127.
3496 // Used for a shorter form of long mul by 10.
3497 operand immL_127() %{
3498   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3499   match(ConL);
3500   op_cost(0);
3501 
3502   format %{ %}
3503   interface(CONST_INTER);
3504 %}
3505 
3506 // Long Immediate: low 32-bit mask
3507 operand immL_32bits() %{
3508   predicate(n->get_long() == 0xFFFFFFFFL);
3509   match(ConL);
3510   op_cost(0);
3511 
3512   format %{ %}
3513   interface(CONST_INTER);
3514 %}
3515 
3516 // Long Immediate: low 32-bit mask
3517 operand immL32() %{
3518   predicate(n->get_long() == (int)(n->get_long()));
3519   match(ConL);
3520   op_cost(20);
3521 
3522   format %{ %}
3523   interface(CONST_INTER);
3524 %}
3525 
3526 //Double Immediate zero
3527 operand immDPR0() %{
3528   // Do additional (and counter-intuitive) test against NaN to work around VC++
3529   // bug that generates code such that NaNs compare equal to 0.0
3530   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3531   match(ConD);
3532 
3533   op_cost(5);
3534   format %{ %}
3535   interface(CONST_INTER);
3536 %}
3537 
3538 // Double Immediate one
3539 operand immDPR1() %{
3540   predicate( UseSSE<=1 && n->getd() == 1.0 );
3541   match(ConD);
3542 
3543   op_cost(5);
3544   format %{ %}
3545   interface(CONST_INTER);
3546 %}
3547 
3548 // Double Immediate
3549 operand immDPR() %{
3550   predicate(UseSSE<=1);
3551   match(ConD);
3552 
3553   op_cost(5);
3554   format %{ %}
3555   interface(CONST_INTER);
3556 %}
3557 
3558 operand immD() %{
3559   predicate(UseSSE>=2);
3560   match(ConD);
3561 
3562   op_cost(5);
3563   format %{ %}
3564   interface(CONST_INTER);
3565 %}
3566 
3567 // Double Immediate zero
3568 operand immD0() %{
3569   // Do additional (and counter-intuitive) test against NaN to work around VC++
3570   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3571   // compare equal to -0.0.
3572   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3573   match(ConD);
3574 
3575   format %{ %}
3576   interface(CONST_INTER);
3577 %}
3578 
3579 // Float Immediate zero
3580 operand immFPR0() %{
3581   predicate(UseSSE == 0 && n->getf() == 0.0F);
3582   match(ConF);
3583 
3584   op_cost(5);
3585   format %{ %}
3586   interface(CONST_INTER);
3587 %}
3588 
3589 // Float Immediate one
3590 operand immFPR1() %{
3591   predicate(UseSSE == 0 && n->getf() == 1.0F);
3592   match(ConF);
3593 
3594   op_cost(5);
3595   format %{ %}
3596   interface(CONST_INTER);
3597 %}
3598 
3599 // Float Immediate
3600 operand immFPR() %{
3601   predicate( UseSSE == 0 );
3602   match(ConF);
3603 
3604   op_cost(5);
3605   format %{ %}
3606   interface(CONST_INTER);
3607 %}
3608 
3609 // Float Immediate
3610 operand immF() %{
3611   predicate(UseSSE >= 1);
3612   match(ConF);
3613 
3614   op_cost(5);
3615   format %{ %}
3616   interface(CONST_INTER);
3617 %}
3618 
3619 // Float Immediate zero.  Zero and not -0.0
3620 operand immF0() %{
3621   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3622   match(ConF);
3623 
3624   op_cost(5);
3625   format %{ %}
3626   interface(CONST_INTER);
3627 %}
3628 
3629 // Immediates for special shifts (sign extend)
3630 
3631 // Constants for increment
3632 operand immI_16() %{
3633   predicate( n->get_int() == 16 );
3634   match(ConI);
3635 
3636   format %{ %}
3637   interface(CONST_INTER);
3638 %}
3639 
3640 operand immI_24() %{
3641   predicate( n->get_int() == 24 );
3642   match(ConI);
3643 
3644   format %{ %}
3645   interface(CONST_INTER);
3646 %}
3647 
3648 // Constant for byte-wide masking
3649 operand immI_255() %{
3650   predicate( n->get_int() == 255 );
3651   match(ConI);
3652 
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Constant for short-wide masking
3658 operand immI_65535() %{
3659   predicate(n->get_int() == 65535);
3660   match(ConI);
3661 
3662   format %{ %}
3663   interface(CONST_INTER);
3664 %}
3665 
3666 // Register Operands
3667 // Integer Register
3668 operand rRegI() %{
3669   constraint(ALLOC_IN_RC(int_reg));
3670   match(RegI);
3671   match(xRegI);
3672   match(eAXRegI);
3673   match(eBXRegI);
3674   match(eCXRegI);
3675   match(eDXRegI);
3676   match(eDIRegI);
3677   match(eSIRegI);
3678 
3679   format %{ %}
3680   interface(REG_INTER);
3681 %}
3682 
3683 // Subset of Integer Register
3684 operand xRegI(rRegI reg) %{
3685   constraint(ALLOC_IN_RC(int_x_reg));
3686   match(reg);
3687   match(eAXRegI);
3688   match(eBXRegI);
3689   match(eCXRegI);
3690   match(eDXRegI);
3691 
3692   format %{ %}
3693   interface(REG_INTER);
3694 %}
3695 
3696 // Special Registers
3697 operand eAXRegI(xRegI reg) %{
3698   constraint(ALLOC_IN_RC(eax_reg));
3699   match(reg);
3700   match(rRegI);
3701 
3702   format %{ "EAX" %}
3703   interface(REG_INTER);
3704 %}
3705 
3706 // Special Registers
3707 operand eBXRegI(xRegI reg) %{
3708   constraint(ALLOC_IN_RC(ebx_reg));
3709   match(reg);
3710   match(rRegI);
3711 
3712   format %{ "EBX" %}
3713   interface(REG_INTER);
3714 %}
3715 
3716 operand eCXRegI(xRegI reg) %{
3717   constraint(ALLOC_IN_RC(ecx_reg));
3718   match(reg);
3719   match(rRegI);
3720 
3721   format %{ "ECX" %}
3722   interface(REG_INTER);
3723 %}
3724 
3725 operand eDXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(edx_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EDX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand eDIRegI(xRegI reg) %{
3735   constraint(ALLOC_IN_RC(edi_reg));
3736   match(reg);
3737   match(rRegI);
3738 
3739   format %{ "EDI" %}
3740   interface(REG_INTER);
3741 %}
3742 
3743 operand naxRegI() %{
3744   constraint(ALLOC_IN_RC(nax_reg));
3745   match(RegI);
3746   match(eCXRegI);
3747   match(eDXRegI);
3748   match(eSIRegI);
3749   match(eDIRegI);
3750 
3751   format %{ %}
3752   interface(REG_INTER);
3753 %}
3754 
3755 operand nadxRegI() %{
3756   constraint(ALLOC_IN_RC(nadx_reg));
3757   match(RegI);
3758   match(eBXRegI);
3759   match(eCXRegI);
3760   match(eSIRegI);
3761   match(eDIRegI);
3762 
3763   format %{ %}
3764   interface(REG_INTER);
3765 %}
3766 
3767 operand ncxRegI() %{
3768   constraint(ALLOC_IN_RC(ncx_reg));
3769   match(RegI);
3770   match(eAXRegI);
3771   match(eDXRegI);
3772   match(eSIRegI);
3773   match(eDIRegI);
3774 
3775   format %{ %}
3776   interface(REG_INTER);
3777 %}
3778 
3779 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3780 // //
3781 operand eSIRegI(xRegI reg) %{
3782    constraint(ALLOC_IN_RC(esi_reg));
3783    match(reg);
3784    match(rRegI);
3785 
3786    format %{ "ESI" %}
3787    interface(REG_INTER);
3788 %}
3789 
3790 // Pointer Register
3791 operand anyRegP() %{
3792   constraint(ALLOC_IN_RC(any_reg));
3793   match(RegP);
3794   match(eAXRegP);
3795   match(eBXRegP);
3796   match(eCXRegP);
3797   match(eDIRegP);
3798   match(eRegP);
3799 
3800   format %{ %}
3801   interface(REG_INTER);
3802 %}
3803 
3804 operand eRegP() %{
3805   constraint(ALLOC_IN_RC(int_reg));
3806   match(RegP);
3807   match(eAXRegP);
3808   match(eBXRegP);
3809   match(eCXRegP);
3810   match(eDIRegP);
3811 
3812   format %{ %}
3813   interface(REG_INTER);
3814 %}
3815 
3816 // On windows95, EBP is not safe to use for implicit null tests.
3817 operand eRegP_no_EBP() %{
3818   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3819   match(RegP);
3820   match(eAXRegP);
3821   match(eBXRegP);
3822   match(eCXRegP);
3823   match(eDIRegP);
3824 
3825   op_cost(100);
3826   format %{ %}
3827   interface(REG_INTER);
3828 %}
3829 
3830 operand naxRegP() %{
3831   constraint(ALLOC_IN_RC(nax_reg));
3832   match(RegP);
3833   match(eBXRegP);
3834   match(eDXRegP);
3835   match(eCXRegP);
3836   match(eSIRegP);
3837   match(eDIRegP);
3838 
3839   format %{ %}
3840   interface(REG_INTER);
3841 %}
3842 
3843 operand nabxRegP() %{
3844   constraint(ALLOC_IN_RC(nabx_reg));
3845   match(RegP);
3846   match(eCXRegP);
3847   match(eDXRegP);
3848   match(eSIRegP);
3849   match(eDIRegP);
3850 
3851   format %{ %}
3852   interface(REG_INTER);
3853 %}
3854 
3855 operand pRegP() %{
3856   constraint(ALLOC_IN_RC(p_reg));
3857   match(RegP);
3858   match(eBXRegP);
3859   match(eDXRegP);
3860   match(eSIRegP);
3861   match(eDIRegP);
3862 
3863   format %{ %}
3864   interface(REG_INTER);
3865 %}
3866 
3867 // Special Registers
3868 // Return a pointer value
3869 operand eAXRegP(eRegP reg) %{
3870   constraint(ALLOC_IN_RC(eax_reg));
3871   match(reg);
3872   format %{ "EAX" %}
3873   interface(REG_INTER);
3874 %}
3875 
3876 // Used in AtomicAdd
3877 operand eBXRegP(eRegP reg) %{
3878   constraint(ALLOC_IN_RC(ebx_reg));
3879   match(reg);
3880   format %{ "EBX" %}
3881   interface(REG_INTER);
3882 %}
3883 
3884 // Tail-call (interprocedural jump) to interpreter
3885 operand eCXRegP(eRegP reg) %{
3886   constraint(ALLOC_IN_RC(ecx_reg));
3887   match(reg);
3888   format %{ "ECX" %}
3889   interface(REG_INTER);
3890 %}
3891 
3892 operand eDXRegP(eRegP reg) %{
3893   constraint(ALLOC_IN_RC(edx_reg));
3894   match(reg);
3895   format %{ "EDX" %}
3896   interface(REG_INTER);
3897 %}
3898 
3899 operand eSIRegP(eRegP reg) %{
3900   constraint(ALLOC_IN_RC(esi_reg));
3901   match(reg);
3902   format %{ "ESI" %}
3903   interface(REG_INTER);
3904 %}
3905 
3906 // Used in rep stosw
3907 operand eDIRegP(eRegP reg) %{
3908   constraint(ALLOC_IN_RC(edi_reg));
3909   match(reg);
3910   format %{ "EDI" %}
3911   interface(REG_INTER);
3912 %}
3913 
3914 operand eRegL() %{
3915   constraint(ALLOC_IN_RC(long_reg));
3916   match(RegL);
3917   match(eADXRegL);
3918 
3919   format %{ %}
3920   interface(REG_INTER);
3921 %}
3922 
3923 operand eADXRegL( eRegL reg ) %{
3924   constraint(ALLOC_IN_RC(eadx_reg));
3925   match(reg);
3926 
3927   format %{ "EDX:EAX" %}
3928   interface(REG_INTER);
3929 %}
3930 
3931 operand eBCXRegL( eRegL reg ) %{
3932   constraint(ALLOC_IN_RC(ebcx_reg));
3933   match(reg);
3934 
3935   format %{ "EBX:ECX" %}
3936   interface(REG_INTER);
3937 %}
3938 
3939 // Special case for integer high multiply
3940 operand eADXRegL_low_only() %{
3941   constraint(ALLOC_IN_RC(eadx_reg));
3942   match(RegL);
3943 
3944   format %{ "EAX" %}
3945   interface(REG_INTER);
3946 %}
3947 
3948 // Flags register, used as output of compare instructions
3949 operand eFlagsReg() %{
3950   constraint(ALLOC_IN_RC(int_flags));
3951   match(RegFlags);
3952 
3953   format %{ "EFLAGS" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 // Flags register, used as output of FLOATING POINT compare instructions
3958 operand eFlagsRegU() %{
3959   constraint(ALLOC_IN_RC(int_flags));
3960   match(RegFlags);
3961 
3962   format %{ "EFLAGS_U" %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 operand eFlagsRegUCF() %{
3967   constraint(ALLOC_IN_RC(int_flags));
3968   match(RegFlags);
3969   predicate(false);
3970 
3971   format %{ "EFLAGS_U_CF" %}
3972   interface(REG_INTER);
3973 %}
3974 
3975 // Condition Code Register used by long compare
3976 operand flagsReg_long_LTGE() %{
3977   constraint(ALLOC_IN_RC(int_flags));
3978   match(RegFlags);
3979   format %{ "FLAGS_LTGE" %}
3980   interface(REG_INTER);
3981 %}
3982 operand flagsReg_long_EQNE() %{
3983   constraint(ALLOC_IN_RC(int_flags));
3984   match(RegFlags);
3985   format %{ "FLAGS_EQNE" %}
3986   interface(REG_INTER);
3987 %}
3988 operand flagsReg_long_LEGT() %{
3989   constraint(ALLOC_IN_RC(int_flags));
3990   match(RegFlags);
3991   format %{ "FLAGS_LEGT" %}
3992   interface(REG_INTER);
3993 %}
3994 
3995 // Condition Code Register used by unsigned long compare
3996 operand flagsReg_ulong_LTGE() %{
3997   constraint(ALLOC_IN_RC(int_flags));
3998   match(RegFlags);
3999   format %{ "FLAGS_U_LTGE" %}
4000   interface(REG_INTER);
4001 %}
4002 operand flagsReg_ulong_EQNE() %{
4003   constraint(ALLOC_IN_RC(int_flags));
4004   match(RegFlags);
4005   format %{ "FLAGS_U_EQNE" %}
4006   interface(REG_INTER);
4007 %}
4008 operand flagsReg_ulong_LEGT() %{
4009   constraint(ALLOC_IN_RC(int_flags));
4010   match(RegFlags);
4011   format %{ "FLAGS_U_LEGT" %}
4012   interface(REG_INTER);
4013 %}
4014 
4015 // Float register operands
4016 operand regDPR() %{
4017   predicate( UseSSE < 2 );
4018   constraint(ALLOC_IN_RC(fp_dbl_reg));
4019   match(RegD);
4020   match(regDPR1);
4021   match(regDPR2);
4022   format %{ %}
4023   interface(REG_INTER);
4024 %}
4025 
4026 operand regDPR1(regDPR reg) %{
4027   predicate( UseSSE < 2 );
4028   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4029   match(reg);
4030   format %{ "FPR1" %}
4031   interface(REG_INTER);
4032 %}
4033 
4034 operand regDPR2(regDPR reg) %{
4035   predicate( UseSSE < 2 );
4036   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4037   match(reg);
4038   format %{ "FPR2" %}
4039   interface(REG_INTER);
4040 %}
4041 
4042 operand regnotDPR1(regDPR reg) %{
4043   predicate( UseSSE < 2 );
4044   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4045   match(reg);
4046   format %{ %}
4047   interface(REG_INTER);
4048 %}
4049 
4050 // Float register operands
4051 operand regFPR() %{
4052   predicate( UseSSE < 2 );
4053   constraint(ALLOC_IN_RC(fp_flt_reg));
4054   match(RegF);
4055   match(regFPR1);
4056   format %{ %}
4057   interface(REG_INTER);
4058 %}
4059 
4060 // Float register operands
4061 operand regFPR1(regFPR reg) %{
4062   predicate( UseSSE < 2 );
4063   constraint(ALLOC_IN_RC(fp_flt_reg0));
4064   match(reg);
4065   format %{ "FPR1" %}
4066   interface(REG_INTER);
4067 %}
4068 
4069 // XMM Float register operands
4070 operand regF() %{
4071   predicate( UseSSE>=1 );
4072   constraint(ALLOC_IN_RC(float_reg_legacy));
4073   match(RegF);
4074   format %{ %}
4075   interface(REG_INTER);
4076 %}
4077 
4078 // Float register operands
4079 operand vlRegF() %{
4080    constraint(ALLOC_IN_RC(float_reg_vl));
4081    match(RegF);
4082 
4083    format %{ %}
4084    interface(REG_INTER);
4085 %}
4086 
4087 // XMM Double register operands
4088 operand regD() %{
4089   predicate( UseSSE>=2 );
4090   constraint(ALLOC_IN_RC(double_reg_legacy));
4091   match(RegD);
4092   format %{ %}
4093   interface(REG_INTER);
4094 %}
4095 
4096 // Double register operands
4097 operand vlRegD() %{
4098    constraint(ALLOC_IN_RC(double_reg_vl));
4099    match(RegD);
4100 
4101    format %{ %}
4102    interface(REG_INTER);
4103 %}
4104 
4105 //----------Memory Operands----------------------------------------------------
4106 // Direct Memory Operand
4107 operand direct(immP addr) %{
4108   match(addr);
4109 
4110   format %{ "[$addr]" %}
4111   interface(MEMORY_INTER) %{
4112     base(0xFFFFFFFF);
4113     index(0x4);
4114     scale(0x0);
4115     disp($addr);
4116   %}
4117 %}
4118 
4119 // Indirect Memory Operand
4120 operand indirect(eRegP reg) %{
4121   constraint(ALLOC_IN_RC(int_reg));
4122   match(reg);
4123 
4124   format %{ "[$reg]" %}
4125   interface(MEMORY_INTER) %{
4126     base($reg);
4127     index(0x4);
4128     scale(0x0);
4129     disp(0x0);
4130   %}
4131 %}
4132 
4133 // Indirect Memory Plus Short Offset Operand
4134 operand indOffset8(eRegP reg, immI8 off) %{
4135   match(AddP reg off);
4136 
4137   format %{ "[$reg + $off]" %}
4138   interface(MEMORY_INTER) %{
4139     base($reg);
4140     index(0x4);
4141     scale(0x0);
4142     disp($off);
4143   %}
4144 %}
4145 
4146 // Indirect Memory Plus Long Offset Operand
4147 operand indOffset32(eRegP reg, immI off) %{
4148   match(AddP reg off);
4149 
4150   format %{ "[$reg + $off]" %}
4151   interface(MEMORY_INTER) %{
4152     base($reg);
4153     index(0x4);
4154     scale(0x0);
4155     disp($off);
4156   %}
4157 %}
4158 
4159 // Indirect Memory Plus Long Offset Operand
4160 operand indOffset32X(rRegI reg, immP off) %{
4161   match(AddP off reg);
4162 
4163   format %{ "[$reg + $off]" %}
4164   interface(MEMORY_INTER) %{
4165     base($reg);
4166     index(0x4);
4167     scale(0x0);
4168     disp($off);
4169   %}
4170 %}
4171 
4172 // Indirect Memory Plus Index Register Plus Offset Operand
4173 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4174   match(AddP (AddP reg ireg) off);
4175 
4176   op_cost(10);
4177   format %{"[$reg + $off + $ireg]" %}
4178   interface(MEMORY_INTER) %{
4179     base($reg);
4180     index($ireg);
4181     scale(0x0);
4182     disp($off);
4183   %}
4184 %}
4185 
4186 // Indirect Memory Plus Index Register Plus Offset Operand
4187 operand indIndex(eRegP reg, rRegI ireg) %{
4188   match(AddP reg ireg);
4189 
4190   op_cost(10);
4191   format %{"[$reg + $ireg]" %}
4192   interface(MEMORY_INTER) %{
4193     base($reg);
4194     index($ireg);
4195     scale(0x0);
4196     disp(0x0);
4197   %}
4198 %}
4199 
4200 // // -------------------------------------------------------------------------
4201 // // 486 architecture doesn't support "scale * index + offset" with out a base
4202 // // -------------------------------------------------------------------------
4203 // // Scaled Memory Operands
4204 // // Indirect Memory Times Scale Plus Offset Operand
4205 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4206 //   match(AddP off (LShiftI ireg scale));
4207 //
4208 //   op_cost(10);
4209 //   format %{"[$off + $ireg << $scale]" %}
4210 //   interface(MEMORY_INTER) %{
4211 //     base(0x4);
4212 //     index($ireg);
4213 //     scale($scale);
4214 //     disp($off);
4215 //   %}
4216 // %}
4217 
4218 // Indirect Memory Times Scale Plus Index Register
4219 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4220   match(AddP reg (LShiftI ireg scale));
4221 
4222   op_cost(10);
4223   format %{"[$reg + $ireg << $scale]" %}
4224   interface(MEMORY_INTER) %{
4225     base($reg);
4226     index($ireg);
4227     scale($scale);
4228     disp(0x0);
4229   %}
4230 %}
4231 
4232 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4233 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4234   match(AddP (AddP reg (LShiftI ireg scale)) off);
4235 
4236   op_cost(10);
4237   format %{"[$reg + $off + $ireg << $scale]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index($ireg);
4241     scale($scale);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 //----------Load Long Memory Operands------------------------------------------
4247 // The load-long idiom will use it's address expression again after loading
4248 // the first word of the long.  If the load-long destination overlaps with
4249 // registers used in the addressing expression, the 2nd half will be loaded
4250 // from a clobbered address.  Fix this by requiring that load-long use
4251 // address registers that do not overlap with the load-long target.
4252 
4253 // load-long support
4254 operand load_long_RegP() %{
4255   constraint(ALLOC_IN_RC(esi_reg));
4256   match(RegP);
4257   match(eSIRegP);
4258   op_cost(100);
4259   format %{  %}
4260   interface(REG_INTER);
4261 %}
4262 
4263 // Indirect Memory Operand Long
4264 operand load_long_indirect(load_long_RegP reg) %{
4265   constraint(ALLOC_IN_RC(esi_reg));
4266   match(reg);
4267 
4268   format %{ "[$reg]" %}
4269   interface(MEMORY_INTER) %{
4270     base($reg);
4271     index(0x4);
4272     scale(0x0);
4273     disp(0x0);
4274   %}
4275 %}
4276 
4277 // Indirect Memory Plus Long Offset Operand
4278 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4279   match(AddP reg off);
4280 
4281   format %{ "[$reg + $off]" %}
4282   interface(MEMORY_INTER) %{
4283     base($reg);
4284     index(0x4);
4285     scale(0x0);
4286     disp($off);
4287   %}
4288 %}
4289 
4290 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4291 
4292 
4293 //----------Special Memory Operands--------------------------------------------
4294 // Stack Slot Operand - This operand is used for loading and storing temporary
4295 //                      values on the stack where a match requires a value to
4296 //                      flow through memory.
4297 operand stackSlotP(sRegP reg) %{
4298   constraint(ALLOC_IN_RC(stack_slots));
4299   // No match rule because this operand is only generated in matching
4300   format %{ "[$reg]" %}
4301   interface(MEMORY_INTER) %{
4302     base(0x4);   // ESP
4303     index(0x4);  // No Index
4304     scale(0x0);  // No Scale
4305     disp($reg);  // Stack Offset
4306   %}
4307 %}
4308 
4309 operand stackSlotI(sRegI reg) %{
4310   constraint(ALLOC_IN_RC(stack_slots));
4311   // No match rule because this operand is only generated in matching
4312   format %{ "[$reg]" %}
4313   interface(MEMORY_INTER) %{
4314     base(0x4);   // ESP
4315     index(0x4);  // No Index
4316     scale(0x0);  // No Scale
4317     disp($reg);  // Stack Offset
4318   %}
4319 %}
4320 
4321 operand stackSlotF(sRegF reg) %{
4322   constraint(ALLOC_IN_RC(stack_slots));
4323   // No match rule because this operand is only generated in matching
4324   format %{ "[$reg]" %}
4325   interface(MEMORY_INTER) %{
4326     base(0x4);   // ESP
4327     index(0x4);  // No Index
4328     scale(0x0);  // No Scale
4329     disp($reg);  // Stack Offset
4330   %}
4331 %}
4332 
4333 operand stackSlotD(sRegD reg) %{
4334   constraint(ALLOC_IN_RC(stack_slots));
4335   // No match rule because this operand is only generated in matching
4336   format %{ "[$reg]" %}
4337   interface(MEMORY_INTER) %{
4338     base(0x4);   // ESP
4339     index(0x4);  // No Index
4340     scale(0x0);  // No Scale
4341     disp($reg);  // Stack Offset
4342   %}
4343 %}
4344 
4345 operand stackSlotL(sRegL reg) %{
4346   constraint(ALLOC_IN_RC(stack_slots));
4347   // No match rule because this operand is only generated in matching
4348   format %{ "[$reg]" %}
4349   interface(MEMORY_INTER) %{
4350     base(0x4);   // ESP
4351     index(0x4);  // No Index
4352     scale(0x0);  // No Scale
4353     disp($reg);  // Stack Offset
4354   %}
4355 %}
4356 
4357 //----------Memory Operands - Win95 Implicit Null Variants----------------
4358 // Indirect Memory Operand
4359 operand indirect_win95_safe(eRegP_no_EBP reg)
4360 %{
4361   constraint(ALLOC_IN_RC(int_reg));
4362   match(reg);
4363 
4364   op_cost(100);
4365   format %{ "[$reg]" %}
4366   interface(MEMORY_INTER) %{
4367     base($reg);
4368     index(0x4);
4369     scale(0x0);
4370     disp(0x0);
4371   %}
4372 %}
4373 
4374 // Indirect Memory Plus Short Offset Operand
4375 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4376 %{
4377   match(AddP reg off);
4378 
4379   op_cost(100);
4380   format %{ "[$reg + $off]" %}
4381   interface(MEMORY_INTER) %{
4382     base($reg);
4383     index(0x4);
4384     scale(0x0);
4385     disp($off);
4386   %}
4387 %}
4388 
4389 // Indirect Memory Plus Long Offset Operand
4390 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4391 %{
4392   match(AddP reg off);
4393 
4394   op_cost(100);
4395   format %{ "[$reg + $off]" %}
4396   interface(MEMORY_INTER) %{
4397     base($reg);
4398     index(0x4);
4399     scale(0x0);
4400     disp($off);
4401   %}
4402 %}
4403 
4404 // Indirect Memory Plus Index Register Plus Offset Operand
4405 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4406 %{
4407   match(AddP (AddP reg ireg) off);
4408 
4409   op_cost(100);
4410   format %{"[$reg + $off + $ireg]" %}
4411   interface(MEMORY_INTER) %{
4412     base($reg);
4413     index($ireg);
4414     scale(0x0);
4415     disp($off);
4416   %}
4417 %}
4418 
4419 // Indirect Memory Times Scale Plus Index Register
4420 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4421 %{
4422   match(AddP reg (LShiftI ireg scale));
4423 
4424   op_cost(100);
4425   format %{"[$reg + $ireg << $scale]" %}
4426   interface(MEMORY_INTER) %{
4427     base($reg);
4428     index($ireg);
4429     scale($scale);
4430     disp(0x0);
4431   %}
4432 %}
4433 
4434 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4435 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4436 %{
4437   match(AddP (AddP reg (LShiftI ireg scale)) off);
4438 
4439   op_cost(100);
4440   format %{"[$reg + $off + $ireg << $scale]" %}
4441   interface(MEMORY_INTER) %{
4442     base($reg);
4443     index($ireg);
4444     scale($scale);
4445     disp($off);
4446   %}
4447 %}
4448 
4449 //----------Conditional Branch Operands----------------------------------------
4450 // Comparison Op  - This is the operation of the comparison, and is limited to
4451 //                  the following set of codes:
4452 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4453 //
4454 // Other attributes of the comparison, such as unsignedness, are specified
4455 // by the comparison instruction that sets a condition code flags register.
4456 // That result is represented by a flags operand whose subtype is appropriate
4457 // to the unsignedness (etc.) of the comparison.
4458 //
4459 // Later, the instruction which matches both the Comparison Op (a Bool) and
4460 // the flags (produced by the Cmp) specifies the coding of the comparison op
4461 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4462 
4463 // Comparision Code
4464 operand cmpOp() %{
4465   match(Bool);
4466 
4467   format %{ "" %}
4468   interface(COND_INTER) %{
4469     equal(0x4, "e");
4470     not_equal(0x5, "ne");
4471     less(0xC, "l");
4472     greater_equal(0xD, "ge");
4473     less_equal(0xE, "le");
4474     greater(0xF, "g");
4475     overflow(0x0, "o");
4476     no_overflow(0x1, "no");
4477   %}
4478 %}
4479 
4480 // Comparison Code, unsigned compare.  Used by FP also, with
4481 // C2 (unordered) turned into GT or LT already.  The other bits
4482 // C0 and C3 are turned into Carry & Zero flags.
4483 operand cmpOpU() %{
4484   match(Bool);
4485 
4486   format %{ "" %}
4487   interface(COND_INTER) %{
4488     equal(0x4, "e");
4489     not_equal(0x5, "ne");
4490     less(0x2, "b");
4491     greater_equal(0x3, "nb");
4492     less_equal(0x6, "be");
4493     greater(0x7, "nbe");
4494     overflow(0x0, "o");
4495     no_overflow(0x1, "no");
4496   %}
4497 %}
4498 
4499 // Floating comparisons that don't require any fixup for the unordered case
4500 operand cmpOpUCF() %{
4501   match(Bool);
4502   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4503             n->as_Bool()->_test._test == BoolTest::ge ||
4504             n->as_Bool()->_test._test == BoolTest::le ||
4505             n->as_Bool()->_test._test == BoolTest::gt);
4506   format %{ "" %}
4507   interface(COND_INTER) %{
4508     equal(0x4, "e");
4509     not_equal(0x5, "ne");
4510     less(0x2, "b");
4511     greater_equal(0x3, "nb");
4512     less_equal(0x6, "be");
4513     greater(0x7, "nbe");
4514     overflow(0x0, "o");
4515     no_overflow(0x1, "no");
4516   %}
4517 %}
4518 
4519 
4520 // Floating comparisons that can be fixed up with extra conditional jumps
4521 operand cmpOpUCF2() %{
4522   match(Bool);
4523   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4524             n->as_Bool()->_test._test == BoolTest::eq);
4525   format %{ "" %}
4526   interface(COND_INTER) %{
4527     equal(0x4, "e");
4528     not_equal(0x5, "ne");
4529     less(0x2, "b");
4530     greater_equal(0x3, "nb");
4531     less_equal(0x6, "be");
4532     greater(0x7, "nbe");
4533     overflow(0x0, "o");
4534     no_overflow(0x1, "no");
4535   %}
4536 %}
4537 
4538 // Comparison Code for FP conditional move
4539 operand cmpOp_fcmov() %{
4540   match(Bool);
4541 
4542   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4543             n->as_Bool()->_test._test != BoolTest::no_overflow);
4544   format %{ "" %}
4545   interface(COND_INTER) %{
4546     equal        (0x0C8);
4547     not_equal    (0x1C8);
4548     less         (0x0C0);
4549     greater_equal(0x1C0);
4550     less_equal   (0x0D0);
4551     greater      (0x1D0);
4552     overflow(0x0, "o"); // not really supported by the instruction
4553     no_overflow(0x1, "no"); // not really supported by the instruction
4554   %}
4555 %}
4556 
4557 // Comparison Code used in long compares
4558 operand cmpOp_commute() %{
4559   match(Bool);
4560 
4561   format %{ "" %}
4562   interface(COND_INTER) %{
4563     equal(0x4, "e");
4564     not_equal(0x5, "ne");
4565     less(0xF, "g");
4566     greater_equal(0xE, "le");
4567     less_equal(0xD, "ge");
4568     greater(0xC, "l");
4569     overflow(0x0, "o");
4570     no_overflow(0x1, "no");
4571   %}
4572 %}
4573 
4574 // Comparison Code used in unsigned long compares
4575 operand cmpOpU_commute() %{
4576   match(Bool);
4577 
4578   format %{ "" %}
4579   interface(COND_INTER) %{
4580     equal(0x4, "e");
4581     not_equal(0x5, "ne");
4582     less(0x7, "nbe");
4583     greater_equal(0x6, "be");
4584     less_equal(0x3, "nb");
4585     greater(0x2, "b");
4586     overflow(0x0, "o");
4587     no_overflow(0x1, "no");
4588   %}
4589 %}
4590 
4591 //----------OPERAND CLASSES----------------------------------------------------
4592 // Operand Classes are groups of operands that are used as to simplify
4593 // instruction definitions by not requiring the AD writer to specify separate
4594 // instructions for every form of operand when the instruction accepts
4595 // multiple operand types with the same basic encoding and format.  The classic
4596 // case of this is memory operands.
4597 
4598 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4599                indIndex, indIndexScale, indIndexScaleOffset);
4600 
4601 // Long memory operations are encoded in 2 instructions and a +4 offset.
4602 // This means some kind of offset is always required and you cannot use
4603 // an oop as the offset (done when working on static globals).
4604 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4605                     indIndex, indIndexScale, indIndexScaleOffset);
4606 
4607 
4608 //----------PIPELINE-----------------------------------------------------------
4609 // Rules which define the behavior of the target architectures pipeline.
4610 pipeline %{
4611 
4612 //----------ATTRIBUTES---------------------------------------------------------
4613 attributes %{
4614   variable_size_instructions;        // Fixed size instructions
4615   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4616   instruction_unit_size = 1;         // An instruction is 1 bytes long
4617   instruction_fetch_unit_size = 16;  // The processor fetches one line
4618   instruction_fetch_units = 1;       // of 16 bytes
4619 
4620   // List of nop instructions
4621   nops( MachNop );
4622 %}
4623 
4624 //----------RESOURCES----------------------------------------------------------
4625 // Resources are the functional units available to the machine
4626 
4627 // Generic P2/P3 pipeline
4628 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4629 // 3 instructions decoded per cycle.
4630 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4631 // 2 ALU op, only ALU0 handles mul/div instructions.
4632 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4633            MS0, MS1, MEM = MS0 | MS1,
4634            BR, FPU,
4635            ALU0, ALU1, ALU = ALU0 | ALU1 );
4636 
4637 //----------PIPELINE DESCRIPTION-----------------------------------------------
4638 // Pipeline Description specifies the stages in the machine's pipeline
4639 
4640 // Generic P2/P3 pipeline
4641 pipe_desc(S0, S1, S2, S3, S4, S5);
4642 
4643 //----------PIPELINE CLASSES---------------------------------------------------
4644 // Pipeline Classes describe the stages in which input and output are
4645 // referenced by the hardware pipeline.
4646 
4647 // Naming convention: ialu or fpu
4648 // Then: _reg
4649 // Then: _reg if there is a 2nd register
4650 // Then: _long if it's a pair of instructions implementing a long
4651 // Then: _fat if it requires the big decoder
4652 //   Or: _mem if it requires the big decoder and a memory unit.
4653 
4654 // Integer ALU reg operation
4655 pipe_class ialu_reg(rRegI dst) %{
4656     single_instruction;
4657     dst    : S4(write);
4658     dst    : S3(read);
4659     DECODE : S0;        // any decoder
4660     ALU    : S3;        // any alu
4661 %}
4662 
4663 // Long ALU reg operation
4664 pipe_class ialu_reg_long(eRegL dst) %{
4665     instruction_count(2);
4666     dst    : S4(write);
4667     dst    : S3(read);
4668     DECODE : S0(2);     // any 2 decoders
4669     ALU    : S3(2);     // both alus
4670 %}
4671 
4672 // Integer ALU reg operation using big decoder
4673 pipe_class ialu_reg_fat(rRegI dst) %{
4674     single_instruction;
4675     dst    : S4(write);
4676     dst    : S3(read);
4677     D0     : S0;        // big decoder only
4678     ALU    : S3;        // any alu
4679 %}
4680 
4681 // Long ALU reg operation using big decoder
4682 pipe_class ialu_reg_long_fat(eRegL dst) %{
4683     instruction_count(2);
4684     dst    : S4(write);
4685     dst    : S3(read);
4686     D0     : S0(2);     // big decoder only; twice
4687     ALU    : S3(2);     // any 2 alus
4688 %}
4689 
4690 // Integer ALU reg-reg operation
4691 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4692     single_instruction;
4693     dst    : S4(write);
4694     src    : S3(read);
4695     DECODE : S0;        // any decoder
4696     ALU    : S3;        // any alu
4697 %}
4698 
4699 // Long ALU reg-reg operation
4700 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4701     instruction_count(2);
4702     dst    : S4(write);
4703     src    : S3(read);
4704     DECODE : S0(2);     // any 2 decoders
4705     ALU    : S3(2);     // both alus
4706 %}
4707 
4708 // Integer ALU reg-reg operation
4709 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4710     single_instruction;
4711     dst    : S4(write);
4712     src    : S3(read);
4713     D0     : S0;        // big decoder only
4714     ALU    : S3;        // any alu
4715 %}
4716 
4717 // Long ALU reg-reg operation
4718 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4719     instruction_count(2);
4720     dst    : S4(write);
4721     src    : S3(read);
4722     D0     : S0(2);     // big decoder only; twice
4723     ALU    : S3(2);     // both alus
4724 %}
4725 
4726 // Integer ALU reg-mem operation
4727 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4728     single_instruction;
4729     dst    : S5(write);
4730     mem    : S3(read);
4731     D0     : S0;        // big decoder only
4732     ALU    : S4;        // any alu
4733     MEM    : S3;        // any mem
4734 %}
4735 
4736 // Long ALU reg-mem operation
4737 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4738     instruction_count(2);
4739     dst    : S5(write);
4740     mem    : S3(read);
4741     D0     : S0(2);     // big decoder only; twice
4742     ALU    : S4(2);     // any 2 alus
4743     MEM    : S3(2);     // both mems
4744 %}
4745 
4746 // Integer mem operation (prefetch)
4747 pipe_class ialu_mem(memory mem)
4748 %{
4749     single_instruction;
4750     mem    : S3(read);
4751     D0     : S0;        // big decoder only
4752     MEM    : S3;        // any mem
4753 %}
4754 
4755 // Integer Store to Memory
4756 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4757     single_instruction;
4758     mem    : S3(read);
4759     src    : S5(read);
4760     D0     : S0;        // big decoder only
4761     ALU    : S4;        // any alu
4762     MEM    : S3;
4763 %}
4764 
4765 // Long Store to Memory
4766 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4767     instruction_count(2);
4768     mem    : S3(read);
4769     src    : S5(read);
4770     D0     : S0(2);     // big decoder only; twice
4771     ALU    : S4(2);     // any 2 alus
4772     MEM    : S3(2);     // Both mems
4773 %}
4774 
4775 // Integer Store to Memory
4776 pipe_class ialu_mem_imm(memory mem) %{
4777     single_instruction;
4778     mem    : S3(read);
4779     D0     : S0;        // big decoder only
4780     ALU    : S4;        // any alu
4781     MEM    : S3;
4782 %}
4783 
4784 // Integer ALU0 reg-reg operation
4785 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4786     single_instruction;
4787     dst    : S4(write);
4788     src    : S3(read);
4789     D0     : S0;        // Big decoder only
4790     ALU0   : S3;        // only alu0
4791 %}
4792 
4793 // Integer ALU0 reg-mem operation
4794 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4795     single_instruction;
4796     dst    : S5(write);
4797     mem    : S3(read);
4798     D0     : S0;        // big decoder only
4799     ALU0   : S4;        // ALU0 only
4800     MEM    : S3;        // any mem
4801 %}
4802 
4803 // Integer ALU reg-reg operation
4804 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4805     single_instruction;
4806     cr     : S4(write);
4807     src1   : S3(read);
4808     src2   : S3(read);
4809     DECODE : S0;        // any decoder
4810     ALU    : S3;        // any alu
4811 %}
4812 
4813 // Integer ALU reg-imm operation
4814 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4815     single_instruction;
4816     cr     : S4(write);
4817     src1   : S3(read);
4818     DECODE : S0;        // any decoder
4819     ALU    : S3;        // any alu
4820 %}
4821 
4822 // Integer ALU reg-mem operation
4823 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4824     single_instruction;
4825     cr     : S4(write);
4826     src1   : S3(read);
4827     src2   : S3(read);
4828     D0     : S0;        // big decoder only
4829     ALU    : S4;        // any alu
4830     MEM    : S3;
4831 %}
4832 
4833 // Conditional move reg-reg
4834 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4835     instruction_count(4);
4836     y      : S4(read);
4837     q      : S3(read);
4838     p      : S3(read);
4839     DECODE : S0(4);     // any decoder
4840 %}
4841 
4842 // Conditional move reg-reg
4843 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4844     single_instruction;
4845     dst    : S4(write);
4846     src    : S3(read);
4847     cr     : S3(read);
4848     DECODE : S0;        // any decoder
4849 %}
4850 
4851 // Conditional move reg-mem
4852 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4853     single_instruction;
4854     dst    : S4(write);
4855     src    : S3(read);
4856     cr     : S3(read);
4857     DECODE : S0;        // any decoder
4858     MEM    : S3;
4859 %}
4860 
4861 // Conditional move reg-reg long
4862 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4863     single_instruction;
4864     dst    : S4(write);
4865     src    : S3(read);
4866     cr     : S3(read);
4867     DECODE : S0(2);     // any 2 decoders
4868 %}
4869 
4870 // Conditional move double reg-reg
4871 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4872     single_instruction;
4873     dst    : S4(write);
4874     src    : S3(read);
4875     cr     : S3(read);
4876     DECODE : S0;        // any decoder
4877 %}
4878 
4879 // Float reg-reg operation
4880 pipe_class fpu_reg(regDPR dst) %{
4881     instruction_count(2);
4882     dst    : S3(read);
4883     DECODE : S0(2);     // any 2 decoders
4884     FPU    : S3;
4885 %}
4886 
4887 // Float reg-reg operation
4888 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4889     instruction_count(2);
4890     dst    : S4(write);
4891     src    : S3(read);
4892     DECODE : S0(2);     // any 2 decoders
4893     FPU    : S3;
4894 %}
4895 
4896 // Float reg-reg operation
4897 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4898     instruction_count(3);
4899     dst    : S4(write);
4900     src1   : S3(read);
4901     src2   : S3(read);
4902     DECODE : S0(3);     // any 3 decoders
4903     FPU    : S3(2);
4904 %}
4905 
4906 // Float reg-reg operation
4907 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4908     instruction_count(4);
4909     dst    : S4(write);
4910     src1   : S3(read);
4911     src2   : S3(read);
4912     src3   : S3(read);
4913     DECODE : S0(4);     // any 3 decoders
4914     FPU    : S3(2);
4915 %}
4916 
4917 // Float reg-reg operation
4918 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4919     instruction_count(4);
4920     dst    : S4(write);
4921     src1   : S3(read);
4922     src2   : S3(read);
4923     src3   : S3(read);
4924     DECODE : S1(3);     // any 3 decoders
4925     D0     : S0;        // Big decoder only
4926     FPU    : S3(2);
4927     MEM    : S3;
4928 %}
4929 
4930 // Float reg-mem operation
4931 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4932     instruction_count(2);
4933     dst    : S5(write);
4934     mem    : S3(read);
4935     D0     : S0;        // big decoder only
4936     DECODE : S1;        // any decoder for FPU POP
4937     FPU    : S4;
4938     MEM    : S3;        // any mem
4939 %}
4940 
4941 // Float reg-mem operation
4942 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4943     instruction_count(3);
4944     dst    : S5(write);
4945     src1   : S3(read);
4946     mem    : S3(read);
4947     D0     : S0;        // big decoder only
4948     DECODE : S1(2);     // any decoder for FPU POP
4949     FPU    : S4;
4950     MEM    : S3;        // any mem
4951 %}
4952 
4953 // Float mem-reg operation
4954 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4955     instruction_count(2);
4956     src    : S5(read);
4957     mem    : S3(read);
4958     DECODE : S0;        // any decoder for FPU PUSH
4959     D0     : S1;        // big decoder only
4960     FPU    : S4;
4961     MEM    : S3;        // any mem
4962 %}
4963 
4964 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4965     instruction_count(3);
4966     src1   : S3(read);
4967     src2   : S3(read);
4968     mem    : S3(read);
4969     DECODE : S0(2);     // any decoder for FPU PUSH
4970     D0     : S1;        // big decoder only
4971     FPU    : S4;
4972     MEM    : S3;        // any mem
4973 %}
4974 
4975 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4976     instruction_count(3);
4977     src1   : S3(read);
4978     src2   : S3(read);
4979     mem    : S4(read);
4980     DECODE : S0;        // any decoder for FPU PUSH
4981     D0     : S0(2);     // big decoder only
4982     FPU    : S4;
4983     MEM    : S3(2);     // any mem
4984 %}
4985 
4986 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4987     instruction_count(2);
4988     src1   : S3(read);
4989     dst    : S4(read);
4990     D0     : S0(2);     // big decoder only
4991     MEM    : S3(2);     // any mem
4992 %}
4993 
4994 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4995     instruction_count(3);
4996     src1   : S3(read);
4997     src2   : S3(read);
4998     dst    : S4(read);
4999     D0     : S0(3);     // big decoder only
5000     FPU    : S4;
5001     MEM    : S3(3);     // any mem
5002 %}
5003 
5004 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5005     instruction_count(3);
5006     src1   : S4(read);
5007     mem    : S4(read);
5008     DECODE : S0;        // any decoder for FPU PUSH
5009     D0     : S0(2);     // big decoder only
5010     FPU    : S4;
5011     MEM    : S3(2);     // any mem
5012 %}
5013 
5014 // Float load constant
5015 pipe_class fpu_reg_con(regDPR dst) %{
5016     instruction_count(2);
5017     dst    : S5(write);
5018     D0     : S0;        // big decoder only for the load
5019     DECODE : S1;        // any decoder for FPU POP
5020     FPU    : S4;
5021     MEM    : S3;        // any mem
5022 %}
5023 
5024 // Float load constant
5025 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5026     instruction_count(3);
5027     dst    : S5(write);
5028     src    : S3(read);
5029     D0     : S0;        // big decoder only for the load
5030     DECODE : S1(2);     // any decoder for FPU POP
5031     FPU    : S4;
5032     MEM    : S3;        // any mem
5033 %}
5034 
5035 // UnConditional branch
5036 pipe_class pipe_jmp( label labl ) %{
5037     single_instruction;
5038     BR   : S3;
5039 %}
5040 
5041 // Conditional branch
5042 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5043     single_instruction;
5044     cr    : S1(read);
5045     BR    : S3;
5046 %}
5047 
5048 // Allocation idiom
5049 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5050     instruction_count(1); force_serialization;
5051     fixed_latency(6);
5052     heap_ptr : S3(read);
5053     DECODE   : S0(3);
5054     D0       : S2;
5055     MEM      : S3;
5056     ALU      : S3(2);
5057     dst      : S5(write);
5058     BR       : S5;
5059 %}
5060 
5061 // Generic big/slow expanded idiom
5062 pipe_class pipe_slow(  ) %{
5063     instruction_count(10); multiple_bundles; force_serialization;
5064     fixed_latency(100);
5065     D0  : S0(2);
5066     MEM : S3(2);
5067 %}
5068 
5069 // The real do-nothing guy
5070 pipe_class empty( ) %{
5071     instruction_count(0);
5072 %}
5073 
5074 // Define the class for the Nop node
5075 define %{
5076    MachNop = empty;
5077 %}
5078 
5079 %}
5080 
5081 //----------INSTRUCTIONS-------------------------------------------------------
5082 //
5083 // match      -- States which machine-independent subtree may be replaced
5084 //               by this instruction.
5085 // ins_cost   -- The estimated cost of this instruction is used by instruction
5086 //               selection to identify a minimum cost tree of machine
5087 //               instructions that matches a tree of machine-independent
5088 //               instructions.
5089 // format     -- A string providing the disassembly for this instruction.
5090 //               The value of an instruction's operand may be inserted
5091 //               by referring to it with a '$' prefix.
5092 // opcode     -- Three instruction opcodes may be provided.  These are referred
5093 //               to within an encode class as $primary, $secondary, and $tertiary
5094 //               respectively.  The primary opcode is commonly used to
5095 //               indicate the type of machine instruction, while secondary
5096 //               and tertiary are often used for prefix options or addressing
5097 //               modes.
5098 // ins_encode -- A list of encode classes with parameters. The encode class
5099 //               name must have been defined in an 'enc_class' specification
5100 //               in the encode section of the architecture description.
5101 
5102 //----------BSWAP-Instruction--------------------------------------------------
5103 instruct bytes_reverse_int(rRegI dst) %{
5104   match(Set dst (ReverseBytesI dst));
5105 
5106   format %{ "BSWAP  $dst" %}
5107   opcode(0x0F, 0xC8);
5108   ins_encode( OpcP, OpcSReg(dst) );
5109   ins_pipe( ialu_reg );
5110 %}
5111 
5112 instruct bytes_reverse_long(eRegL dst) %{
5113   match(Set dst (ReverseBytesL dst));
5114 
5115   format %{ "BSWAP  $dst.lo\n\t"
5116             "BSWAP  $dst.hi\n\t"
5117             "XCHG   $dst.lo $dst.hi" %}
5118 
5119   ins_cost(125);
5120   ins_encode( bswap_long_bytes(dst) );
5121   ins_pipe( ialu_reg_reg);
5122 %}
5123 
5124 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5125   match(Set dst (ReverseBytesUS dst));
5126   effect(KILL cr);
5127 
5128   format %{ "BSWAP  $dst\n\t"
5129             "SHR    $dst,16\n\t" %}
5130   ins_encode %{
5131     __ bswapl($dst$$Register);
5132     __ shrl($dst$$Register, 16);
5133   %}
5134   ins_pipe( ialu_reg );
5135 %}
5136 
5137 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5138   match(Set dst (ReverseBytesS dst));
5139   effect(KILL cr);
5140 
5141   format %{ "BSWAP  $dst\n\t"
5142             "SAR    $dst,16\n\t" %}
5143   ins_encode %{
5144     __ bswapl($dst$$Register);
5145     __ sarl($dst$$Register, 16);
5146   %}
5147   ins_pipe( ialu_reg );
5148 %}
5149 
5150 
5151 //---------- Zeros Count Instructions ------------------------------------------
5152 
5153 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5154   predicate(UseCountLeadingZerosInstruction);
5155   match(Set dst (CountLeadingZerosI src));
5156   effect(KILL cr);
5157 
5158   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5159   ins_encode %{
5160     __ lzcntl($dst$$Register, $src$$Register);
5161   %}
5162   ins_pipe(ialu_reg);
5163 %}
5164 
5165 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5166   predicate(!UseCountLeadingZerosInstruction);
5167   match(Set dst (CountLeadingZerosI src));
5168   effect(KILL cr);
5169 
5170   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5171             "JNZ    skip\n\t"
5172             "MOV    $dst, -1\n"
5173       "skip:\n\t"
5174             "NEG    $dst\n\t"
5175             "ADD    $dst, 31" %}
5176   ins_encode %{
5177     Register Rdst = $dst$$Register;
5178     Register Rsrc = $src$$Register;
5179     Label skip;
5180     __ bsrl(Rdst, Rsrc);
5181     __ jccb(Assembler::notZero, skip);
5182     __ movl(Rdst, -1);
5183     __ bind(skip);
5184     __ negl(Rdst);
5185     __ addl(Rdst, BitsPerInt - 1);
5186   %}
5187   ins_pipe(ialu_reg);
5188 %}
5189 
5190 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5191   predicate(UseCountLeadingZerosInstruction);
5192   match(Set dst (CountLeadingZerosL src));
5193   effect(TEMP dst, KILL cr);
5194 
5195   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5196             "JNC    done\n\t"
5197             "LZCNT  $dst, $src.lo\n\t"
5198             "ADD    $dst, 32\n"
5199       "done:" %}
5200   ins_encode %{
5201     Register Rdst = $dst$$Register;
5202     Register Rsrc = $src$$Register;
5203     Label done;
5204     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5205     __ jccb(Assembler::carryClear, done);
5206     __ lzcntl(Rdst, Rsrc);
5207     __ addl(Rdst, BitsPerInt);
5208     __ bind(done);
5209   %}
5210   ins_pipe(ialu_reg);
5211 %}
5212 
5213 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5214   predicate(!UseCountLeadingZerosInstruction);
5215   match(Set dst (CountLeadingZerosL src));
5216   effect(TEMP dst, KILL cr);
5217 
5218   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5219             "JZ     msw_is_zero\n\t"
5220             "ADD    $dst, 32\n\t"
5221             "JMP    not_zero\n"
5222       "msw_is_zero:\n\t"
5223             "BSR    $dst, $src.lo\n\t"
5224             "JNZ    not_zero\n\t"
5225             "MOV    $dst, -1\n"
5226       "not_zero:\n\t"
5227             "NEG    $dst\n\t"
5228             "ADD    $dst, 63\n" %}
5229  ins_encode %{
5230     Register Rdst = $dst$$Register;
5231     Register Rsrc = $src$$Register;
5232     Label msw_is_zero;
5233     Label not_zero;
5234     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5235     __ jccb(Assembler::zero, msw_is_zero);
5236     __ addl(Rdst, BitsPerInt);
5237     __ jmpb(not_zero);
5238     __ bind(msw_is_zero);
5239     __ bsrl(Rdst, Rsrc);
5240     __ jccb(Assembler::notZero, not_zero);
5241     __ movl(Rdst, -1);
5242     __ bind(not_zero);
5243     __ negl(Rdst);
5244     __ addl(Rdst, BitsPerLong - 1);
5245   %}
5246   ins_pipe(ialu_reg);
5247 %}
5248 
5249 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5250   predicate(UseCountTrailingZerosInstruction);
5251   match(Set dst (CountTrailingZerosI src));
5252   effect(KILL cr);
5253 
5254   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5255   ins_encode %{
5256     __ tzcntl($dst$$Register, $src$$Register);
5257   %}
5258   ins_pipe(ialu_reg);
5259 %}
5260 
5261 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5262   predicate(!UseCountTrailingZerosInstruction);
5263   match(Set dst (CountTrailingZerosI src));
5264   effect(KILL cr);
5265 
5266   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5267             "JNZ    done\n\t"
5268             "MOV    $dst, 32\n"
5269       "done:" %}
5270   ins_encode %{
5271     Register Rdst = $dst$$Register;
5272     Label done;
5273     __ bsfl(Rdst, $src$$Register);
5274     __ jccb(Assembler::notZero, done);
5275     __ movl(Rdst, BitsPerInt);
5276     __ bind(done);
5277   %}
5278   ins_pipe(ialu_reg);
5279 %}
5280 
5281 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5282   predicate(UseCountTrailingZerosInstruction);
5283   match(Set dst (CountTrailingZerosL src));
5284   effect(TEMP dst, KILL cr);
5285 
5286   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5287             "JNC    done\n\t"
5288             "TZCNT  $dst, $src.hi\n\t"
5289             "ADD    $dst, 32\n"
5290             "done:" %}
5291   ins_encode %{
5292     Register Rdst = $dst$$Register;
5293     Register Rsrc = $src$$Register;
5294     Label done;
5295     __ tzcntl(Rdst, Rsrc);
5296     __ jccb(Assembler::carryClear, done);
5297     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5298     __ addl(Rdst, BitsPerInt);
5299     __ bind(done);
5300   %}
5301   ins_pipe(ialu_reg);
5302 %}
5303 
5304 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5305   predicate(!UseCountTrailingZerosInstruction);
5306   match(Set dst (CountTrailingZerosL src));
5307   effect(TEMP dst, KILL cr);
5308 
5309   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5310             "JNZ    done\n\t"
5311             "BSF    $dst, $src.hi\n\t"
5312             "JNZ    msw_not_zero\n\t"
5313             "MOV    $dst, 32\n"
5314       "msw_not_zero:\n\t"
5315             "ADD    $dst, 32\n"
5316       "done:" %}
5317   ins_encode %{
5318     Register Rdst = $dst$$Register;
5319     Register Rsrc = $src$$Register;
5320     Label msw_not_zero;
5321     Label done;
5322     __ bsfl(Rdst, Rsrc);
5323     __ jccb(Assembler::notZero, done);
5324     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5325     __ jccb(Assembler::notZero, msw_not_zero);
5326     __ movl(Rdst, BitsPerInt);
5327     __ bind(msw_not_zero);
5328     __ addl(Rdst, BitsPerInt);
5329     __ bind(done);
5330   %}
5331   ins_pipe(ialu_reg);
5332 %}
5333 
5334 
5335 //---------- Population Count Instructions -------------------------------------
5336 
5337 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5338   predicate(UsePopCountInstruction);
5339   match(Set dst (PopCountI src));
5340   effect(KILL cr);
5341 
5342   format %{ "POPCNT $dst, $src" %}
5343   ins_encode %{
5344     __ popcntl($dst$$Register, $src$$Register);
5345   %}
5346   ins_pipe(ialu_reg);
5347 %}
5348 
5349 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5350   predicate(UsePopCountInstruction);
5351   match(Set dst (PopCountI (LoadI mem)));
5352   effect(KILL cr);
5353 
5354   format %{ "POPCNT $dst, $mem" %}
5355   ins_encode %{
5356     __ popcntl($dst$$Register, $mem$$Address);
5357   %}
5358   ins_pipe(ialu_reg);
5359 %}
5360 
5361 // Note: Long.bitCount(long) returns an int.
5362 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5363   predicate(UsePopCountInstruction);
5364   match(Set dst (PopCountL src));
5365   effect(KILL cr, TEMP tmp, TEMP dst);
5366 
5367   format %{ "POPCNT $dst, $src.lo\n\t"
5368             "POPCNT $tmp, $src.hi\n\t"
5369             "ADD    $dst, $tmp" %}
5370   ins_encode %{
5371     __ popcntl($dst$$Register, $src$$Register);
5372     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5373     __ addl($dst$$Register, $tmp$$Register);
5374   %}
5375   ins_pipe(ialu_reg);
5376 %}
5377 
5378 // Note: Long.bitCount(long) returns an int.
5379 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5380   predicate(UsePopCountInstruction);
5381   match(Set dst (PopCountL (LoadL mem)));
5382   effect(KILL cr, TEMP tmp, TEMP dst);
5383 
5384   format %{ "POPCNT $dst, $mem\n\t"
5385             "POPCNT $tmp, $mem+4\n\t"
5386             "ADD    $dst, $tmp" %}
5387   ins_encode %{
5388     //__ popcntl($dst$$Register, $mem$$Address$$first);
5389     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5390     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5391     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5392     __ addl($dst$$Register, $tmp$$Register);
5393   %}
5394   ins_pipe(ialu_reg);
5395 %}
5396 
5397 
5398 //----------Load/Store/Move Instructions---------------------------------------
5399 //----------Load Instructions--------------------------------------------------
5400 // Load Byte (8bit signed)
5401 instruct loadB(xRegI dst, memory mem) %{
5402   match(Set dst (LoadB mem));
5403 
5404   ins_cost(125);
5405   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5406 
5407   ins_encode %{
5408     __ movsbl($dst$$Register, $mem$$Address);
5409   %}
5410 
5411   ins_pipe(ialu_reg_mem);
5412 %}
5413 
5414 // Load Byte (8bit signed) into Long Register
5415 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5416   match(Set dst (ConvI2L (LoadB mem)));
5417   effect(KILL cr);
5418 
5419   ins_cost(375);
5420   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5421             "MOV    $dst.hi,$dst.lo\n\t"
5422             "SAR    $dst.hi,7" %}
5423 
5424   ins_encode %{
5425     __ movsbl($dst$$Register, $mem$$Address);
5426     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5427     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5428   %}
5429 
5430   ins_pipe(ialu_reg_mem);
5431 %}
5432 
5433 // Load Unsigned Byte (8bit UNsigned)
5434 instruct loadUB(xRegI dst, memory mem) %{
5435   match(Set dst (LoadUB mem));
5436 
5437   ins_cost(125);
5438   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5439 
5440   ins_encode %{
5441     __ movzbl($dst$$Register, $mem$$Address);
5442   %}
5443 
5444   ins_pipe(ialu_reg_mem);
5445 %}
5446 
5447 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5448 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5449   match(Set dst (ConvI2L (LoadUB mem)));
5450   effect(KILL cr);
5451 
5452   ins_cost(250);
5453   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5454             "XOR    $dst.hi,$dst.hi" %}
5455 
5456   ins_encode %{
5457     Register Rdst = $dst$$Register;
5458     __ movzbl(Rdst, $mem$$Address);
5459     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5460   %}
5461 
5462   ins_pipe(ialu_reg_mem);
5463 %}
5464 
5465 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5466 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5467   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5468   effect(KILL cr);
5469 
5470   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5471             "XOR    $dst.hi,$dst.hi\n\t"
5472             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5473   ins_encode %{
5474     Register Rdst = $dst$$Register;
5475     __ movzbl(Rdst, $mem$$Address);
5476     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5477     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5478   %}
5479   ins_pipe(ialu_reg_mem);
5480 %}
5481 
5482 // Load Short (16bit signed)
5483 instruct loadS(rRegI dst, memory mem) %{
5484   match(Set dst (LoadS mem));
5485 
5486   ins_cost(125);
5487   format %{ "MOVSX  $dst,$mem\t# short" %}
5488 
5489   ins_encode %{
5490     __ movswl($dst$$Register, $mem$$Address);
5491   %}
5492 
5493   ins_pipe(ialu_reg_mem);
5494 %}
5495 
5496 // Load Short (16 bit signed) to Byte (8 bit signed)
5497 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5498   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5499 
5500   ins_cost(125);
5501   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5502   ins_encode %{
5503     __ movsbl($dst$$Register, $mem$$Address);
5504   %}
5505   ins_pipe(ialu_reg_mem);
5506 %}
5507 
5508 // Load Short (16bit signed) into Long Register
5509 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5510   match(Set dst (ConvI2L (LoadS mem)));
5511   effect(KILL cr);
5512 
5513   ins_cost(375);
5514   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5515             "MOV    $dst.hi,$dst.lo\n\t"
5516             "SAR    $dst.hi,15" %}
5517 
5518   ins_encode %{
5519     __ movswl($dst$$Register, $mem$$Address);
5520     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5521     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5522   %}
5523 
5524   ins_pipe(ialu_reg_mem);
5525 %}
5526 
5527 // Load Unsigned Short/Char (16bit unsigned)
5528 instruct loadUS(rRegI dst, memory mem) %{
5529   match(Set dst (LoadUS mem));
5530 
5531   ins_cost(125);
5532   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5533 
5534   ins_encode %{
5535     __ movzwl($dst$$Register, $mem$$Address);
5536   %}
5537 
5538   ins_pipe(ialu_reg_mem);
5539 %}
5540 
5541 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5542 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5543   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5544 
5545   ins_cost(125);
5546   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5547   ins_encode %{
5548     __ movsbl($dst$$Register, $mem$$Address);
5549   %}
5550   ins_pipe(ialu_reg_mem);
5551 %}
5552 
5553 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5554 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5555   match(Set dst (ConvI2L (LoadUS mem)));
5556   effect(KILL cr);
5557 
5558   ins_cost(250);
5559   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5560             "XOR    $dst.hi,$dst.hi" %}
5561 
5562   ins_encode %{
5563     __ movzwl($dst$$Register, $mem$$Address);
5564     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5565   %}
5566 
5567   ins_pipe(ialu_reg_mem);
5568 %}
5569 
5570 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5571 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5572   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5573   effect(KILL cr);
5574 
5575   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5576             "XOR    $dst.hi,$dst.hi" %}
5577   ins_encode %{
5578     Register Rdst = $dst$$Register;
5579     __ movzbl(Rdst, $mem$$Address);
5580     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5581   %}
5582   ins_pipe(ialu_reg_mem);
5583 %}
5584 
5585 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5586 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5587   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5588   effect(KILL cr);
5589 
5590   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5591             "XOR    $dst.hi,$dst.hi\n\t"
5592             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5593   ins_encode %{
5594     Register Rdst = $dst$$Register;
5595     __ movzwl(Rdst, $mem$$Address);
5596     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5597     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5598   %}
5599   ins_pipe(ialu_reg_mem);
5600 %}
5601 
5602 // Load Integer
5603 instruct loadI(rRegI dst, memory mem) %{
5604   match(Set dst (LoadI mem));
5605 
5606   ins_cost(125);
5607   format %{ "MOV    $dst,$mem\t# int" %}
5608 
5609   ins_encode %{
5610     __ movl($dst$$Register, $mem$$Address);
5611   %}
5612 
5613   ins_pipe(ialu_reg_mem);
5614 %}
5615 
5616 // Load Integer (32 bit signed) to Byte (8 bit signed)
5617 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5618   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5619 
5620   ins_cost(125);
5621   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5622   ins_encode %{
5623     __ movsbl($dst$$Register, $mem$$Address);
5624   %}
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5629 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5630   match(Set dst (AndI (LoadI mem) mask));
5631 
5632   ins_cost(125);
5633   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5634   ins_encode %{
5635     __ movzbl($dst$$Register, $mem$$Address);
5636   %}
5637   ins_pipe(ialu_reg_mem);
5638 %}
5639 
5640 // Load Integer (32 bit signed) to Short (16 bit signed)
5641 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5642   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5643 
5644   ins_cost(125);
5645   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5646   ins_encode %{
5647     __ movswl($dst$$Register, $mem$$Address);
5648   %}
5649   ins_pipe(ialu_reg_mem);
5650 %}
5651 
5652 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5653 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5654   match(Set dst (AndI (LoadI mem) mask));
5655 
5656   ins_cost(125);
5657   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5658   ins_encode %{
5659     __ movzwl($dst$$Register, $mem$$Address);
5660   %}
5661   ins_pipe(ialu_reg_mem);
5662 %}
5663 
5664 // Load Integer into Long Register
5665 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5666   match(Set dst (ConvI2L (LoadI mem)));
5667   effect(KILL cr);
5668 
5669   ins_cost(375);
5670   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5671             "MOV    $dst.hi,$dst.lo\n\t"
5672             "SAR    $dst.hi,31" %}
5673 
5674   ins_encode %{
5675     __ movl($dst$$Register, $mem$$Address);
5676     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5677     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5678   %}
5679 
5680   ins_pipe(ialu_reg_mem);
5681 %}
5682 
5683 // Load Integer with mask 0xFF into Long Register
5684 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5685   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5686   effect(KILL cr);
5687 
5688   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5689             "XOR    $dst.hi,$dst.hi" %}
5690   ins_encode %{
5691     Register Rdst = $dst$$Register;
5692     __ movzbl(Rdst, $mem$$Address);
5693     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5694   %}
5695   ins_pipe(ialu_reg_mem);
5696 %}
5697 
5698 // Load Integer with mask 0xFFFF into Long Register
5699 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5700   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5701   effect(KILL cr);
5702 
5703   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5704             "XOR    $dst.hi,$dst.hi" %}
5705   ins_encode %{
5706     Register Rdst = $dst$$Register;
5707     __ movzwl(Rdst, $mem$$Address);
5708     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5709   %}
5710   ins_pipe(ialu_reg_mem);
5711 %}
5712 
5713 // Load Integer with 31-bit mask into Long Register
5714 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5715   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5716   effect(KILL cr);
5717 
5718   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5719             "XOR    $dst.hi,$dst.hi\n\t"
5720             "AND    $dst.lo,$mask" %}
5721   ins_encode %{
5722     Register Rdst = $dst$$Register;
5723     __ movl(Rdst, $mem$$Address);
5724     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5725     __ andl(Rdst, $mask$$constant);
5726   %}
5727   ins_pipe(ialu_reg_mem);
5728 %}
5729 
5730 // Load Unsigned Integer into Long Register
5731 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5732   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5733   effect(KILL cr);
5734 
5735   ins_cost(250);
5736   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5737             "XOR    $dst.hi,$dst.hi" %}
5738 
5739   ins_encode %{
5740     __ movl($dst$$Register, $mem$$Address);
5741     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5742   %}
5743 
5744   ins_pipe(ialu_reg_mem);
5745 %}
5746 
5747 // Load Long.  Cannot clobber address while loading, so restrict address
5748 // register to ESI
5749 instruct loadL(eRegL dst, load_long_memory mem) %{
5750   predicate(!((LoadLNode*)n)->require_atomic_access());
5751   match(Set dst (LoadL mem));
5752 
5753   ins_cost(250);
5754   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5755             "MOV    $dst.hi,$mem+4" %}
5756 
5757   ins_encode %{
5758     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5759     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5760     __ movl($dst$$Register, Amemlo);
5761     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5762   %}
5763 
5764   ins_pipe(ialu_reg_long_mem);
5765 %}
5766 
5767 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5768 // then store it down to the stack and reload on the int
5769 // side.
5770 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5771   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5772   match(Set dst (LoadL mem));
5773 
5774   ins_cost(200);
5775   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5776             "FISTp  $dst" %}
5777   ins_encode(enc_loadL_volatile(mem,dst));
5778   ins_pipe( fpu_reg_mem );
5779 %}
5780 
5781 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5782   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5783   match(Set dst (LoadL mem));
5784   effect(TEMP tmp);
5785   ins_cost(180);
5786   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5787             "MOVSD  $dst,$tmp" %}
5788   ins_encode %{
5789     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5790     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5791   %}
5792   ins_pipe( pipe_slow );
5793 %}
5794 
5795 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5796   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5797   match(Set dst (LoadL mem));
5798   effect(TEMP tmp);
5799   ins_cost(160);
5800   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5801             "MOVD   $dst.lo,$tmp\n\t"
5802             "PSRLQ  $tmp,32\n\t"
5803             "MOVD   $dst.hi,$tmp" %}
5804   ins_encode %{
5805     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5806     __ movdl($dst$$Register, $tmp$$XMMRegister);
5807     __ psrlq($tmp$$XMMRegister, 32);
5808     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5809   %}
5810   ins_pipe( pipe_slow );
5811 %}
5812 
5813 // Load Range
5814 instruct loadRange(rRegI dst, memory mem) %{
5815   match(Set dst (LoadRange mem));
5816 
5817   ins_cost(125);
5818   format %{ "MOV    $dst,$mem" %}
5819   opcode(0x8B);
5820   ins_encode( OpcP, RegMem(dst,mem));
5821   ins_pipe( ialu_reg_mem );
5822 %}
5823 
5824 
5825 // Load Pointer
5826 instruct loadP(eRegP dst, memory mem) %{
5827   match(Set dst (LoadP mem));
5828 
5829   ins_cost(125);
5830   format %{ "MOV    $dst,$mem" %}
5831   opcode(0x8B);
5832   ins_encode( OpcP, RegMem(dst,mem));
5833   ins_pipe( ialu_reg_mem );
5834 %}
5835 
5836 // Load Klass Pointer
5837 instruct loadKlass(eRegP dst, memory mem) %{
5838   match(Set dst (LoadKlass mem));
5839 
5840   ins_cost(125);
5841   format %{ "MOV    $dst,$mem" %}
5842   opcode(0x8B);
5843   ins_encode( OpcP, RegMem(dst,mem));
5844   ins_pipe( ialu_reg_mem );
5845 %}
5846 
5847 // Load Double
5848 instruct loadDPR(regDPR dst, memory mem) %{
5849   predicate(UseSSE<=1);
5850   match(Set dst (LoadD mem));
5851 
5852   ins_cost(150);
5853   format %{ "FLD_D  ST,$mem\n\t"
5854             "FSTP   $dst" %}
5855   opcode(0xDD);               /* DD /0 */
5856   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5857               Pop_Reg_DPR(dst) );
5858   ins_pipe( fpu_reg_mem );
5859 %}
5860 
5861 // Load Double to XMM
5862 instruct loadD(regD dst, memory mem) %{
5863   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5864   match(Set dst (LoadD mem));
5865   ins_cost(145);
5866   format %{ "MOVSD  $dst,$mem" %}
5867   ins_encode %{
5868     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5869   %}
5870   ins_pipe( pipe_slow );
5871 %}
5872 
5873 instruct loadD_partial(regD dst, memory mem) %{
5874   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5875   match(Set dst (LoadD mem));
5876   ins_cost(145);
5877   format %{ "MOVLPD $dst,$mem" %}
5878   ins_encode %{
5879     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5880   %}
5881   ins_pipe( pipe_slow );
5882 %}
5883 
5884 // Load to XMM register (single-precision floating point)
5885 // MOVSS instruction
5886 instruct loadF(regF dst, memory mem) %{
5887   predicate(UseSSE>=1);
5888   match(Set dst (LoadF mem));
5889   ins_cost(145);
5890   format %{ "MOVSS  $dst,$mem" %}
5891   ins_encode %{
5892     __ movflt ($dst$$XMMRegister, $mem$$Address);
5893   %}
5894   ins_pipe( pipe_slow );
5895 %}
5896 
5897 // Load Float
5898 instruct loadFPR(regFPR dst, memory mem) %{
5899   predicate(UseSSE==0);
5900   match(Set dst (LoadF mem));
5901 
5902   ins_cost(150);
5903   format %{ "FLD_S  ST,$mem\n\t"
5904             "FSTP   $dst" %}
5905   opcode(0xD9);               /* D9 /0 */
5906   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5907               Pop_Reg_FPR(dst) );
5908   ins_pipe( fpu_reg_mem );
5909 %}
5910 
5911 // Load Effective Address
5912 instruct leaP8(eRegP dst, indOffset8 mem) %{
5913   match(Set dst mem);
5914 
5915   ins_cost(110);
5916   format %{ "LEA    $dst,$mem" %}
5917   opcode(0x8D);
5918   ins_encode( OpcP, RegMem(dst,mem));
5919   ins_pipe( ialu_reg_reg_fat );
5920 %}
5921 
5922 instruct leaP32(eRegP dst, indOffset32 mem) %{
5923   match(Set dst mem);
5924 
5925   ins_cost(110);
5926   format %{ "LEA    $dst,$mem" %}
5927   opcode(0x8D);
5928   ins_encode( OpcP, RegMem(dst,mem));
5929   ins_pipe( ialu_reg_reg_fat );
5930 %}
5931 
5932 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5933   match(Set dst mem);
5934 
5935   ins_cost(110);
5936   format %{ "LEA    $dst,$mem" %}
5937   opcode(0x8D);
5938   ins_encode( OpcP, RegMem(dst,mem));
5939   ins_pipe( ialu_reg_reg_fat );
5940 %}
5941 
5942 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5943   match(Set dst mem);
5944 
5945   ins_cost(110);
5946   format %{ "LEA    $dst,$mem" %}
5947   opcode(0x8D);
5948   ins_encode( OpcP, RegMem(dst,mem));
5949   ins_pipe( ialu_reg_reg_fat );
5950 %}
5951 
5952 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5953   match(Set dst mem);
5954 
5955   ins_cost(110);
5956   format %{ "LEA    $dst,$mem" %}
5957   opcode(0x8D);
5958   ins_encode( OpcP, RegMem(dst,mem));
5959   ins_pipe( ialu_reg_reg_fat );
5960 %}
5961 
5962 // Load Constant
5963 instruct loadConI(rRegI dst, immI src) %{
5964   match(Set dst src);
5965 
5966   format %{ "MOV    $dst,$src" %}
5967   ins_encode( LdImmI(dst, src) );
5968   ins_pipe( ialu_reg_fat );
5969 %}
5970 
5971 // Load Constant zero
5972 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5973   match(Set dst src);
5974   effect(KILL cr);
5975 
5976   ins_cost(50);
5977   format %{ "XOR    $dst,$dst" %}
5978   opcode(0x33);  /* + rd */
5979   ins_encode( OpcP, RegReg( dst, dst ) );
5980   ins_pipe( ialu_reg );
5981 %}
5982 
5983 instruct loadConP(eRegP dst, immP src) %{
5984   match(Set dst src);
5985 
5986   format %{ "MOV    $dst,$src" %}
5987   opcode(0xB8);  /* + rd */
5988   ins_encode( LdImmP(dst, src) );
5989   ins_pipe( ialu_reg_fat );
5990 %}
5991 
5992 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5993   match(Set dst src);
5994   effect(KILL cr);
5995   ins_cost(200);
5996   format %{ "MOV    $dst.lo,$src.lo\n\t"
5997             "MOV    $dst.hi,$src.hi" %}
5998   opcode(0xB8);
5999   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6000   ins_pipe( ialu_reg_long_fat );
6001 %}
6002 
6003 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6004   match(Set dst src);
6005   effect(KILL cr);
6006   ins_cost(150);
6007   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6008             "XOR    $dst.hi,$dst.hi" %}
6009   opcode(0x33,0x33);
6010   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6011   ins_pipe( ialu_reg_long );
6012 %}
6013 
6014 // The instruction usage is guarded by predicate in operand immFPR().
6015 instruct loadConFPR(regFPR dst, immFPR con) %{
6016   match(Set dst con);
6017   ins_cost(125);
6018   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6019             "FSTP   $dst" %}
6020   ins_encode %{
6021     __ fld_s($constantaddress($con));
6022     __ fstp_d($dst$$reg);
6023   %}
6024   ins_pipe(fpu_reg_con);
6025 %}
6026 
6027 // The instruction usage is guarded by predicate in operand immFPR0().
6028 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6029   match(Set dst con);
6030   ins_cost(125);
6031   format %{ "FLDZ   ST\n\t"
6032             "FSTP   $dst" %}
6033   ins_encode %{
6034     __ fldz();
6035     __ fstp_d($dst$$reg);
6036   %}
6037   ins_pipe(fpu_reg_con);
6038 %}
6039 
6040 // The instruction usage is guarded by predicate in operand immFPR1().
6041 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6042   match(Set dst con);
6043   ins_cost(125);
6044   format %{ "FLD1   ST\n\t"
6045             "FSTP   $dst" %}
6046   ins_encode %{
6047     __ fld1();
6048     __ fstp_d($dst$$reg);
6049   %}
6050   ins_pipe(fpu_reg_con);
6051 %}
6052 
6053 // The instruction usage is guarded by predicate in operand immF().
6054 instruct loadConF(regF dst, immF con) %{
6055   match(Set dst con);
6056   ins_cost(125);
6057   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6058   ins_encode %{
6059     __ movflt($dst$$XMMRegister, $constantaddress($con));
6060   %}
6061   ins_pipe(pipe_slow);
6062 %}
6063 
6064 // The instruction usage is guarded by predicate in operand immF0().
6065 instruct loadConF0(regF dst, immF0 src) %{
6066   match(Set dst src);
6067   ins_cost(100);
6068   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6069   ins_encode %{
6070     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6071   %}
6072   ins_pipe(pipe_slow);
6073 %}
6074 
6075 // The instruction usage is guarded by predicate in operand immDPR().
6076 instruct loadConDPR(regDPR dst, immDPR con) %{
6077   match(Set dst con);
6078   ins_cost(125);
6079 
6080   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6081             "FSTP   $dst" %}
6082   ins_encode %{
6083     __ fld_d($constantaddress($con));
6084     __ fstp_d($dst$$reg);
6085   %}
6086   ins_pipe(fpu_reg_con);
6087 %}
6088 
6089 // The instruction usage is guarded by predicate in operand immDPR0().
6090 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6091   match(Set dst con);
6092   ins_cost(125);
6093 
6094   format %{ "FLDZ   ST\n\t"
6095             "FSTP   $dst" %}
6096   ins_encode %{
6097     __ fldz();
6098     __ fstp_d($dst$$reg);
6099   %}
6100   ins_pipe(fpu_reg_con);
6101 %}
6102 
6103 // The instruction usage is guarded by predicate in operand immDPR1().
6104 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6105   match(Set dst con);
6106   ins_cost(125);
6107 
6108   format %{ "FLD1   ST\n\t"
6109             "FSTP   $dst" %}
6110   ins_encode %{
6111     __ fld1();
6112     __ fstp_d($dst$$reg);
6113   %}
6114   ins_pipe(fpu_reg_con);
6115 %}
6116 
6117 // The instruction usage is guarded by predicate in operand immD().
6118 instruct loadConD(regD dst, immD con) %{
6119   match(Set dst con);
6120   ins_cost(125);
6121   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6122   ins_encode %{
6123     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6124   %}
6125   ins_pipe(pipe_slow);
6126 %}
6127 
6128 // The instruction usage is guarded by predicate in operand immD0().
6129 instruct loadConD0(regD dst, immD0 src) %{
6130   match(Set dst src);
6131   ins_cost(100);
6132   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6133   ins_encode %{
6134     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6135   %}
6136   ins_pipe( pipe_slow );
6137 %}
6138 
6139 // Load Stack Slot
6140 instruct loadSSI(rRegI dst, stackSlotI src) %{
6141   match(Set dst src);
6142   ins_cost(125);
6143 
6144   format %{ "MOV    $dst,$src" %}
6145   opcode(0x8B);
6146   ins_encode( OpcP, RegMem(dst,src));
6147   ins_pipe( ialu_reg_mem );
6148 %}
6149 
6150 instruct loadSSL(eRegL dst, stackSlotL src) %{
6151   match(Set dst src);
6152 
6153   ins_cost(200);
6154   format %{ "MOV    $dst,$src.lo\n\t"
6155             "MOV    $dst+4,$src.hi" %}
6156   opcode(0x8B, 0x8B);
6157   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6158   ins_pipe( ialu_mem_long_reg );
6159 %}
6160 
6161 // Load Stack Slot
6162 instruct loadSSP(eRegP dst, stackSlotP src) %{
6163   match(Set dst src);
6164   ins_cost(125);
6165 
6166   format %{ "MOV    $dst,$src" %}
6167   opcode(0x8B);
6168   ins_encode( OpcP, RegMem(dst,src));
6169   ins_pipe( ialu_reg_mem );
6170 %}
6171 
6172 // Load Stack Slot
6173 instruct loadSSF(regFPR dst, stackSlotF src) %{
6174   match(Set dst src);
6175   ins_cost(125);
6176 
6177   format %{ "FLD_S  $src\n\t"
6178             "FSTP   $dst" %}
6179   opcode(0xD9);               /* D9 /0, FLD m32real */
6180   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6181               Pop_Reg_FPR(dst) );
6182   ins_pipe( fpu_reg_mem );
6183 %}
6184 
6185 // Load Stack Slot
6186 instruct loadSSD(regDPR dst, stackSlotD src) %{
6187   match(Set dst src);
6188   ins_cost(125);
6189 
6190   format %{ "FLD_D  $src\n\t"
6191             "FSTP   $dst" %}
6192   opcode(0xDD);               /* DD /0, FLD m64real */
6193   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6194               Pop_Reg_DPR(dst) );
6195   ins_pipe( fpu_reg_mem );
6196 %}
6197 
6198 // Prefetch instructions for allocation.
6199 // Must be safe to execute with invalid address (cannot fault).
6200 
6201 instruct prefetchAlloc0( memory mem ) %{
6202   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6203   match(PrefetchAllocation mem);
6204   ins_cost(0);
6205   size(0);
6206   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6207   ins_encode();
6208   ins_pipe(empty);
6209 %}
6210 
6211 instruct prefetchAlloc( memory mem ) %{
6212   predicate(AllocatePrefetchInstr==3);
6213   match( PrefetchAllocation mem );
6214   ins_cost(100);
6215 
6216   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6217   ins_encode %{
6218     __ prefetchw($mem$$Address);
6219   %}
6220   ins_pipe(ialu_mem);
6221 %}
6222 
6223 instruct prefetchAllocNTA( memory mem ) %{
6224   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6225   match(PrefetchAllocation mem);
6226   ins_cost(100);
6227 
6228   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6229   ins_encode %{
6230     __ prefetchnta($mem$$Address);
6231   %}
6232   ins_pipe(ialu_mem);
6233 %}
6234 
6235 instruct prefetchAllocT0( memory mem ) %{
6236   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6237   match(PrefetchAllocation mem);
6238   ins_cost(100);
6239 
6240   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6241   ins_encode %{
6242     __ prefetcht0($mem$$Address);
6243   %}
6244   ins_pipe(ialu_mem);
6245 %}
6246 
6247 instruct prefetchAllocT2( memory mem ) %{
6248   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6249   match(PrefetchAllocation mem);
6250   ins_cost(100);
6251 
6252   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6253   ins_encode %{
6254     __ prefetcht2($mem$$Address);
6255   %}
6256   ins_pipe(ialu_mem);
6257 %}
6258 
6259 //----------Store Instructions-------------------------------------------------
6260 
6261 // Store Byte
6262 instruct storeB(memory mem, xRegI src) %{
6263   match(Set mem (StoreB mem src));
6264 
6265   ins_cost(125);
6266   format %{ "MOV8   $mem,$src" %}
6267   opcode(0x88);
6268   ins_encode( OpcP, RegMem( src, mem ) );
6269   ins_pipe( ialu_mem_reg );
6270 %}
6271 
6272 // Store Char/Short
6273 instruct storeC(memory mem, rRegI src) %{
6274   match(Set mem (StoreC mem src));
6275 
6276   ins_cost(125);
6277   format %{ "MOV16  $mem,$src" %}
6278   opcode(0x89, 0x66);
6279   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6280   ins_pipe( ialu_mem_reg );
6281 %}
6282 
6283 // Store Integer
6284 instruct storeI(memory mem, rRegI src) %{
6285   match(Set mem (StoreI mem src));
6286 
6287   ins_cost(125);
6288   format %{ "MOV    $mem,$src" %}
6289   opcode(0x89);
6290   ins_encode( OpcP, RegMem( src, mem ) );
6291   ins_pipe( ialu_mem_reg );
6292 %}
6293 
6294 // Store Long
6295 instruct storeL(long_memory mem, eRegL src) %{
6296   predicate(!((StoreLNode*)n)->require_atomic_access());
6297   match(Set mem (StoreL mem src));
6298 
6299   ins_cost(200);
6300   format %{ "MOV    $mem,$src.lo\n\t"
6301             "MOV    $mem+4,$src.hi" %}
6302   opcode(0x89, 0x89);
6303   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6304   ins_pipe( ialu_mem_long_reg );
6305 %}
6306 
6307 // Store Long to Integer
6308 instruct storeL2I(memory mem, eRegL src) %{
6309   match(Set mem (StoreI mem (ConvL2I src)));
6310 
6311   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6312   ins_encode %{
6313     __ movl($mem$$Address, $src$$Register);
6314   %}
6315   ins_pipe(ialu_mem_reg);
6316 %}
6317 
6318 // Volatile Store Long.  Must be atomic, so move it into
6319 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6320 // target address before the store (for null-ptr checks)
6321 // so the memory operand is used twice in the encoding.
6322 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6323   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6324   match(Set mem (StoreL mem src));
6325   effect( KILL cr );
6326   ins_cost(400);
6327   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6328             "FILD   $src\n\t"
6329             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6330   opcode(0x3B);
6331   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6332   ins_pipe( fpu_reg_mem );
6333 %}
6334 
6335 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6336   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6337   match(Set mem (StoreL mem src));
6338   effect( TEMP tmp, KILL cr );
6339   ins_cost(380);
6340   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6341             "MOVSD  $tmp,$src\n\t"
6342             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6343   ins_encode %{
6344     __ cmpl(rax, $mem$$Address);
6345     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6346     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6347   %}
6348   ins_pipe( pipe_slow );
6349 %}
6350 
6351 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6352   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6353   match(Set mem (StoreL mem src));
6354   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6355   ins_cost(360);
6356   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6357             "MOVD   $tmp,$src.lo\n\t"
6358             "MOVD   $tmp2,$src.hi\n\t"
6359             "PUNPCKLDQ $tmp,$tmp2\n\t"
6360             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6361   ins_encode %{
6362     __ cmpl(rax, $mem$$Address);
6363     __ movdl($tmp$$XMMRegister, $src$$Register);
6364     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6365     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6366     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6367   %}
6368   ins_pipe( pipe_slow );
6369 %}
6370 
6371 // Store Pointer; for storing unknown oops and raw pointers
6372 instruct storeP(memory mem, anyRegP src) %{
6373   match(Set mem (StoreP mem src));
6374 
6375   ins_cost(125);
6376   format %{ "MOV    $mem,$src" %}
6377   opcode(0x89);
6378   ins_encode( OpcP, RegMem( src, mem ) );
6379   ins_pipe( ialu_mem_reg );
6380 %}
6381 
6382 // Store Integer Immediate
6383 instruct storeImmI(memory mem, immI src) %{
6384   match(Set mem (StoreI mem src));
6385 
6386   ins_cost(150);
6387   format %{ "MOV    $mem,$src" %}
6388   opcode(0xC7);               /* C7 /0 */
6389   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6390   ins_pipe( ialu_mem_imm );
6391 %}
6392 
6393 // Store Short/Char Immediate
6394 instruct storeImmI16(memory mem, immI16 src) %{
6395   predicate(UseStoreImmI16);
6396   match(Set mem (StoreC mem src));
6397 
6398   ins_cost(150);
6399   format %{ "MOV16  $mem,$src" %}
6400   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6401   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6402   ins_pipe( ialu_mem_imm );
6403 %}
6404 
6405 // Store Pointer Immediate; null pointers or constant oops that do not
6406 // need card-mark barriers.
6407 instruct storeImmP(memory mem, immP src) %{
6408   match(Set mem (StoreP mem src));
6409 
6410   ins_cost(150);
6411   format %{ "MOV    $mem,$src" %}
6412   opcode(0xC7);               /* C7 /0 */
6413   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6414   ins_pipe( ialu_mem_imm );
6415 %}
6416 
6417 // Store Byte Immediate
6418 instruct storeImmB(memory mem, immI8 src) %{
6419   match(Set mem (StoreB mem src));
6420 
6421   ins_cost(150);
6422   format %{ "MOV8   $mem,$src" %}
6423   opcode(0xC6);               /* C6 /0 */
6424   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6425   ins_pipe( ialu_mem_imm );
6426 %}
6427 
6428 // Store CMS card-mark Immediate
6429 instruct storeImmCM(memory mem, immI8 src) %{
6430   match(Set mem (StoreCM mem src));
6431 
6432   ins_cost(150);
6433   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6434   opcode(0xC6);               /* C6 /0 */
6435   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6436   ins_pipe( ialu_mem_imm );
6437 %}
6438 
6439 // Store Double
6440 instruct storeDPR( memory mem, regDPR1 src) %{
6441   predicate(UseSSE<=1);
6442   match(Set mem (StoreD mem src));
6443 
6444   ins_cost(100);
6445   format %{ "FST_D  $mem,$src" %}
6446   opcode(0xDD);       /* DD /2 */
6447   ins_encode( enc_FPR_store(mem,src) );
6448   ins_pipe( fpu_mem_reg );
6449 %}
6450 
6451 // Store double does rounding on x86
6452 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6453   predicate(UseSSE<=1);
6454   match(Set mem (StoreD mem (RoundDouble src)));
6455 
6456   ins_cost(100);
6457   format %{ "FST_D  $mem,$src\t# round" %}
6458   opcode(0xDD);       /* DD /2 */
6459   ins_encode( enc_FPR_store(mem,src) );
6460   ins_pipe( fpu_mem_reg );
6461 %}
6462 
6463 // Store XMM register to memory (double-precision floating points)
6464 // MOVSD instruction
6465 instruct storeD(memory mem, regD src) %{
6466   predicate(UseSSE>=2);
6467   match(Set mem (StoreD mem src));
6468   ins_cost(95);
6469   format %{ "MOVSD  $mem,$src" %}
6470   ins_encode %{
6471     __ movdbl($mem$$Address, $src$$XMMRegister);
6472   %}
6473   ins_pipe( pipe_slow );
6474 %}
6475 
6476 // Load Double
6477 instruct MoveD2VL(vlRegD dst, regD src) %{
6478   match(Set dst src);
6479   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6480   ins_encode %{
6481     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6482   %}
6483   ins_pipe( fpu_reg_reg );
6484 %}
6485 
6486 // Load Double
6487 instruct MoveVL2D(regD dst, vlRegD src) %{
6488   match(Set dst src);
6489   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6490   ins_encode %{
6491     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6492   %}
6493   ins_pipe( fpu_reg_reg );
6494 %}
6495 
6496 // Store XMM register to memory (single-precision floating point)
6497 // MOVSS instruction
6498 instruct storeF(memory mem, regF src) %{
6499   predicate(UseSSE>=1);
6500   match(Set mem (StoreF mem src));
6501   ins_cost(95);
6502   format %{ "MOVSS  $mem,$src" %}
6503   ins_encode %{
6504     __ movflt($mem$$Address, $src$$XMMRegister);
6505   %}
6506   ins_pipe( pipe_slow );
6507 %}
6508 
6509 // Load Float
6510 instruct MoveF2VL(vlRegF dst, regF src) %{
6511   match(Set dst src);
6512   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6513   ins_encode %{
6514     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6515   %}
6516   ins_pipe( fpu_reg_reg );
6517 %}
6518 
6519 // Load Float
6520 instruct MoveVL2F(regF dst, vlRegF src) %{
6521   match(Set dst src);
6522   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6523   ins_encode %{
6524     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6525   %}
6526   ins_pipe( fpu_reg_reg );
6527 %}
6528 
6529 // Store Float
6530 instruct storeFPR( memory mem, regFPR1 src) %{
6531   predicate(UseSSE==0);
6532   match(Set mem (StoreF mem src));
6533 
6534   ins_cost(100);
6535   format %{ "FST_S  $mem,$src" %}
6536   opcode(0xD9);       /* D9 /2 */
6537   ins_encode( enc_FPR_store(mem,src) );
6538   ins_pipe( fpu_mem_reg );
6539 %}
6540 
6541 // Store Float does rounding on x86
6542 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6543   predicate(UseSSE==0);
6544   match(Set mem (StoreF mem (RoundFloat src)));
6545 
6546   ins_cost(100);
6547   format %{ "FST_S  $mem,$src\t# round" %}
6548   opcode(0xD9);       /* D9 /2 */
6549   ins_encode( enc_FPR_store(mem,src) );
6550   ins_pipe( fpu_mem_reg );
6551 %}
6552 
6553 // Store Float does rounding on x86
6554 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6555   predicate(UseSSE<=1);
6556   match(Set mem (StoreF mem (ConvD2F src)));
6557 
6558   ins_cost(100);
6559   format %{ "FST_S  $mem,$src\t# D-round" %}
6560   opcode(0xD9);       /* D9 /2 */
6561   ins_encode( enc_FPR_store(mem,src) );
6562   ins_pipe( fpu_mem_reg );
6563 %}
6564 
6565 // Store immediate Float value (it is faster than store from FPU register)
6566 // The instruction usage is guarded by predicate in operand immFPR().
6567 instruct storeFPR_imm( memory mem, immFPR src) %{
6568   match(Set mem (StoreF mem src));
6569 
6570   ins_cost(50);
6571   format %{ "MOV    $mem,$src\t# store float" %}
6572   opcode(0xC7);               /* C7 /0 */
6573   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6574   ins_pipe( ialu_mem_imm );
6575 %}
6576 
6577 // Store immediate Float value (it is faster than store from XMM register)
6578 // The instruction usage is guarded by predicate in operand immF().
6579 instruct storeF_imm( memory mem, immF src) %{
6580   match(Set mem (StoreF mem src));
6581 
6582   ins_cost(50);
6583   format %{ "MOV    $mem,$src\t# store float" %}
6584   opcode(0xC7);               /* C7 /0 */
6585   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6586   ins_pipe( ialu_mem_imm );
6587 %}
6588 
6589 // Store Integer to stack slot
6590 instruct storeSSI(stackSlotI dst, rRegI src) %{
6591   match(Set dst src);
6592 
6593   ins_cost(100);
6594   format %{ "MOV    $dst,$src" %}
6595   opcode(0x89);
6596   ins_encode( OpcPRegSS( dst, src ) );
6597   ins_pipe( ialu_mem_reg );
6598 %}
6599 
6600 // Store Integer to stack slot
6601 instruct storeSSP(stackSlotP dst, eRegP src) %{
6602   match(Set dst src);
6603 
6604   ins_cost(100);
6605   format %{ "MOV    $dst,$src" %}
6606   opcode(0x89);
6607   ins_encode( OpcPRegSS( dst, src ) );
6608   ins_pipe( ialu_mem_reg );
6609 %}
6610 
6611 // Store Long to stack slot
6612 instruct storeSSL(stackSlotL dst, eRegL src) %{
6613   match(Set dst src);
6614 
6615   ins_cost(200);
6616   format %{ "MOV    $dst,$src.lo\n\t"
6617             "MOV    $dst+4,$src.hi" %}
6618   opcode(0x89, 0x89);
6619   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6620   ins_pipe( ialu_mem_long_reg );
6621 %}
6622 
6623 //----------MemBar Instructions-----------------------------------------------
6624 // Memory barrier flavors
6625 
6626 instruct membar_acquire() %{
6627   match(MemBarAcquire);
6628   match(LoadFence);
6629   ins_cost(400);
6630 
6631   size(0);
6632   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6633   ins_encode();
6634   ins_pipe(empty);
6635 %}
6636 
6637 instruct membar_acquire_lock() %{
6638   match(MemBarAcquireLock);
6639   ins_cost(0);
6640 
6641   size(0);
6642   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6643   ins_encode( );
6644   ins_pipe(empty);
6645 %}
6646 
6647 instruct membar_release() %{
6648   match(MemBarRelease);
6649   match(StoreFence);
6650   ins_cost(400);
6651 
6652   size(0);
6653   format %{ "MEMBAR-release ! (empty encoding)" %}
6654   ins_encode( );
6655   ins_pipe(empty);
6656 %}
6657 
6658 instruct membar_release_lock() %{
6659   match(MemBarReleaseLock);
6660   ins_cost(0);
6661 
6662   size(0);
6663   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6664   ins_encode( );
6665   ins_pipe(empty);
6666 %}
6667 
6668 instruct membar_volatile(eFlagsReg cr) %{
6669   match(MemBarVolatile);
6670   effect(KILL cr);
6671   ins_cost(400);
6672 
6673   format %{
6674     $$template
6675     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6676   %}
6677   ins_encode %{
6678     __ membar(Assembler::StoreLoad);
6679   %}
6680   ins_pipe(pipe_slow);
6681 %}
6682 
6683 instruct unnecessary_membar_volatile() %{
6684   match(MemBarVolatile);
6685   predicate(Matcher::post_store_load_barrier(n));
6686   ins_cost(0);
6687 
6688   size(0);
6689   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6690   ins_encode( );
6691   ins_pipe(empty);
6692 %}
6693 
6694 instruct membar_storestore() %{
6695   match(MemBarStoreStore);
6696   ins_cost(0);
6697 
6698   size(0);
6699   format %{ "MEMBAR-storestore (empty encoding)" %}
6700   ins_encode( );
6701   ins_pipe(empty);
6702 %}
6703 
6704 //----------Move Instructions--------------------------------------------------
6705 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6706   match(Set dst (CastX2P src));
6707   format %{ "# X2P  $dst, $src" %}
6708   ins_encode( /*empty encoding*/ );
6709   ins_cost(0);
6710   ins_pipe(empty);
6711 %}
6712 
6713 instruct castP2X(rRegI dst, eRegP src ) %{
6714   match(Set dst (CastP2X src));
6715   ins_cost(50);
6716   format %{ "MOV    $dst, $src\t# CastP2X" %}
6717   ins_encode( enc_Copy( dst, src) );
6718   ins_pipe( ialu_reg_reg );
6719 %}
6720 
6721 //----------Conditional Move---------------------------------------------------
6722 // Conditional move
6723 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6724   predicate(!VM_Version::supports_cmov() );
6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6726   ins_cost(200);
6727   format %{ "J$cop,us skip\t# signed cmove\n\t"
6728             "MOV    $dst,$src\n"
6729       "skip:" %}
6730   ins_encode %{
6731     Label Lskip;
6732     // Invert sense of branch from sense of CMOV
6733     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6734     __ movl($dst$$Register, $src$$Register);
6735     __ bind(Lskip);
6736   %}
6737   ins_pipe( pipe_cmov_reg );
6738 %}
6739 
6740 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6741   predicate(!VM_Version::supports_cmov() );
6742   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6743   ins_cost(200);
6744   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6745             "MOV    $dst,$src\n"
6746       "skip:" %}
6747   ins_encode %{
6748     Label Lskip;
6749     // Invert sense of branch from sense of CMOV
6750     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6751     __ movl($dst$$Register, $src$$Register);
6752     __ bind(Lskip);
6753   %}
6754   ins_pipe( pipe_cmov_reg );
6755 %}
6756 
6757 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6758   predicate(VM_Version::supports_cmov() );
6759   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6760   ins_cost(200);
6761   format %{ "CMOV$cop $dst,$src" %}
6762   opcode(0x0F,0x40);
6763   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6764   ins_pipe( pipe_cmov_reg );
6765 %}
6766 
6767 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6768   predicate(VM_Version::supports_cmov() );
6769   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6770   ins_cost(200);
6771   format %{ "CMOV$cop $dst,$src" %}
6772   opcode(0x0F,0x40);
6773   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6774   ins_pipe( pipe_cmov_reg );
6775 %}
6776 
6777 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6778   predicate(VM_Version::supports_cmov() );
6779   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6780   ins_cost(200);
6781   expand %{
6782     cmovI_regU(cop, cr, dst, src);
6783   %}
6784 %}
6785 
6786 // Conditional move
6787 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6788   predicate(VM_Version::supports_cmov() );
6789   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6790   ins_cost(250);
6791   format %{ "CMOV$cop $dst,$src" %}
6792   opcode(0x0F,0x40);
6793   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6794   ins_pipe( pipe_cmov_mem );
6795 %}
6796 
6797 // Conditional move
6798 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6799   predicate(VM_Version::supports_cmov() );
6800   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6801   ins_cost(250);
6802   format %{ "CMOV$cop $dst,$src" %}
6803   opcode(0x0F,0x40);
6804   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6805   ins_pipe( pipe_cmov_mem );
6806 %}
6807 
6808 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6809   predicate(VM_Version::supports_cmov() );
6810   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6811   ins_cost(250);
6812   expand %{
6813     cmovI_memU(cop, cr, dst, src);
6814   %}
6815 %}
6816 
6817 // Conditional move
6818 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6819   predicate(VM_Version::supports_cmov() );
6820   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6821   ins_cost(200);
6822   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6823   opcode(0x0F,0x40);
6824   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6825   ins_pipe( pipe_cmov_reg );
6826 %}
6827 
6828 // Conditional move (non-P6 version)
6829 // Note:  a CMoveP is generated for  stubs and native wrappers
6830 //        regardless of whether we are on a P6, so we
6831 //        emulate a cmov here
6832 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6833   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6834   ins_cost(300);
6835   format %{ "Jn$cop   skip\n\t"
6836           "MOV    $dst,$src\t# pointer\n"
6837       "skip:" %}
6838   opcode(0x8b);
6839   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6840   ins_pipe( pipe_cmov_reg );
6841 %}
6842 
6843 // Conditional move
6844 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6845   predicate(VM_Version::supports_cmov() );
6846   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6847   ins_cost(200);
6848   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6849   opcode(0x0F,0x40);
6850   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6851   ins_pipe( pipe_cmov_reg );
6852 %}
6853 
6854 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6855   predicate(VM_Version::supports_cmov() );
6856   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6857   ins_cost(200);
6858   expand %{
6859     cmovP_regU(cop, cr, dst, src);
6860   %}
6861 %}
6862 
6863 // DISABLED: Requires the ADLC to emit a bottom_type call that
6864 // correctly meets the two pointer arguments; one is an incoming
6865 // register but the other is a memory operand.  ALSO appears to
6866 // be buggy with implicit null checks.
6867 //
6868 //// Conditional move
6869 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6870 //  predicate(VM_Version::supports_cmov() );
6871 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6872 //  ins_cost(250);
6873 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6874 //  opcode(0x0F,0x40);
6875 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6876 //  ins_pipe( pipe_cmov_mem );
6877 //%}
6878 //
6879 //// Conditional move
6880 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6881 //  predicate(VM_Version::supports_cmov() );
6882 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6883 //  ins_cost(250);
6884 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6885 //  opcode(0x0F,0x40);
6886 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6887 //  ins_pipe( pipe_cmov_mem );
6888 //%}
6889 
6890 // Conditional move
6891 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6892   predicate(UseSSE<=1);
6893   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6894   ins_cost(200);
6895   format %{ "FCMOV$cop $dst,$src\t# double" %}
6896   opcode(0xDA);
6897   ins_encode( enc_cmov_dpr(cop,src) );
6898   ins_pipe( pipe_cmovDPR_reg );
6899 %}
6900 
6901 // Conditional move
6902 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6903   predicate(UseSSE==0);
6904   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6905   ins_cost(200);
6906   format %{ "FCMOV$cop $dst,$src\t# float" %}
6907   opcode(0xDA);
6908   ins_encode( enc_cmov_dpr(cop,src) );
6909   ins_pipe( pipe_cmovDPR_reg );
6910 %}
6911 
6912 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6913 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6914   predicate(UseSSE<=1);
6915   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6916   ins_cost(200);
6917   format %{ "Jn$cop   skip\n\t"
6918             "MOV    $dst,$src\t# double\n"
6919       "skip:" %}
6920   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6921   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6922   ins_pipe( pipe_cmovDPR_reg );
6923 %}
6924 
6925 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6926 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6927   predicate(UseSSE==0);
6928   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6929   ins_cost(200);
6930   format %{ "Jn$cop    skip\n\t"
6931             "MOV    $dst,$src\t# float\n"
6932       "skip:" %}
6933   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6934   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6935   ins_pipe( pipe_cmovDPR_reg );
6936 %}
6937 
6938 // No CMOVE with SSE/SSE2
6939 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6940   predicate (UseSSE>=1);
6941   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6942   ins_cost(200);
6943   format %{ "Jn$cop   skip\n\t"
6944             "MOVSS  $dst,$src\t# float\n"
6945       "skip:" %}
6946   ins_encode %{
6947     Label skip;
6948     // Invert sense of branch from sense of CMOV
6949     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6950     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6951     __ bind(skip);
6952   %}
6953   ins_pipe( pipe_slow );
6954 %}
6955 
6956 // No CMOVE with SSE/SSE2
6957 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6958   predicate (UseSSE>=2);
6959   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6960   ins_cost(200);
6961   format %{ "Jn$cop   skip\n\t"
6962             "MOVSD  $dst,$src\t# float\n"
6963       "skip:" %}
6964   ins_encode %{
6965     Label skip;
6966     // Invert sense of branch from sense of CMOV
6967     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6968     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6969     __ bind(skip);
6970   %}
6971   ins_pipe( pipe_slow );
6972 %}
6973 
6974 // unsigned version
6975 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6976   predicate (UseSSE>=1);
6977   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6978   ins_cost(200);
6979   format %{ "Jn$cop   skip\n\t"
6980             "MOVSS  $dst,$src\t# float\n"
6981       "skip:" %}
6982   ins_encode %{
6983     Label skip;
6984     // Invert sense of branch from sense of CMOV
6985     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6986     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6987     __ bind(skip);
6988   %}
6989   ins_pipe( pipe_slow );
6990 %}
6991 
6992 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6993   predicate (UseSSE>=1);
6994   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6995   ins_cost(200);
6996   expand %{
6997     fcmovF_regU(cop, cr, dst, src);
6998   %}
6999 %}
7000 
7001 // unsigned version
7002 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7003   predicate (UseSSE>=2);
7004   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7005   ins_cost(200);
7006   format %{ "Jn$cop   skip\n\t"
7007             "MOVSD  $dst,$src\t# float\n"
7008       "skip:" %}
7009   ins_encode %{
7010     Label skip;
7011     // Invert sense of branch from sense of CMOV
7012     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7013     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7014     __ bind(skip);
7015   %}
7016   ins_pipe( pipe_slow );
7017 %}
7018 
7019 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7020   predicate (UseSSE>=2);
7021   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7022   ins_cost(200);
7023   expand %{
7024     fcmovD_regU(cop, cr, dst, src);
7025   %}
7026 %}
7027 
7028 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7029   predicate(VM_Version::supports_cmov() );
7030   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7031   ins_cost(200);
7032   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7033             "CMOV$cop $dst.hi,$src.hi" %}
7034   opcode(0x0F,0x40);
7035   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7036   ins_pipe( pipe_cmov_reg_long );
7037 %}
7038 
7039 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7040   predicate(VM_Version::supports_cmov() );
7041   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7042   ins_cost(200);
7043   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7044             "CMOV$cop $dst.hi,$src.hi" %}
7045   opcode(0x0F,0x40);
7046   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7047   ins_pipe( pipe_cmov_reg_long );
7048 %}
7049 
7050 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7051   predicate(VM_Version::supports_cmov() );
7052   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7053   ins_cost(200);
7054   expand %{
7055     cmovL_regU(cop, cr, dst, src);
7056   %}
7057 %}
7058 
7059 //----------Arithmetic Instructions--------------------------------------------
7060 //----------Addition Instructions----------------------------------------------
7061 
7062 // Integer Addition Instructions
7063 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7064   match(Set dst (AddI dst src));
7065   effect(KILL cr);
7066 
7067   size(2);
7068   format %{ "ADD    $dst,$src" %}
7069   opcode(0x03);
7070   ins_encode( OpcP, RegReg( dst, src) );
7071   ins_pipe( ialu_reg_reg );
7072 %}
7073 
7074 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7075   match(Set dst (AddI dst src));
7076   effect(KILL cr);
7077 
7078   format %{ "ADD    $dst,$src" %}
7079   opcode(0x81, 0x00); /* /0 id */
7080   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7081   ins_pipe( ialu_reg );
7082 %}
7083 
7084 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7085   predicate(UseIncDec);
7086   match(Set dst (AddI dst src));
7087   effect(KILL cr);
7088 
7089   size(1);
7090   format %{ "INC    $dst" %}
7091   opcode(0x40); /*  */
7092   ins_encode( Opc_plus( primary, dst ) );
7093   ins_pipe( ialu_reg );
7094 %}
7095 
7096 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7097   match(Set dst (AddI src0 src1));
7098   ins_cost(110);
7099 
7100   format %{ "LEA    $dst,[$src0 + $src1]" %}
7101   opcode(0x8D); /* 0x8D /r */
7102   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7103   ins_pipe( ialu_reg_reg );
7104 %}
7105 
7106 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7107   match(Set dst (AddP src0 src1));
7108   ins_cost(110);
7109 
7110   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7111   opcode(0x8D); /* 0x8D /r */
7112   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7113   ins_pipe( ialu_reg_reg );
7114 %}
7115 
7116 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7117   predicate(UseIncDec);
7118   match(Set dst (AddI dst src));
7119   effect(KILL cr);
7120 
7121   size(1);
7122   format %{ "DEC    $dst" %}
7123   opcode(0x48); /*  */
7124   ins_encode( Opc_plus( primary, dst ) );
7125   ins_pipe( ialu_reg );
7126 %}
7127 
7128 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7129   match(Set dst (AddP dst src));
7130   effect(KILL cr);
7131 
7132   size(2);
7133   format %{ "ADD    $dst,$src" %}
7134   opcode(0x03);
7135   ins_encode( OpcP, RegReg( dst, src) );
7136   ins_pipe( ialu_reg_reg );
7137 %}
7138 
7139 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7140   match(Set dst (AddP dst src));
7141   effect(KILL cr);
7142 
7143   format %{ "ADD    $dst,$src" %}
7144   opcode(0x81,0x00); /* Opcode 81 /0 id */
7145   // ins_encode( RegImm( dst, src) );
7146   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7147   ins_pipe( ialu_reg );
7148 %}
7149 
7150 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7151   match(Set dst (AddI dst (LoadI src)));
7152   effect(KILL cr);
7153 
7154   ins_cost(125);
7155   format %{ "ADD    $dst,$src" %}
7156   opcode(0x03);
7157   ins_encode( OpcP, RegMem( dst, src) );
7158   ins_pipe( ialu_reg_mem );
7159 %}
7160 
7161 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7162   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7163   effect(KILL cr);
7164 
7165   ins_cost(150);
7166   format %{ "ADD    $dst,$src" %}
7167   opcode(0x01);  /* Opcode 01 /r */
7168   ins_encode( OpcP, RegMem( src, dst ) );
7169   ins_pipe( ialu_mem_reg );
7170 %}
7171 
7172 // Add Memory with Immediate
7173 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7174   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7175   effect(KILL cr);
7176 
7177   ins_cost(125);
7178   format %{ "ADD    $dst,$src" %}
7179   opcode(0x81);               /* Opcode 81 /0 id */
7180   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7181   ins_pipe( ialu_mem_imm );
7182 %}
7183 
7184 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7185   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7186   effect(KILL cr);
7187 
7188   ins_cost(125);
7189   format %{ "INC    $dst" %}
7190   opcode(0xFF);               /* Opcode FF /0 */
7191   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7192   ins_pipe( ialu_mem_imm );
7193 %}
7194 
7195 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7196   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7197   effect(KILL cr);
7198 
7199   ins_cost(125);
7200   format %{ "DEC    $dst" %}
7201   opcode(0xFF);               /* Opcode FF /1 */
7202   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7203   ins_pipe( ialu_mem_imm );
7204 %}
7205 
7206 
7207 instruct checkCastPP( eRegP dst ) %{
7208   match(Set dst (CheckCastPP dst));
7209 
7210   size(0);
7211   format %{ "#checkcastPP of $dst" %}
7212   ins_encode( /*empty encoding*/ );
7213   ins_pipe( empty );
7214 %}
7215 
7216 instruct castPP( eRegP dst ) %{
7217   match(Set dst (CastPP dst));
7218   format %{ "#castPP of $dst" %}
7219   ins_encode( /*empty encoding*/ );
7220   ins_pipe( empty );
7221 %}
7222 
7223 instruct castII( rRegI dst ) %{
7224   match(Set dst (CastII dst));
7225   format %{ "#castII of $dst" %}
7226   ins_encode( /*empty encoding*/ );
7227   ins_cost(0);
7228   ins_pipe( empty );
7229 %}
7230 
7231 instruct castLL( eRegL dst ) %{
7232   match(Set dst (CastLL dst));
7233   format %{ "#castLL of $dst" %}
7234   ins_encode( /*empty encoding*/ );
7235   ins_cost(0);
7236   ins_pipe( empty );
7237 %}
7238 
7239 
7240 // Load-locked - same as a regular pointer load when used with compare-swap
7241 instruct loadPLocked(eRegP dst, memory mem) %{
7242   match(Set dst (LoadPLocked mem));
7243 
7244   ins_cost(125);
7245   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7246   opcode(0x8B);
7247   ins_encode( OpcP, RegMem(dst,mem));
7248   ins_pipe( ialu_reg_mem );
7249 %}
7250 
7251 // Conditional-store of the updated heap-top.
7252 // Used during allocation of the shared heap.
7253 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7254 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7255   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7256   // EAX is killed if there is contention, but then it's also unused.
7257   // In the common case of no contention, EAX holds the new oop address.
7258   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7259   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7260   ins_pipe( pipe_cmpxchg );
7261 %}
7262 
7263 // Conditional-store of an int value.
7264 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7265 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7266   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7267   effect(KILL oldval);
7268   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7269   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7270   ins_pipe( pipe_cmpxchg );
7271 %}
7272 
7273 // Conditional-store of a long value.
7274 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7275 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7276   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7277   effect(KILL oldval);
7278   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7279             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7280             "XCHG   EBX,ECX"
7281   %}
7282   ins_encode %{
7283     // Note: we need to swap rbx, and rcx before and after the
7284     //       cmpxchg8 instruction because the instruction uses
7285     //       rcx as the high order word of the new value to store but
7286     //       our register encoding uses rbx.
7287     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7288     __ lock();
7289     __ cmpxchg8($mem$$Address);
7290     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7291   %}
7292   ins_pipe( pipe_cmpxchg );
7293 %}
7294 
7295 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7296 
7297 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7298   predicate(VM_Version::supports_cx8());
7299   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7300   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7301   effect(KILL cr, KILL oldval);
7302   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7303             "MOV    $res,0\n\t"
7304             "JNE,s  fail\n\t"
7305             "MOV    $res,1\n"
7306           "fail:" %}
7307   ins_encode( enc_cmpxchg8(mem_ptr),
7308               enc_flags_ne_to_boolean(res) );
7309   ins_pipe( pipe_cmpxchg );
7310 %}
7311 
7312 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7313   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7314   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7315   effect(KILL cr, KILL oldval);
7316   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7317             "MOV    $res,0\n\t"
7318             "JNE,s  fail\n\t"
7319             "MOV    $res,1\n"
7320           "fail:" %}
7321   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7322   ins_pipe( pipe_cmpxchg );
7323 %}
7324 
7325 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7326   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7327   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7328   effect(KILL cr, KILL oldval);
7329   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7330             "MOV    $res,0\n\t"
7331             "JNE,s  fail\n\t"
7332             "MOV    $res,1\n"
7333           "fail:" %}
7334   ins_encode( enc_cmpxchgb(mem_ptr),
7335               enc_flags_ne_to_boolean(res) );
7336   ins_pipe( pipe_cmpxchg );
7337 %}
7338 
7339 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7340   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7341   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7342   effect(KILL cr, KILL oldval);
7343   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7344             "MOV    $res,0\n\t"
7345             "JNE,s  fail\n\t"
7346             "MOV    $res,1\n"
7347           "fail:" %}
7348   ins_encode( enc_cmpxchgw(mem_ptr),
7349               enc_flags_ne_to_boolean(res) );
7350   ins_pipe( pipe_cmpxchg );
7351 %}
7352 
7353 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7354   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7355   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7356   effect(KILL cr, KILL oldval);
7357   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7358             "MOV    $res,0\n\t"
7359             "JNE,s  fail\n\t"
7360             "MOV    $res,1\n"
7361           "fail:" %}
7362   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7363   ins_pipe( pipe_cmpxchg );
7364 %}
7365 
7366 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7367   predicate(VM_Version::supports_cx8());
7368   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7369   effect(KILL cr);
7370   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7371   ins_encode( enc_cmpxchg8(mem_ptr) );
7372   ins_pipe( pipe_cmpxchg );
7373 %}
7374 
7375 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7376   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7377   effect(KILL cr);
7378   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7379   ins_encode( enc_cmpxchg(mem_ptr) );
7380   ins_pipe( pipe_cmpxchg );
7381 %}
7382 
7383 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7384   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7385   effect(KILL cr);
7386   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7387   ins_encode( enc_cmpxchgb(mem_ptr) );
7388   ins_pipe( pipe_cmpxchg );
7389 %}
7390 
7391 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7392   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7393   effect(KILL cr);
7394   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7395   ins_encode( enc_cmpxchgw(mem_ptr) );
7396   ins_pipe( pipe_cmpxchg );
7397 %}
7398 
7399 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7400   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7401   effect(KILL cr);
7402   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7403   ins_encode( enc_cmpxchg(mem_ptr) );
7404   ins_pipe( pipe_cmpxchg );
7405 %}
7406 
7407 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7408   predicate(n->as_LoadStore()->result_not_used());
7409   match(Set dummy (GetAndAddB mem add));
7410   effect(KILL cr);
7411   format %{ "ADDB  [$mem],$add" %}
7412   ins_encode %{
7413     __ lock();
7414     __ addb($mem$$Address, $add$$constant);
7415   %}
7416   ins_pipe( pipe_cmpxchg );
7417 %}
7418 
7419 // Important to match to xRegI: only 8-bit regs.
7420 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7421   match(Set newval (GetAndAddB mem newval));
7422   effect(KILL cr);
7423   format %{ "XADDB  [$mem],$newval" %}
7424   ins_encode %{
7425     __ lock();
7426     __ xaddb($mem$$Address, $newval$$Register);
7427   %}
7428   ins_pipe( pipe_cmpxchg );
7429 %}
7430 
7431 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7432   predicate(n->as_LoadStore()->result_not_used());
7433   match(Set dummy (GetAndAddS mem add));
7434   effect(KILL cr);
7435   format %{ "ADDS  [$mem],$add" %}
7436   ins_encode %{
7437     __ lock();
7438     __ addw($mem$$Address, $add$$constant);
7439   %}
7440   ins_pipe( pipe_cmpxchg );
7441 %}
7442 
7443 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7444   match(Set newval (GetAndAddS mem newval));
7445   effect(KILL cr);
7446   format %{ "XADDS  [$mem],$newval" %}
7447   ins_encode %{
7448     __ lock();
7449     __ xaddw($mem$$Address, $newval$$Register);
7450   %}
7451   ins_pipe( pipe_cmpxchg );
7452 %}
7453 
7454 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7455   predicate(n->as_LoadStore()->result_not_used());
7456   match(Set dummy (GetAndAddI mem add));
7457   effect(KILL cr);
7458   format %{ "ADDL  [$mem],$add" %}
7459   ins_encode %{
7460     __ lock();
7461     __ addl($mem$$Address, $add$$constant);
7462   %}
7463   ins_pipe( pipe_cmpxchg );
7464 %}
7465 
7466 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7467   match(Set newval (GetAndAddI mem newval));
7468   effect(KILL cr);
7469   format %{ "XADDL  [$mem],$newval" %}
7470   ins_encode %{
7471     __ lock();
7472     __ xaddl($mem$$Address, $newval$$Register);
7473   %}
7474   ins_pipe( pipe_cmpxchg );
7475 %}
7476 
7477 // Important to match to xRegI: only 8-bit regs.
7478 instruct xchgB( memory mem, xRegI newval) %{
7479   match(Set newval (GetAndSetB mem newval));
7480   format %{ "XCHGB  $newval,[$mem]" %}
7481   ins_encode %{
7482     __ xchgb($newval$$Register, $mem$$Address);
7483   %}
7484   ins_pipe( pipe_cmpxchg );
7485 %}
7486 
7487 instruct xchgS( memory mem, rRegI newval) %{
7488   match(Set newval (GetAndSetS mem newval));
7489   format %{ "XCHGW  $newval,[$mem]" %}
7490   ins_encode %{
7491     __ xchgw($newval$$Register, $mem$$Address);
7492   %}
7493   ins_pipe( pipe_cmpxchg );
7494 %}
7495 
7496 instruct xchgI( memory mem, rRegI newval) %{
7497   match(Set newval (GetAndSetI mem newval));
7498   format %{ "XCHGL  $newval,[$mem]" %}
7499   ins_encode %{
7500     __ xchgl($newval$$Register, $mem$$Address);
7501   %}
7502   ins_pipe( pipe_cmpxchg );
7503 %}
7504 
7505 instruct xchgP( memory mem, pRegP newval) %{
7506   match(Set newval (GetAndSetP mem newval));
7507   format %{ "XCHGL  $newval,[$mem]" %}
7508   ins_encode %{
7509     __ xchgl($newval$$Register, $mem$$Address);
7510   %}
7511   ins_pipe( pipe_cmpxchg );
7512 %}
7513 
7514 //----------Subtraction Instructions-------------------------------------------
7515 
7516 // Integer Subtraction Instructions
7517 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7518   match(Set dst (SubI dst src));
7519   effect(KILL cr);
7520 
7521   size(2);
7522   format %{ "SUB    $dst,$src" %}
7523   opcode(0x2B);
7524   ins_encode( OpcP, RegReg( dst, src) );
7525   ins_pipe( ialu_reg_reg );
7526 %}
7527 
7528 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7529   match(Set dst (SubI dst src));
7530   effect(KILL cr);
7531 
7532   format %{ "SUB    $dst,$src" %}
7533   opcode(0x81,0x05);  /* Opcode 81 /5 */
7534   // ins_encode( RegImm( dst, src) );
7535   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7536   ins_pipe( ialu_reg );
7537 %}
7538 
7539 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7540   match(Set dst (SubI dst (LoadI src)));
7541   effect(KILL cr);
7542 
7543   ins_cost(125);
7544   format %{ "SUB    $dst,$src" %}
7545   opcode(0x2B);
7546   ins_encode( OpcP, RegMem( dst, src) );
7547   ins_pipe( ialu_reg_mem );
7548 %}
7549 
7550 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7551   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7552   effect(KILL cr);
7553 
7554   ins_cost(150);
7555   format %{ "SUB    $dst,$src" %}
7556   opcode(0x29);  /* Opcode 29 /r */
7557   ins_encode( OpcP, RegMem( src, dst ) );
7558   ins_pipe( ialu_mem_reg );
7559 %}
7560 
7561 // Subtract from a pointer
7562 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7563   match(Set dst (AddP dst (SubI zero src)));
7564   effect(KILL cr);
7565 
7566   size(2);
7567   format %{ "SUB    $dst,$src" %}
7568   opcode(0x2B);
7569   ins_encode( OpcP, RegReg( dst, src) );
7570   ins_pipe( ialu_reg_reg );
7571 %}
7572 
7573 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7574   match(Set dst (SubI zero dst));
7575   effect(KILL cr);
7576 
7577   size(2);
7578   format %{ "NEG    $dst" %}
7579   opcode(0xF7,0x03);  // Opcode F7 /3
7580   ins_encode( OpcP, RegOpc( dst ) );
7581   ins_pipe( ialu_reg );
7582 %}
7583 
7584 //----------Multiplication/Division Instructions-------------------------------
7585 // Integer Multiplication Instructions
7586 // Multiply Register
7587 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7588   match(Set dst (MulI dst src));
7589   effect(KILL cr);
7590 
7591   size(3);
7592   ins_cost(300);
7593   format %{ "IMUL   $dst,$src" %}
7594   opcode(0xAF, 0x0F);
7595   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7596   ins_pipe( ialu_reg_reg_alu0 );
7597 %}
7598 
7599 // Multiply 32-bit Immediate
7600 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7601   match(Set dst (MulI src imm));
7602   effect(KILL cr);
7603 
7604   ins_cost(300);
7605   format %{ "IMUL   $dst,$src,$imm" %}
7606   opcode(0x69);  /* 69 /r id */
7607   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7608   ins_pipe( ialu_reg_reg_alu0 );
7609 %}
7610 
7611 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7612   match(Set dst src);
7613   effect(KILL cr);
7614 
7615   // Note that this is artificially increased to make it more expensive than loadConL
7616   ins_cost(250);
7617   format %{ "MOV    EAX,$src\t// low word only" %}
7618   opcode(0xB8);
7619   ins_encode( LdImmL_Lo(dst, src) );
7620   ins_pipe( ialu_reg_fat );
7621 %}
7622 
7623 // Multiply by 32-bit Immediate, taking the shifted high order results
7624 //  (special case for shift by 32)
7625 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7626   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7627   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7628              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7629              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7630   effect(USE src1, KILL cr);
7631 
7632   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7633   ins_cost(0*100 + 1*400 - 150);
7634   format %{ "IMUL   EDX:EAX,$src1" %}
7635   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7636   ins_pipe( pipe_slow );
7637 %}
7638 
7639 // Multiply by 32-bit Immediate, taking the shifted high order results
7640 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7641   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7642   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7643              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7644              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7645   effect(USE src1, KILL cr);
7646 
7647   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7648   ins_cost(1*100 + 1*400 - 150);
7649   format %{ "IMUL   EDX:EAX,$src1\n\t"
7650             "SAR    EDX,$cnt-32" %}
7651   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7652   ins_pipe( pipe_slow );
7653 %}
7654 
7655 // Multiply Memory 32-bit Immediate
7656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7657   match(Set dst (MulI (LoadI src) imm));
7658   effect(KILL cr);
7659 
7660   ins_cost(300);
7661   format %{ "IMUL   $dst,$src,$imm" %}
7662   opcode(0x69);  /* 69 /r id */
7663   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7664   ins_pipe( ialu_reg_mem_alu0 );
7665 %}
7666 
7667 // Multiply Memory
7668 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7669   match(Set dst (MulI dst (LoadI src)));
7670   effect(KILL cr);
7671 
7672   ins_cost(350);
7673   format %{ "IMUL   $dst,$src" %}
7674   opcode(0xAF, 0x0F);
7675   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7676   ins_pipe( ialu_reg_mem_alu0 );
7677 %}
7678 
7679 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7680 %{
7681   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7682   effect(KILL cr, KILL src2);
7683 
7684   expand %{ mulI_eReg(dst, src1, cr);
7685            mulI_eReg(src2, src3, cr);
7686            addI_eReg(dst, src2, cr); %}
7687 %}
7688 
7689 // Multiply Register Int to Long
7690 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7691   // Basic Idea: long = (long)int * (long)int
7692   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7693   effect(DEF dst, USE src, USE src1, KILL flags);
7694 
7695   ins_cost(300);
7696   format %{ "IMUL   $dst,$src1" %}
7697 
7698   ins_encode( long_int_multiply( dst, src1 ) );
7699   ins_pipe( ialu_reg_reg_alu0 );
7700 %}
7701 
7702 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7703   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7704   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7705   effect(KILL flags);
7706 
7707   ins_cost(300);
7708   format %{ "MUL    $dst,$src1" %}
7709 
7710   ins_encode( long_uint_multiply(dst, src1) );
7711   ins_pipe( ialu_reg_reg_alu0 );
7712 %}
7713 
7714 // Multiply Register Long
7715 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7716   match(Set dst (MulL dst src));
7717   effect(KILL cr, TEMP tmp);
7718   ins_cost(4*100+3*400);
7719 // Basic idea: lo(result) = lo(x_lo * y_lo)
7720 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7721   format %{ "MOV    $tmp,$src.lo\n\t"
7722             "IMUL   $tmp,EDX\n\t"
7723             "MOV    EDX,$src.hi\n\t"
7724             "IMUL   EDX,EAX\n\t"
7725             "ADD    $tmp,EDX\n\t"
7726             "MUL    EDX:EAX,$src.lo\n\t"
7727             "ADD    EDX,$tmp" %}
7728   ins_encode( long_multiply( dst, src, tmp ) );
7729   ins_pipe( pipe_slow );
7730 %}
7731 
7732 // Multiply Register Long where the left operand's high 32 bits are zero
7733 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7734   predicate(is_operand_hi32_zero(n->in(1)));
7735   match(Set dst (MulL dst src));
7736   effect(KILL cr, TEMP tmp);
7737   ins_cost(2*100+2*400);
7738 // Basic idea: lo(result) = lo(x_lo * y_lo)
7739 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7740   format %{ "MOV    $tmp,$src.hi\n\t"
7741             "IMUL   $tmp,EAX\n\t"
7742             "MUL    EDX:EAX,$src.lo\n\t"
7743             "ADD    EDX,$tmp" %}
7744   ins_encode %{
7745     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7746     __ imull($tmp$$Register, rax);
7747     __ mull($src$$Register);
7748     __ addl(rdx, $tmp$$Register);
7749   %}
7750   ins_pipe( pipe_slow );
7751 %}
7752 
7753 // Multiply Register Long where the right operand's high 32 bits are zero
7754 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7755   predicate(is_operand_hi32_zero(n->in(2)));
7756   match(Set dst (MulL dst src));
7757   effect(KILL cr, TEMP tmp);
7758   ins_cost(2*100+2*400);
7759 // Basic idea: lo(result) = lo(x_lo * y_lo)
7760 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7761   format %{ "MOV    $tmp,$src.lo\n\t"
7762             "IMUL   $tmp,EDX\n\t"
7763             "MUL    EDX:EAX,$src.lo\n\t"
7764             "ADD    EDX,$tmp" %}
7765   ins_encode %{
7766     __ movl($tmp$$Register, $src$$Register);
7767     __ imull($tmp$$Register, rdx);
7768     __ mull($src$$Register);
7769     __ addl(rdx, $tmp$$Register);
7770   %}
7771   ins_pipe( pipe_slow );
7772 %}
7773 
7774 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7775 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7776   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7777   match(Set dst (MulL dst src));
7778   effect(KILL cr);
7779   ins_cost(1*400);
7780 // Basic idea: lo(result) = lo(x_lo * y_lo)
7781 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7782   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7783   ins_encode %{
7784     __ mull($src$$Register);
7785   %}
7786   ins_pipe( pipe_slow );
7787 %}
7788 
7789 // Multiply Register Long by small constant
7790 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7791   match(Set dst (MulL dst src));
7792   effect(KILL cr, TEMP tmp);
7793   ins_cost(2*100+2*400);
7794   size(12);
7795 // Basic idea: lo(result) = lo(src * EAX)
7796 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7797   format %{ "IMUL   $tmp,EDX,$src\n\t"
7798             "MOV    EDX,$src\n\t"
7799             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7800             "ADD    EDX,$tmp" %}
7801   ins_encode( long_multiply_con( dst, src, tmp ) );
7802   ins_pipe( pipe_slow );
7803 %}
7804 
7805 // Integer DIV with Register
7806 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7807   match(Set rax (DivI rax div));
7808   effect(KILL rdx, KILL cr);
7809   size(26);
7810   ins_cost(30*100+10*100);
7811   format %{ "CMP    EAX,0x80000000\n\t"
7812             "JNE,s  normal\n\t"
7813             "XOR    EDX,EDX\n\t"
7814             "CMP    ECX,-1\n\t"
7815             "JE,s   done\n"
7816     "normal: CDQ\n\t"
7817             "IDIV   $div\n\t"
7818     "done:"        %}
7819   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7820   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7821   ins_pipe( ialu_reg_reg_alu0 );
7822 %}
7823 
7824 // Divide Register Long
7825 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7826   match(Set dst (DivL src1 src2));
7827   effect( KILL cr, KILL cx, KILL bx );
7828   ins_cost(10000);
7829   format %{ "PUSH   $src1.hi\n\t"
7830             "PUSH   $src1.lo\n\t"
7831             "PUSH   $src2.hi\n\t"
7832             "PUSH   $src2.lo\n\t"
7833             "CALL   SharedRuntime::ldiv\n\t"
7834             "ADD    ESP,16" %}
7835   ins_encode( long_div(src1,src2) );
7836   ins_pipe( pipe_slow );
7837 %}
7838 
7839 // Integer DIVMOD with Register, both quotient and mod results
7840 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7841   match(DivModI rax div);
7842   effect(KILL cr);
7843   size(26);
7844   ins_cost(30*100+10*100);
7845   format %{ "CMP    EAX,0x80000000\n\t"
7846             "JNE,s  normal\n\t"
7847             "XOR    EDX,EDX\n\t"
7848             "CMP    ECX,-1\n\t"
7849             "JE,s   done\n"
7850     "normal: CDQ\n\t"
7851             "IDIV   $div\n\t"
7852     "done:"        %}
7853   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7854   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7855   ins_pipe( pipe_slow );
7856 %}
7857 
7858 // Integer MOD with Register
7859 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7860   match(Set rdx (ModI rax div));
7861   effect(KILL rax, KILL cr);
7862 
7863   size(26);
7864   ins_cost(300);
7865   format %{ "CDQ\n\t"
7866             "IDIV   $div" %}
7867   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7868   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7869   ins_pipe( ialu_reg_reg_alu0 );
7870 %}
7871 
7872 // Remainder Register Long
7873 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7874   match(Set dst (ModL src1 src2));
7875   effect( KILL cr, KILL cx, KILL bx );
7876   ins_cost(10000);
7877   format %{ "PUSH   $src1.hi\n\t"
7878             "PUSH   $src1.lo\n\t"
7879             "PUSH   $src2.hi\n\t"
7880             "PUSH   $src2.lo\n\t"
7881             "CALL   SharedRuntime::lrem\n\t"
7882             "ADD    ESP,16" %}
7883   ins_encode( long_mod(src1,src2) );
7884   ins_pipe( pipe_slow );
7885 %}
7886 
7887 // Divide Register Long (no special case since divisor != -1)
7888 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7889   match(Set dst (DivL dst imm));
7890   effect( TEMP tmp, TEMP tmp2, KILL cr );
7891   ins_cost(1000);
7892   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7893             "XOR    $tmp2,$tmp2\n\t"
7894             "CMP    $tmp,EDX\n\t"
7895             "JA,s   fast\n\t"
7896             "MOV    $tmp2,EAX\n\t"
7897             "MOV    EAX,EDX\n\t"
7898             "MOV    EDX,0\n\t"
7899             "JLE,s  pos\n\t"
7900             "LNEG   EAX : $tmp2\n\t"
7901             "DIV    $tmp # unsigned division\n\t"
7902             "XCHG   EAX,$tmp2\n\t"
7903             "DIV    $tmp\n\t"
7904             "LNEG   $tmp2 : EAX\n\t"
7905             "JMP,s  done\n"
7906     "pos:\n\t"
7907             "DIV    $tmp\n\t"
7908             "XCHG   EAX,$tmp2\n"
7909     "fast:\n\t"
7910             "DIV    $tmp\n"
7911     "done:\n\t"
7912             "MOV    EDX,$tmp2\n\t"
7913             "NEG    EDX:EAX # if $imm < 0" %}
7914   ins_encode %{
7915     int con = (int)$imm$$constant;
7916     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7917     int pcon = (con > 0) ? con : -con;
7918     Label Lfast, Lpos, Ldone;
7919 
7920     __ movl($tmp$$Register, pcon);
7921     __ xorl($tmp2$$Register,$tmp2$$Register);
7922     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7923     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7924 
7925     __ movl($tmp2$$Register, $dst$$Register); // save
7926     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7927     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7928     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7929 
7930     // Negative dividend.
7931     // convert value to positive to use unsigned division
7932     __ lneg($dst$$Register, $tmp2$$Register);
7933     __ divl($tmp$$Register);
7934     __ xchgl($dst$$Register, $tmp2$$Register);
7935     __ divl($tmp$$Register);
7936     // revert result back to negative
7937     __ lneg($tmp2$$Register, $dst$$Register);
7938     __ jmpb(Ldone);
7939 
7940     __ bind(Lpos);
7941     __ divl($tmp$$Register); // Use unsigned division
7942     __ xchgl($dst$$Register, $tmp2$$Register);
7943     // Fallthrow for final divide, tmp2 has 32 bit hi result
7944 
7945     __ bind(Lfast);
7946     // fast path: src is positive
7947     __ divl($tmp$$Register); // Use unsigned division
7948 
7949     __ bind(Ldone);
7950     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7951     if (con < 0) {
7952       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7953     }
7954   %}
7955   ins_pipe( pipe_slow );
7956 %}
7957 
7958 // Remainder Register Long (remainder fit into 32 bits)
7959 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7960   match(Set dst (ModL dst imm));
7961   effect( TEMP tmp, TEMP tmp2, KILL cr );
7962   ins_cost(1000);
7963   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7964             "CMP    $tmp,EDX\n\t"
7965             "JA,s   fast\n\t"
7966             "MOV    $tmp2,EAX\n\t"
7967             "MOV    EAX,EDX\n\t"
7968             "MOV    EDX,0\n\t"
7969             "JLE,s  pos\n\t"
7970             "LNEG   EAX : $tmp2\n\t"
7971             "DIV    $tmp # unsigned division\n\t"
7972             "MOV    EAX,$tmp2\n\t"
7973             "DIV    $tmp\n\t"
7974             "NEG    EDX\n\t"
7975             "JMP,s  done\n"
7976     "pos:\n\t"
7977             "DIV    $tmp\n\t"
7978             "MOV    EAX,$tmp2\n"
7979     "fast:\n\t"
7980             "DIV    $tmp\n"
7981     "done:\n\t"
7982             "MOV    EAX,EDX\n\t"
7983             "SAR    EDX,31\n\t" %}
7984   ins_encode %{
7985     int con = (int)$imm$$constant;
7986     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7987     int pcon = (con > 0) ? con : -con;
7988     Label  Lfast, Lpos, Ldone;
7989 
7990     __ movl($tmp$$Register, pcon);
7991     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7992     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7993 
7994     __ movl($tmp2$$Register, $dst$$Register); // save
7995     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7996     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7997     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7998 
7999     // Negative dividend.
8000     // convert value to positive to use unsigned division
8001     __ lneg($dst$$Register, $tmp2$$Register);
8002     __ divl($tmp$$Register);
8003     __ movl($dst$$Register, $tmp2$$Register);
8004     __ divl($tmp$$Register);
8005     // revert remainder back to negative
8006     __ negl(HIGH_FROM_LOW($dst$$Register));
8007     __ jmpb(Ldone);
8008 
8009     __ bind(Lpos);
8010     __ divl($tmp$$Register);
8011     __ movl($dst$$Register, $tmp2$$Register);
8012 
8013     __ bind(Lfast);
8014     // fast path: src is positive
8015     __ divl($tmp$$Register);
8016 
8017     __ bind(Ldone);
8018     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8019     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8020 
8021   %}
8022   ins_pipe( pipe_slow );
8023 %}
8024 
8025 // Integer Shift Instructions
8026 // Shift Left by one
8027 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8028   match(Set dst (LShiftI dst shift));
8029   effect(KILL cr);
8030 
8031   size(2);
8032   format %{ "SHL    $dst,$shift" %}
8033   opcode(0xD1, 0x4);  /* D1 /4 */
8034   ins_encode( OpcP, RegOpc( dst ) );
8035   ins_pipe( ialu_reg );
8036 %}
8037 
8038 // Shift Left by 8-bit immediate
8039 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8040   match(Set dst (LShiftI dst shift));
8041   effect(KILL cr);
8042 
8043   size(3);
8044   format %{ "SHL    $dst,$shift" %}
8045   opcode(0xC1, 0x4);  /* C1 /4 ib */
8046   ins_encode( RegOpcImm( dst, shift) );
8047   ins_pipe( ialu_reg );
8048 %}
8049 
8050 // Shift Left by variable
8051 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8052   match(Set dst (LShiftI dst shift));
8053   effect(KILL cr);
8054 
8055   size(2);
8056   format %{ "SHL    $dst,$shift" %}
8057   opcode(0xD3, 0x4);  /* D3 /4 */
8058   ins_encode( OpcP, RegOpc( dst ) );
8059   ins_pipe( ialu_reg_reg );
8060 %}
8061 
8062 // Arithmetic shift right by one
8063 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8064   match(Set dst (RShiftI dst shift));
8065   effect(KILL cr);
8066 
8067   size(2);
8068   format %{ "SAR    $dst,$shift" %}
8069   opcode(0xD1, 0x7);  /* D1 /7 */
8070   ins_encode( OpcP, RegOpc( dst ) );
8071   ins_pipe( ialu_reg );
8072 %}
8073 
8074 // Arithmetic shift right by one
8075 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8076   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8077   effect(KILL cr);
8078   format %{ "SAR    $dst,$shift" %}
8079   opcode(0xD1, 0x7);  /* D1 /7 */
8080   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8081   ins_pipe( ialu_mem_imm );
8082 %}
8083 
8084 // Arithmetic Shift Right by 8-bit immediate
8085 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8086   match(Set dst (RShiftI dst shift));
8087   effect(KILL cr);
8088 
8089   size(3);
8090   format %{ "SAR    $dst,$shift" %}
8091   opcode(0xC1, 0x7);  /* C1 /7 ib */
8092   ins_encode( RegOpcImm( dst, shift ) );
8093   ins_pipe( ialu_mem_imm );
8094 %}
8095 
8096 // Arithmetic Shift Right by 8-bit immediate
8097 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8098   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8099   effect(KILL cr);
8100 
8101   format %{ "SAR    $dst,$shift" %}
8102   opcode(0xC1, 0x7);  /* C1 /7 ib */
8103   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8104   ins_pipe( ialu_mem_imm );
8105 %}
8106 
8107 // Arithmetic Shift Right by variable
8108 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8109   match(Set dst (RShiftI dst shift));
8110   effect(KILL cr);
8111 
8112   size(2);
8113   format %{ "SAR    $dst,$shift" %}
8114   opcode(0xD3, 0x7);  /* D3 /7 */
8115   ins_encode( OpcP, RegOpc( dst ) );
8116   ins_pipe( ialu_reg_reg );
8117 %}
8118 
8119 // Logical shift right by one
8120 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8121   match(Set dst (URShiftI dst shift));
8122   effect(KILL cr);
8123 
8124   size(2);
8125   format %{ "SHR    $dst,$shift" %}
8126   opcode(0xD1, 0x5);  /* D1 /5 */
8127   ins_encode( OpcP, RegOpc( dst ) );
8128   ins_pipe( ialu_reg );
8129 %}
8130 
8131 // Logical Shift Right by 8-bit immediate
8132 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8133   match(Set dst (URShiftI dst shift));
8134   effect(KILL cr);
8135 
8136   size(3);
8137   format %{ "SHR    $dst,$shift" %}
8138   opcode(0xC1, 0x5);  /* C1 /5 ib */
8139   ins_encode( RegOpcImm( dst, shift) );
8140   ins_pipe( ialu_reg );
8141 %}
8142 
8143 
8144 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8145 // This idiom is used by the compiler for the i2b bytecode.
8146 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8147   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8148 
8149   size(3);
8150   format %{ "MOVSX  $dst,$src :8" %}
8151   ins_encode %{
8152     __ movsbl($dst$$Register, $src$$Register);
8153   %}
8154   ins_pipe(ialu_reg_reg);
8155 %}
8156 
8157 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8158 // This idiom is used by the compiler the i2s bytecode.
8159 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8160   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8161 
8162   size(3);
8163   format %{ "MOVSX  $dst,$src :16" %}
8164   ins_encode %{
8165     __ movswl($dst$$Register, $src$$Register);
8166   %}
8167   ins_pipe(ialu_reg_reg);
8168 %}
8169 
8170 
8171 // Logical Shift Right by variable
8172 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8173   match(Set dst (URShiftI dst shift));
8174   effect(KILL cr);
8175 
8176   size(2);
8177   format %{ "SHR    $dst,$shift" %}
8178   opcode(0xD3, 0x5);  /* D3 /5 */
8179   ins_encode( OpcP, RegOpc( dst ) );
8180   ins_pipe( ialu_reg_reg );
8181 %}
8182 
8183 
8184 //----------Logical Instructions-----------------------------------------------
8185 //----------Integer Logical Instructions---------------------------------------
8186 // And Instructions
8187 // And Register with Register
8188 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8189   match(Set dst (AndI dst src));
8190   effect(KILL cr);
8191 
8192   size(2);
8193   format %{ "AND    $dst,$src" %}
8194   opcode(0x23);
8195   ins_encode( OpcP, RegReg( dst, src) );
8196   ins_pipe( ialu_reg_reg );
8197 %}
8198 
8199 // And Register with Immediate
8200 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8201   match(Set dst (AndI dst src));
8202   effect(KILL cr);
8203 
8204   format %{ "AND    $dst,$src" %}
8205   opcode(0x81,0x04);  /* Opcode 81 /4 */
8206   // ins_encode( RegImm( dst, src) );
8207   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8208   ins_pipe( ialu_reg );
8209 %}
8210 
8211 // And Register with Memory
8212 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8213   match(Set dst (AndI dst (LoadI src)));
8214   effect(KILL cr);
8215 
8216   ins_cost(125);
8217   format %{ "AND    $dst,$src" %}
8218   opcode(0x23);
8219   ins_encode( OpcP, RegMem( dst, src) );
8220   ins_pipe( ialu_reg_mem );
8221 %}
8222 
8223 // And Memory with Register
8224 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8225   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8226   effect(KILL cr);
8227 
8228   ins_cost(150);
8229   format %{ "AND    $dst,$src" %}
8230   opcode(0x21);  /* Opcode 21 /r */
8231   ins_encode( OpcP, RegMem( src, dst ) );
8232   ins_pipe( ialu_mem_reg );
8233 %}
8234 
8235 // And Memory with Immediate
8236 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8237   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8238   effect(KILL cr);
8239 
8240   ins_cost(125);
8241   format %{ "AND    $dst,$src" %}
8242   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8243   // ins_encode( MemImm( dst, src) );
8244   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8245   ins_pipe( ialu_mem_imm );
8246 %}
8247 
8248 // BMI1 instructions
8249 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8250   match(Set dst (AndI (XorI src1 minus_1) src2));
8251   predicate(UseBMI1Instructions);
8252   effect(KILL cr);
8253 
8254   format %{ "ANDNL  $dst, $src1, $src2" %}
8255 
8256   ins_encode %{
8257     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8258   %}
8259   ins_pipe(ialu_reg);
8260 %}
8261 
8262 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8263   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8264   predicate(UseBMI1Instructions);
8265   effect(KILL cr);
8266 
8267   ins_cost(125);
8268   format %{ "ANDNL  $dst, $src1, $src2" %}
8269 
8270   ins_encode %{
8271     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8272   %}
8273   ins_pipe(ialu_reg_mem);
8274 %}
8275 
8276 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8277   match(Set dst (AndI (SubI imm_zero src) src));
8278   predicate(UseBMI1Instructions);
8279   effect(KILL cr);
8280 
8281   format %{ "BLSIL  $dst, $src" %}
8282 
8283   ins_encode %{
8284     __ blsil($dst$$Register, $src$$Register);
8285   %}
8286   ins_pipe(ialu_reg);
8287 %}
8288 
8289 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8290   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8291   predicate(UseBMI1Instructions);
8292   effect(KILL cr);
8293 
8294   ins_cost(125);
8295   format %{ "BLSIL  $dst, $src" %}
8296 
8297   ins_encode %{
8298     __ blsil($dst$$Register, $src$$Address);
8299   %}
8300   ins_pipe(ialu_reg_mem);
8301 %}
8302 
8303 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8304 %{
8305   match(Set dst (XorI (AddI src minus_1) src));
8306   predicate(UseBMI1Instructions);
8307   effect(KILL cr);
8308 
8309   format %{ "BLSMSKL $dst, $src" %}
8310 
8311   ins_encode %{
8312     __ blsmskl($dst$$Register, $src$$Register);
8313   %}
8314 
8315   ins_pipe(ialu_reg);
8316 %}
8317 
8318 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8319 %{
8320   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8321   predicate(UseBMI1Instructions);
8322   effect(KILL cr);
8323 
8324   ins_cost(125);
8325   format %{ "BLSMSKL $dst, $src" %}
8326 
8327   ins_encode %{
8328     __ blsmskl($dst$$Register, $src$$Address);
8329   %}
8330 
8331   ins_pipe(ialu_reg_mem);
8332 %}
8333 
8334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8335 %{
8336   match(Set dst (AndI (AddI src minus_1) src) );
8337   predicate(UseBMI1Instructions);
8338   effect(KILL cr);
8339 
8340   format %{ "BLSRL  $dst, $src" %}
8341 
8342   ins_encode %{
8343     __ blsrl($dst$$Register, $src$$Register);
8344   %}
8345 
8346   ins_pipe(ialu_reg);
8347 %}
8348 
8349 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8350 %{
8351   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8352   predicate(UseBMI1Instructions);
8353   effect(KILL cr);
8354 
8355   ins_cost(125);
8356   format %{ "BLSRL  $dst, $src" %}
8357 
8358   ins_encode %{
8359     __ blsrl($dst$$Register, $src$$Address);
8360   %}
8361 
8362   ins_pipe(ialu_reg_mem);
8363 %}
8364 
8365 // Or Instructions
8366 // Or Register with Register
8367 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8368   match(Set dst (OrI dst src));
8369   effect(KILL cr);
8370 
8371   size(2);
8372   format %{ "OR     $dst,$src" %}
8373   opcode(0x0B);
8374   ins_encode( OpcP, RegReg( dst, src) );
8375   ins_pipe( ialu_reg_reg );
8376 %}
8377 
8378 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8379   match(Set dst (OrI dst (CastP2X src)));
8380   effect(KILL cr);
8381 
8382   size(2);
8383   format %{ "OR     $dst,$src" %}
8384   opcode(0x0B);
8385   ins_encode( OpcP, RegReg( dst, src) );
8386   ins_pipe( ialu_reg_reg );
8387 %}
8388 
8389 
8390 // Or Register with Immediate
8391 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8392   match(Set dst (OrI dst src));
8393   effect(KILL cr);
8394 
8395   format %{ "OR     $dst,$src" %}
8396   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8397   // ins_encode( RegImm( dst, src) );
8398   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8399   ins_pipe( ialu_reg );
8400 %}
8401 
8402 // Or Register with Memory
8403 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8404   match(Set dst (OrI dst (LoadI src)));
8405   effect(KILL cr);
8406 
8407   ins_cost(125);
8408   format %{ "OR     $dst,$src" %}
8409   opcode(0x0B);
8410   ins_encode( OpcP, RegMem( dst, src) );
8411   ins_pipe( ialu_reg_mem );
8412 %}
8413 
8414 // Or Memory with Register
8415 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8416   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8417   effect(KILL cr);
8418 
8419   ins_cost(150);
8420   format %{ "OR     $dst,$src" %}
8421   opcode(0x09);  /* Opcode 09 /r */
8422   ins_encode( OpcP, RegMem( src, dst ) );
8423   ins_pipe( ialu_mem_reg );
8424 %}
8425 
8426 // Or Memory with Immediate
8427 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8428   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8429   effect(KILL cr);
8430 
8431   ins_cost(125);
8432   format %{ "OR     $dst,$src" %}
8433   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8434   // ins_encode( MemImm( dst, src) );
8435   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8436   ins_pipe( ialu_mem_imm );
8437 %}
8438 
8439 // ROL/ROR
8440 // ROL expand
8441 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8442   effect(USE_DEF dst, USE shift, KILL cr);
8443 
8444   format %{ "ROL    $dst, $shift" %}
8445   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8446   ins_encode( OpcP, RegOpc( dst ));
8447   ins_pipe( ialu_reg );
8448 %}
8449 
8450 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8451   effect(USE_DEF dst, USE shift, KILL cr);
8452 
8453   format %{ "ROL    $dst, $shift" %}
8454   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8455   ins_encode( RegOpcImm(dst, shift) );
8456   ins_pipe(ialu_reg);
8457 %}
8458 
8459 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8460   effect(USE_DEF dst, USE shift, KILL cr);
8461 
8462   format %{ "ROL    $dst, $shift" %}
8463   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8464   ins_encode(OpcP, RegOpc(dst));
8465   ins_pipe( ialu_reg_reg );
8466 %}
8467 // end of ROL expand
8468 
8469 // ROL 32bit by one once
8470 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8471   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8472 
8473   expand %{
8474     rolI_eReg_imm1(dst, lshift, cr);
8475   %}
8476 %}
8477 
8478 // ROL 32bit var by imm8 once
8479 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8480   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8481   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8482 
8483   expand %{
8484     rolI_eReg_imm8(dst, lshift, cr);
8485   %}
8486 %}
8487 
8488 // ROL 32bit var by var once
8489 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8490   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8491 
8492   expand %{
8493     rolI_eReg_CL(dst, shift, cr);
8494   %}
8495 %}
8496 
8497 // ROL 32bit var by var once
8498 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8499   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8500 
8501   expand %{
8502     rolI_eReg_CL(dst, shift, cr);
8503   %}
8504 %}
8505 
8506 // ROR expand
8507 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8508   effect(USE_DEF dst, USE shift, KILL cr);
8509 
8510   format %{ "ROR    $dst, $shift" %}
8511   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8512   ins_encode( OpcP, RegOpc( dst ) );
8513   ins_pipe( ialu_reg );
8514 %}
8515 
8516 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8517   effect (USE_DEF dst, USE shift, KILL cr);
8518 
8519   format %{ "ROR    $dst, $shift" %}
8520   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8521   ins_encode( RegOpcImm(dst, shift) );
8522   ins_pipe( ialu_reg );
8523 %}
8524 
8525 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8526   effect(USE_DEF dst, USE shift, KILL cr);
8527 
8528   format %{ "ROR    $dst, $shift" %}
8529   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8530   ins_encode(OpcP, RegOpc(dst));
8531   ins_pipe( ialu_reg_reg );
8532 %}
8533 // end of ROR expand
8534 
8535 // ROR right once
8536 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8537   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8538 
8539   expand %{
8540     rorI_eReg_imm1(dst, rshift, cr);
8541   %}
8542 %}
8543 
8544 // ROR 32bit by immI8 once
8545 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8546   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8547   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8548 
8549   expand %{
8550     rorI_eReg_imm8(dst, rshift, cr);
8551   %}
8552 %}
8553 
8554 // ROR 32bit var by var once
8555 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8556   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8557 
8558   expand %{
8559     rorI_eReg_CL(dst, shift, cr);
8560   %}
8561 %}
8562 
8563 // ROR 32bit var by var once
8564 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8565   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8566 
8567   expand %{
8568     rorI_eReg_CL(dst, shift, cr);
8569   %}
8570 %}
8571 
8572 // Xor Instructions
8573 // Xor Register with Register
8574 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8575   match(Set dst (XorI dst src));
8576   effect(KILL cr);
8577 
8578   size(2);
8579   format %{ "XOR    $dst,$src" %}
8580   opcode(0x33);
8581   ins_encode( OpcP, RegReg( dst, src) );
8582   ins_pipe( ialu_reg_reg );
8583 %}
8584 
8585 // Xor Register with Immediate -1
8586 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8587   match(Set dst (XorI dst imm));
8588 
8589   size(2);
8590   format %{ "NOT    $dst" %}
8591   ins_encode %{
8592      __ notl($dst$$Register);
8593   %}
8594   ins_pipe( ialu_reg );
8595 %}
8596 
8597 // Xor Register with Immediate
8598 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8599   match(Set dst (XorI dst src));
8600   effect(KILL cr);
8601 
8602   format %{ "XOR    $dst,$src" %}
8603   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8604   // ins_encode( RegImm( dst, src) );
8605   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8606   ins_pipe( ialu_reg );
8607 %}
8608 
8609 // Xor Register with Memory
8610 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8611   match(Set dst (XorI dst (LoadI src)));
8612   effect(KILL cr);
8613 
8614   ins_cost(125);
8615   format %{ "XOR    $dst,$src" %}
8616   opcode(0x33);
8617   ins_encode( OpcP, RegMem(dst, src) );
8618   ins_pipe( ialu_reg_mem );
8619 %}
8620 
8621 // Xor Memory with Register
8622 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8623   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8624   effect(KILL cr);
8625 
8626   ins_cost(150);
8627   format %{ "XOR    $dst,$src" %}
8628   opcode(0x31);  /* Opcode 31 /r */
8629   ins_encode( OpcP, RegMem( src, dst ) );
8630   ins_pipe( ialu_mem_reg );
8631 %}
8632 
8633 // Xor Memory with Immediate
8634 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8635   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8636   effect(KILL cr);
8637 
8638   ins_cost(125);
8639   format %{ "XOR    $dst,$src" %}
8640   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8641   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8642   ins_pipe( ialu_mem_imm );
8643 %}
8644 
8645 //----------Convert Int to Boolean---------------------------------------------
8646 
8647 instruct movI_nocopy(rRegI dst, rRegI src) %{
8648   effect( DEF dst, USE src );
8649   format %{ "MOV    $dst,$src" %}
8650   ins_encode( enc_Copy( dst, src) );
8651   ins_pipe( ialu_reg_reg );
8652 %}
8653 
8654 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8655   effect( USE_DEF dst, USE src, KILL cr );
8656 
8657   size(4);
8658   format %{ "NEG    $dst\n\t"
8659             "ADC    $dst,$src" %}
8660   ins_encode( neg_reg(dst),
8661               OpcRegReg(0x13,dst,src) );
8662   ins_pipe( ialu_reg_reg_long );
8663 %}
8664 
8665 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8666   match(Set dst (Conv2B src));
8667 
8668   expand %{
8669     movI_nocopy(dst,src);
8670     ci2b(dst,src,cr);
8671   %}
8672 %}
8673 
8674 instruct movP_nocopy(rRegI dst, eRegP src) %{
8675   effect( DEF dst, USE src );
8676   format %{ "MOV    $dst,$src" %}
8677   ins_encode( enc_Copy( dst, src) );
8678   ins_pipe( ialu_reg_reg );
8679 %}
8680 
8681 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8682   effect( USE_DEF dst, USE src, KILL cr );
8683   format %{ "NEG    $dst\n\t"
8684             "ADC    $dst,$src" %}
8685   ins_encode( neg_reg(dst),
8686               OpcRegReg(0x13,dst,src) );
8687   ins_pipe( ialu_reg_reg_long );
8688 %}
8689 
8690 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8691   match(Set dst (Conv2B src));
8692 
8693   expand %{
8694     movP_nocopy(dst,src);
8695     cp2b(dst,src,cr);
8696   %}
8697 %}
8698 
8699 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8700   match(Set dst (CmpLTMask p q));
8701   effect(KILL cr);
8702   ins_cost(400);
8703 
8704   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8705   format %{ "XOR    $dst,$dst\n\t"
8706             "CMP    $p,$q\n\t"
8707             "SETlt  $dst\n\t"
8708             "NEG    $dst" %}
8709   ins_encode %{
8710     Register Rp = $p$$Register;
8711     Register Rq = $q$$Register;
8712     Register Rd = $dst$$Register;
8713     Label done;
8714     __ xorl(Rd, Rd);
8715     __ cmpl(Rp, Rq);
8716     __ setb(Assembler::less, Rd);
8717     __ negl(Rd);
8718   %}
8719 
8720   ins_pipe(pipe_slow);
8721 %}
8722 
8723 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8724   match(Set dst (CmpLTMask dst zero));
8725   effect(DEF dst, KILL cr);
8726   ins_cost(100);
8727 
8728   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8729   ins_encode %{
8730   __ sarl($dst$$Register, 31);
8731   %}
8732   ins_pipe(ialu_reg);
8733 %}
8734 
8735 /* better to save a register than avoid a branch */
8736 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8737   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8738   effect(KILL cr);
8739   ins_cost(400);
8740   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8741             "JGE    done\n\t"
8742             "ADD    $p,$y\n"
8743             "done:  " %}
8744   ins_encode %{
8745     Register Rp = $p$$Register;
8746     Register Rq = $q$$Register;
8747     Register Ry = $y$$Register;
8748     Label done;
8749     __ subl(Rp, Rq);
8750     __ jccb(Assembler::greaterEqual, done);
8751     __ addl(Rp, Ry);
8752     __ bind(done);
8753   %}
8754 
8755   ins_pipe(pipe_cmplt);
8756 %}
8757 
8758 /* better to save a register than avoid a branch */
8759 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8760   match(Set y (AndI (CmpLTMask p q) y));
8761   effect(KILL cr);
8762 
8763   ins_cost(300);
8764 
8765   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8766             "JLT      done\n\t"
8767             "XORL     $y, $y\n"
8768             "done:  " %}
8769   ins_encode %{
8770     Register Rp = $p$$Register;
8771     Register Rq = $q$$Register;
8772     Register Ry = $y$$Register;
8773     Label done;
8774     __ cmpl(Rp, Rq);
8775     __ jccb(Assembler::less, done);
8776     __ xorl(Ry, Ry);
8777     __ bind(done);
8778   %}
8779 
8780   ins_pipe(pipe_cmplt);
8781 %}
8782 
8783 /* If I enable this, I encourage spilling in the inner loop of compress.
8784 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8785   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8786 */
8787 //----------Overflow Math Instructions-----------------------------------------
8788 
8789 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8790 %{
8791   match(Set cr (OverflowAddI op1 op2));
8792   effect(DEF cr, USE_KILL op1, USE op2);
8793 
8794   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8795 
8796   ins_encode %{
8797     __ addl($op1$$Register, $op2$$Register);
8798   %}
8799   ins_pipe(ialu_reg_reg);
8800 %}
8801 
8802 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8803 %{
8804   match(Set cr (OverflowAddI op1 op2));
8805   effect(DEF cr, USE_KILL op1, USE op2);
8806 
8807   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8808 
8809   ins_encode %{
8810     __ addl($op1$$Register, $op2$$constant);
8811   %}
8812   ins_pipe(ialu_reg_reg);
8813 %}
8814 
8815 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8816 %{
8817   match(Set cr (OverflowSubI op1 op2));
8818 
8819   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8820   ins_encode %{
8821     __ cmpl($op1$$Register, $op2$$Register);
8822   %}
8823   ins_pipe(ialu_reg_reg);
8824 %}
8825 
8826 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8827 %{
8828   match(Set cr (OverflowSubI op1 op2));
8829 
8830   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8831   ins_encode %{
8832     __ cmpl($op1$$Register, $op2$$constant);
8833   %}
8834   ins_pipe(ialu_reg_reg);
8835 %}
8836 
8837 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8838 %{
8839   match(Set cr (OverflowSubI zero op2));
8840   effect(DEF cr, USE_KILL op2);
8841 
8842   format %{ "NEG    $op2\t# overflow check int" %}
8843   ins_encode %{
8844     __ negl($op2$$Register);
8845   %}
8846   ins_pipe(ialu_reg_reg);
8847 %}
8848 
8849 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8850 %{
8851   match(Set cr (OverflowMulI op1 op2));
8852   effect(DEF cr, USE_KILL op1, USE op2);
8853 
8854   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8855   ins_encode %{
8856     __ imull($op1$$Register, $op2$$Register);
8857   %}
8858   ins_pipe(ialu_reg_reg_alu0);
8859 %}
8860 
8861 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8862 %{
8863   match(Set cr (OverflowMulI op1 op2));
8864   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8865 
8866   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8867   ins_encode %{
8868     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8869   %}
8870   ins_pipe(ialu_reg_reg_alu0);
8871 %}
8872 
8873 // Integer Absolute Instructions
8874 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8875 %{
8876   match(Set dst (AbsI src));
8877   effect(TEMP dst, TEMP tmp, KILL cr);
8878   format %{ "movl $tmp, $src\n\t"
8879             "sarl $tmp, 31\n\t"
8880             "movl $dst, $src\n\t"
8881             "xorl $dst, $tmp\n\t"
8882             "subl $dst, $tmp\n"
8883           %}
8884   ins_encode %{
8885     __ movl($tmp$$Register, $src$$Register);
8886     __ sarl($tmp$$Register, 31);
8887     __ movl($dst$$Register, $src$$Register);
8888     __ xorl($dst$$Register, $tmp$$Register);
8889     __ subl($dst$$Register, $tmp$$Register);
8890   %}
8891 
8892   ins_pipe(ialu_reg_reg);
8893 %}
8894 
8895 //----------Long Instructions------------------------------------------------
8896 // Add Long Register with Register
8897 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8898   match(Set dst (AddL dst src));
8899   effect(KILL cr);
8900   ins_cost(200);
8901   format %{ "ADD    $dst.lo,$src.lo\n\t"
8902             "ADC    $dst.hi,$src.hi" %}
8903   opcode(0x03, 0x13);
8904   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8905   ins_pipe( ialu_reg_reg_long );
8906 %}
8907 
8908 // Add Long Register with Immediate
8909 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8910   match(Set dst (AddL dst src));
8911   effect(KILL cr);
8912   format %{ "ADD    $dst.lo,$src.lo\n\t"
8913             "ADC    $dst.hi,$src.hi" %}
8914   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8915   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8916   ins_pipe( ialu_reg_long );
8917 %}
8918 
8919 // Add Long Register with Memory
8920 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8921   match(Set dst (AddL dst (LoadL mem)));
8922   effect(KILL cr);
8923   ins_cost(125);
8924   format %{ "ADD    $dst.lo,$mem\n\t"
8925             "ADC    $dst.hi,$mem+4" %}
8926   opcode(0x03, 0x13);
8927   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8928   ins_pipe( ialu_reg_long_mem );
8929 %}
8930 
8931 // Subtract Long Register with Register.
8932 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8933   match(Set dst (SubL dst src));
8934   effect(KILL cr);
8935   ins_cost(200);
8936   format %{ "SUB    $dst.lo,$src.lo\n\t"
8937             "SBB    $dst.hi,$src.hi" %}
8938   opcode(0x2B, 0x1B);
8939   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8940   ins_pipe( ialu_reg_reg_long );
8941 %}
8942 
8943 // Subtract Long Register with Immediate
8944 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8945   match(Set dst (SubL dst src));
8946   effect(KILL cr);
8947   format %{ "SUB    $dst.lo,$src.lo\n\t"
8948             "SBB    $dst.hi,$src.hi" %}
8949   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8950   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8951   ins_pipe( ialu_reg_long );
8952 %}
8953 
8954 // Subtract Long Register with Memory
8955 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8956   match(Set dst (SubL dst (LoadL mem)));
8957   effect(KILL cr);
8958   ins_cost(125);
8959   format %{ "SUB    $dst.lo,$mem\n\t"
8960             "SBB    $dst.hi,$mem+4" %}
8961   opcode(0x2B, 0x1B);
8962   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8963   ins_pipe( ialu_reg_long_mem );
8964 %}
8965 
8966 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8967   match(Set dst (SubL zero dst));
8968   effect(KILL cr);
8969   ins_cost(300);
8970   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8971   ins_encode( neg_long(dst) );
8972   ins_pipe( ialu_reg_reg_long );
8973 %}
8974 
8975 // And Long Register with Register
8976 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8977   match(Set dst (AndL dst src));
8978   effect(KILL cr);
8979   format %{ "AND    $dst.lo,$src.lo\n\t"
8980             "AND    $dst.hi,$src.hi" %}
8981   opcode(0x23,0x23);
8982   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8983   ins_pipe( ialu_reg_reg_long );
8984 %}
8985 
8986 // And Long Register with Immediate
8987 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8988   match(Set dst (AndL dst src));
8989   effect(KILL cr);
8990   format %{ "AND    $dst.lo,$src.lo\n\t"
8991             "AND    $dst.hi,$src.hi" %}
8992   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8993   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8994   ins_pipe( ialu_reg_long );
8995 %}
8996 
8997 // And Long Register with Memory
8998 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8999   match(Set dst (AndL dst (LoadL mem)));
9000   effect(KILL cr);
9001   ins_cost(125);
9002   format %{ "AND    $dst.lo,$mem\n\t"
9003             "AND    $dst.hi,$mem+4" %}
9004   opcode(0x23, 0x23);
9005   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9006   ins_pipe( ialu_reg_long_mem );
9007 %}
9008 
9009 // BMI1 instructions
9010 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9011   match(Set dst (AndL (XorL src1 minus_1) src2));
9012   predicate(UseBMI1Instructions);
9013   effect(KILL cr, TEMP dst);
9014 
9015   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9016             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9017          %}
9018 
9019   ins_encode %{
9020     Register Rdst = $dst$$Register;
9021     Register Rsrc1 = $src1$$Register;
9022     Register Rsrc2 = $src2$$Register;
9023     __ andnl(Rdst, Rsrc1, Rsrc2);
9024     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9025   %}
9026   ins_pipe(ialu_reg_reg_long);
9027 %}
9028 
9029 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9030   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9031   predicate(UseBMI1Instructions);
9032   effect(KILL cr, TEMP dst);
9033 
9034   ins_cost(125);
9035   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9036             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9037          %}
9038 
9039   ins_encode %{
9040     Register Rdst = $dst$$Register;
9041     Register Rsrc1 = $src1$$Register;
9042     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9043 
9044     __ andnl(Rdst, Rsrc1, $src2$$Address);
9045     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9046   %}
9047   ins_pipe(ialu_reg_mem);
9048 %}
9049 
9050 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9051   match(Set dst (AndL (SubL imm_zero src) src));
9052   predicate(UseBMI1Instructions);
9053   effect(KILL cr, TEMP dst);
9054 
9055   format %{ "MOVL   $dst.hi, 0\n\t"
9056             "BLSIL  $dst.lo, $src.lo\n\t"
9057             "JNZ    done\n\t"
9058             "BLSIL  $dst.hi, $src.hi\n"
9059             "done:"
9060          %}
9061 
9062   ins_encode %{
9063     Label done;
9064     Register Rdst = $dst$$Register;
9065     Register Rsrc = $src$$Register;
9066     __ movl(HIGH_FROM_LOW(Rdst), 0);
9067     __ blsil(Rdst, Rsrc);
9068     __ jccb(Assembler::notZero, done);
9069     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9070     __ bind(done);
9071   %}
9072   ins_pipe(ialu_reg);
9073 %}
9074 
9075 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9076   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9077   predicate(UseBMI1Instructions);
9078   effect(KILL cr, TEMP dst);
9079 
9080   ins_cost(125);
9081   format %{ "MOVL   $dst.hi, 0\n\t"
9082             "BLSIL  $dst.lo, $src\n\t"
9083             "JNZ    done\n\t"
9084             "BLSIL  $dst.hi, $src+4\n"
9085             "done:"
9086          %}
9087 
9088   ins_encode %{
9089     Label done;
9090     Register Rdst = $dst$$Register;
9091     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9092 
9093     __ movl(HIGH_FROM_LOW(Rdst), 0);
9094     __ blsil(Rdst, $src$$Address);
9095     __ jccb(Assembler::notZero, done);
9096     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9097     __ bind(done);
9098   %}
9099   ins_pipe(ialu_reg_mem);
9100 %}
9101 
9102 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9103 %{
9104   match(Set dst (XorL (AddL src minus_1) src));
9105   predicate(UseBMI1Instructions);
9106   effect(KILL cr, TEMP dst);
9107 
9108   format %{ "MOVL    $dst.hi, 0\n\t"
9109             "BLSMSKL $dst.lo, $src.lo\n\t"
9110             "JNC     done\n\t"
9111             "BLSMSKL $dst.hi, $src.hi\n"
9112             "done:"
9113          %}
9114 
9115   ins_encode %{
9116     Label done;
9117     Register Rdst = $dst$$Register;
9118     Register Rsrc = $src$$Register;
9119     __ movl(HIGH_FROM_LOW(Rdst), 0);
9120     __ blsmskl(Rdst, Rsrc);
9121     __ jccb(Assembler::carryClear, done);
9122     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9123     __ bind(done);
9124   %}
9125 
9126   ins_pipe(ialu_reg);
9127 %}
9128 
9129 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9130 %{
9131   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9132   predicate(UseBMI1Instructions);
9133   effect(KILL cr, TEMP dst);
9134 
9135   ins_cost(125);
9136   format %{ "MOVL    $dst.hi, 0\n\t"
9137             "BLSMSKL $dst.lo, $src\n\t"
9138             "JNC     done\n\t"
9139             "BLSMSKL $dst.hi, $src+4\n"
9140             "done:"
9141          %}
9142 
9143   ins_encode %{
9144     Label done;
9145     Register Rdst = $dst$$Register;
9146     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9147 
9148     __ movl(HIGH_FROM_LOW(Rdst), 0);
9149     __ blsmskl(Rdst, $src$$Address);
9150     __ jccb(Assembler::carryClear, done);
9151     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9152     __ bind(done);
9153   %}
9154 
9155   ins_pipe(ialu_reg_mem);
9156 %}
9157 
9158 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9159 %{
9160   match(Set dst (AndL (AddL src minus_1) src) );
9161   predicate(UseBMI1Instructions);
9162   effect(KILL cr, TEMP dst);
9163 
9164   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9165             "BLSRL  $dst.lo, $src.lo\n\t"
9166             "JNC    done\n\t"
9167             "BLSRL  $dst.hi, $src.hi\n"
9168             "done:"
9169   %}
9170 
9171   ins_encode %{
9172     Label done;
9173     Register Rdst = $dst$$Register;
9174     Register Rsrc = $src$$Register;
9175     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9176     __ blsrl(Rdst, Rsrc);
9177     __ jccb(Assembler::carryClear, done);
9178     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9179     __ bind(done);
9180   %}
9181 
9182   ins_pipe(ialu_reg);
9183 %}
9184 
9185 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9186 %{
9187   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9188   predicate(UseBMI1Instructions);
9189   effect(KILL cr, TEMP dst);
9190 
9191   ins_cost(125);
9192   format %{ "MOVL   $dst.hi, $src+4\n\t"
9193             "BLSRL  $dst.lo, $src\n\t"
9194             "JNC    done\n\t"
9195             "BLSRL  $dst.hi, $src+4\n"
9196             "done:"
9197   %}
9198 
9199   ins_encode %{
9200     Label done;
9201     Register Rdst = $dst$$Register;
9202     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9203     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9204     __ blsrl(Rdst, $src$$Address);
9205     __ jccb(Assembler::carryClear, done);
9206     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9207     __ bind(done);
9208   %}
9209 
9210   ins_pipe(ialu_reg_mem);
9211 %}
9212 
9213 // Or Long Register with Register
9214 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9215   match(Set dst (OrL dst src));
9216   effect(KILL cr);
9217   format %{ "OR     $dst.lo,$src.lo\n\t"
9218             "OR     $dst.hi,$src.hi" %}
9219   opcode(0x0B,0x0B);
9220   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9221   ins_pipe( ialu_reg_reg_long );
9222 %}
9223 
9224 // Or Long Register with Immediate
9225 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9226   match(Set dst (OrL dst src));
9227   effect(KILL cr);
9228   format %{ "OR     $dst.lo,$src.lo\n\t"
9229             "OR     $dst.hi,$src.hi" %}
9230   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9231   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9232   ins_pipe( ialu_reg_long );
9233 %}
9234 
9235 // Or Long Register with Memory
9236 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9237   match(Set dst (OrL dst (LoadL mem)));
9238   effect(KILL cr);
9239   ins_cost(125);
9240   format %{ "OR     $dst.lo,$mem\n\t"
9241             "OR     $dst.hi,$mem+4" %}
9242   opcode(0x0B,0x0B);
9243   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9244   ins_pipe( ialu_reg_long_mem );
9245 %}
9246 
9247 // Xor Long Register with Register
9248 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9249   match(Set dst (XorL dst src));
9250   effect(KILL cr);
9251   format %{ "XOR    $dst.lo,$src.lo\n\t"
9252             "XOR    $dst.hi,$src.hi" %}
9253   opcode(0x33,0x33);
9254   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9255   ins_pipe( ialu_reg_reg_long );
9256 %}
9257 
9258 // Xor Long Register with Immediate -1
9259 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9260   match(Set dst (XorL dst imm));
9261   format %{ "NOT    $dst.lo\n\t"
9262             "NOT    $dst.hi" %}
9263   ins_encode %{
9264      __ notl($dst$$Register);
9265      __ notl(HIGH_FROM_LOW($dst$$Register));
9266   %}
9267   ins_pipe( ialu_reg_long );
9268 %}
9269 
9270 // Xor Long Register with Immediate
9271 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9272   match(Set dst (XorL dst src));
9273   effect(KILL cr);
9274   format %{ "XOR    $dst.lo,$src.lo\n\t"
9275             "XOR    $dst.hi,$src.hi" %}
9276   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9277   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9278   ins_pipe( ialu_reg_long );
9279 %}
9280 
9281 // Xor Long Register with Memory
9282 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9283   match(Set dst (XorL dst (LoadL mem)));
9284   effect(KILL cr);
9285   ins_cost(125);
9286   format %{ "XOR    $dst.lo,$mem\n\t"
9287             "XOR    $dst.hi,$mem+4" %}
9288   opcode(0x33,0x33);
9289   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9290   ins_pipe( ialu_reg_long_mem );
9291 %}
9292 
9293 // Shift Left Long by 1
9294 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9295   predicate(UseNewLongLShift);
9296   match(Set dst (LShiftL dst cnt));
9297   effect(KILL cr);
9298   ins_cost(100);
9299   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9300             "ADC    $dst.hi,$dst.hi" %}
9301   ins_encode %{
9302     __ addl($dst$$Register,$dst$$Register);
9303     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9304   %}
9305   ins_pipe( ialu_reg_long );
9306 %}
9307 
9308 // Shift Left Long by 2
9309 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9310   predicate(UseNewLongLShift);
9311   match(Set dst (LShiftL dst cnt));
9312   effect(KILL cr);
9313   ins_cost(100);
9314   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9315             "ADC    $dst.hi,$dst.hi\n\t"
9316             "ADD    $dst.lo,$dst.lo\n\t"
9317             "ADC    $dst.hi,$dst.hi" %}
9318   ins_encode %{
9319     __ addl($dst$$Register,$dst$$Register);
9320     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9321     __ addl($dst$$Register,$dst$$Register);
9322     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9323   %}
9324   ins_pipe( ialu_reg_long );
9325 %}
9326 
9327 // Shift Left Long by 3
9328 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9329   predicate(UseNewLongLShift);
9330   match(Set dst (LShiftL dst cnt));
9331   effect(KILL cr);
9332   ins_cost(100);
9333   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9334             "ADC    $dst.hi,$dst.hi\n\t"
9335             "ADD    $dst.lo,$dst.lo\n\t"
9336             "ADC    $dst.hi,$dst.hi\n\t"
9337             "ADD    $dst.lo,$dst.lo\n\t"
9338             "ADC    $dst.hi,$dst.hi" %}
9339   ins_encode %{
9340     __ addl($dst$$Register,$dst$$Register);
9341     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9342     __ addl($dst$$Register,$dst$$Register);
9343     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9344     __ addl($dst$$Register,$dst$$Register);
9345     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9346   %}
9347   ins_pipe( ialu_reg_long );
9348 %}
9349 
9350 // Shift Left Long by 1-31
9351 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9352   match(Set dst (LShiftL dst cnt));
9353   effect(KILL cr);
9354   ins_cost(200);
9355   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9356             "SHL    $dst.lo,$cnt" %}
9357   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9358   ins_encode( move_long_small_shift(dst,cnt) );
9359   ins_pipe( ialu_reg_long );
9360 %}
9361 
9362 // Shift Left Long by 32-63
9363 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9364   match(Set dst (LShiftL dst cnt));
9365   effect(KILL cr);
9366   ins_cost(300);
9367   format %{ "MOV    $dst.hi,$dst.lo\n"
9368           "\tSHL    $dst.hi,$cnt-32\n"
9369           "\tXOR    $dst.lo,$dst.lo" %}
9370   opcode(0xC1, 0x4);  /* C1 /4 ib */
9371   ins_encode( move_long_big_shift_clr(dst,cnt) );
9372   ins_pipe( ialu_reg_long );
9373 %}
9374 
9375 // Shift Left Long by variable
9376 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9377   match(Set dst (LShiftL dst shift));
9378   effect(KILL cr);
9379   ins_cost(500+200);
9380   size(17);
9381   format %{ "TEST   $shift,32\n\t"
9382             "JEQ,s  small\n\t"
9383             "MOV    $dst.hi,$dst.lo\n\t"
9384             "XOR    $dst.lo,$dst.lo\n"
9385     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9386             "SHL    $dst.lo,$shift" %}
9387   ins_encode( shift_left_long( dst, shift ) );
9388   ins_pipe( pipe_slow );
9389 %}
9390 
9391 // Shift Right Long by 1-31
9392 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9393   match(Set dst (URShiftL dst cnt));
9394   effect(KILL cr);
9395   ins_cost(200);
9396   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9397             "SHR    $dst.hi,$cnt" %}
9398   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9399   ins_encode( move_long_small_shift(dst,cnt) );
9400   ins_pipe( ialu_reg_long );
9401 %}
9402 
9403 // Shift Right Long by 32-63
9404 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9405   match(Set dst (URShiftL dst cnt));
9406   effect(KILL cr);
9407   ins_cost(300);
9408   format %{ "MOV    $dst.lo,$dst.hi\n"
9409           "\tSHR    $dst.lo,$cnt-32\n"
9410           "\tXOR    $dst.hi,$dst.hi" %}
9411   opcode(0xC1, 0x5);  /* C1 /5 ib */
9412   ins_encode( move_long_big_shift_clr(dst,cnt) );
9413   ins_pipe( ialu_reg_long );
9414 %}
9415 
9416 // Shift Right Long by variable
9417 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9418   match(Set dst (URShiftL dst shift));
9419   effect(KILL cr);
9420   ins_cost(600);
9421   size(17);
9422   format %{ "TEST   $shift,32\n\t"
9423             "JEQ,s  small\n\t"
9424             "MOV    $dst.lo,$dst.hi\n\t"
9425             "XOR    $dst.hi,$dst.hi\n"
9426     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9427             "SHR    $dst.hi,$shift" %}
9428   ins_encode( shift_right_long( dst, shift ) );
9429   ins_pipe( pipe_slow );
9430 %}
9431 
9432 // Shift Right Long by 1-31
9433 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9434   match(Set dst (RShiftL dst cnt));
9435   effect(KILL cr);
9436   ins_cost(200);
9437   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9438             "SAR    $dst.hi,$cnt" %}
9439   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9440   ins_encode( move_long_small_shift(dst,cnt) );
9441   ins_pipe( ialu_reg_long );
9442 %}
9443 
9444 // Shift Right Long by 32-63
9445 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9446   match(Set dst (RShiftL dst cnt));
9447   effect(KILL cr);
9448   ins_cost(300);
9449   format %{ "MOV    $dst.lo,$dst.hi\n"
9450           "\tSAR    $dst.lo,$cnt-32\n"
9451           "\tSAR    $dst.hi,31" %}
9452   opcode(0xC1, 0x7);  /* C1 /7 ib */
9453   ins_encode( move_long_big_shift_sign(dst,cnt) );
9454   ins_pipe( ialu_reg_long );
9455 %}
9456 
9457 // Shift Right arithmetic Long by variable
9458 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9459   match(Set dst (RShiftL dst shift));
9460   effect(KILL cr);
9461   ins_cost(600);
9462   size(18);
9463   format %{ "TEST   $shift,32\n\t"
9464             "JEQ,s  small\n\t"
9465             "MOV    $dst.lo,$dst.hi\n\t"
9466             "SAR    $dst.hi,31\n"
9467     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9468             "SAR    $dst.hi,$shift" %}
9469   ins_encode( shift_right_arith_long( dst, shift ) );
9470   ins_pipe( pipe_slow );
9471 %}
9472 
9473 
9474 //----------Double Instructions------------------------------------------------
9475 // Double Math
9476 
9477 // Compare & branch
9478 
9479 // P6 version of float compare, sets condition codes in EFLAGS
9480 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9481   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9482   match(Set cr (CmpD src1 src2));
9483   effect(KILL rax);
9484   ins_cost(150);
9485   format %{ "FLD    $src1\n\t"
9486             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9487             "JNP    exit\n\t"
9488             "MOV    ah,1       // saw a NaN, set CF\n\t"
9489             "SAHF\n"
9490      "exit:\tNOP               // avoid branch to branch" %}
9491   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9492   ins_encode( Push_Reg_DPR(src1),
9493               OpcP, RegOpc(src2),
9494               cmpF_P6_fixup );
9495   ins_pipe( pipe_slow );
9496 %}
9497 
9498 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9499   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9500   match(Set cr (CmpD src1 src2));
9501   ins_cost(150);
9502   format %{ "FLD    $src1\n\t"
9503             "FUCOMIP ST,$src2  // P6 instruction" %}
9504   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9505   ins_encode( Push_Reg_DPR(src1),
9506               OpcP, RegOpc(src2));
9507   ins_pipe( pipe_slow );
9508 %}
9509 
9510 // Compare & branch
9511 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9512   predicate(UseSSE<=1);
9513   match(Set cr (CmpD src1 src2));
9514   effect(KILL rax);
9515   ins_cost(200);
9516   format %{ "FLD    $src1\n\t"
9517             "FCOMp  $src2\n\t"
9518             "FNSTSW AX\n\t"
9519             "TEST   AX,0x400\n\t"
9520             "JZ,s   flags\n\t"
9521             "MOV    AH,1\t# unordered treat as LT\n"
9522     "flags:\tSAHF" %}
9523   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9524   ins_encode( Push_Reg_DPR(src1),
9525               OpcP, RegOpc(src2),
9526               fpu_flags);
9527   ins_pipe( pipe_slow );
9528 %}
9529 
9530 // Compare vs zero into -1,0,1
9531 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9532   predicate(UseSSE<=1);
9533   match(Set dst (CmpD3 src1 zero));
9534   effect(KILL cr, KILL rax);
9535   ins_cost(280);
9536   format %{ "FTSTD  $dst,$src1" %}
9537   opcode(0xE4, 0xD9);
9538   ins_encode( Push_Reg_DPR(src1),
9539               OpcS, OpcP, PopFPU,
9540               CmpF_Result(dst));
9541   ins_pipe( pipe_slow );
9542 %}
9543 
9544 // Compare into -1,0,1
9545 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9546   predicate(UseSSE<=1);
9547   match(Set dst (CmpD3 src1 src2));
9548   effect(KILL cr, KILL rax);
9549   ins_cost(300);
9550   format %{ "FCMPD  $dst,$src1,$src2" %}
9551   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9552   ins_encode( Push_Reg_DPR(src1),
9553               OpcP, RegOpc(src2),
9554               CmpF_Result(dst));
9555   ins_pipe( pipe_slow );
9556 %}
9557 
9558 // float compare and set condition codes in EFLAGS by XMM regs
9559 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9560   predicate(UseSSE>=2);
9561   match(Set cr (CmpD src1 src2));
9562   ins_cost(145);
9563   format %{ "UCOMISD $src1,$src2\n\t"
9564             "JNP,s   exit\n\t"
9565             "PUSHF\t# saw NaN, set CF\n\t"
9566             "AND     [rsp], #0xffffff2b\n\t"
9567             "POPF\n"
9568     "exit:" %}
9569   ins_encode %{
9570     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9571     emit_cmpfp_fixup(_masm);
9572   %}
9573   ins_pipe( pipe_slow );
9574 %}
9575 
9576 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9577   predicate(UseSSE>=2);
9578   match(Set cr (CmpD src1 src2));
9579   ins_cost(100);
9580   format %{ "UCOMISD $src1,$src2" %}
9581   ins_encode %{
9582     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9583   %}
9584   ins_pipe( pipe_slow );
9585 %}
9586 
9587 // float compare and set condition codes in EFLAGS by XMM regs
9588 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9589   predicate(UseSSE>=2);
9590   match(Set cr (CmpD src1 (LoadD src2)));
9591   ins_cost(145);
9592   format %{ "UCOMISD $src1,$src2\n\t"
9593             "JNP,s   exit\n\t"
9594             "PUSHF\t# saw NaN, set CF\n\t"
9595             "AND     [rsp], #0xffffff2b\n\t"
9596             "POPF\n"
9597     "exit:" %}
9598   ins_encode %{
9599     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9600     emit_cmpfp_fixup(_masm);
9601   %}
9602   ins_pipe( pipe_slow );
9603 %}
9604 
9605 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9606   predicate(UseSSE>=2);
9607   match(Set cr (CmpD src1 (LoadD src2)));
9608   ins_cost(100);
9609   format %{ "UCOMISD $src1,$src2" %}
9610   ins_encode %{
9611     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9612   %}
9613   ins_pipe( pipe_slow );
9614 %}
9615 
9616 // Compare into -1,0,1 in XMM
9617 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9618   predicate(UseSSE>=2);
9619   match(Set dst (CmpD3 src1 src2));
9620   effect(KILL cr);
9621   ins_cost(255);
9622   format %{ "UCOMISD $src1, $src2\n\t"
9623             "MOV     $dst, #-1\n\t"
9624             "JP,s    done\n\t"
9625             "JB,s    done\n\t"
9626             "SETNE   $dst\n\t"
9627             "MOVZB   $dst, $dst\n"
9628     "done:" %}
9629   ins_encode %{
9630     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9631     emit_cmpfp3(_masm, $dst$$Register);
9632   %}
9633   ins_pipe( pipe_slow );
9634 %}
9635 
9636 // Compare into -1,0,1 in XMM and memory
9637 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9638   predicate(UseSSE>=2);
9639   match(Set dst (CmpD3 src1 (LoadD src2)));
9640   effect(KILL cr);
9641   ins_cost(275);
9642   format %{ "UCOMISD $src1, $src2\n\t"
9643             "MOV     $dst, #-1\n\t"
9644             "JP,s    done\n\t"
9645             "JB,s    done\n\t"
9646             "SETNE   $dst\n\t"
9647             "MOVZB   $dst, $dst\n"
9648     "done:" %}
9649   ins_encode %{
9650     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9651     emit_cmpfp3(_masm, $dst$$Register);
9652   %}
9653   ins_pipe( pipe_slow );
9654 %}
9655 
9656 
9657 instruct subDPR_reg(regDPR dst, regDPR src) %{
9658   predicate (UseSSE <=1);
9659   match(Set dst (SubD dst src));
9660 
9661   format %{ "FLD    $src\n\t"
9662             "DSUBp  $dst,ST" %}
9663   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9664   ins_cost(150);
9665   ins_encode( Push_Reg_DPR(src),
9666               OpcP, RegOpc(dst) );
9667   ins_pipe( fpu_reg_reg );
9668 %}
9669 
9670 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9671   predicate (UseSSE <=1);
9672   match(Set dst (RoundDouble (SubD src1 src2)));
9673   ins_cost(250);
9674 
9675   format %{ "FLD    $src2\n\t"
9676             "DSUB   ST,$src1\n\t"
9677             "FSTP_D $dst\t# D-round" %}
9678   opcode(0xD8, 0x5);
9679   ins_encode( Push_Reg_DPR(src2),
9680               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9681   ins_pipe( fpu_mem_reg_reg );
9682 %}
9683 
9684 
9685 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9686   predicate (UseSSE <=1);
9687   match(Set dst (SubD dst (LoadD src)));
9688   ins_cost(150);
9689 
9690   format %{ "FLD    $src\n\t"
9691             "DSUBp  $dst,ST" %}
9692   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9693   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9694               OpcP, RegOpc(dst) );
9695   ins_pipe( fpu_reg_mem );
9696 %}
9697 
9698 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9699   predicate (UseSSE<=1);
9700   match(Set dst (AbsD src));
9701   ins_cost(100);
9702   format %{ "FABS" %}
9703   opcode(0xE1, 0xD9);
9704   ins_encode( OpcS, OpcP );
9705   ins_pipe( fpu_reg_reg );
9706 %}
9707 
9708 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9709   predicate(UseSSE<=1);
9710   match(Set dst (NegD src));
9711   ins_cost(100);
9712   format %{ "FCHS" %}
9713   opcode(0xE0, 0xD9);
9714   ins_encode( OpcS, OpcP );
9715   ins_pipe( fpu_reg_reg );
9716 %}
9717 
9718 instruct addDPR_reg(regDPR dst, regDPR src) %{
9719   predicate(UseSSE<=1);
9720   match(Set dst (AddD dst src));
9721   format %{ "FLD    $src\n\t"
9722             "DADD   $dst,ST" %}
9723   size(4);
9724   ins_cost(150);
9725   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9726   ins_encode( Push_Reg_DPR(src),
9727               OpcP, RegOpc(dst) );
9728   ins_pipe( fpu_reg_reg );
9729 %}
9730 
9731 
9732 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9733   predicate(UseSSE<=1);
9734   match(Set dst (RoundDouble (AddD src1 src2)));
9735   ins_cost(250);
9736 
9737   format %{ "FLD    $src2\n\t"
9738             "DADD   ST,$src1\n\t"
9739             "FSTP_D $dst\t# D-round" %}
9740   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9741   ins_encode( Push_Reg_DPR(src2),
9742               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9743   ins_pipe( fpu_mem_reg_reg );
9744 %}
9745 
9746 
9747 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9748   predicate(UseSSE<=1);
9749   match(Set dst (AddD dst (LoadD src)));
9750   ins_cost(150);
9751 
9752   format %{ "FLD    $src\n\t"
9753             "DADDp  $dst,ST" %}
9754   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9755   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9756               OpcP, RegOpc(dst) );
9757   ins_pipe( fpu_reg_mem );
9758 %}
9759 
9760 // add-to-memory
9761 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9762   predicate(UseSSE<=1);
9763   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9764   ins_cost(150);
9765 
9766   format %{ "FLD_D  $dst\n\t"
9767             "DADD   ST,$src\n\t"
9768             "FST_D  $dst" %}
9769   opcode(0xDD, 0x0);
9770   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9771               Opcode(0xD8), RegOpc(src),
9772               set_instruction_start,
9773               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9774   ins_pipe( fpu_reg_mem );
9775 %}
9776 
9777 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9778   predicate(UseSSE<=1);
9779   match(Set dst (AddD dst con));
9780   ins_cost(125);
9781   format %{ "FLD1\n\t"
9782             "DADDp  $dst,ST" %}
9783   ins_encode %{
9784     __ fld1();
9785     __ faddp($dst$$reg);
9786   %}
9787   ins_pipe(fpu_reg);
9788 %}
9789 
9790 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9791   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9792   match(Set dst (AddD dst con));
9793   ins_cost(200);
9794   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9795             "DADDp  $dst,ST" %}
9796   ins_encode %{
9797     __ fld_d($constantaddress($con));
9798     __ faddp($dst$$reg);
9799   %}
9800   ins_pipe(fpu_reg_mem);
9801 %}
9802 
9803 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9804   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9805   match(Set dst (RoundDouble (AddD src con)));
9806   ins_cost(200);
9807   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9808             "DADD   ST,$src\n\t"
9809             "FSTP_D $dst\t# D-round" %}
9810   ins_encode %{
9811     __ fld_d($constantaddress($con));
9812     __ fadd($src$$reg);
9813     __ fstp_d(Address(rsp, $dst$$disp));
9814   %}
9815   ins_pipe(fpu_mem_reg_con);
9816 %}
9817 
9818 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9819   predicate(UseSSE<=1);
9820   match(Set dst (MulD dst src));
9821   format %{ "FLD    $src\n\t"
9822             "DMULp  $dst,ST" %}
9823   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9824   ins_cost(150);
9825   ins_encode( Push_Reg_DPR(src),
9826               OpcP, RegOpc(dst) );
9827   ins_pipe( fpu_reg_reg );
9828 %}
9829 
9830 // Strict FP instruction biases argument before multiply then
9831 // biases result to avoid double rounding of subnormals.
9832 //
9833 // scale arg1 by multiplying arg1 by 2^(-15360)
9834 // load arg2
9835 // multiply scaled arg1 by arg2
9836 // rescale product by 2^(15360)
9837 //
9838 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9839   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9840   match(Set dst (MulD dst src));
9841   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9842 
9843   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9844             "DMULp  $dst,ST\n\t"
9845             "FLD    $src\n\t"
9846             "DMULp  $dst,ST\n\t"
9847             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9848             "DMULp  $dst,ST\n\t" %}
9849   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9850   ins_encode( strictfp_bias1(dst),
9851               Push_Reg_DPR(src),
9852               OpcP, RegOpc(dst),
9853               strictfp_bias2(dst) );
9854   ins_pipe( fpu_reg_reg );
9855 %}
9856 
9857 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9858   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9859   match(Set dst (MulD dst con));
9860   ins_cost(200);
9861   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9862             "DMULp  $dst,ST" %}
9863   ins_encode %{
9864     __ fld_d($constantaddress($con));
9865     __ fmulp($dst$$reg);
9866   %}
9867   ins_pipe(fpu_reg_mem);
9868 %}
9869 
9870 
9871 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9872   predicate( UseSSE<=1 );
9873   match(Set dst (MulD dst (LoadD src)));
9874   ins_cost(200);
9875   format %{ "FLD_D  $src\n\t"
9876             "DMULp  $dst,ST" %}
9877   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9878   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9879               OpcP, RegOpc(dst) );
9880   ins_pipe( fpu_reg_mem );
9881 %}
9882 
9883 //
9884 // Cisc-alternate to reg-reg multiply
9885 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9886   predicate( UseSSE<=1 );
9887   match(Set dst (MulD src (LoadD mem)));
9888   ins_cost(250);
9889   format %{ "FLD_D  $mem\n\t"
9890             "DMUL   ST,$src\n\t"
9891             "FSTP_D $dst" %}
9892   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9893   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9894               OpcReg_FPR(src),
9895               Pop_Reg_DPR(dst) );
9896   ins_pipe( fpu_reg_reg_mem );
9897 %}
9898 
9899 
9900 // MACRO3 -- addDPR a mulDPR
9901 // This instruction is a '2-address' instruction in that the result goes
9902 // back to src2.  This eliminates a move from the macro; possibly the
9903 // register allocator will have to add it back (and maybe not).
9904 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9905   predicate( UseSSE<=1 );
9906   match(Set src2 (AddD (MulD src0 src1) src2));
9907   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9908             "DMUL   ST,$src1\n\t"
9909             "DADDp  $src2,ST" %}
9910   ins_cost(250);
9911   opcode(0xDD); /* LoadD DD /0 */
9912   ins_encode( Push_Reg_FPR(src0),
9913               FMul_ST_reg(src1),
9914               FAddP_reg_ST(src2) );
9915   ins_pipe( fpu_reg_reg_reg );
9916 %}
9917 
9918 
9919 // MACRO3 -- subDPR a mulDPR
9920 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9921   predicate( UseSSE<=1 );
9922   match(Set src2 (SubD (MulD src0 src1) src2));
9923   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9924             "DMUL   ST,$src1\n\t"
9925             "DSUBRp $src2,ST" %}
9926   ins_cost(250);
9927   ins_encode( Push_Reg_FPR(src0),
9928               FMul_ST_reg(src1),
9929               Opcode(0xDE), Opc_plus(0xE0,src2));
9930   ins_pipe( fpu_reg_reg_reg );
9931 %}
9932 
9933 
9934 instruct divDPR_reg(regDPR dst, regDPR src) %{
9935   predicate( UseSSE<=1 );
9936   match(Set dst (DivD dst src));
9937 
9938   format %{ "FLD    $src\n\t"
9939             "FDIVp  $dst,ST" %}
9940   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9941   ins_cost(150);
9942   ins_encode( Push_Reg_DPR(src),
9943               OpcP, RegOpc(dst) );
9944   ins_pipe( fpu_reg_reg );
9945 %}
9946 
9947 // Strict FP instruction biases argument before division then
9948 // biases result, to avoid double rounding of subnormals.
9949 //
9950 // scale dividend by multiplying dividend by 2^(-15360)
9951 // load divisor
9952 // divide scaled dividend by divisor
9953 // rescale quotient by 2^(15360)
9954 //
9955 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9956   predicate (UseSSE<=1);
9957   match(Set dst (DivD dst src));
9958   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9959   ins_cost(01);
9960 
9961   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9962             "DMULp  $dst,ST\n\t"
9963             "FLD    $src\n\t"
9964             "FDIVp  $dst,ST\n\t"
9965             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9966             "DMULp  $dst,ST\n\t" %}
9967   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9968   ins_encode( strictfp_bias1(dst),
9969               Push_Reg_DPR(src),
9970               OpcP, RegOpc(dst),
9971               strictfp_bias2(dst) );
9972   ins_pipe( fpu_reg_reg );
9973 %}
9974 
9975 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9976   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9977   match(Set dst (RoundDouble (DivD src1 src2)));
9978 
9979   format %{ "FLD    $src1\n\t"
9980             "FDIV   ST,$src2\n\t"
9981             "FSTP_D $dst\t# D-round" %}
9982   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9983   ins_encode( Push_Reg_DPR(src1),
9984               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9985   ins_pipe( fpu_mem_reg_reg );
9986 %}
9987 
9988 
9989 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9990   predicate(UseSSE<=1);
9991   match(Set dst (ModD dst src));
9992   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9993 
9994   format %{ "DMOD   $dst,$src" %}
9995   ins_cost(250);
9996   ins_encode(Push_Reg_Mod_DPR(dst, src),
9997               emitModDPR(),
9998               Push_Result_Mod_DPR(src),
9999               Pop_Reg_DPR(dst));
10000   ins_pipe( pipe_slow );
10001 %}
10002 
10003 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10004   predicate(UseSSE>=2);
10005   match(Set dst (ModD src0 src1));
10006   effect(KILL rax, KILL cr);
10007 
10008   format %{ "SUB    ESP,8\t # DMOD\n"
10009           "\tMOVSD  [ESP+0],$src1\n"
10010           "\tFLD_D  [ESP+0]\n"
10011           "\tMOVSD  [ESP+0],$src0\n"
10012           "\tFLD_D  [ESP+0]\n"
10013      "loop:\tFPREM\n"
10014           "\tFWAIT\n"
10015           "\tFNSTSW AX\n"
10016           "\tSAHF\n"
10017           "\tJP     loop\n"
10018           "\tFSTP_D [ESP+0]\n"
10019           "\tMOVSD  $dst,[ESP+0]\n"
10020           "\tADD    ESP,8\n"
10021           "\tFSTP   ST0\t # Restore FPU Stack"
10022     %}
10023   ins_cost(250);
10024   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10025   ins_pipe( pipe_slow );
10026 %}
10027 
10028 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10029   predicate (UseSSE<=1);
10030   match(Set dst(AtanD dst src));
10031   format %{ "DATA   $dst,$src" %}
10032   opcode(0xD9, 0xF3);
10033   ins_encode( Push_Reg_DPR(src),
10034               OpcP, OpcS, RegOpc(dst) );
10035   ins_pipe( pipe_slow );
10036 %}
10037 
10038 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10039   predicate (UseSSE>=2);
10040   match(Set dst(AtanD dst src));
10041   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10042   format %{ "DATA   $dst,$src" %}
10043   opcode(0xD9, 0xF3);
10044   ins_encode( Push_SrcD(src),
10045               OpcP, OpcS, Push_ResultD(dst) );
10046   ins_pipe( pipe_slow );
10047 %}
10048 
10049 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10050   predicate (UseSSE<=1);
10051   match(Set dst (SqrtD src));
10052   format %{ "DSQRT  $dst,$src" %}
10053   opcode(0xFA, 0xD9);
10054   ins_encode( Push_Reg_DPR(src),
10055               OpcS, OpcP, Pop_Reg_DPR(dst) );
10056   ins_pipe( pipe_slow );
10057 %}
10058 
10059 //-------------Float Instructions-------------------------------
10060 // Float Math
10061 
10062 // Code for float compare:
10063 //     fcompp();
10064 //     fwait(); fnstsw_ax();
10065 //     sahf();
10066 //     movl(dst, unordered_result);
10067 //     jcc(Assembler::parity, exit);
10068 //     movl(dst, less_result);
10069 //     jcc(Assembler::below, exit);
10070 //     movl(dst, equal_result);
10071 //     jcc(Assembler::equal, exit);
10072 //     movl(dst, greater_result);
10073 //   exit:
10074 
10075 // P6 version of float compare, sets condition codes in EFLAGS
10076 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10077   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10078   match(Set cr (CmpF src1 src2));
10079   effect(KILL rax);
10080   ins_cost(150);
10081   format %{ "FLD    $src1\n\t"
10082             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10083             "JNP    exit\n\t"
10084             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10085             "SAHF\n"
10086      "exit:\tNOP               // avoid branch to branch" %}
10087   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10088   ins_encode( Push_Reg_DPR(src1),
10089               OpcP, RegOpc(src2),
10090               cmpF_P6_fixup );
10091   ins_pipe( pipe_slow );
10092 %}
10093 
10094 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10095   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10096   match(Set cr (CmpF src1 src2));
10097   ins_cost(100);
10098   format %{ "FLD    $src1\n\t"
10099             "FUCOMIP ST,$src2  // P6 instruction" %}
10100   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10101   ins_encode( Push_Reg_DPR(src1),
10102               OpcP, RegOpc(src2));
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 
10107 // Compare & branch
10108 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10109   predicate(UseSSE == 0);
10110   match(Set cr (CmpF src1 src2));
10111   effect(KILL rax);
10112   ins_cost(200);
10113   format %{ "FLD    $src1\n\t"
10114             "FCOMp  $src2\n\t"
10115             "FNSTSW AX\n\t"
10116             "TEST   AX,0x400\n\t"
10117             "JZ,s   flags\n\t"
10118             "MOV    AH,1\t# unordered treat as LT\n"
10119     "flags:\tSAHF" %}
10120   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10121   ins_encode( Push_Reg_DPR(src1),
10122               OpcP, RegOpc(src2),
10123               fpu_flags);
10124   ins_pipe( pipe_slow );
10125 %}
10126 
10127 // Compare vs zero into -1,0,1
10128 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10129   predicate(UseSSE == 0);
10130   match(Set dst (CmpF3 src1 zero));
10131   effect(KILL cr, KILL rax);
10132   ins_cost(280);
10133   format %{ "FTSTF  $dst,$src1" %}
10134   opcode(0xE4, 0xD9);
10135   ins_encode( Push_Reg_DPR(src1),
10136               OpcS, OpcP, PopFPU,
10137               CmpF_Result(dst));
10138   ins_pipe( pipe_slow );
10139 %}
10140 
10141 // Compare into -1,0,1
10142 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10143   predicate(UseSSE == 0);
10144   match(Set dst (CmpF3 src1 src2));
10145   effect(KILL cr, KILL rax);
10146   ins_cost(300);
10147   format %{ "FCMPF  $dst,$src1,$src2" %}
10148   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10149   ins_encode( Push_Reg_DPR(src1),
10150               OpcP, RegOpc(src2),
10151               CmpF_Result(dst));
10152   ins_pipe( pipe_slow );
10153 %}
10154 
10155 // float compare and set condition codes in EFLAGS by XMM regs
10156 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10157   predicate(UseSSE>=1);
10158   match(Set cr (CmpF src1 src2));
10159   ins_cost(145);
10160   format %{ "UCOMISS $src1,$src2\n\t"
10161             "JNP,s   exit\n\t"
10162             "PUSHF\t# saw NaN, set CF\n\t"
10163             "AND     [rsp], #0xffffff2b\n\t"
10164             "POPF\n"
10165     "exit:" %}
10166   ins_encode %{
10167     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10168     emit_cmpfp_fixup(_masm);
10169   %}
10170   ins_pipe( pipe_slow );
10171 %}
10172 
10173 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10174   predicate(UseSSE>=1);
10175   match(Set cr (CmpF src1 src2));
10176   ins_cost(100);
10177   format %{ "UCOMISS $src1,$src2" %}
10178   ins_encode %{
10179     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10180   %}
10181   ins_pipe( pipe_slow );
10182 %}
10183 
10184 // float compare and set condition codes in EFLAGS by XMM regs
10185 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10186   predicate(UseSSE>=1);
10187   match(Set cr (CmpF src1 (LoadF src2)));
10188   ins_cost(165);
10189   format %{ "UCOMISS $src1,$src2\n\t"
10190             "JNP,s   exit\n\t"
10191             "PUSHF\t# saw NaN, set CF\n\t"
10192             "AND     [rsp], #0xffffff2b\n\t"
10193             "POPF\n"
10194     "exit:" %}
10195   ins_encode %{
10196     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10197     emit_cmpfp_fixup(_masm);
10198   %}
10199   ins_pipe( pipe_slow );
10200 %}
10201 
10202 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10203   predicate(UseSSE>=1);
10204   match(Set cr (CmpF src1 (LoadF src2)));
10205   ins_cost(100);
10206   format %{ "UCOMISS $src1,$src2" %}
10207   ins_encode %{
10208     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10209   %}
10210   ins_pipe( pipe_slow );
10211 %}
10212 
10213 // Compare into -1,0,1 in XMM
10214 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10215   predicate(UseSSE>=1);
10216   match(Set dst (CmpF3 src1 src2));
10217   effect(KILL cr);
10218   ins_cost(255);
10219   format %{ "UCOMISS $src1, $src2\n\t"
10220             "MOV     $dst, #-1\n\t"
10221             "JP,s    done\n\t"
10222             "JB,s    done\n\t"
10223             "SETNE   $dst\n\t"
10224             "MOVZB   $dst, $dst\n"
10225     "done:" %}
10226   ins_encode %{
10227     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10228     emit_cmpfp3(_masm, $dst$$Register);
10229   %}
10230   ins_pipe( pipe_slow );
10231 %}
10232 
10233 // Compare into -1,0,1 in XMM and memory
10234 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10235   predicate(UseSSE>=1);
10236   match(Set dst (CmpF3 src1 (LoadF src2)));
10237   effect(KILL cr);
10238   ins_cost(275);
10239   format %{ "UCOMISS $src1, $src2\n\t"
10240             "MOV     $dst, #-1\n\t"
10241             "JP,s    done\n\t"
10242             "JB,s    done\n\t"
10243             "SETNE   $dst\n\t"
10244             "MOVZB   $dst, $dst\n"
10245     "done:" %}
10246   ins_encode %{
10247     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10248     emit_cmpfp3(_masm, $dst$$Register);
10249   %}
10250   ins_pipe( pipe_slow );
10251 %}
10252 
10253 // Spill to obtain 24-bit precision
10254 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10255   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10256   match(Set dst (SubF src1 src2));
10257 
10258   format %{ "FSUB   $dst,$src1 - $src2" %}
10259   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10260   ins_encode( Push_Reg_FPR(src1),
10261               OpcReg_FPR(src2),
10262               Pop_Mem_FPR(dst) );
10263   ins_pipe( fpu_mem_reg_reg );
10264 %}
10265 //
10266 // This instruction does not round to 24-bits
10267 instruct subFPR_reg(regFPR dst, regFPR src) %{
10268   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269   match(Set dst (SubF dst src));
10270 
10271   format %{ "FSUB   $dst,$src" %}
10272   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10273   ins_encode( Push_Reg_FPR(src),
10274               OpcP, RegOpc(dst) );
10275   ins_pipe( fpu_reg_reg );
10276 %}
10277 
10278 // Spill to obtain 24-bit precision
10279 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10280   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10281   match(Set dst (AddF src1 src2));
10282 
10283   format %{ "FADD   $dst,$src1,$src2" %}
10284   opcode(0xD8, 0x0); /* D8 C0+i */
10285   ins_encode( Push_Reg_FPR(src2),
10286               OpcReg_FPR(src1),
10287               Pop_Mem_FPR(dst) );
10288   ins_pipe( fpu_mem_reg_reg );
10289 %}
10290 //
10291 // This instruction does not round to 24-bits
10292 instruct addFPR_reg(regFPR dst, regFPR src) %{
10293   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10294   match(Set dst (AddF dst src));
10295 
10296   format %{ "FLD    $src\n\t"
10297             "FADDp  $dst,ST" %}
10298   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10299   ins_encode( Push_Reg_FPR(src),
10300               OpcP, RegOpc(dst) );
10301   ins_pipe( fpu_reg_reg );
10302 %}
10303 
10304 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10305   predicate(UseSSE==0);
10306   match(Set dst (AbsF src));
10307   ins_cost(100);
10308   format %{ "FABS" %}
10309   opcode(0xE1, 0xD9);
10310   ins_encode( OpcS, OpcP );
10311   ins_pipe( fpu_reg_reg );
10312 %}
10313 
10314 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10315   predicate(UseSSE==0);
10316   match(Set dst (NegF src));
10317   ins_cost(100);
10318   format %{ "FCHS" %}
10319   opcode(0xE0, 0xD9);
10320   ins_encode( OpcS, OpcP );
10321   ins_pipe( fpu_reg_reg );
10322 %}
10323 
10324 // Cisc-alternate to addFPR_reg
10325 // Spill to obtain 24-bit precision
10326 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10327   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10328   match(Set dst (AddF src1 (LoadF src2)));
10329 
10330   format %{ "FLD    $src2\n\t"
10331             "FADD   ST,$src1\n\t"
10332             "FSTP_S $dst" %}
10333   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10334   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10335               OpcReg_FPR(src1),
10336               Pop_Mem_FPR(dst) );
10337   ins_pipe( fpu_mem_reg_mem );
10338 %}
10339 //
10340 // Cisc-alternate to addFPR_reg
10341 // This instruction does not round to 24-bits
10342 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10343   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10344   match(Set dst (AddF dst (LoadF src)));
10345 
10346   format %{ "FADD   $dst,$src" %}
10347   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10348   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10349               OpcP, RegOpc(dst) );
10350   ins_pipe( fpu_reg_mem );
10351 %}
10352 
10353 // // Following two instructions for _222_mpegaudio
10354 // Spill to obtain 24-bit precision
10355 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10356   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10357   match(Set dst (AddF src1 src2));
10358 
10359   format %{ "FADD   $dst,$src1,$src2" %}
10360   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10361   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10362               OpcReg_FPR(src2),
10363               Pop_Mem_FPR(dst) );
10364   ins_pipe( fpu_mem_reg_mem );
10365 %}
10366 
10367 // Cisc-spill variant
10368 // Spill to obtain 24-bit precision
10369 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10370   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10371   match(Set dst (AddF src1 (LoadF src2)));
10372 
10373   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10374   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10375   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10376               set_instruction_start,
10377               OpcP, RMopc_Mem(secondary,src1),
10378               Pop_Mem_FPR(dst) );
10379   ins_pipe( fpu_mem_mem_mem );
10380 %}
10381 
10382 // Spill to obtain 24-bit precision
10383 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10384   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10385   match(Set dst (AddF src1 src2));
10386 
10387   format %{ "FADD   $dst,$src1,$src2" %}
10388   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10389   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10390               set_instruction_start,
10391               OpcP, RMopc_Mem(secondary,src1),
10392               Pop_Mem_FPR(dst) );
10393   ins_pipe( fpu_mem_mem_mem );
10394 %}
10395 
10396 
10397 // Spill to obtain 24-bit precision
10398 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10399   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10400   match(Set dst (AddF src con));
10401   format %{ "FLD    $src\n\t"
10402             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10403             "FSTP_S $dst"  %}
10404   ins_encode %{
10405     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10406     __ fadd_s($constantaddress($con));
10407     __ fstp_s(Address(rsp, $dst$$disp));
10408   %}
10409   ins_pipe(fpu_mem_reg_con);
10410 %}
10411 //
10412 // This instruction does not round to 24-bits
10413 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10414   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10415   match(Set dst (AddF src con));
10416   format %{ "FLD    $src\n\t"
10417             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10418             "FSTP   $dst"  %}
10419   ins_encode %{
10420     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10421     __ fadd_s($constantaddress($con));
10422     __ fstp_d($dst$$reg);
10423   %}
10424   ins_pipe(fpu_reg_reg_con);
10425 %}
10426 
10427 // Spill to obtain 24-bit precision
10428 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10429   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10430   match(Set dst (MulF src1 src2));
10431 
10432   format %{ "FLD    $src1\n\t"
10433             "FMUL   $src2\n\t"
10434             "FSTP_S $dst"  %}
10435   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10436   ins_encode( Push_Reg_FPR(src1),
10437               OpcReg_FPR(src2),
10438               Pop_Mem_FPR(dst) );
10439   ins_pipe( fpu_mem_reg_reg );
10440 %}
10441 //
10442 // This instruction does not round to 24-bits
10443 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10444   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10445   match(Set dst (MulF src1 src2));
10446 
10447   format %{ "FLD    $src1\n\t"
10448             "FMUL   $src2\n\t"
10449             "FSTP_S $dst"  %}
10450   opcode(0xD8, 0x1); /* D8 C8+i */
10451   ins_encode( Push_Reg_FPR(src2),
10452               OpcReg_FPR(src1),
10453               Pop_Reg_FPR(dst) );
10454   ins_pipe( fpu_reg_reg_reg );
10455 %}
10456 
10457 
10458 // Spill to obtain 24-bit precision
10459 // Cisc-alternate to reg-reg multiply
10460 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10461   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10462   match(Set dst (MulF src1 (LoadF src2)));
10463 
10464   format %{ "FLD_S  $src2\n\t"
10465             "FMUL   $src1\n\t"
10466             "FSTP_S $dst"  %}
10467   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10468   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10469               OpcReg_FPR(src1),
10470               Pop_Mem_FPR(dst) );
10471   ins_pipe( fpu_mem_reg_mem );
10472 %}
10473 //
10474 // This instruction does not round to 24-bits
10475 // Cisc-alternate to reg-reg multiply
10476 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10477   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10478   match(Set dst (MulF src1 (LoadF src2)));
10479 
10480   format %{ "FMUL   $dst,$src1,$src2" %}
10481   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10482   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10483               OpcReg_FPR(src1),
10484               Pop_Reg_FPR(dst) );
10485   ins_pipe( fpu_reg_reg_mem );
10486 %}
10487 
10488 // Spill to obtain 24-bit precision
10489 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10490   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10491   match(Set dst (MulF src1 src2));
10492 
10493   format %{ "FMUL   $dst,$src1,$src2" %}
10494   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10495   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10496               set_instruction_start,
10497               OpcP, RMopc_Mem(secondary,src1),
10498               Pop_Mem_FPR(dst) );
10499   ins_pipe( fpu_mem_mem_mem );
10500 %}
10501 
10502 // Spill to obtain 24-bit precision
10503 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10504   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10505   match(Set dst (MulF src con));
10506 
10507   format %{ "FLD    $src\n\t"
10508             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10509             "FSTP_S $dst"  %}
10510   ins_encode %{
10511     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10512     __ fmul_s($constantaddress($con));
10513     __ fstp_s(Address(rsp, $dst$$disp));
10514   %}
10515   ins_pipe(fpu_mem_reg_con);
10516 %}
10517 //
10518 // This instruction does not round to 24-bits
10519 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10520   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10521   match(Set dst (MulF src con));
10522 
10523   format %{ "FLD    $src\n\t"
10524             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10525             "FSTP   $dst"  %}
10526   ins_encode %{
10527     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10528     __ fmul_s($constantaddress($con));
10529     __ fstp_d($dst$$reg);
10530   %}
10531   ins_pipe(fpu_reg_reg_con);
10532 %}
10533 
10534 
10535 //
10536 // MACRO1 -- subsume unshared load into mulFPR
10537 // This instruction does not round to 24-bits
10538 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10539   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10540   match(Set dst (MulF (LoadF mem1) src));
10541 
10542   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10543             "FMUL   ST,$src\n\t"
10544             "FSTP   $dst" %}
10545   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10546   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10547               OpcReg_FPR(src),
10548               Pop_Reg_FPR(dst) );
10549   ins_pipe( fpu_reg_reg_mem );
10550 %}
10551 //
10552 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10553 // This instruction does not round to 24-bits
10554 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10555   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10556   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10557   ins_cost(95);
10558 
10559   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10560             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10561             "FADD   ST,$src2\n\t"
10562             "FSTP   $dst" %}
10563   opcode(0xD9); /* LoadF D9 /0 */
10564   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10565               FMul_ST_reg(src1),
10566               FAdd_ST_reg(src2),
10567               Pop_Reg_FPR(dst) );
10568   ins_pipe( fpu_reg_mem_reg_reg );
10569 %}
10570 
10571 // MACRO3 -- addFPR a mulFPR
10572 // This instruction does not round to 24-bits.  It is a '2-address'
10573 // instruction in that the result goes back to src2.  This eliminates
10574 // a move from the macro; possibly the register allocator will have
10575 // to add it back (and maybe not).
10576 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10577   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10578   match(Set src2 (AddF (MulF src0 src1) src2));
10579 
10580   format %{ "FLD    $src0     ===MACRO3===\n\t"
10581             "FMUL   ST,$src1\n\t"
10582             "FADDP  $src2,ST" %}
10583   opcode(0xD9); /* LoadF D9 /0 */
10584   ins_encode( Push_Reg_FPR(src0),
10585               FMul_ST_reg(src1),
10586               FAddP_reg_ST(src2) );
10587   ins_pipe( fpu_reg_reg_reg );
10588 %}
10589 
10590 // MACRO4 -- divFPR subFPR
10591 // This instruction does not round to 24-bits
10592 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10593   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10594   match(Set dst (DivF (SubF src2 src1) src3));
10595 
10596   format %{ "FLD    $src2   ===MACRO4===\n\t"
10597             "FSUB   ST,$src1\n\t"
10598             "FDIV   ST,$src3\n\t"
10599             "FSTP  $dst" %}
10600   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10601   ins_encode( Push_Reg_FPR(src2),
10602               subFPR_divFPR_encode(src1,src3),
10603               Pop_Reg_FPR(dst) );
10604   ins_pipe( fpu_reg_reg_reg_reg );
10605 %}
10606 
10607 // Spill to obtain 24-bit precision
10608 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10609   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10610   match(Set dst (DivF src1 src2));
10611 
10612   format %{ "FDIV   $dst,$src1,$src2" %}
10613   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10614   ins_encode( Push_Reg_FPR(src1),
10615               OpcReg_FPR(src2),
10616               Pop_Mem_FPR(dst) );
10617   ins_pipe( fpu_mem_reg_reg );
10618 %}
10619 //
10620 // This instruction does not round to 24-bits
10621 instruct divFPR_reg(regFPR dst, regFPR src) %{
10622   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10623   match(Set dst (DivF dst src));
10624 
10625   format %{ "FDIV   $dst,$src" %}
10626   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10627   ins_encode( Push_Reg_FPR(src),
10628               OpcP, RegOpc(dst) );
10629   ins_pipe( fpu_reg_reg );
10630 %}
10631 
10632 
10633 // Spill to obtain 24-bit precision
10634 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10635   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10636   match(Set dst (ModF src1 src2));
10637   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10638 
10639   format %{ "FMOD   $dst,$src1,$src2" %}
10640   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10641               emitModDPR(),
10642               Push_Result_Mod_DPR(src2),
10643               Pop_Mem_FPR(dst));
10644   ins_pipe( pipe_slow );
10645 %}
10646 //
10647 // This instruction does not round to 24-bits
10648 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10649   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10650   match(Set dst (ModF dst src));
10651   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10652 
10653   format %{ "FMOD   $dst,$src" %}
10654   ins_encode(Push_Reg_Mod_DPR(dst, src),
10655               emitModDPR(),
10656               Push_Result_Mod_DPR(src),
10657               Pop_Reg_FPR(dst));
10658   ins_pipe( pipe_slow );
10659 %}
10660 
10661 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10662   predicate(UseSSE>=1);
10663   match(Set dst (ModF src0 src1));
10664   effect(KILL rax, KILL cr);
10665   format %{ "SUB    ESP,4\t # FMOD\n"
10666           "\tMOVSS  [ESP+0],$src1\n"
10667           "\tFLD_S  [ESP+0]\n"
10668           "\tMOVSS  [ESP+0],$src0\n"
10669           "\tFLD_S  [ESP+0]\n"
10670      "loop:\tFPREM\n"
10671           "\tFWAIT\n"
10672           "\tFNSTSW AX\n"
10673           "\tSAHF\n"
10674           "\tJP     loop\n"
10675           "\tFSTP_S [ESP+0]\n"
10676           "\tMOVSS  $dst,[ESP+0]\n"
10677           "\tADD    ESP,4\n"
10678           "\tFSTP   ST0\t # Restore FPU Stack"
10679     %}
10680   ins_cost(250);
10681   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10682   ins_pipe( pipe_slow );
10683 %}
10684 
10685 
10686 //----------Arithmetic Conversion Instructions---------------------------------
10687 // The conversions operations are all Alpha sorted.  Please keep it that way!
10688 
10689 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10690   predicate(UseSSE==0);
10691   match(Set dst (RoundFloat src));
10692   ins_cost(125);
10693   format %{ "FST_S  $dst,$src\t# F-round" %}
10694   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10695   ins_pipe( fpu_mem_reg );
10696 %}
10697 
10698 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10699   predicate(UseSSE<=1);
10700   match(Set dst (RoundDouble src));
10701   ins_cost(125);
10702   format %{ "FST_D  $dst,$src\t# D-round" %}
10703   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10704   ins_pipe( fpu_mem_reg );
10705 %}
10706 
10707 // Force rounding to 24-bit precision and 6-bit exponent
10708 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10709   predicate(UseSSE==0);
10710   match(Set dst (ConvD2F src));
10711   format %{ "FST_S  $dst,$src\t# F-round" %}
10712   expand %{
10713     roundFloat_mem_reg(dst,src);
10714   %}
10715 %}
10716 
10717 // Force rounding to 24-bit precision and 6-bit exponent
10718 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10719   predicate(UseSSE==1);
10720   match(Set dst (ConvD2F src));
10721   effect( KILL cr );
10722   format %{ "SUB    ESP,4\n\t"
10723             "FST_S  [ESP],$src\t# F-round\n\t"
10724             "MOVSS  $dst,[ESP]\n\t"
10725             "ADD ESP,4" %}
10726   ins_encode %{
10727     __ subptr(rsp, 4);
10728     if ($src$$reg != FPR1L_enc) {
10729       __ fld_s($src$$reg-1);
10730       __ fstp_s(Address(rsp, 0));
10731     } else {
10732       __ fst_s(Address(rsp, 0));
10733     }
10734     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10735     __ addptr(rsp, 4);
10736   %}
10737   ins_pipe( pipe_slow );
10738 %}
10739 
10740 // Force rounding double precision to single precision
10741 instruct convD2F_reg(regF dst, regD src) %{
10742   predicate(UseSSE>=2);
10743   match(Set dst (ConvD2F src));
10744   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10745   ins_encode %{
10746     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10747   %}
10748   ins_pipe( pipe_slow );
10749 %}
10750 
10751 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10752   predicate(UseSSE==0);
10753   match(Set dst (ConvF2D src));
10754   format %{ "FST_S  $dst,$src\t# D-round" %}
10755   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10756   ins_pipe( fpu_reg_reg );
10757 %}
10758 
10759 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10760   predicate(UseSSE==1);
10761   match(Set dst (ConvF2D src));
10762   format %{ "FST_D  $dst,$src\t# D-round" %}
10763   expand %{
10764     roundDouble_mem_reg(dst,src);
10765   %}
10766 %}
10767 
10768 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10769   predicate(UseSSE==1);
10770   match(Set dst (ConvF2D src));
10771   effect( KILL cr );
10772   format %{ "SUB    ESP,4\n\t"
10773             "MOVSS  [ESP] $src\n\t"
10774             "FLD_S  [ESP]\n\t"
10775             "ADD    ESP,4\n\t"
10776             "FSTP   $dst\t# D-round" %}
10777   ins_encode %{
10778     __ subptr(rsp, 4);
10779     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10780     __ fld_s(Address(rsp, 0));
10781     __ addptr(rsp, 4);
10782     __ fstp_d($dst$$reg);
10783   %}
10784   ins_pipe( pipe_slow );
10785 %}
10786 
10787 instruct convF2D_reg(regD dst, regF src) %{
10788   predicate(UseSSE>=2);
10789   match(Set dst (ConvF2D src));
10790   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10791   ins_encode %{
10792     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10793   %}
10794   ins_pipe( pipe_slow );
10795 %}
10796 
10797 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10798 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10799   predicate(UseSSE<=1);
10800   match(Set dst (ConvD2I src));
10801   effect( KILL tmp, KILL cr );
10802   format %{ "FLD    $src\t# Convert double to int \n\t"
10803             "FLDCW  trunc mode\n\t"
10804             "SUB    ESP,4\n\t"
10805             "FISTp  [ESP + #0]\n\t"
10806             "FLDCW  std/24-bit mode\n\t"
10807             "POP    EAX\n\t"
10808             "CMP    EAX,0x80000000\n\t"
10809             "JNE,s  fast\n\t"
10810             "FLD_D  $src\n\t"
10811             "CALL   d2i_wrapper\n"
10812       "fast:" %}
10813   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10814   ins_pipe( pipe_slow );
10815 %}
10816 
10817 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10818 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10819   predicate(UseSSE>=2);
10820   match(Set dst (ConvD2I src));
10821   effect( KILL tmp, KILL cr );
10822   format %{ "CVTTSD2SI $dst, $src\n\t"
10823             "CMP    $dst,0x80000000\n\t"
10824             "JNE,s  fast\n\t"
10825             "SUB    ESP, 8\n\t"
10826             "MOVSD  [ESP], $src\n\t"
10827             "FLD_D  [ESP]\n\t"
10828             "ADD    ESP, 8\n\t"
10829             "CALL   d2i_wrapper\n"
10830       "fast:" %}
10831   ins_encode %{
10832     Label fast;
10833     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10834     __ cmpl($dst$$Register, 0x80000000);
10835     __ jccb(Assembler::notEqual, fast);
10836     __ subptr(rsp, 8);
10837     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10838     __ fld_d(Address(rsp, 0));
10839     __ addptr(rsp, 8);
10840     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10841     __ bind(fast);
10842   %}
10843   ins_pipe( pipe_slow );
10844 %}
10845 
10846 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10847   predicate(UseSSE<=1);
10848   match(Set dst (ConvD2L src));
10849   effect( KILL cr );
10850   format %{ "FLD    $src\t# Convert double to long\n\t"
10851             "FLDCW  trunc mode\n\t"
10852             "SUB    ESP,8\n\t"
10853             "FISTp  [ESP + #0]\n\t"
10854             "FLDCW  std/24-bit mode\n\t"
10855             "POP    EAX\n\t"
10856             "POP    EDX\n\t"
10857             "CMP    EDX,0x80000000\n\t"
10858             "JNE,s  fast\n\t"
10859             "TEST   EAX,EAX\n\t"
10860             "JNE,s  fast\n\t"
10861             "FLD    $src\n\t"
10862             "CALL   d2l_wrapper\n"
10863       "fast:" %}
10864   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10865   ins_pipe( pipe_slow );
10866 %}
10867 
10868 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10869 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10870   predicate (UseSSE>=2);
10871   match(Set dst (ConvD2L src));
10872   effect( KILL cr );
10873   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10874             "MOVSD  [ESP],$src\n\t"
10875             "FLD_D  [ESP]\n\t"
10876             "FLDCW  trunc mode\n\t"
10877             "FISTp  [ESP + #0]\n\t"
10878             "FLDCW  std/24-bit mode\n\t"
10879             "POP    EAX\n\t"
10880             "POP    EDX\n\t"
10881             "CMP    EDX,0x80000000\n\t"
10882             "JNE,s  fast\n\t"
10883             "TEST   EAX,EAX\n\t"
10884             "JNE,s  fast\n\t"
10885             "SUB    ESP,8\n\t"
10886             "MOVSD  [ESP],$src\n\t"
10887             "FLD_D  [ESP]\n\t"
10888             "ADD    ESP,8\n\t"
10889             "CALL   d2l_wrapper\n"
10890       "fast:" %}
10891   ins_encode %{
10892     Label fast;
10893     __ subptr(rsp, 8);
10894     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10895     __ fld_d(Address(rsp, 0));
10896     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10897     __ fistp_d(Address(rsp, 0));
10898     // Restore the rounding mode, mask the exception
10899     if (Compile::current()->in_24_bit_fp_mode()) {
10900       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10901     } else {
10902       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10903     }
10904     // Load the converted long, adjust CPU stack
10905     __ pop(rax);
10906     __ pop(rdx);
10907     __ cmpl(rdx, 0x80000000);
10908     __ jccb(Assembler::notEqual, fast);
10909     __ testl(rax, rax);
10910     __ jccb(Assembler::notEqual, fast);
10911     __ subptr(rsp, 8);
10912     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10913     __ fld_d(Address(rsp, 0));
10914     __ addptr(rsp, 8);
10915     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10916     __ bind(fast);
10917   %}
10918   ins_pipe( pipe_slow );
10919 %}
10920 
10921 // Convert a double to an int.  Java semantics require we do complex
10922 // manglations in the corner cases.  So we set the rounding mode to
10923 // 'zero', store the darned double down as an int, and reset the
10924 // rounding mode to 'nearest'.  The hardware stores a flag value down
10925 // if we would overflow or converted a NAN; we check for this and
10926 // and go the slow path if needed.
10927 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10928   predicate(UseSSE==0);
10929   match(Set dst (ConvF2I src));
10930   effect( KILL tmp, KILL cr );
10931   format %{ "FLD    $src\t# Convert float to int \n\t"
10932             "FLDCW  trunc mode\n\t"
10933             "SUB    ESP,4\n\t"
10934             "FISTp  [ESP + #0]\n\t"
10935             "FLDCW  std/24-bit mode\n\t"
10936             "POP    EAX\n\t"
10937             "CMP    EAX,0x80000000\n\t"
10938             "JNE,s  fast\n\t"
10939             "FLD    $src\n\t"
10940             "CALL   d2i_wrapper\n"
10941       "fast:" %}
10942   // DPR2I_encoding works for FPR2I
10943   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10944   ins_pipe( pipe_slow );
10945 %}
10946 
10947 // Convert a float in xmm to an int reg.
10948 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10949   predicate(UseSSE>=1);
10950   match(Set dst (ConvF2I src));
10951   effect( KILL tmp, KILL cr );
10952   format %{ "CVTTSS2SI $dst, $src\n\t"
10953             "CMP    $dst,0x80000000\n\t"
10954             "JNE,s  fast\n\t"
10955             "SUB    ESP, 4\n\t"
10956             "MOVSS  [ESP], $src\n\t"
10957             "FLD    [ESP]\n\t"
10958             "ADD    ESP, 4\n\t"
10959             "CALL   d2i_wrapper\n"
10960       "fast:" %}
10961   ins_encode %{
10962     Label fast;
10963     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10964     __ cmpl($dst$$Register, 0x80000000);
10965     __ jccb(Assembler::notEqual, fast);
10966     __ subptr(rsp, 4);
10967     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10968     __ fld_s(Address(rsp, 0));
10969     __ addptr(rsp, 4);
10970     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10971     __ bind(fast);
10972   %}
10973   ins_pipe( pipe_slow );
10974 %}
10975 
10976 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10977   predicate(UseSSE==0);
10978   match(Set dst (ConvF2L src));
10979   effect( KILL cr );
10980   format %{ "FLD    $src\t# Convert float to long\n\t"
10981             "FLDCW  trunc mode\n\t"
10982             "SUB    ESP,8\n\t"
10983             "FISTp  [ESP + #0]\n\t"
10984             "FLDCW  std/24-bit mode\n\t"
10985             "POP    EAX\n\t"
10986             "POP    EDX\n\t"
10987             "CMP    EDX,0x80000000\n\t"
10988             "JNE,s  fast\n\t"
10989             "TEST   EAX,EAX\n\t"
10990             "JNE,s  fast\n\t"
10991             "FLD    $src\n\t"
10992             "CALL   d2l_wrapper\n"
10993       "fast:" %}
10994   // DPR2L_encoding works for FPR2L
10995   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10996   ins_pipe( pipe_slow );
10997 %}
10998 
10999 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11000 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11001   predicate (UseSSE>=1);
11002   match(Set dst (ConvF2L src));
11003   effect( KILL cr );
11004   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11005             "MOVSS  [ESP],$src\n\t"
11006             "FLD_S  [ESP]\n\t"
11007             "FLDCW  trunc mode\n\t"
11008             "FISTp  [ESP + #0]\n\t"
11009             "FLDCW  std/24-bit mode\n\t"
11010             "POP    EAX\n\t"
11011             "POP    EDX\n\t"
11012             "CMP    EDX,0x80000000\n\t"
11013             "JNE,s  fast\n\t"
11014             "TEST   EAX,EAX\n\t"
11015             "JNE,s  fast\n\t"
11016             "SUB    ESP,4\t# Convert float to long\n\t"
11017             "MOVSS  [ESP],$src\n\t"
11018             "FLD_S  [ESP]\n\t"
11019             "ADD    ESP,4\n\t"
11020             "CALL   d2l_wrapper\n"
11021       "fast:" %}
11022   ins_encode %{
11023     Label fast;
11024     __ subptr(rsp, 8);
11025     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11026     __ fld_s(Address(rsp, 0));
11027     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11028     __ fistp_d(Address(rsp, 0));
11029     // Restore the rounding mode, mask the exception
11030     if (Compile::current()->in_24_bit_fp_mode()) {
11031       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11032     } else {
11033       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11034     }
11035     // Load the converted long, adjust CPU stack
11036     __ pop(rax);
11037     __ pop(rdx);
11038     __ cmpl(rdx, 0x80000000);
11039     __ jccb(Assembler::notEqual, fast);
11040     __ testl(rax, rax);
11041     __ jccb(Assembler::notEqual, fast);
11042     __ subptr(rsp, 4);
11043     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11044     __ fld_s(Address(rsp, 0));
11045     __ addptr(rsp, 4);
11046     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11047     __ bind(fast);
11048   %}
11049   ins_pipe( pipe_slow );
11050 %}
11051 
11052 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11053   predicate( UseSSE<=1 );
11054   match(Set dst (ConvI2D src));
11055   format %{ "FILD   $src\n\t"
11056             "FSTP   $dst" %}
11057   opcode(0xDB, 0x0);  /* DB /0 */
11058   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11059   ins_pipe( fpu_reg_mem );
11060 %}
11061 
11062 instruct convI2D_reg(regD dst, rRegI src) %{
11063   predicate( UseSSE>=2 && !UseXmmI2D );
11064   match(Set dst (ConvI2D src));
11065   format %{ "CVTSI2SD $dst,$src" %}
11066   ins_encode %{
11067     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11068   %}
11069   ins_pipe( pipe_slow );
11070 %}
11071 
11072 instruct convI2D_mem(regD dst, memory mem) %{
11073   predicate( UseSSE>=2 );
11074   match(Set dst (ConvI2D (LoadI mem)));
11075   format %{ "CVTSI2SD $dst,$mem" %}
11076   ins_encode %{
11077     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11078   %}
11079   ins_pipe( pipe_slow );
11080 %}
11081 
11082 instruct convXI2D_reg(regD dst, rRegI src)
11083 %{
11084   predicate( UseSSE>=2 && UseXmmI2D );
11085   match(Set dst (ConvI2D src));
11086 
11087   format %{ "MOVD  $dst,$src\n\t"
11088             "CVTDQ2PD $dst,$dst\t# i2d" %}
11089   ins_encode %{
11090     __ movdl($dst$$XMMRegister, $src$$Register);
11091     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11092   %}
11093   ins_pipe(pipe_slow); // XXX
11094 %}
11095 
11096 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11097   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11098   match(Set dst (ConvI2D (LoadI mem)));
11099   format %{ "FILD   $mem\n\t"
11100             "FSTP   $dst" %}
11101   opcode(0xDB);      /* DB /0 */
11102   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11103               Pop_Reg_DPR(dst));
11104   ins_pipe( fpu_reg_mem );
11105 %}
11106 
11107 // Convert a byte to a float; no rounding step needed.
11108 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11109   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11110   match(Set dst (ConvI2F src));
11111   format %{ "FILD   $src\n\t"
11112             "FSTP   $dst" %}
11113 
11114   opcode(0xDB, 0x0);  /* DB /0 */
11115   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11116   ins_pipe( fpu_reg_mem );
11117 %}
11118 
11119 // In 24-bit mode, force exponent rounding by storing back out
11120 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11121   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11122   match(Set dst (ConvI2F src));
11123   ins_cost(200);
11124   format %{ "FILD   $src\n\t"
11125             "FSTP_S $dst" %}
11126   opcode(0xDB, 0x0);  /* DB /0 */
11127   ins_encode( Push_Mem_I(src),
11128               Pop_Mem_FPR(dst));
11129   ins_pipe( fpu_mem_mem );
11130 %}
11131 
11132 // In 24-bit mode, force exponent rounding by storing back out
11133 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11134   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11135   match(Set dst (ConvI2F (LoadI mem)));
11136   ins_cost(200);
11137   format %{ "FILD   $mem\n\t"
11138             "FSTP_S $dst" %}
11139   opcode(0xDB);  /* DB /0 */
11140   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11141               Pop_Mem_FPR(dst));
11142   ins_pipe( fpu_mem_mem );
11143 %}
11144 
11145 // This instruction does not round to 24-bits
11146 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11147   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11148   match(Set dst (ConvI2F src));
11149   format %{ "FILD   $src\n\t"
11150             "FSTP   $dst" %}
11151   opcode(0xDB, 0x0);  /* DB /0 */
11152   ins_encode( Push_Mem_I(src),
11153               Pop_Reg_FPR(dst));
11154   ins_pipe( fpu_reg_mem );
11155 %}
11156 
11157 // This instruction does not round to 24-bits
11158 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11159   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11160   match(Set dst (ConvI2F (LoadI mem)));
11161   format %{ "FILD   $mem\n\t"
11162             "FSTP   $dst" %}
11163   opcode(0xDB);      /* DB /0 */
11164   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11165               Pop_Reg_FPR(dst));
11166   ins_pipe( fpu_reg_mem );
11167 %}
11168 
11169 // Convert an int to a float in xmm; no rounding step needed.
11170 instruct convI2F_reg(regF dst, rRegI src) %{
11171   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11172   match(Set dst (ConvI2F src));
11173   format %{ "CVTSI2SS $dst, $src" %}
11174   ins_encode %{
11175     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11176   %}
11177   ins_pipe( pipe_slow );
11178 %}
11179 
11180  instruct convXI2F_reg(regF dst, rRegI src)
11181 %{
11182   predicate( UseSSE>=2 && UseXmmI2F );
11183   match(Set dst (ConvI2F src));
11184 
11185   format %{ "MOVD  $dst,$src\n\t"
11186             "CVTDQ2PS $dst,$dst\t# i2f" %}
11187   ins_encode %{
11188     __ movdl($dst$$XMMRegister, $src$$Register);
11189     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11190   %}
11191   ins_pipe(pipe_slow); // XXX
11192 %}
11193 
11194 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11195   match(Set dst (ConvI2L src));
11196   effect(KILL cr);
11197   ins_cost(375);
11198   format %{ "MOV    $dst.lo,$src\n\t"
11199             "MOV    $dst.hi,$src\n\t"
11200             "SAR    $dst.hi,31" %}
11201   ins_encode(convert_int_long(dst,src));
11202   ins_pipe( ialu_reg_reg_long );
11203 %}
11204 
11205 // Zero-extend convert int to long
11206 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11207   match(Set dst (AndL (ConvI2L src) mask) );
11208   effect( KILL flags );
11209   ins_cost(250);
11210   format %{ "MOV    $dst.lo,$src\n\t"
11211             "XOR    $dst.hi,$dst.hi" %}
11212   opcode(0x33); // XOR
11213   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11214   ins_pipe( ialu_reg_reg_long );
11215 %}
11216 
11217 // Zero-extend long
11218 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11219   match(Set dst (AndL src mask) );
11220   effect( KILL flags );
11221   ins_cost(250);
11222   format %{ "MOV    $dst.lo,$src.lo\n\t"
11223             "XOR    $dst.hi,$dst.hi\n\t" %}
11224   opcode(0x33); // XOR
11225   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11226   ins_pipe( ialu_reg_reg_long );
11227 %}
11228 
11229 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11230   predicate (UseSSE<=1);
11231   match(Set dst (ConvL2D src));
11232   effect( KILL cr );
11233   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11234             "PUSH   $src.lo\n\t"
11235             "FILD   ST,[ESP + #0]\n\t"
11236             "ADD    ESP,8\n\t"
11237             "FSTP_D $dst\t# D-round" %}
11238   opcode(0xDF, 0x5);  /* DF /5 */
11239   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11240   ins_pipe( pipe_slow );
11241 %}
11242 
11243 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11244   predicate (UseSSE>=2);
11245   match(Set dst (ConvL2D src));
11246   effect( KILL cr );
11247   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11248             "PUSH   $src.lo\n\t"
11249             "FILD_D [ESP]\n\t"
11250             "FSTP_D [ESP]\n\t"
11251             "MOVSD  $dst,[ESP]\n\t"
11252             "ADD    ESP,8" %}
11253   opcode(0xDF, 0x5);  /* DF /5 */
11254   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11255   ins_pipe( pipe_slow );
11256 %}
11257 
11258 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11259   predicate (UseSSE>=1);
11260   match(Set dst (ConvL2F src));
11261   effect( KILL cr );
11262   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11263             "PUSH   $src.lo\n\t"
11264             "FILD_D [ESP]\n\t"
11265             "FSTP_S [ESP]\n\t"
11266             "MOVSS  $dst,[ESP]\n\t"
11267             "ADD    ESP,8" %}
11268   opcode(0xDF, 0x5);  /* DF /5 */
11269   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11270   ins_pipe( pipe_slow );
11271 %}
11272 
11273 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11274   match(Set dst (ConvL2F src));
11275   effect( KILL cr );
11276   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11277             "PUSH   $src.lo\n\t"
11278             "FILD   ST,[ESP + #0]\n\t"
11279             "ADD    ESP,8\n\t"
11280             "FSTP_S $dst\t# F-round" %}
11281   opcode(0xDF, 0x5);  /* DF /5 */
11282   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11283   ins_pipe( pipe_slow );
11284 %}
11285 
11286 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11287   match(Set dst (ConvL2I src));
11288   effect( DEF dst, USE src );
11289   format %{ "MOV    $dst,$src.lo" %}
11290   ins_encode(enc_CopyL_Lo(dst,src));
11291   ins_pipe( ialu_reg_reg );
11292 %}
11293 
11294 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11295   match(Set dst (MoveF2I src));
11296   effect( DEF dst, USE src );
11297   ins_cost(100);
11298   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11299   ins_encode %{
11300     __ movl($dst$$Register, Address(rsp, $src$$disp));
11301   %}
11302   ins_pipe( ialu_reg_mem );
11303 %}
11304 
11305 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11306   predicate(UseSSE==0);
11307   match(Set dst (MoveF2I src));
11308   effect( DEF dst, USE src );
11309 
11310   ins_cost(125);
11311   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11312   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11313   ins_pipe( fpu_mem_reg );
11314 %}
11315 
11316 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11317   predicate(UseSSE>=1);
11318   match(Set dst (MoveF2I src));
11319   effect( DEF dst, USE src );
11320 
11321   ins_cost(95);
11322   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11323   ins_encode %{
11324     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11325   %}
11326   ins_pipe( pipe_slow );
11327 %}
11328 
11329 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11330   predicate(UseSSE>=2);
11331   match(Set dst (MoveF2I src));
11332   effect( DEF dst, USE src );
11333   ins_cost(85);
11334   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11335   ins_encode %{
11336     __ movdl($dst$$Register, $src$$XMMRegister);
11337   %}
11338   ins_pipe( pipe_slow );
11339 %}
11340 
11341 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11342   match(Set dst (MoveI2F src));
11343   effect( DEF dst, USE src );
11344 
11345   ins_cost(100);
11346   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11347   ins_encode %{
11348     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11349   %}
11350   ins_pipe( ialu_mem_reg );
11351 %}
11352 
11353 
11354 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11355   predicate(UseSSE==0);
11356   match(Set dst (MoveI2F src));
11357   effect(DEF dst, USE src);
11358 
11359   ins_cost(125);
11360   format %{ "FLD_S  $src\n\t"
11361             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11362   opcode(0xD9);               /* D9 /0, FLD m32real */
11363   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11364               Pop_Reg_FPR(dst) );
11365   ins_pipe( fpu_reg_mem );
11366 %}
11367 
11368 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11369   predicate(UseSSE>=1);
11370   match(Set dst (MoveI2F src));
11371   effect( DEF dst, USE src );
11372 
11373   ins_cost(95);
11374   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11375   ins_encode %{
11376     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11377   %}
11378   ins_pipe( pipe_slow );
11379 %}
11380 
11381 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11382   predicate(UseSSE>=2);
11383   match(Set dst (MoveI2F src));
11384   effect( DEF dst, USE src );
11385 
11386   ins_cost(85);
11387   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11388   ins_encode %{
11389     __ movdl($dst$$XMMRegister, $src$$Register);
11390   %}
11391   ins_pipe( pipe_slow );
11392 %}
11393 
11394 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11395   match(Set dst (MoveD2L src));
11396   effect(DEF dst, USE src);
11397 
11398   ins_cost(250);
11399   format %{ "MOV    $dst.lo,$src\n\t"
11400             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11401   opcode(0x8B, 0x8B);
11402   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11403   ins_pipe( ialu_mem_long_reg );
11404 %}
11405 
11406 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11407   predicate(UseSSE<=1);
11408   match(Set dst (MoveD2L src));
11409   effect(DEF dst, USE src);
11410 
11411   ins_cost(125);
11412   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11413   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11414   ins_pipe( fpu_mem_reg );
11415 %}
11416 
11417 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11418   predicate(UseSSE>=2);
11419   match(Set dst (MoveD2L src));
11420   effect(DEF dst, USE src);
11421   ins_cost(95);
11422   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11423   ins_encode %{
11424     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11425   %}
11426   ins_pipe( pipe_slow );
11427 %}
11428 
11429 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11430   predicate(UseSSE>=2);
11431   match(Set dst (MoveD2L src));
11432   effect(DEF dst, USE src, TEMP tmp);
11433   ins_cost(85);
11434   format %{ "MOVD   $dst.lo,$src\n\t"
11435             "PSHUFLW $tmp,$src,0x4E\n\t"
11436             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11437   ins_encode %{
11438     __ movdl($dst$$Register, $src$$XMMRegister);
11439     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11440     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11441   %}
11442   ins_pipe( pipe_slow );
11443 %}
11444 
11445 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11446   match(Set dst (MoveL2D src));
11447   effect(DEF dst, USE src);
11448 
11449   ins_cost(200);
11450   format %{ "MOV    $dst,$src.lo\n\t"
11451             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11452   opcode(0x89, 0x89);
11453   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11454   ins_pipe( ialu_mem_long_reg );
11455 %}
11456 
11457 
11458 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11459   predicate(UseSSE<=1);
11460   match(Set dst (MoveL2D src));
11461   effect(DEF dst, USE src);
11462   ins_cost(125);
11463 
11464   format %{ "FLD_D  $src\n\t"
11465             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11466   opcode(0xDD);               /* DD /0, FLD m64real */
11467   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11468               Pop_Reg_DPR(dst) );
11469   ins_pipe( fpu_reg_mem );
11470 %}
11471 
11472 
11473 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11474   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11475   match(Set dst (MoveL2D src));
11476   effect(DEF dst, USE src);
11477 
11478   ins_cost(95);
11479   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11480   ins_encode %{
11481     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11482   %}
11483   ins_pipe( pipe_slow );
11484 %}
11485 
11486 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11487   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11488   match(Set dst (MoveL2D src));
11489   effect(DEF dst, USE src);
11490 
11491   ins_cost(95);
11492   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11493   ins_encode %{
11494     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11495   %}
11496   ins_pipe( pipe_slow );
11497 %}
11498 
11499 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11500   predicate(UseSSE>=2);
11501   match(Set dst (MoveL2D src));
11502   effect(TEMP dst, USE src, TEMP tmp);
11503   ins_cost(85);
11504   format %{ "MOVD   $dst,$src.lo\n\t"
11505             "MOVD   $tmp,$src.hi\n\t"
11506             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11507   ins_encode %{
11508     __ movdl($dst$$XMMRegister, $src$$Register);
11509     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11510     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11511   %}
11512   ins_pipe( pipe_slow );
11513 %}
11514 
11515 
11516 // =======================================================================
11517 // fast clearing of an array
11518 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11519   predicate(!((ClearArrayNode*)n)->is_large());
11520   match(Set dummy (ClearArray cnt base));
11521   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11522 
11523   format %{ $$template
11524     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11525     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11526     $$emit$$"JG     LARGE\n\t"
11527     $$emit$$"SHL    ECX, 1\n\t"
11528     $$emit$$"DEC    ECX\n\t"
11529     $$emit$$"JS     DONE\t# Zero length\n\t"
11530     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11531     $$emit$$"DEC    ECX\n\t"
11532     $$emit$$"JGE    LOOP\n\t"
11533     $$emit$$"JMP    DONE\n\t"
11534     $$emit$$"# LARGE:\n\t"
11535     if (UseFastStosb) {
11536        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11537        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11538     } else if (UseXMMForObjInit) {
11539        $$emit$$"MOV     RDI,RAX\n\t"
11540        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11541        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11542        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11543        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11544        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11545        $$emit$$"ADD     0x40,RAX\n\t"
11546        $$emit$$"# L_zero_64_bytes:\n\t"
11547        $$emit$$"SUB     0x8,RCX\n\t"
11548        $$emit$$"JGE     L_loop\n\t"
11549        $$emit$$"ADD     0x4,RCX\n\t"
11550        $$emit$$"JL      L_tail\n\t"
11551        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11552        $$emit$$"ADD     0x20,RAX\n\t"
11553        $$emit$$"SUB     0x4,RCX\n\t"
11554        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11555        $$emit$$"ADD     0x4,RCX\n\t"
11556        $$emit$$"JLE     L_end\n\t"
11557        $$emit$$"DEC     RCX\n\t"
11558        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11559        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11560        $$emit$$"ADD     0x8,RAX\n\t"
11561        $$emit$$"DEC     RCX\n\t"
11562        $$emit$$"JGE     L_sloop\n\t"
11563        $$emit$$"# L_end:\n\t"
11564     } else {
11565        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11566        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11567     }
11568     $$emit$$"# DONE"
11569   %}
11570   ins_encode %{
11571     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11572                  $tmp$$XMMRegister, false);
11573   %}
11574   ins_pipe( pipe_slow );
11575 %}
11576 
11577 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11578   predicate(((ClearArrayNode*)n)->is_large());
11579   match(Set dummy (ClearArray cnt base));
11580   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11581   format %{ $$template
11582     if (UseFastStosb) {
11583        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11584        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11585        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11586     } else if (UseXMMForObjInit) {
11587        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11588        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11589        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11590        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11591        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11592        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11593        $$emit$$"ADD     0x40,RAX\n\t"
11594        $$emit$$"# L_zero_64_bytes:\n\t"
11595        $$emit$$"SUB     0x8,RCX\n\t"
11596        $$emit$$"JGE     L_loop\n\t"
11597        $$emit$$"ADD     0x4,RCX\n\t"
11598        $$emit$$"JL      L_tail\n\t"
11599        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11600        $$emit$$"ADD     0x20,RAX\n\t"
11601        $$emit$$"SUB     0x4,RCX\n\t"
11602        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11603        $$emit$$"ADD     0x4,RCX\n\t"
11604        $$emit$$"JLE     L_end\n\t"
11605        $$emit$$"DEC     RCX\n\t"
11606        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11607        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11608        $$emit$$"ADD     0x8,RAX\n\t"
11609        $$emit$$"DEC     RCX\n\t"
11610        $$emit$$"JGE     L_sloop\n\t"
11611        $$emit$$"# L_end:\n\t"
11612     } else {
11613        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11614        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11615        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11616     }
11617     $$emit$$"# DONE"
11618   %}
11619   ins_encode %{
11620     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11621                  $tmp$$XMMRegister, true);
11622   %}
11623   ins_pipe( pipe_slow );
11624 %}
11625 
11626 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11627                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11628   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11629   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11630   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11631 
11632   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11633   ins_encode %{
11634     __ string_compare($str1$$Register, $str2$$Register,
11635                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11636                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11637   %}
11638   ins_pipe( pipe_slow );
11639 %}
11640 
11641 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11642                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11643   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11644   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11645   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11646 
11647   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11648   ins_encode %{
11649     __ string_compare($str1$$Register, $str2$$Register,
11650                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11651                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11652   %}
11653   ins_pipe( pipe_slow );
11654 %}
11655 
11656 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11657                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11658   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11659   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11660   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11661 
11662   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11663   ins_encode %{
11664     __ string_compare($str1$$Register, $str2$$Register,
11665                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11666                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11667   %}
11668   ins_pipe( pipe_slow );
11669 %}
11670 
11671 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11672                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11673   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11674   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11675   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11676 
11677   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11678   ins_encode %{
11679     __ string_compare($str2$$Register, $str1$$Register,
11680                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11681                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11682   %}
11683   ins_pipe( pipe_slow );
11684 %}
11685 
11686 // fast string equals
11687 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11688                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11689   match(Set result (StrEquals (Binary str1 str2) cnt));
11690   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11691 
11692   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11693   ins_encode %{
11694     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11695                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11696                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11697   %}
11698 
11699   ins_pipe( pipe_slow );
11700 %}
11701 
11702 // fast search of substring with known size.
11703 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11704                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11705   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11706   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11707   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11708 
11709   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11710   ins_encode %{
11711     int icnt2 = (int)$int_cnt2$$constant;
11712     if (icnt2 >= 16) {
11713       // IndexOf for constant substrings with size >= 16 elements
11714       // which don't need to be loaded through stack.
11715       __ string_indexofC8($str1$$Register, $str2$$Register,
11716                           $cnt1$$Register, $cnt2$$Register,
11717                           icnt2, $result$$Register,
11718                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11719     } else {
11720       // Small strings are loaded through stack if they cross page boundary.
11721       __ string_indexof($str1$$Register, $str2$$Register,
11722                         $cnt1$$Register, $cnt2$$Register,
11723                         icnt2, $result$$Register,
11724                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11725     }
11726   %}
11727   ins_pipe( pipe_slow );
11728 %}
11729 
11730 // fast search of substring with known size.
11731 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11732                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11733   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11734   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11735   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11736 
11737   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11738   ins_encode %{
11739     int icnt2 = (int)$int_cnt2$$constant;
11740     if (icnt2 >= 8) {
11741       // IndexOf for constant substrings with size >= 8 elements
11742       // which don't need to be loaded through stack.
11743       __ string_indexofC8($str1$$Register, $str2$$Register,
11744                           $cnt1$$Register, $cnt2$$Register,
11745                           icnt2, $result$$Register,
11746                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11747     } else {
11748       // Small strings are loaded through stack if they cross page boundary.
11749       __ string_indexof($str1$$Register, $str2$$Register,
11750                         $cnt1$$Register, $cnt2$$Register,
11751                         icnt2, $result$$Register,
11752                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11753     }
11754   %}
11755   ins_pipe( pipe_slow );
11756 %}
11757 
11758 // fast search of substring with known size.
11759 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11760                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11761   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11762   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11763   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11764 
11765   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11766   ins_encode %{
11767     int icnt2 = (int)$int_cnt2$$constant;
11768     if (icnt2 >= 8) {
11769       // IndexOf for constant substrings with size >= 8 elements
11770       // which don't need to be loaded through stack.
11771       __ string_indexofC8($str1$$Register, $str2$$Register,
11772                           $cnt1$$Register, $cnt2$$Register,
11773                           icnt2, $result$$Register,
11774                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11775     } else {
11776       // Small strings are loaded through stack if they cross page boundary.
11777       __ string_indexof($str1$$Register, $str2$$Register,
11778                         $cnt1$$Register, $cnt2$$Register,
11779                         icnt2, $result$$Register,
11780                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11781     }
11782   %}
11783   ins_pipe( pipe_slow );
11784 %}
11785 
11786 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11787                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11788   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11789   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11790   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11791 
11792   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11793   ins_encode %{
11794     __ string_indexof($str1$$Register, $str2$$Register,
11795                       $cnt1$$Register, $cnt2$$Register,
11796                       (-1), $result$$Register,
11797                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11798   %}
11799   ins_pipe( pipe_slow );
11800 %}
11801 
11802 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11803                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11804   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11805   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11806   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11807 
11808   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11809   ins_encode %{
11810     __ string_indexof($str1$$Register, $str2$$Register,
11811                       $cnt1$$Register, $cnt2$$Register,
11812                       (-1), $result$$Register,
11813                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11814   %}
11815   ins_pipe( pipe_slow );
11816 %}
11817 
11818 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11819                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11820   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11821   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11822   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11823 
11824   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11825   ins_encode %{
11826     __ string_indexof($str1$$Register, $str2$$Register,
11827                       $cnt1$$Register, $cnt2$$Register,
11828                       (-1), $result$$Register,
11829                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11830   %}
11831   ins_pipe( pipe_slow );
11832 %}
11833 
11834 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11835                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11836   predicate(UseSSE42Intrinsics);
11837   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11838   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11839   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11840   ins_encode %{
11841     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11842                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11843   %}
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 // fast array equals
11848 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11849                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11850 %{
11851   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11852   match(Set result (AryEq ary1 ary2));
11853   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11854   //ins_cost(300);
11855 
11856   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11857   ins_encode %{
11858     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11859                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11860                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11861   %}
11862   ins_pipe( pipe_slow );
11863 %}
11864 
11865 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11866                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11867 %{
11868   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11869   match(Set result (AryEq ary1 ary2));
11870   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11871   //ins_cost(300);
11872 
11873   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11874   ins_encode %{
11875     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11876                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11877                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11878   %}
11879   ins_pipe( pipe_slow );
11880 %}
11881 
11882 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11883                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11884 %{
11885   match(Set result (HasNegatives ary1 len));
11886   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11887 
11888   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11889   ins_encode %{
11890     __ has_negatives($ary1$$Register, $len$$Register,
11891                      $result$$Register, $tmp3$$Register,
11892                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11893   %}
11894   ins_pipe( pipe_slow );
11895 %}
11896 
11897 // fast char[] to byte[] compression
11898 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11899                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11900   match(Set result (StrCompressedCopy src (Binary dst len)));
11901   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11902 
11903   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11904   ins_encode %{
11905     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11906                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11907                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11908   %}
11909   ins_pipe( pipe_slow );
11910 %}
11911 
11912 // fast byte[] to char[] inflation
11913 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11914                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11915   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11916   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11917 
11918   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11919   ins_encode %{
11920     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11921                           $tmp1$$XMMRegister, $tmp2$$Register);
11922   %}
11923   ins_pipe( pipe_slow );
11924 %}
11925 
11926 // encode char[] to byte[] in ISO_8859_1
11927 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11928                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11929                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11930   match(Set result (EncodeISOArray src (Binary dst len)));
11931   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11932 
11933   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11934   ins_encode %{
11935     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11936                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11937                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11938   %}
11939   ins_pipe( pipe_slow );
11940 %}
11941 
11942 
11943 //----------Control Flow Instructions------------------------------------------
11944 // Signed compare Instructions
11945 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11946   match(Set cr (CmpI op1 op2));
11947   effect( DEF cr, USE op1, USE op2 );
11948   format %{ "CMP    $op1,$op2" %}
11949   opcode(0x3B);  /* Opcode 3B /r */
11950   ins_encode( OpcP, RegReg( op1, op2) );
11951   ins_pipe( ialu_cr_reg_reg );
11952 %}
11953 
11954 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11955   match(Set cr (CmpI op1 op2));
11956   effect( DEF cr, USE op1 );
11957   format %{ "CMP    $op1,$op2" %}
11958   opcode(0x81,0x07);  /* Opcode 81 /7 */
11959   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11960   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11961   ins_pipe( ialu_cr_reg_imm );
11962 %}
11963 
11964 // Cisc-spilled version of cmpI_eReg
11965 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11966   match(Set cr (CmpI op1 (LoadI op2)));
11967 
11968   format %{ "CMP    $op1,$op2" %}
11969   ins_cost(500);
11970   opcode(0x3B);  /* Opcode 3B /r */
11971   ins_encode( OpcP, RegMem( op1, op2) );
11972   ins_pipe( ialu_cr_reg_mem );
11973 %}
11974 
11975 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11976   match(Set cr (CmpI src zero));
11977   effect( DEF cr, USE src );
11978 
11979   format %{ "TEST   $src,$src" %}
11980   opcode(0x85);
11981   ins_encode( OpcP, RegReg( src, src ) );
11982   ins_pipe( ialu_cr_reg_imm );
11983 %}
11984 
11985 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11986   match(Set cr (CmpI (AndI src con) zero));
11987 
11988   format %{ "TEST   $src,$con" %}
11989   opcode(0xF7,0x00);
11990   ins_encode( OpcP, RegOpc(src), Con32(con) );
11991   ins_pipe( ialu_cr_reg_imm );
11992 %}
11993 
11994 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11995   match(Set cr (CmpI (AndI src mem) zero));
11996 
11997   format %{ "TEST   $src,$mem" %}
11998   opcode(0x85);
11999   ins_encode( OpcP, RegMem( src, mem ) );
12000   ins_pipe( ialu_cr_reg_mem );
12001 %}
12002 
12003 // Unsigned compare Instructions; really, same as signed except they
12004 // produce an eFlagsRegU instead of eFlagsReg.
12005 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12006   match(Set cr (CmpU op1 op2));
12007 
12008   format %{ "CMPu   $op1,$op2" %}
12009   opcode(0x3B);  /* Opcode 3B /r */
12010   ins_encode( OpcP, RegReg( op1, op2) );
12011   ins_pipe( ialu_cr_reg_reg );
12012 %}
12013 
12014 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12015   match(Set cr (CmpU op1 op2));
12016 
12017   format %{ "CMPu   $op1,$op2" %}
12018   opcode(0x81,0x07);  /* Opcode 81 /7 */
12019   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12020   ins_pipe( ialu_cr_reg_imm );
12021 %}
12022 
12023 // // Cisc-spilled version of cmpU_eReg
12024 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12025   match(Set cr (CmpU op1 (LoadI op2)));
12026 
12027   format %{ "CMPu   $op1,$op2" %}
12028   ins_cost(500);
12029   opcode(0x3B);  /* Opcode 3B /r */
12030   ins_encode( OpcP, RegMem( op1, op2) );
12031   ins_pipe( ialu_cr_reg_mem );
12032 %}
12033 
12034 // // Cisc-spilled version of cmpU_eReg
12035 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12036 //  match(Set cr (CmpU (LoadI op1) op2));
12037 //
12038 //  format %{ "CMPu   $op1,$op2" %}
12039 //  ins_cost(500);
12040 //  opcode(0x39);  /* Opcode 39 /r */
12041 //  ins_encode( OpcP, RegMem( op1, op2) );
12042 //%}
12043 
12044 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12045   match(Set cr (CmpU src zero));
12046 
12047   format %{ "TESTu  $src,$src" %}
12048   opcode(0x85);
12049   ins_encode( OpcP, RegReg( src, src ) );
12050   ins_pipe( ialu_cr_reg_imm );
12051 %}
12052 
12053 // Unsigned pointer compare Instructions
12054 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12055   match(Set cr (CmpP op1 op2));
12056 
12057   format %{ "CMPu   $op1,$op2" %}
12058   opcode(0x3B);  /* Opcode 3B /r */
12059   ins_encode( OpcP, RegReg( op1, op2) );
12060   ins_pipe( ialu_cr_reg_reg );
12061 %}
12062 
12063 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12064   match(Set cr (CmpP op1 op2));
12065 
12066   format %{ "CMPu   $op1,$op2" %}
12067   opcode(0x81,0x07);  /* Opcode 81 /7 */
12068   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12069   ins_pipe( ialu_cr_reg_imm );
12070 %}
12071 
12072 // // Cisc-spilled version of cmpP_eReg
12073 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12074   match(Set cr (CmpP op1 (LoadP op2)));
12075 
12076   format %{ "CMPu   $op1,$op2" %}
12077   ins_cost(500);
12078   opcode(0x3B);  /* Opcode 3B /r */
12079   ins_encode( OpcP, RegMem( op1, op2) );
12080   ins_pipe( ialu_cr_reg_mem );
12081 %}
12082 
12083 // // Cisc-spilled version of cmpP_eReg
12084 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12085 //  match(Set cr (CmpP (LoadP op1) op2));
12086 //
12087 //  format %{ "CMPu   $op1,$op2" %}
12088 //  ins_cost(500);
12089 //  opcode(0x39);  /* Opcode 39 /r */
12090 //  ins_encode( OpcP, RegMem( op1, op2) );
12091 //%}
12092 
12093 // Compare raw pointer (used in out-of-heap check).
12094 // Only works because non-oop pointers must be raw pointers
12095 // and raw pointers have no anti-dependencies.
12096 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12097   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12098   match(Set cr (CmpP op1 (LoadP op2)));
12099 
12100   format %{ "CMPu   $op1,$op2" %}
12101   opcode(0x3B);  /* Opcode 3B /r */
12102   ins_encode( OpcP, RegMem( op1, op2) );
12103   ins_pipe( ialu_cr_reg_mem );
12104 %}
12105 
12106 //
12107 // This will generate a signed flags result. This should be ok
12108 // since any compare to a zero should be eq/neq.
12109 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12110   match(Set cr (CmpP src zero));
12111 
12112   format %{ "TEST   $src,$src" %}
12113   opcode(0x85);
12114   ins_encode( OpcP, RegReg( src, src ) );
12115   ins_pipe( ialu_cr_reg_imm );
12116 %}
12117 
12118 // Cisc-spilled version of testP_reg
12119 // This will generate a signed flags result. This should be ok
12120 // since any compare to a zero should be eq/neq.
12121 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12122   match(Set cr (CmpP (LoadP op) zero));
12123 
12124   format %{ "TEST   $op,0xFFFFFFFF" %}
12125   ins_cost(500);
12126   opcode(0xF7);               /* Opcode F7 /0 */
12127   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12128   ins_pipe( ialu_cr_reg_imm );
12129 %}
12130 
12131 // Yanked all unsigned pointer compare operations.
12132 // Pointer compares are done with CmpP which is already unsigned.
12133 
12134 //----------Max and Min--------------------------------------------------------
12135 // Min Instructions
12136 ////
12137 //   *** Min and Max using the conditional move are slower than the
12138 //   *** branch version on a Pentium III.
12139 // // Conditional move for min
12140 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12141 //  effect( USE_DEF op2, USE op1, USE cr );
12142 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12143 //  opcode(0x4C,0x0F);
12144 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12145 //  ins_pipe( pipe_cmov_reg );
12146 //%}
12147 //
12148 //// Min Register with Register (P6 version)
12149 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12150 //  predicate(VM_Version::supports_cmov() );
12151 //  match(Set op2 (MinI op1 op2));
12152 //  ins_cost(200);
12153 //  expand %{
12154 //    eFlagsReg cr;
12155 //    compI_eReg(cr,op1,op2);
12156 //    cmovI_reg_lt(op2,op1,cr);
12157 //  %}
12158 //%}
12159 
12160 // Min Register with Register (generic version)
12161 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12162   match(Set dst (MinI dst src));
12163   effect(KILL flags);
12164   ins_cost(300);
12165 
12166   format %{ "MIN    $dst,$src" %}
12167   opcode(0xCC);
12168   ins_encode( min_enc(dst,src) );
12169   ins_pipe( pipe_slow );
12170 %}
12171 
12172 // Max Register with Register
12173 //   *** Min and Max using the conditional move are slower than the
12174 //   *** branch version on a Pentium III.
12175 // // Conditional move for max
12176 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12177 //  effect( USE_DEF op2, USE op1, USE cr );
12178 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12179 //  opcode(0x4F,0x0F);
12180 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12181 //  ins_pipe( pipe_cmov_reg );
12182 //%}
12183 //
12184 // // Max Register with Register (P6 version)
12185 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12186 //  predicate(VM_Version::supports_cmov() );
12187 //  match(Set op2 (MaxI op1 op2));
12188 //  ins_cost(200);
12189 //  expand %{
12190 //    eFlagsReg cr;
12191 //    compI_eReg(cr,op1,op2);
12192 //    cmovI_reg_gt(op2,op1,cr);
12193 //  %}
12194 //%}
12195 
12196 // Max Register with Register (generic version)
12197 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12198   match(Set dst (MaxI dst src));
12199   effect(KILL flags);
12200   ins_cost(300);
12201 
12202   format %{ "MAX    $dst,$src" %}
12203   opcode(0xCC);
12204   ins_encode( max_enc(dst,src) );
12205   ins_pipe( pipe_slow );
12206 %}
12207 
12208 // ============================================================================
12209 // Counted Loop limit node which represents exact final iterator value.
12210 // Note: the resulting value should fit into integer range since
12211 // counted loops have limit check on overflow.
12212 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12213   match(Set limit (LoopLimit (Binary init limit) stride));
12214   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12215   ins_cost(300);
12216 
12217   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12218   ins_encode %{
12219     int strd = (int)$stride$$constant;
12220     assert(strd != 1 && strd != -1, "sanity");
12221     int m1 = (strd > 0) ? 1 : -1;
12222     // Convert limit to long (EAX:EDX)
12223     __ cdql();
12224     // Convert init to long (init:tmp)
12225     __ movl($tmp$$Register, $init$$Register);
12226     __ sarl($tmp$$Register, 31);
12227     // $limit - $init
12228     __ subl($limit$$Register, $init$$Register);
12229     __ sbbl($limit_hi$$Register, $tmp$$Register);
12230     // + ($stride - 1)
12231     if (strd > 0) {
12232       __ addl($limit$$Register, (strd - 1));
12233       __ adcl($limit_hi$$Register, 0);
12234       __ movl($tmp$$Register, strd);
12235     } else {
12236       __ addl($limit$$Register, (strd + 1));
12237       __ adcl($limit_hi$$Register, -1);
12238       __ lneg($limit_hi$$Register, $limit$$Register);
12239       __ movl($tmp$$Register, -strd);
12240     }
12241     // signed devision: (EAX:EDX) / pos_stride
12242     __ idivl($tmp$$Register);
12243     if (strd < 0) {
12244       // restore sign
12245       __ negl($tmp$$Register);
12246     }
12247     // (EAX) * stride
12248     __ mull($tmp$$Register);
12249     // + init (ignore upper bits)
12250     __ addl($limit$$Register, $init$$Register);
12251   %}
12252   ins_pipe( pipe_slow );
12253 %}
12254 
12255 // ============================================================================
12256 // Branch Instructions
12257 // Jump Table
12258 instruct jumpXtnd(rRegI switch_val) %{
12259   match(Jump switch_val);
12260   ins_cost(350);
12261   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12262   ins_encode %{
12263     // Jump to Address(table_base + switch_reg)
12264     Address index(noreg, $switch_val$$Register, Address::times_1);
12265     __ jump(ArrayAddress($constantaddress, index));
12266   %}
12267   ins_pipe(pipe_jmp);
12268 %}
12269 
12270 // Jump Direct - Label defines a relative address from JMP+1
12271 instruct jmpDir(label labl) %{
12272   match(Goto);
12273   effect(USE labl);
12274 
12275   ins_cost(300);
12276   format %{ "JMP    $labl" %}
12277   size(5);
12278   ins_encode %{
12279     Label* L = $labl$$label;
12280     __ jmp(*L, false); // Always long jump
12281   %}
12282   ins_pipe( pipe_jmp );
12283 %}
12284 
12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12286 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12287   match(If cop cr);
12288   effect(USE labl);
12289 
12290   ins_cost(300);
12291   format %{ "J$cop    $labl" %}
12292   size(6);
12293   ins_encode %{
12294     Label* L = $labl$$label;
12295     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12296   %}
12297   ins_pipe( pipe_jcc );
12298 %}
12299 
12300 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12301 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12302   predicate(!n->has_vector_mask_set());
12303   match(CountedLoopEnd cop cr);
12304   effect(USE labl);
12305 
12306   ins_cost(300);
12307   format %{ "J$cop    $labl\t# Loop end" %}
12308   size(6);
12309   ins_encode %{
12310     Label* L = $labl$$label;
12311     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12312   %}
12313   ins_pipe( pipe_jcc );
12314 %}
12315 
12316 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12317 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12318   predicate(!n->has_vector_mask_set());
12319   match(CountedLoopEnd cop cmp);
12320   effect(USE labl);
12321 
12322   ins_cost(300);
12323   format %{ "J$cop,u  $labl\t# Loop end" %}
12324   size(6);
12325   ins_encode %{
12326     Label* L = $labl$$label;
12327     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12328   %}
12329   ins_pipe( pipe_jcc );
12330 %}
12331 
12332 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12333   predicate(!n->has_vector_mask_set());
12334   match(CountedLoopEnd cop cmp);
12335   effect(USE labl);
12336 
12337   ins_cost(200);
12338   format %{ "J$cop,u  $labl\t# Loop end" %}
12339   size(6);
12340   ins_encode %{
12341     Label* L = $labl$$label;
12342     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12343   %}
12344   ins_pipe( pipe_jcc );
12345 %}
12346 
12347 // mask version
12348 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12349 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12350   predicate(n->has_vector_mask_set());
12351   match(CountedLoopEnd cop cr);
12352   effect(USE labl);
12353 
12354   ins_cost(400);
12355   format %{ "J$cop    $labl\t# Loop end\n\t"
12356             "restorevectmask \t# vector mask restore for loops" %}
12357   size(10);
12358   ins_encode %{
12359     Label* L = $labl$$label;
12360     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12361     __ restorevectmask();
12362   %}
12363   ins_pipe( pipe_jcc );
12364 %}
12365 
12366 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12367 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12368   predicate(n->has_vector_mask_set());
12369   match(CountedLoopEnd cop cmp);
12370   effect(USE labl);
12371 
12372   ins_cost(400);
12373   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12374             "restorevectmask \t# vector mask restore for loops" %}
12375   size(10);
12376   ins_encode %{
12377     Label* L = $labl$$label;
12378     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12379     __ restorevectmask();
12380   %}
12381   ins_pipe( pipe_jcc );
12382 %}
12383 
12384 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12385   predicate(n->has_vector_mask_set());
12386   match(CountedLoopEnd cop cmp);
12387   effect(USE labl);
12388 
12389   ins_cost(300);
12390   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12391             "restorevectmask \t# vector mask restore for loops" %}
12392   size(10);
12393   ins_encode %{
12394     Label* L = $labl$$label;
12395     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12396     __ restorevectmask();
12397   %}
12398   ins_pipe( pipe_jcc );
12399 %}
12400 
12401 // Jump Direct Conditional - using unsigned comparison
12402 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12403   match(If cop cmp);
12404   effect(USE labl);
12405 
12406   ins_cost(300);
12407   format %{ "J$cop,u  $labl" %}
12408   size(6);
12409   ins_encode %{
12410     Label* L = $labl$$label;
12411     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12412   %}
12413   ins_pipe(pipe_jcc);
12414 %}
12415 
12416 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12417   match(If cop cmp);
12418   effect(USE labl);
12419 
12420   ins_cost(200);
12421   format %{ "J$cop,u  $labl" %}
12422   size(6);
12423   ins_encode %{
12424     Label* L = $labl$$label;
12425     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12426   %}
12427   ins_pipe(pipe_jcc);
12428 %}
12429 
12430 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12431   match(If cop cmp);
12432   effect(USE labl);
12433 
12434   ins_cost(200);
12435   format %{ $$template
12436     if ($cop$$cmpcode == Assembler::notEqual) {
12437       $$emit$$"JP,u   $labl\n\t"
12438       $$emit$$"J$cop,u   $labl"
12439     } else {
12440       $$emit$$"JP,u   done\n\t"
12441       $$emit$$"J$cop,u   $labl\n\t"
12442       $$emit$$"done:"
12443     }
12444   %}
12445   ins_encode %{
12446     Label* l = $labl$$label;
12447     if ($cop$$cmpcode == Assembler::notEqual) {
12448       __ jcc(Assembler::parity, *l, false);
12449       __ jcc(Assembler::notEqual, *l, false);
12450     } else if ($cop$$cmpcode == Assembler::equal) {
12451       Label done;
12452       __ jccb(Assembler::parity, done);
12453       __ jcc(Assembler::equal, *l, false);
12454       __ bind(done);
12455     } else {
12456        ShouldNotReachHere();
12457     }
12458   %}
12459   ins_pipe(pipe_jcc);
12460 %}
12461 
12462 // ============================================================================
12463 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12464 // array for an instance of the superklass.  Set a hidden internal cache on a
12465 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12466 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12467 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12468   match(Set result (PartialSubtypeCheck sub super));
12469   effect( KILL rcx, KILL cr );
12470 
12471   ins_cost(1100);  // slightly larger than the next version
12472   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12473             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12474             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12475             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12476             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12477             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12478             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12479      "miss:\t" %}
12480 
12481   opcode(0x1); // Force a XOR of EDI
12482   ins_encode( enc_PartialSubtypeCheck() );
12483   ins_pipe( pipe_slow );
12484 %}
12485 
12486 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12487   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12488   effect( KILL rcx, KILL result );
12489 
12490   ins_cost(1000);
12491   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12492             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12493             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12494             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12495             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12496             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12497      "miss:\t" %}
12498 
12499   opcode(0x0);  // No need to XOR EDI
12500   ins_encode( enc_PartialSubtypeCheck() );
12501   ins_pipe( pipe_slow );
12502 %}
12503 
12504 // ============================================================================
12505 // Branch Instructions -- short offset versions
12506 //
12507 // These instructions are used to replace jumps of a long offset (the default
12508 // match) with jumps of a shorter offset.  These instructions are all tagged
12509 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12510 // match rules in general matching.  Instead, the ADLC generates a conversion
12511 // method in the MachNode which can be used to do in-place replacement of the
12512 // long variant with the shorter variant.  The compiler will determine if a
12513 // branch can be taken by the is_short_branch_offset() predicate in the machine
12514 // specific code section of the file.
12515 
12516 // Jump Direct - Label defines a relative address from JMP+1
12517 instruct jmpDir_short(label labl) %{
12518   match(Goto);
12519   effect(USE labl);
12520 
12521   ins_cost(300);
12522   format %{ "JMP,s  $labl" %}
12523   size(2);
12524   ins_encode %{
12525     Label* L = $labl$$label;
12526     __ jmpb(*L);
12527   %}
12528   ins_pipe( pipe_jmp );
12529   ins_short_branch(1);
12530 %}
12531 
12532 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12533 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12534   match(If cop cr);
12535   effect(USE labl);
12536 
12537   ins_cost(300);
12538   format %{ "J$cop,s  $labl" %}
12539   size(2);
12540   ins_encode %{
12541     Label* L = $labl$$label;
12542     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12543   %}
12544   ins_pipe( pipe_jcc );
12545   ins_short_branch(1);
12546 %}
12547 
12548 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12549 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12550   match(CountedLoopEnd cop cr);
12551   effect(USE labl);
12552 
12553   ins_cost(300);
12554   format %{ "J$cop,s  $labl\t# Loop end" %}
12555   size(2);
12556   ins_encode %{
12557     Label* L = $labl$$label;
12558     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12559   %}
12560   ins_pipe( pipe_jcc );
12561   ins_short_branch(1);
12562 %}
12563 
12564 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12565 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12566   match(CountedLoopEnd cop cmp);
12567   effect(USE labl);
12568 
12569   ins_cost(300);
12570   format %{ "J$cop,us $labl\t# Loop end" %}
12571   size(2);
12572   ins_encode %{
12573     Label* L = $labl$$label;
12574     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12575   %}
12576   ins_pipe( pipe_jcc );
12577   ins_short_branch(1);
12578 %}
12579 
12580 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12581   match(CountedLoopEnd cop cmp);
12582   effect(USE labl);
12583 
12584   ins_cost(300);
12585   format %{ "J$cop,us $labl\t# Loop end" %}
12586   size(2);
12587   ins_encode %{
12588     Label* L = $labl$$label;
12589     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12590   %}
12591   ins_pipe( pipe_jcc );
12592   ins_short_branch(1);
12593 %}
12594 
12595 // Jump Direct Conditional - using unsigned comparison
12596 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12597   match(If cop cmp);
12598   effect(USE labl);
12599 
12600   ins_cost(300);
12601   format %{ "J$cop,us $labl" %}
12602   size(2);
12603   ins_encode %{
12604     Label* L = $labl$$label;
12605     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12606   %}
12607   ins_pipe( pipe_jcc );
12608   ins_short_branch(1);
12609 %}
12610 
12611 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12612   match(If cop cmp);
12613   effect(USE labl);
12614 
12615   ins_cost(300);
12616   format %{ "J$cop,us $labl" %}
12617   size(2);
12618   ins_encode %{
12619     Label* L = $labl$$label;
12620     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12621   %}
12622   ins_pipe( pipe_jcc );
12623   ins_short_branch(1);
12624 %}
12625 
12626 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12627   match(If cop cmp);
12628   effect(USE labl);
12629 
12630   ins_cost(300);
12631   format %{ $$template
12632     if ($cop$$cmpcode == Assembler::notEqual) {
12633       $$emit$$"JP,u,s   $labl\n\t"
12634       $$emit$$"J$cop,u,s   $labl"
12635     } else {
12636       $$emit$$"JP,u,s   done\n\t"
12637       $$emit$$"J$cop,u,s  $labl\n\t"
12638       $$emit$$"done:"
12639     }
12640   %}
12641   size(4);
12642   ins_encode %{
12643     Label* l = $labl$$label;
12644     if ($cop$$cmpcode == Assembler::notEqual) {
12645       __ jccb(Assembler::parity, *l);
12646       __ jccb(Assembler::notEqual, *l);
12647     } else if ($cop$$cmpcode == Assembler::equal) {
12648       Label done;
12649       __ jccb(Assembler::parity, done);
12650       __ jccb(Assembler::equal, *l);
12651       __ bind(done);
12652     } else {
12653        ShouldNotReachHere();
12654     }
12655   %}
12656   ins_pipe(pipe_jcc);
12657   ins_short_branch(1);
12658 %}
12659 
12660 // ============================================================================
12661 // Long Compare
12662 //
12663 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12664 // is tricky.  The flavor of compare used depends on whether we are testing
12665 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12666 // The GE test is the negated LT test.  The LE test can be had by commuting
12667 // the operands (yielding a GE test) and then negating; negate again for the
12668 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12669 // NE test is negated from that.
12670 
12671 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12672 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12673 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12674 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12675 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12676 // foo match ends up with the wrong leaf.  One fix is to not match both
12677 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12678 // both forms beat the trinary form of long-compare and both are very useful
12679 // on Intel which has so few registers.
12680 
12681 // Manifest a CmpL result in an integer register.  Very painful.
12682 // This is the test to avoid.
12683 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12684   match(Set dst (CmpL3 src1 src2));
12685   effect( KILL flags );
12686   ins_cost(1000);
12687   format %{ "XOR    $dst,$dst\n\t"
12688             "CMP    $src1.hi,$src2.hi\n\t"
12689             "JLT,s  m_one\n\t"
12690             "JGT,s  p_one\n\t"
12691             "CMP    $src1.lo,$src2.lo\n\t"
12692             "JB,s   m_one\n\t"
12693             "JEQ,s  done\n"
12694     "p_one:\tINC    $dst\n\t"
12695             "JMP,s  done\n"
12696     "m_one:\tDEC    $dst\n"
12697      "done:" %}
12698   ins_encode %{
12699     Label p_one, m_one, done;
12700     __ xorptr($dst$$Register, $dst$$Register);
12701     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12702     __ jccb(Assembler::less,    m_one);
12703     __ jccb(Assembler::greater, p_one);
12704     __ cmpl($src1$$Register, $src2$$Register);
12705     __ jccb(Assembler::below,   m_one);
12706     __ jccb(Assembler::equal,   done);
12707     __ bind(p_one);
12708     __ incrementl($dst$$Register);
12709     __ jmpb(done);
12710     __ bind(m_one);
12711     __ decrementl($dst$$Register);
12712     __ bind(done);
12713   %}
12714   ins_pipe( pipe_slow );
12715 %}
12716 
12717 //======
12718 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12719 // compares.  Can be used for LE or GT compares by reversing arguments.
12720 // NOT GOOD FOR EQ/NE tests.
12721 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12722   match( Set flags (CmpL src zero ));
12723   ins_cost(100);
12724   format %{ "TEST   $src.hi,$src.hi" %}
12725   opcode(0x85);
12726   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12727   ins_pipe( ialu_cr_reg_reg );
12728 %}
12729 
12730 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12731 // compares.  Can be used for LE or GT compares by reversing arguments.
12732 // NOT GOOD FOR EQ/NE tests.
12733 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12734   match( Set flags (CmpL src1 src2 ));
12735   effect( TEMP tmp );
12736   ins_cost(300);
12737   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12738             "MOV    $tmp,$src1.hi\n\t"
12739             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12740   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12741   ins_pipe( ialu_cr_reg_reg );
12742 %}
12743 
12744 // Long compares reg < zero/req OR reg >= zero/req.
12745 // Just a wrapper for a normal branch, plus the predicate test.
12746 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12747   match(If cmp flags);
12748   effect(USE labl);
12749   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12750   expand %{
12751     jmpCon(cmp,flags,labl);    // JLT or JGE...
12752   %}
12753 %}
12754 
12755 //======
12756 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12757 // compares.  Can be used for LE or GT compares by reversing arguments.
12758 // NOT GOOD FOR EQ/NE tests.
12759 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12760   match(Set flags (CmpUL src zero));
12761   ins_cost(100);
12762   format %{ "TEST   $src.hi,$src.hi" %}
12763   opcode(0x85);
12764   ins_encode(OpcP, RegReg_Hi2(src, src));
12765   ins_pipe(ialu_cr_reg_reg);
12766 %}
12767 
12768 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12769 // compares.  Can be used for LE or GT compares by reversing arguments.
12770 // NOT GOOD FOR EQ/NE tests.
12771 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12772   match(Set flags (CmpUL src1 src2));
12773   effect(TEMP tmp);
12774   ins_cost(300);
12775   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12776             "MOV    $tmp,$src1.hi\n\t"
12777             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12778   ins_encode(long_cmp_flags2(src1, src2, tmp));
12779   ins_pipe(ialu_cr_reg_reg);
12780 %}
12781 
12782 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12783 // Just a wrapper for a normal branch, plus the predicate test.
12784 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12785   match(If cmp flags);
12786   effect(USE labl);
12787   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12788   expand %{
12789     jmpCon(cmp, flags, labl);    // JLT or JGE...
12790   %}
12791 %}
12792 
12793 // Compare 2 longs and CMOVE longs.
12794 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12795   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12796   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12797   ins_cost(400);
12798   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12799             "CMOV$cmp $dst.hi,$src.hi" %}
12800   opcode(0x0F,0x40);
12801   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12802   ins_pipe( pipe_cmov_reg_long );
12803 %}
12804 
12805 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12806   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12807   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12808   ins_cost(500);
12809   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12810             "CMOV$cmp $dst.hi,$src.hi" %}
12811   opcode(0x0F,0x40);
12812   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12813   ins_pipe( pipe_cmov_reg_long );
12814 %}
12815 
12816 // Compare 2 longs and CMOVE ints.
12817 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12818   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12819   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12820   ins_cost(200);
12821   format %{ "CMOV$cmp $dst,$src" %}
12822   opcode(0x0F,0x40);
12823   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12824   ins_pipe( pipe_cmov_reg );
12825 %}
12826 
12827 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12828   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12829   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12830   ins_cost(250);
12831   format %{ "CMOV$cmp $dst,$src" %}
12832   opcode(0x0F,0x40);
12833   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12834   ins_pipe( pipe_cmov_mem );
12835 %}
12836 
12837 // Compare 2 longs and CMOVE ints.
12838 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12839   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12840   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12841   ins_cost(200);
12842   format %{ "CMOV$cmp $dst,$src" %}
12843   opcode(0x0F,0x40);
12844   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12845   ins_pipe( pipe_cmov_reg );
12846 %}
12847 
12848 // Compare 2 longs and CMOVE doubles
12849 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12850   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12851   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12852   ins_cost(200);
12853   expand %{
12854     fcmovDPR_regS(cmp,flags,dst,src);
12855   %}
12856 %}
12857 
12858 // Compare 2 longs and CMOVE doubles
12859 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12860   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12861   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12862   ins_cost(200);
12863   expand %{
12864     fcmovD_regS(cmp,flags,dst,src);
12865   %}
12866 %}
12867 
12868 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12869   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12870   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12871   ins_cost(200);
12872   expand %{
12873     fcmovFPR_regS(cmp,flags,dst,src);
12874   %}
12875 %}
12876 
12877 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12878   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12879   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12880   ins_cost(200);
12881   expand %{
12882     fcmovF_regS(cmp,flags,dst,src);
12883   %}
12884 %}
12885 
12886 //======
12887 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12888 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12889   match( Set flags (CmpL src zero ));
12890   effect(TEMP tmp);
12891   ins_cost(200);
12892   format %{ "MOV    $tmp,$src.lo\n\t"
12893             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12894   ins_encode( long_cmp_flags0( src, tmp ) );
12895   ins_pipe( ialu_reg_reg_long );
12896 %}
12897 
12898 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12899 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12900   match( Set flags (CmpL src1 src2 ));
12901   ins_cost(200+300);
12902   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12903             "JNE,s  skip\n\t"
12904             "CMP    $src1.hi,$src2.hi\n\t"
12905      "skip:\t" %}
12906   ins_encode( long_cmp_flags1( src1, src2 ) );
12907   ins_pipe( ialu_cr_reg_reg );
12908 %}
12909 
12910 // Long compare reg == zero/reg OR reg != zero/reg
12911 // Just a wrapper for a normal branch, plus the predicate test.
12912 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12913   match(If cmp flags);
12914   effect(USE labl);
12915   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12916   expand %{
12917     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12918   %}
12919 %}
12920 
12921 //======
12922 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12923 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12924   match(Set flags (CmpUL src zero));
12925   effect(TEMP tmp);
12926   ins_cost(200);
12927   format %{ "MOV    $tmp,$src.lo\n\t"
12928             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12929   ins_encode(long_cmp_flags0(src, tmp));
12930   ins_pipe(ialu_reg_reg_long);
12931 %}
12932 
12933 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12934 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12935   match(Set flags (CmpUL src1 src2));
12936   ins_cost(200+300);
12937   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12938             "JNE,s  skip\n\t"
12939             "CMP    $src1.hi,$src2.hi\n\t"
12940      "skip:\t" %}
12941   ins_encode(long_cmp_flags1(src1, src2));
12942   ins_pipe(ialu_cr_reg_reg);
12943 %}
12944 
12945 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12946 // Just a wrapper for a normal branch, plus the predicate test.
12947 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12948   match(If cmp flags);
12949   effect(USE labl);
12950   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12951   expand %{
12952     jmpCon(cmp, flags, labl);    // JEQ or JNE...
12953   %}
12954 %}
12955 
12956 // Compare 2 longs and CMOVE longs.
12957 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12958   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12959   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12960   ins_cost(400);
12961   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12962             "CMOV$cmp $dst.hi,$src.hi" %}
12963   opcode(0x0F,0x40);
12964   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12965   ins_pipe( pipe_cmov_reg_long );
12966 %}
12967 
12968 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12969   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12970   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12971   ins_cost(500);
12972   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12973             "CMOV$cmp $dst.hi,$src.hi" %}
12974   opcode(0x0F,0x40);
12975   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12976   ins_pipe( pipe_cmov_reg_long );
12977 %}
12978 
12979 // Compare 2 longs and CMOVE ints.
12980 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12981   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12982   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12983   ins_cost(200);
12984   format %{ "CMOV$cmp $dst,$src" %}
12985   opcode(0x0F,0x40);
12986   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12987   ins_pipe( pipe_cmov_reg );
12988 %}
12989 
12990 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12991   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12992   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12993   ins_cost(250);
12994   format %{ "CMOV$cmp $dst,$src" %}
12995   opcode(0x0F,0x40);
12996   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12997   ins_pipe( pipe_cmov_mem );
12998 %}
12999 
13000 // Compare 2 longs and CMOVE ints.
13001 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13002   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13003   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13004   ins_cost(200);
13005   format %{ "CMOV$cmp $dst,$src" %}
13006   opcode(0x0F,0x40);
13007   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13008   ins_pipe( pipe_cmov_reg );
13009 %}
13010 
13011 // Compare 2 longs and CMOVE doubles
13012 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13013   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13014   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13015   ins_cost(200);
13016   expand %{
13017     fcmovDPR_regS(cmp,flags,dst,src);
13018   %}
13019 %}
13020 
13021 // Compare 2 longs and CMOVE doubles
13022 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13023   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13024   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13025   ins_cost(200);
13026   expand %{
13027     fcmovD_regS(cmp,flags,dst,src);
13028   %}
13029 %}
13030 
13031 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13032   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13033   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13034   ins_cost(200);
13035   expand %{
13036     fcmovFPR_regS(cmp,flags,dst,src);
13037   %}
13038 %}
13039 
13040 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13041   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13042   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13043   ins_cost(200);
13044   expand %{
13045     fcmovF_regS(cmp,flags,dst,src);
13046   %}
13047 %}
13048 
13049 //======
13050 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13051 // Same as cmpL_reg_flags_LEGT except must negate src
13052 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13053   match( Set flags (CmpL src zero ));
13054   effect( TEMP tmp );
13055   ins_cost(300);
13056   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13057             "CMP    $tmp,$src.lo\n\t"
13058             "SBB    $tmp,$src.hi\n\t" %}
13059   ins_encode( long_cmp_flags3(src, tmp) );
13060   ins_pipe( ialu_reg_reg_long );
13061 %}
13062 
13063 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13064 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13065 // requires a commuted test to get the same result.
13066 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13067   match( Set flags (CmpL src1 src2 ));
13068   effect( TEMP tmp );
13069   ins_cost(300);
13070   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13071             "MOV    $tmp,$src2.hi\n\t"
13072             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13073   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13074   ins_pipe( ialu_cr_reg_reg );
13075 %}
13076 
13077 // Long compares reg < zero/req OR reg >= zero/req.
13078 // Just a wrapper for a normal branch, plus the predicate test
13079 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13080   match(If cmp flags);
13081   effect(USE labl);
13082   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13083   ins_cost(300);
13084   expand %{
13085     jmpCon(cmp,flags,labl);    // JGT or JLE...
13086   %}
13087 %}
13088 
13089 //======
13090 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13091 // Same as cmpUL_reg_flags_LEGT except must negate src
13092 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13093   match(Set flags (CmpUL src zero));
13094   effect(TEMP tmp);
13095   ins_cost(300);
13096   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13097             "CMP    $tmp,$src.lo\n\t"
13098             "SBB    $tmp,$src.hi\n\t" %}
13099   ins_encode(long_cmp_flags3(src, tmp));
13100   ins_pipe(ialu_reg_reg_long);
13101 %}
13102 
13103 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13104 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13105 // requires a commuted test to get the same result.
13106 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13107   match(Set flags (CmpUL src1 src2));
13108   effect(TEMP tmp);
13109   ins_cost(300);
13110   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13111             "MOV    $tmp,$src2.hi\n\t"
13112             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13113   ins_encode(long_cmp_flags2( src2, src1, tmp));
13114   ins_pipe(ialu_cr_reg_reg);
13115 %}
13116 
13117 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13118 // Just a wrapper for a normal branch, plus the predicate test
13119 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13120   match(If cmp flags);
13121   effect(USE labl);
13122   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13123   ins_cost(300);
13124   expand %{
13125     jmpCon(cmp, flags, labl);    // JGT or JLE...
13126   %}
13127 %}
13128 
13129 // Compare 2 longs and CMOVE longs.
13130 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13131   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13132   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13133   ins_cost(400);
13134   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13135             "CMOV$cmp $dst.hi,$src.hi" %}
13136   opcode(0x0F,0x40);
13137   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13138   ins_pipe( pipe_cmov_reg_long );
13139 %}
13140 
13141 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13142   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13143   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13144   ins_cost(500);
13145   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13146             "CMOV$cmp $dst.hi,$src.hi+4" %}
13147   opcode(0x0F,0x40);
13148   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13149   ins_pipe( pipe_cmov_reg_long );
13150 %}
13151 
13152 // Compare 2 longs and CMOVE ints.
13153 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13154   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13155   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13156   ins_cost(200);
13157   format %{ "CMOV$cmp $dst,$src" %}
13158   opcode(0x0F,0x40);
13159   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13160   ins_pipe( pipe_cmov_reg );
13161 %}
13162 
13163 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13164   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13165   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13166   ins_cost(250);
13167   format %{ "CMOV$cmp $dst,$src" %}
13168   opcode(0x0F,0x40);
13169   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13170   ins_pipe( pipe_cmov_mem );
13171 %}
13172 
13173 // Compare 2 longs and CMOVE ptrs.
13174 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13175   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13176   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13177   ins_cost(200);
13178   format %{ "CMOV$cmp $dst,$src" %}
13179   opcode(0x0F,0x40);
13180   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13181   ins_pipe( pipe_cmov_reg );
13182 %}
13183 
13184 // Compare 2 longs and CMOVE doubles
13185 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13186   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13187   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13188   ins_cost(200);
13189   expand %{
13190     fcmovDPR_regS(cmp,flags,dst,src);
13191   %}
13192 %}
13193 
13194 // Compare 2 longs and CMOVE doubles
13195 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13196   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13197   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13198   ins_cost(200);
13199   expand %{
13200     fcmovD_regS(cmp,flags,dst,src);
13201   %}
13202 %}
13203 
13204 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13205   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13206   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13207   ins_cost(200);
13208   expand %{
13209     fcmovFPR_regS(cmp,flags,dst,src);
13210   %}
13211 %}
13212 
13213 
13214 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13215   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13216   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13217   ins_cost(200);
13218   expand %{
13219     fcmovF_regS(cmp,flags,dst,src);
13220   %}
13221 %}
13222 
13223 
13224 // ============================================================================
13225 // Procedure Call/Return Instructions
13226 // Call Java Static Instruction
13227 // Note: If this code changes, the corresponding ret_addr_offset() and
13228 //       compute_padding() functions will have to be adjusted.
13229 instruct CallStaticJavaDirect(method meth) %{
13230   match(CallStaticJava);
13231   effect(USE meth);
13232 
13233   ins_cost(300);
13234   format %{ "CALL,static " %}
13235   opcode(0xE8); /* E8 cd */
13236   ins_encode( pre_call_resets,
13237               Java_Static_Call( meth ),
13238               call_epilog,
13239               post_call_FPU );
13240   ins_pipe( pipe_slow );
13241   ins_alignment(4);
13242 %}
13243 
13244 // Call Java Dynamic Instruction
13245 // Note: If this code changes, the corresponding ret_addr_offset() and
13246 //       compute_padding() functions will have to be adjusted.
13247 instruct CallDynamicJavaDirect(method meth) %{
13248   match(CallDynamicJava);
13249   effect(USE meth);
13250 
13251   ins_cost(300);
13252   format %{ "MOV    EAX,(oop)-1\n\t"
13253             "CALL,dynamic" %}
13254   opcode(0xE8); /* E8 cd */
13255   ins_encode( pre_call_resets,
13256               Java_Dynamic_Call( meth ),
13257               call_epilog,
13258               post_call_FPU );
13259   ins_pipe( pipe_slow );
13260   ins_alignment(4);
13261 %}
13262 
13263 // Call Runtime Instruction
13264 instruct CallRuntimeDirect(method meth) %{
13265   match(CallRuntime );
13266   effect(USE meth);
13267 
13268   ins_cost(300);
13269   format %{ "CALL,runtime " %}
13270   opcode(0xE8); /* E8 cd */
13271   // Use FFREEs to clear entries in float stack
13272   ins_encode( pre_call_resets,
13273               FFree_Float_Stack_All,
13274               Java_To_Runtime( meth ),
13275               post_call_FPU );
13276   ins_pipe( pipe_slow );
13277 %}
13278 
13279 // Call runtime without safepoint
13280 instruct CallLeafDirect(method meth) %{
13281   match(CallLeaf);
13282   effect(USE meth);
13283 
13284   ins_cost(300);
13285   format %{ "CALL_LEAF,runtime " %}
13286   opcode(0xE8); /* E8 cd */
13287   ins_encode( pre_call_resets,
13288               FFree_Float_Stack_All,
13289               Java_To_Runtime( meth ),
13290               Verify_FPU_For_Leaf, post_call_FPU );
13291   ins_pipe( pipe_slow );
13292 %}
13293 
13294 instruct CallLeafNoFPDirect(method meth) %{
13295   match(CallLeafNoFP);
13296   effect(USE meth);
13297 
13298   ins_cost(300);
13299   format %{ "CALL_LEAF_NOFP,runtime " %}
13300   opcode(0xE8); /* E8 cd */
13301   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13302   ins_pipe( pipe_slow );
13303 %}
13304 
13305 
13306 // Return Instruction
13307 // Remove the return address & jump to it.
13308 instruct Ret() %{
13309   match(Return);
13310   format %{ "RET" %}
13311   opcode(0xC3);
13312   ins_encode(OpcP);
13313   ins_pipe( pipe_jmp );
13314 %}
13315 
13316 // Tail Call; Jump from runtime stub to Java code.
13317 // Also known as an 'interprocedural jump'.
13318 // Target of jump will eventually return to caller.
13319 // TailJump below removes the return address.
13320 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13321   match(TailCall jump_target method_oop );
13322   ins_cost(300);
13323   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13324   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13325   ins_encode( OpcP, RegOpc(jump_target) );
13326   ins_pipe( pipe_jmp );
13327 %}
13328 
13329 
13330 // Tail Jump; remove the return address; jump to target.
13331 // TailCall above leaves the return address around.
13332 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13333   match( TailJump jump_target ex_oop );
13334   ins_cost(300);
13335   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13336             "JMP    $jump_target " %}
13337   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13338   ins_encode( enc_pop_rdx,
13339               OpcP, RegOpc(jump_target) );
13340   ins_pipe( pipe_jmp );
13341 %}
13342 
13343 // Create exception oop: created by stack-crawling runtime code.
13344 // Created exception is now available to this handler, and is setup
13345 // just prior to jumping to this handler.  No code emitted.
13346 instruct CreateException( eAXRegP ex_oop )
13347 %{
13348   match(Set ex_oop (CreateEx));
13349 
13350   size(0);
13351   // use the following format syntax
13352   format %{ "# exception oop is in EAX; no code emitted" %}
13353   ins_encode();
13354   ins_pipe( empty );
13355 %}
13356 
13357 
13358 // Rethrow exception:
13359 // The exception oop will come in the first argument position.
13360 // Then JUMP (not call) to the rethrow stub code.
13361 instruct RethrowException()
13362 %{
13363   match(Rethrow);
13364 
13365   // use the following format syntax
13366   format %{ "JMP    rethrow_stub" %}
13367   ins_encode(enc_rethrow);
13368   ins_pipe( pipe_jmp );
13369 %}
13370 
13371 // inlined locking and unlocking
13372 
13373 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13374   predicate(Compile::current()->use_rtm());
13375   match(Set cr (FastLock object box));
13376   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13377   ins_cost(300);
13378   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13379   ins_encode %{
13380     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13381                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13382                  _counters, _rtm_counters, _stack_rtm_counters,
13383                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13384                  true, ra_->C->profile_rtm());
13385   %}
13386   ins_pipe(pipe_slow);
13387 %}
13388 
13389 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13390   predicate(!Compile::current()->use_rtm());
13391   match(Set cr (FastLock object box));
13392   effect(TEMP tmp, TEMP scr, USE_KILL box);
13393   ins_cost(300);
13394   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13395   ins_encode %{
13396     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13397                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13398   %}
13399   ins_pipe(pipe_slow);
13400 %}
13401 
13402 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13403   match(Set cr (FastUnlock object box));
13404   effect(TEMP tmp, USE_KILL box);
13405   ins_cost(300);
13406   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13407   ins_encode %{
13408     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13409   %}
13410   ins_pipe(pipe_slow);
13411 %}
13412 
13413 
13414 
13415 // ============================================================================
13416 // Safepoint Instruction
13417 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13418   match(SafePoint poll);
13419   effect(KILL cr, USE poll);
13420 
13421   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13422   ins_cost(125);
13423   // EBP would need size(3)
13424   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13425   ins_encode %{
13426     __ relocate(relocInfo::poll_type);
13427     address pre_pc = __ pc();
13428     __ testl(rax, Address($poll$$Register, 0));
13429     address post_pc = __ pc();
13430     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13431   %}
13432   ins_pipe(ialu_reg_mem);
13433 %}
13434 
13435 
13436 // ============================================================================
13437 // This name is KNOWN by the ADLC and cannot be changed.
13438 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13439 // for this guy.
13440 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13441   match(Set dst (ThreadLocal));
13442   effect(DEF dst, KILL cr);
13443 
13444   format %{ "MOV    $dst, Thread::current()" %}
13445   ins_encode %{
13446     Register dstReg = as_Register($dst$$reg);
13447     __ get_thread(dstReg);
13448   %}
13449   ins_pipe( ialu_reg_fat );
13450 %}
13451 
13452 
13453 
13454 //----------PEEPHOLE RULES-----------------------------------------------------
13455 // These must follow all instruction definitions as they use the names
13456 // defined in the instructions definitions.
13457 //
13458 // peepmatch ( root_instr_name [preceding_instruction]* );
13459 //
13460 // peepconstraint %{
13461 // (instruction_number.operand_name relational_op instruction_number.operand_name
13462 //  [, ...] );
13463 // // instruction numbers are zero-based using left to right order in peepmatch
13464 //
13465 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13466 // // provide an instruction_number.operand_name for each operand that appears
13467 // // in the replacement instruction's match rule
13468 //
13469 // ---------VM FLAGS---------------------------------------------------------
13470 //
13471 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13472 //
13473 // Each peephole rule is given an identifying number starting with zero and
13474 // increasing by one in the order seen by the parser.  An individual peephole
13475 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13476 // on the command-line.
13477 //
13478 // ---------CURRENT LIMITATIONS----------------------------------------------
13479 //
13480 // Only match adjacent instructions in same basic block
13481 // Only equality constraints
13482 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13483 // Only one replacement instruction
13484 //
13485 // ---------EXAMPLE----------------------------------------------------------
13486 //
13487 // // pertinent parts of existing instructions in architecture description
13488 // instruct movI(rRegI dst, rRegI src) %{
13489 //   match(Set dst (CopyI src));
13490 // %}
13491 //
13492 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13493 //   match(Set dst (AddI dst src));
13494 //   effect(KILL cr);
13495 // %}
13496 //
13497 // // Change (inc mov) to lea
13498 // peephole %{
13499 //   // increment preceeded by register-register move
13500 //   peepmatch ( incI_eReg movI );
13501 //   // require that the destination register of the increment
13502 //   // match the destination register of the move
13503 //   peepconstraint ( 0.dst == 1.dst );
13504 //   // construct a replacement instruction that sets
13505 //   // the destination to ( move's source register + one )
13506 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13507 // %}
13508 //
13509 // Implementation no longer uses movX instructions since
13510 // machine-independent system no longer uses CopyX nodes.
13511 //
13512 // peephole %{
13513 //   peepmatch ( incI_eReg movI );
13514 //   peepconstraint ( 0.dst == 1.dst );
13515 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13516 // %}
13517 //
13518 // peephole %{
13519 //   peepmatch ( decI_eReg movI );
13520 //   peepconstraint ( 0.dst == 1.dst );
13521 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13522 // %}
13523 //
13524 // peephole %{
13525 //   peepmatch ( addI_eReg_imm movI );
13526 //   peepconstraint ( 0.dst == 1.dst );
13527 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13528 // %}
13529 //
13530 // peephole %{
13531 //   peepmatch ( addP_eReg_imm movP );
13532 //   peepconstraint ( 0.dst == 1.dst );
13533 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13534 // %}
13535 
13536 // // Change load of spilled value to only a spill
13537 // instruct storeI(memory mem, rRegI src) %{
13538 //   match(Set mem (StoreI mem src));
13539 // %}
13540 //
13541 // instruct loadI(rRegI dst, memory mem) %{
13542 //   match(Set dst (LoadI mem));
13543 // %}
13544 //
13545 peephole %{
13546   peepmatch ( loadI storeI );
13547   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13548   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13549 %}
13550 
13551 //----------SMARTSPILL RULES---------------------------------------------------
13552 // These must follow all instruction definitions as they use the names
13553 // defined in the instructions definitions.