Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
 799     //                          it maps more cases to single byte displacement
 800     _masm.set_managed();
 801     if (reg_lo+1 == reg_hi) { // double move?
 802       if (is_load) {
 803         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 804       } else {
 805         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 806       }
 807     } else {
 808       if (is_load) {
 809         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 810       } else {
 811         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 812       }
 813     }
 814 #ifndef PRODUCT
 815   } else if (!do_size) {
 816     if (size != 0) st->print("\n\t");
 817     if (reg_lo+1 == reg_hi) { // double move?
 818       if (is_load) st->print("%s %s,[ESP + #%d]",
 819                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSD  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     } else {
 824       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 825                               Matcher::regName[reg_lo], offset);
 826       else         st->print("MOVSS  [ESP + #%d],%s",
 827                               offset, Matcher::regName[reg_lo]);
 828     }
 829 #endif
 830   }
 831   bool is_single_byte = false;
 832   if ((UseAVX > 2) && (offset != 0)) {
 833     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 834   }
 835   int offset_size = 0;
 836   if (UseAVX > 2 ) {
 837     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 838   } else {
 839     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 840   }
 841   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 842   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 843   return size+5+offset_size;
 844 }
 845 
 846 
 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 848                             int src_hi, int dst_hi, int size, outputStream* st ) {
 849   if (cbuf) {
 850     MacroAssembler _masm(cbuf);
 851     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 852     _masm.set_managed();
 853     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     } else {
 857       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 858                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 859     }
 860 #ifndef PRODUCT
 861   } else if (!do_size) {
 862     if (size != 0) st->print("\n\t");
 863     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 864       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 865         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 866       } else {
 867         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 868       }
 869     } else {
 870       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 871         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 872       } else {
 873         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 874       }
 875     }
 876 #endif
 877   }
 878   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 879   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 880   int sz = (UseAVX > 2) ? 6 : 4;
 881   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 882       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 883   return size + sz;
 884 }
 885 
 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 887                             int src_hi, int dst_hi, int size, outputStream* st ) {
 888   // 32-bit
 889   if (cbuf) {
 890     MacroAssembler _masm(cbuf);
 891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 892     _masm.set_managed();
 893     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 894              as_Register(Matcher::_regEncode[src_lo]));
 895 #ifndef PRODUCT
 896   } else if (!do_size) {
 897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 898 #endif
 899   }
 900   return (UseAVX> 2) ? 6 : 4;
 901 }
 902 
 903 
 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 905                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 906   // 32-bit
 907   if (cbuf) {
 908     MacroAssembler _masm(cbuf);
 909     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 910     _masm.set_managed();
 911     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 912              as_XMMRegister(Matcher::_regEncode[src_lo]));
 913 #ifndef PRODUCT
 914   } else if (!do_size) {
 915     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 916 #endif
 917   }
 918   return (UseAVX> 2) ? 6 : 4;
 919 }
 920 
 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 922   if( cbuf ) {
 923     emit_opcode(*cbuf, 0x8B );
 924     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 925 #ifndef PRODUCT
 926   } else if( !do_size ) {
 927     if( size != 0 ) st->print("\n\t");
 928     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 929 #endif
 930   }
 931   return size+2;
 932 }
 933 
 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 935                                  int offset, int size, outputStream* st ) {
 936   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 937     if( cbuf ) {
 938       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 939       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 940 #ifndef PRODUCT
 941     } else if( !do_size ) {
 942       if( size != 0 ) st->print("\n\t");
 943       st->print("FLD    %s",Matcher::regName[src_lo]);
 944 #endif
 945     }
 946     size += 2;
 947   }
 948 
 949   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 950   const char *op_str;
 951   int op;
 952   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 953     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 954     op = 0xDD;
 955   } else {                   // 32-bit store
 956     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 957     op = 0xD9;
 958     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 959   }
 960 
 961   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 962 }
 963 
 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 966                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 967 
 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 969                             int stack_offset, int reg, uint ireg, outputStream* st);
 970 
 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 972                                      int dst_offset, uint ireg, outputStream* st) {
 973   int calc_size = 0;
 974   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 975   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 976   switch (ireg) {
 977   case Op_VecS:
 978     calc_size = 3+src_offset_size + 3+dst_offset_size;
 979     break;
 980   case Op_VecD: {
 981     calc_size = 3+src_offset_size + 3+dst_offset_size;
 982     int tmp_src_offset = src_offset + 4;
 983     int tmp_dst_offset = dst_offset + 4;
 984     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 985     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 986     calc_size += 3+src_offset_size + 3+dst_offset_size;
 987     break;
 988   }   
 989   case Op_VecX:
 990   case Op_VecY:
 991   case Op_VecZ:
 992     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 993     break;
 994   default:
 995     ShouldNotReachHere();
 996   }
 997   if (cbuf) {
 998     MacroAssembler _masm(cbuf);
 999     int offset = __ offset();
1000     switch (ireg) {
1001     case Op_VecS:
1002       __ pushl(Address(rsp, src_offset));
1003       __ popl (Address(rsp, dst_offset));
1004       break;
1005     case Op_VecD:
1006       __ pushl(Address(rsp, src_offset));
1007       __ popl (Address(rsp, dst_offset));
1008       __ pushl(Address(rsp, src_offset+4));
1009       __ popl (Address(rsp, dst_offset+4));
1010       break;
1011     case Op_VecX:
1012       __ movdqu(Address(rsp, -16), xmm0);
1013       __ movdqu(xmm0, Address(rsp, src_offset));
1014       __ movdqu(Address(rsp, dst_offset), xmm0);
1015       __ movdqu(xmm0, Address(rsp, -16));
1016       break;
1017     case Op_VecY:
1018       __ vmovdqu(Address(rsp, -32), xmm0);
1019       __ vmovdqu(xmm0, Address(rsp, src_offset));
1020       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1021       __ vmovdqu(xmm0, Address(rsp, -32));
1022       break;
1023     case Op_VecZ:
1024       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1025       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1026       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1027       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1028       break;
1029     default:
1030       ShouldNotReachHere();
1031     }
1032     int size = __ offset() - offset;
1033     assert(size == calc_size, "incorrect size calculation");
1034     return size;
1035 #ifndef PRODUCT
1036   } else if (!do_size) {
1037     switch (ireg) {
1038     case Op_VecS:
1039       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1040                 "popl    [rsp + #%d]",
1041                 src_offset, dst_offset);
1042       break;
1043     case Op_VecD:
1044       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1045                 "popq    [rsp + #%d]\n\t"
1046                 "pushl   [rsp + #%d]\n\t"
1047                 "popq    [rsp + #%d]",
1048                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1049       break;
1050      case Op_VecX:
1051       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1052                 "movdqu  xmm0, [rsp + #%d]\n\t"
1053                 "movdqu  [rsp + #%d], xmm0\n\t"
1054                 "movdqu  xmm0, [rsp - #16]",
1055                 src_offset, dst_offset);
1056       break;
1057     case Op_VecY:
1058       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1059                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1060                 "vmovdqu [rsp + #%d], xmm0\n\t"
1061                 "vmovdqu xmm0, [rsp - #32]",
1062                 src_offset, dst_offset);
1063       break;
1064     case Op_VecZ:
1065       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1066                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1067                 "vmovdqu [rsp + #%d], xmm0\n\t"
1068                 "vmovdqu xmm0, [rsp - #64]",
1069                 src_offset, dst_offset);
1070       break;
1071     default:
1072       ShouldNotReachHere();
1073     }
1074 #endif
1075   }
1076   return calc_size;
1077 }
1078 
1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1080   // Get registers to move
1081   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1082   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1083   OptoReg::Name dst_second = ra_->get_reg_second(this );
1084   OptoReg::Name dst_first = ra_->get_reg_first(this );
1085 
1086   enum RC src_second_rc = rc_class(src_second);
1087   enum RC src_first_rc = rc_class(src_first);
1088   enum RC dst_second_rc = rc_class(dst_second);
1089   enum RC dst_first_rc = rc_class(dst_first);
1090 
1091   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1092 
1093   // Generate spill code!
1094   int size = 0;
1095 
1096   if( src_first == dst_first && src_second == dst_second )
1097     return size;            // Self copy, no move
1098 
1099   if (bottom_type()->isa_vect() != NULL) {
1100     uint ireg = ideal_reg();
1101     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1102     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1103     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1104     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1105       // mem -> mem
1106       int src_offset = ra_->reg2offset(src_first);
1107       int dst_offset = ra_->reg2offset(dst_first);
1108       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1109     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1110       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1111     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1112       int stack_offset = ra_->reg2offset(dst_first);
1113       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1114     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1115       int stack_offset = ra_->reg2offset(src_first);
1116       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1117     } else {
1118       ShouldNotReachHere();
1119     }
1120   }
1121 
1122   // --------------------------------------
1123   // Check for mem-mem move.  push/pop to move.
1124   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1125     if( src_second == dst_first ) { // overlapping stack copy ranges
1126       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1127       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1128       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1129       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1130     }
1131     // move low bits
1132     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1133     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1134     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1135       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1136       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1137     }
1138     return size;
1139   }
1140 
1141   // --------------------------------------
1142   // Check for integer reg-reg copy
1143   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1144     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1145 
1146   // Check for integer store
1147   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1148     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1149 
1150   // Check for integer load
1151   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1152     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1153 
1154   // Check for integer reg-xmm reg copy
1155   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1156     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1157             "no 64 bit integer-float reg moves" );
1158     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1159   }
1160   // --------------------------------------
1161   // Check for float reg-reg copy
1162   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1163     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1164             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1165     if( cbuf ) {
1166 
1167       // Note the mucking with the register encode to compensate for the 0/1
1168       // indexing issue mentioned in a comment in the reg_def sections
1169       // for FPR registers many lines above here.
1170 
1171       if( src_first != FPR1L_num ) {
1172         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1173         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1174         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1175         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1176      } else {
1177         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1178         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1179      }
1180 #ifndef PRODUCT
1181     } else if( !do_size ) {
1182       if( size != 0 ) st->print("\n\t");
1183       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1184       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1185 #endif
1186     }
1187     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1188   }
1189 
1190   // Check for float store
1191   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1192     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1193   }
1194 
1195   // Check for float load
1196   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1197     int offset = ra_->reg2offset(src_first);
1198     const char *op_str;
1199     int op;
1200     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1201       op_str = "FLD_D";
1202       op = 0xDD;
1203     } else {                   // 32-bit load
1204       op_str = "FLD_S";
1205       op = 0xD9;
1206       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1207     }
1208     if( cbuf ) {
1209       emit_opcode  (*cbuf, op );
1210       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1211       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1212       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1213 #ifndef PRODUCT
1214     } else if( !do_size ) {
1215       if( size != 0 ) st->print("\n\t");
1216       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1217 #endif
1218     }
1219     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1220     return size + 3+offset_size+2;
1221   }
1222 
1223   // Check for xmm reg-reg copy
1224   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1225     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1226             (src_first+1 == src_second && dst_first+1 == dst_second),
1227             "no non-adjacent float-moves" );
1228     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1229   }
1230 
1231   // Check for xmm reg-integer reg copy
1232   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1233     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1234             "no 64 bit float-integer reg moves" );
1235     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1236   }
1237 
1238   // Check for xmm store
1239   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1240     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1241   }
1242 
1243   // Check for float xmm load
1244   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1245     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1246   }
1247 
1248   // Copy from float reg to xmm reg
1249   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1250     // copy to the top of stack from floating point reg
1251     // and use LEA to preserve flags
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0xF8);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP-8]");
1261 #endif
1262     }
1263     size += 4;
1264 
1265     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1266 
1267     // Copy from the temp memory to the xmm reg.
1268     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1269 
1270     if( cbuf ) {
1271       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1272       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1273       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1274       emit_d8(*cbuf,0x08);
1275 #ifndef PRODUCT
1276     } else if( !do_size ) {
1277       if( size != 0 ) st->print("\n\t");
1278       st->print("LEA    ESP,[ESP+8]");
1279 #endif
1280     }
1281     size += 4;
1282     return size;
1283   }
1284 
1285   assert( size > 0, "missed a case" );
1286 
1287   // --------------------------------------------------------------------
1288   // Check for second bits still needing moving.
1289   if( src_second == dst_second )
1290     return size;               // Self copy; no move
1291   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1292 
1293   // Check for second word int-int move
1294   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1295     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1296 
1297   // Check for second word integer store
1298   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1299     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1300 
1301   // Check for second word integer load
1302   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1303     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1304 
1305 
1306   Unimplemented();
1307   return 0; // Mute compiler
1308 }
1309 
1310 #ifndef PRODUCT
1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1312   implementation( NULL, ra_, false, st );
1313 }
1314 #endif
1315 
1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   implementation( &cbuf, ra_, false, NULL );
1318 }
1319 
1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1321   return implementation( NULL, ra_, true, NULL );
1322 }
1323 
1324 
1325 //=============================================================================
1326 #ifndef PRODUCT
1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   int reg = ra_->get_reg_first(this);
1330   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1331 }
1332 #endif
1333 
1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1336   int reg = ra_->get_encode(this);
1337   if( offset >= 128 ) {
1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1339     emit_rm(cbuf, 0x2, reg, 0x04);
1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1341     emit_d32(cbuf, offset);
1342   }
1343   else {
1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1345     emit_rm(cbuf, 0x1, reg, 0x04);
1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347     emit_d8(cbuf, offset);
1348   }
1349 }
1350 
1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1353   if( offset >= 128 ) {
1354     return 7;
1355   }
1356   else {
1357     return 4;
1358   }
1359 }
1360 
1361 //=============================================================================
1362 #ifndef PRODUCT
1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1364   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1365   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1366   st->print_cr("\tNOP");
1367   st->print_cr("\tNOP");
1368   if( !OptoBreakpoint )
1369     st->print_cr("\tNOP");
1370 }
1371 #endif
1372 
1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1374   MacroAssembler masm(&cbuf);
1375 #ifdef ASSERT
1376   uint insts_size = cbuf.insts_size();
1377 #endif
1378   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1379   masm.jump_cc(Assembler::notEqual,
1380                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1381   /* WARNING these NOPs are critical so that verified entry point is properly
1382      aligned for patching by NativeJump::patch_verified_entry() */
1383   int nops_cnt = 2;
1384   if( !OptoBreakpoint ) // Leave space for int3
1385      nops_cnt += 1;
1386   masm.nop(nops_cnt);
1387 
1388   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1389 }
1390 
1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1392   return OptoBreakpoint ? 11 : 12;
1393 }
1394 
1395 
1396 //=============================================================================
1397 
1398 int Matcher::regnum_to_fpu_offset(int regnum) {
1399   return regnum - 32; // The FP registers are in the second chunk
1400 }
1401 
1402 // This is UltraSparc specific, true just means we have fast l2f conversion
1403 const bool Matcher::convL2FSupported(void) {
1404   return true;
1405 }
1406 
1407 // Is this branch offset short enough that a short branch can be used?
1408 //
1409 // NOTE: If the platform does not provide any short branch variants, then
1410 //       this method should return false for offset 0.
1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1412   // The passed offset is relative to address of the branch.
1413   // On 86 a branch displacement is calculated relative to address
1414   // of a next instruction.
1415   offset -= br_size;
1416 
1417   // the short version of jmpConUCF2 contains multiple branches,
1418   // making the reach slightly less
1419   if (rule == jmpConUCF2_rule)
1420     return (-126 <= offset && offset <= 125);
1421   return (-128 <= offset && offset <= 127);
1422 }
1423 
1424 const bool Matcher::isSimpleConstant64(jlong value) {
1425   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1426   return false;
1427 }
1428 
1429 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1430 const bool Matcher::init_array_count_is_in_bytes = false;
1431 
1432 // Needs 2 CMOV's for longs.
1433 const int Matcher::long_cmove_cost() { return 1; }
1434 
1435 // No CMOVF/CMOVD with SSE/SSE2
1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1437 
1438 // Does the CPU require late expand (see block.cpp for description of late expand)?
1439 const bool Matcher::require_postalloc_expand = false;
1440 
1441 // Do we need to mask the count passed to shift instructions or does
1442 // the cpu only look at the lower 5/6 bits anyway?
1443 const bool Matcher::need_masked_shift_count = false;
1444 
1445 bool Matcher::narrow_oop_use_complex_address() {
1446   ShouldNotCallThis();
1447   return true;
1448 }
1449 
1450 bool Matcher::narrow_klass_use_complex_address() {
1451   ShouldNotCallThis();
1452   return true;
1453 }
1454 
1455 bool Matcher::const_oop_prefer_decode() {
1456   ShouldNotCallThis();
1457   return true;
1458 }
1459 
1460 bool Matcher::const_klass_prefer_decode() {
1461   ShouldNotCallThis();
1462   return true;
1463 }
1464 
1465 // Is it better to copy float constants, or load them directly from memory?
1466 // Intel can load a float constant from a direct address, requiring no
1467 // extra registers.  Most RISCs will have to materialize an address into a
1468 // register first, so they would do better to copy the constant from stack.
1469 const bool Matcher::rematerialize_float_constants = true;
1470 
1471 // If CPU can load and store mis-aligned doubles directly then no fixup is
1472 // needed.  Else we split the double into 2 integer pieces and move it
1473 // piece-by-piece.  Only happens when passing doubles into C code as the
1474 // Java calling convention forces doubles to be aligned.
1475 const bool Matcher::misaligned_doubles_ok = true;
1476 
1477 
1478 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1479   // Get the memory operand from the node
1480   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1481   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1482   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1483   uint opcnt     = 1;                 // First operand
1484   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1485   while( idx >= skipped+num_edges ) {
1486     skipped += num_edges;
1487     opcnt++;                          // Bump operand count
1488     assert( opcnt < numopnds, "Accessing non-existent operand" );
1489     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1490   }
1491 
1492   MachOper *memory = node->_opnds[opcnt];
1493   MachOper *new_memory = NULL;
1494   switch (memory->opcode()) {
1495   case DIRECT:
1496   case INDOFFSET32X:
1497     // No transformation necessary.
1498     return;
1499   case INDIRECT:
1500     new_memory = new indirect_win95_safeOper( );
1501     break;
1502   case INDOFFSET8:
1503     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1504     break;
1505   case INDOFFSET32:
1506     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1507     break;
1508   case INDINDEXOFFSET:
1509     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1510     break;
1511   case INDINDEXSCALE:
1512     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1513     break;
1514   case INDINDEXSCALEOFFSET:
1515     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1516     break;
1517   case LOAD_LONG_INDIRECT:
1518   case LOAD_LONG_INDOFFSET32:
1519     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1520     return;
1521   default:
1522     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1523     return;
1524   }
1525   node->_opnds[opcnt] = new_memory;
1526 }
1527 
1528 // Advertise here if the CPU requires explicit rounding operations
1529 // to implement the UseStrictFP mode.
1530 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1531 
1532 // Are floats conerted to double when stored to stack during deoptimization?
1533 // On x32 it is stored with convertion only when FPU is used for floats.
1534 bool Matcher::float_in_double() { return (UseSSE == 0); }
1535 
1536 // Do ints take an entire long register or just half?
1537 const bool Matcher::int_in_long = false;
1538 
1539 // Return whether or not this register is ever used as an argument.  This
1540 // function is used on startup to build the trampoline stubs in generateOptoStub.
1541 // Registers not mentioned will be killed by the VM call in the trampoline, and
1542 // arguments in those registers not be available to the callee.
1543 bool Matcher::can_be_java_arg( int reg ) {
1544   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1545   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1546   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1547   return false;
1548 }
1549 
1550 bool Matcher::is_spillable_arg( int reg ) {
1551   return can_be_java_arg(reg);
1552 }
1553 
1554 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1555   // Use hardware integer DIV instruction when
1556   // it is faster than a code which use multiply.
1557   // Only when constant divisor fits into 32 bit
1558   // (min_jint is excluded to get only correct
1559   // positive 32 bit values from negative).
1560   return VM_Version::has_fast_idiv() &&
1561          (divisor == (int)divisor && divisor != min_jint);
1562 }
1563 
1564 // Register for DIVI projection of divmodI
1565 RegMask Matcher::divI_proj_mask() {
1566   return EAX_REG_mask();
1567 }
1568 
1569 // Register for MODI projection of divmodI
1570 RegMask Matcher::modI_proj_mask() {
1571   return EDX_REG_mask();
1572 }
1573 
1574 // Register for DIVL projection of divmodL
1575 RegMask Matcher::divL_proj_mask() {
1576   ShouldNotReachHere();
1577   return RegMask();
1578 }
1579 
1580 // Register for MODL projection of divmodL
1581 RegMask Matcher::modL_proj_mask() {
1582   ShouldNotReachHere();
1583   return RegMask();
1584 }
1585 
1586 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1587   return NO_REG_mask();
1588 }
1589 
1590 // Returns true if the high 32 bits of the value is known to be zero.
1591 bool is_operand_hi32_zero(Node* n) {
1592   int opc = n->Opcode();
1593   if (opc == Op_AndL) {
1594     Node* o2 = n->in(2);
1595     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1596       return true;
1597     }
1598   }
1599   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1600     return true;
1601   }
1602   return false;
1603 }
1604 
1605 %}
1606 
1607 //----------ENCODING BLOCK-----------------------------------------------------
1608 // This block specifies the encoding classes used by the compiler to output
1609 // byte streams.  Encoding classes generate functions which are called by
1610 // Machine Instruction Nodes in order to generate the bit encoding of the
1611 // instruction.  Operands specify their base encoding interface with the
1612 // interface keyword.  There are currently supported four interfaces,
1613 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1614 // operand to generate a function which returns its register number when
1615 // queried.   CONST_INTER causes an operand to generate a function which
1616 // returns the value of the constant when queried.  MEMORY_INTER causes an
1617 // operand to generate four functions which return the Base Register, the
1618 // Index Register, the Scale Value, and the Offset Value of the operand when
1619 // queried.  COND_INTER causes an operand to generate six functions which
1620 // return the encoding code (ie - encoding bits for the instruction)
1621 // associated with each basic boolean condition for a conditional instruction.
1622 // Instructions specify two basic values for encoding.  They use the
1623 // ins_encode keyword to specify their encoding class (which must be one of
1624 // the class names specified in the encoding block), and they use the
1625 // opcode keyword to specify, in order, their primary, secondary, and
1626 // tertiary opcode.  Only the opcode sections which a particular instruction
1627 // needs for encoding need to be specified.
1628 encode %{
1629   // Build emit functions for each basic byte or larger field in the intel
1630   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1631   // code in the enc_class source block.  Emit functions will live in the
1632   // main source block for now.  In future, we can generalize this by
1633   // adding a syntax that specifies the sizes of fields in an order,
1634   // so that the adlc can build the emit functions automagically
1635 
1636   // Emit primary opcode
1637   enc_class OpcP %{
1638     emit_opcode(cbuf, $primary);
1639   %}
1640 
1641   // Emit secondary opcode
1642   enc_class OpcS %{
1643     emit_opcode(cbuf, $secondary);
1644   %}
1645 
1646   // Emit opcode directly
1647   enc_class Opcode(immI d8) %{
1648     emit_opcode(cbuf, $d8$$constant);
1649   %}
1650 
1651   enc_class SizePrefix %{
1652     emit_opcode(cbuf,0x66);
1653   %}
1654 
1655   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1656     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1657   %}
1658 
1659   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1660     emit_opcode(cbuf,$opcode$$constant);
1661     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1662   %}
1663 
1664   enc_class mov_r32_imm0( rRegI dst ) %{
1665     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1666     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1667   %}
1668 
1669   enc_class cdq_enc %{
1670     // Full implementation of Java idiv and irem; checks for
1671     // special case as described in JVM spec., p.243 & p.271.
1672     //
1673     //         normal case                           special case
1674     //
1675     // input : rax,: dividend                         min_int
1676     //         reg: divisor                          -1
1677     //
1678     // output: rax,: quotient  (= rax, idiv reg)       min_int
1679     //         rdx: remainder (= rax, irem reg)       0
1680     //
1681     //  Code sequnce:
1682     //
1683     //  81 F8 00 00 00 80    cmp         rax,80000000h
1684     //  0F 85 0B 00 00 00    jne         normal_case
1685     //  33 D2                xor         rdx,edx
1686     //  83 F9 FF             cmp         rcx,0FFh
1687     //  0F 84 03 00 00 00    je          done
1688     //                  normal_case:
1689     //  99                   cdq
1690     //  F7 F9                idiv        rax,ecx
1691     //                  done:
1692     //
1693     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1695     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1696     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1697     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1698     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1699     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1700     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1701     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1702     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1703     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1704     // normal_case:
1705     emit_opcode(cbuf,0x99);                                         // cdq
1706     // idiv (note: must be emitted by the user of this rule)
1707     // normal:
1708   %}
1709 
1710   // Dense encoding for older common ops
1711   enc_class Opc_plus(immI opcode, rRegI reg) %{
1712     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1713   %}
1714 
1715 
1716   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1717   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1718     // Check for 8-bit immediate, and set sign extend bit in opcode
1719     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1720       emit_opcode(cbuf, $primary | 0x02);
1721     }
1722     else {                          // If 32-bit immediate
1723       emit_opcode(cbuf, $primary);
1724     }
1725   %}
1726 
1727   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1728     // Emit primary opcode and set sign-extend bit
1729     // Check for 8-bit immediate, and set sign extend bit in opcode
1730     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1731       emit_opcode(cbuf, $primary | 0x02);    }
1732     else {                          // If 32-bit immediate
1733       emit_opcode(cbuf, $primary);
1734     }
1735     // Emit r/m byte with secondary opcode, after primary opcode.
1736     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1737   %}
1738 
1739   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1740     // Check for 8-bit immediate, and set sign extend bit in opcode
1741     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1742       $$$emit8$imm$$constant;
1743     }
1744     else {                          // If 32-bit immediate
1745       // Output immediate
1746       $$$emit32$imm$$constant;
1747     }
1748   %}
1749 
1750   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1751     // Emit primary opcode and set sign-extend bit
1752     // Check for 8-bit immediate, and set sign extend bit in opcode
1753     int con = (int)$imm$$constant; // Throw away top bits
1754     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1755     // Emit r/m byte with secondary opcode, after primary opcode.
1756     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1757     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1758     else                               emit_d32(cbuf,con);
1759   %}
1760 
1761   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1762     // Emit primary opcode and set sign-extend bit
1763     // Check for 8-bit immediate, and set sign extend bit in opcode
1764     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1765     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1766     // Emit r/m byte with tertiary opcode, after primary opcode.
1767     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1768     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1769     else                               emit_d32(cbuf,con);
1770   %}
1771 
1772   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1773     emit_cc(cbuf, $secondary, $dst$$reg );
1774   %}
1775 
1776   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1777     int destlo = $dst$$reg;
1778     int desthi = HIGH_FROM_LOW(destlo);
1779     // bswap lo
1780     emit_opcode(cbuf, 0x0F);
1781     emit_cc(cbuf, 0xC8, destlo);
1782     // bswap hi
1783     emit_opcode(cbuf, 0x0F);
1784     emit_cc(cbuf, 0xC8, desthi);
1785     // xchg lo and hi
1786     emit_opcode(cbuf, 0x87);
1787     emit_rm(cbuf, 0x3, destlo, desthi);
1788   %}
1789 
1790   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1791     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1792   %}
1793 
1794   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1795     $$$emit8$primary;
1796     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1797   %}
1798 
1799   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1800     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1801     emit_d8(cbuf, op >> 8 );
1802     emit_d8(cbuf, op & 255);
1803   %}
1804 
1805   // emulate a CMOV with a conditional branch around a MOV
1806   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1807     // Invert sense of branch from sense of CMOV
1808     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1809     emit_d8( cbuf, $brOffs$$constant );
1810   %}
1811 
1812   enc_class enc_PartialSubtypeCheck( ) %{
1813     Register Redi = as_Register(EDI_enc); // result register
1814     Register Reax = as_Register(EAX_enc); // super class
1815     Register Recx = as_Register(ECX_enc); // killed
1816     Register Resi = as_Register(ESI_enc); // sub class
1817     Label miss;
1818 
1819     MacroAssembler _masm(&cbuf);
1820     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1821                                      NULL, &miss,
1822                                      /*set_cond_codes:*/ true);
1823     if ($primary) {
1824       __ xorptr(Redi, Redi);
1825     }
1826     __ bind(miss);
1827   %}
1828 
1829   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1830     MacroAssembler masm(&cbuf);
1831     int start = masm.offset();
1832     if (UseSSE >= 2) {
1833       if (VerifyFPU) {
1834         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1835       }
1836     } else {
1837       // External c_calling_convention expects the FPU stack to be 'clean'.
1838       // Compiled code leaves it dirty.  Do cleanup now.
1839       masm.empty_FPU_stack();
1840     }
1841     if (sizeof_FFree_Float_Stack_All == -1) {
1842       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1843     } else {
1844       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1845     }
1846   %}
1847 
1848   enc_class Verify_FPU_For_Leaf %{
1849     if( VerifyFPU ) {
1850       MacroAssembler masm(&cbuf);
1851       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1852     }
1853   %}
1854 
1855   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1856     // This is the instruction starting address for relocation info.
1857     cbuf.set_insts_mark();
1858     $$$emit8$primary;
1859     // CALL directly to the runtime
1860     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1861                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1862 
1863     if (UseSSE >= 2) {
1864       MacroAssembler _masm(&cbuf);
1865       BasicType rt = tf()->return_type();
1866 
1867       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1868         // A C runtime call where the return value is unused.  In SSE2+
1869         // mode the result needs to be removed from the FPU stack.  It's
1870         // likely that this function call could be removed by the
1871         // optimizer if the C function is a pure function.
1872         __ ffree(0);
1873       } else if (rt == T_FLOAT) {
1874         __ lea(rsp, Address(rsp, -4));
1875         __ fstp_s(Address(rsp, 0));
1876         __ movflt(xmm0, Address(rsp, 0));
1877         __ lea(rsp, Address(rsp,  4));
1878       } else if (rt == T_DOUBLE) {
1879         __ lea(rsp, Address(rsp, -8));
1880         __ fstp_d(Address(rsp, 0));
1881         __ movdbl(xmm0, Address(rsp, 0));
1882         __ lea(rsp, Address(rsp,  8));
1883       }
1884     }
1885   %}
1886 
1887 
1888   enc_class pre_call_resets %{
1889     // If method sets FPU control word restore it here
1890     debug_only(int off0 = cbuf.insts_size());
1891     if (ra_->C->in_24_bit_fp_mode()) {
1892       MacroAssembler _masm(&cbuf);
1893       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1894     }
1895     if (ra_->C->max_vector_size() > 16) {
1896       // Clear upper bits of YMM registers when current compiled code uses
1897       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1898       MacroAssembler _masm(&cbuf);
1899       __ vzeroupper();
1900     }
1901     debug_only(int off1 = cbuf.insts_size());
1902     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1903   %}
1904 
1905   enc_class post_call_FPU %{
1906     // If method sets FPU control word do it here also
1907     if (Compile::current()->in_24_bit_fp_mode()) {
1908       MacroAssembler masm(&cbuf);
1909       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1910     }
1911   %}
1912 
1913   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1914     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1915     // who we intended to call.
1916     cbuf.set_insts_mark();
1917     $$$emit8$primary;
1918 
1919     if (!_method) {
1920       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1921                      runtime_call_Relocation::spec(),
1922                      RELOC_IMM32);
1923     } else {
1924       int method_index = resolved_method_index(cbuf);
1925       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1926                                                   : static_call_Relocation::spec(method_index);
1927       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1928                      rspec, RELOC_DISP32);
1929       // Emit stubs for static call.
1930       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1931       if (stub == NULL) {
1932         ciEnv::current()->record_failure("CodeCache is full");
1933         return;
1934       }
1935     }
1936   %}
1937 
1938   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1939     MacroAssembler _masm(&cbuf);
1940     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1941   %}
1942 
1943   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1944     int disp = in_bytes(Method::from_compiled_offset());
1945     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1946 
1947     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1948     cbuf.set_insts_mark();
1949     $$$emit8$primary;
1950     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1951     emit_d8(cbuf, disp);             // Displacement
1952 
1953   %}
1954 
1955 //   Following encoding is no longer used, but may be restored if calling
1956 //   convention changes significantly.
1957 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1958 //
1959 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1960 //     // int ic_reg     = Matcher::inline_cache_reg();
1961 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1962 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1963 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1964 //
1965 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1966 //     // // so we load it immediately before the call
1967 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1968 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1969 //
1970 //     // xor rbp,ebp
1971 //     emit_opcode(cbuf, 0x33);
1972 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1973 //
1974 //     // CALL to interpreter.
1975 //     cbuf.set_insts_mark();
1976 //     $$$emit8$primary;
1977 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1978 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1979 //   %}
1980 
1981   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1982     $$$emit8$primary;
1983     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1984     $$$emit8$shift$$constant;
1985   %}
1986 
1987   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1988     // Load immediate does not have a zero or sign extended version
1989     // for 8-bit immediates
1990     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1991     $$$emit32$src$$constant;
1992   %}
1993 
1994   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1995     // Load immediate does not have a zero or sign extended version
1996     // for 8-bit immediates
1997     emit_opcode(cbuf, $primary + $dst$$reg);
1998     $$$emit32$src$$constant;
1999   %}
2000 
2001   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
2002     // Load immediate does not have a zero or sign extended version
2003     // for 8-bit immediates
2004     int dst_enc = $dst$$reg;
2005     int src_con = $src$$constant & 0x0FFFFFFFFL;
2006     if (src_con == 0) {
2007       // xor dst, dst
2008       emit_opcode(cbuf, 0x33);
2009       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2010     } else {
2011       emit_opcode(cbuf, $primary + dst_enc);
2012       emit_d32(cbuf, src_con);
2013     }
2014   %}
2015 
2016   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2017     // Load immediate does not have a zero or sign extended version
2018     // for 8-bit immediates
2019     int dst_enc = $dst$$reg + 2;
2020     int src_con = ((julong)($src$$constant)) >> 32;
2021     if (src_con == 0) {
2022       // xor dst, dst
2023       emit_opcode(cbuf, 0x33);
2024       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2025     } else {
2026       emit_opcode(cbuf, $primary + dst_enc);
2027       emit_d32(cbuf, src_con);
2028     }
2029   %}
2030 
2031 
2032   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2033   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2034     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2035   %}
2036 
2037   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2038     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2039   %}
2040 
2041   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2042     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2043   %}
2044 
2045   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2046     $$$emit8$primary;
2047     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2048   %}
2049 
2050   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2051     $$$emit8$secondary;
2052     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2056     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2057   %}
2058 
2059   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2060     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2061   %}
2062 
2063   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2064     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2065   %}
2066 
2067   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2068     // Output immediate
2069     $$$emit32$src$$constant;
2070   %}
2071 
2072   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2073     // Output Float immediate bits
2074     jfloat jf = $src$$constant;
2075     int    jf_as_bits = jint_cast( jf );
2076     emit_d32(cbuf, jf_as_bits);
2077   %}
2078 
2079   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2080     // Output Float immediate bits
2081     jfloat jf = $src$$constant;
2082     int    jf_as_bits = jint_cast( jf );
2083     emit_d32(cbuf, jf_as_bits);
2084   %}
2085 
2086   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2087     // Output immediate
2088     $$$emit16$src$$constant;
2089   %}
2090 
2091   enc_class Con_d32(immI src) %{
2092     emit_d32(cbuf,$src$$constant);
2093   %}
2094 
2095   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2096     // Output immediate memory reference
2097     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2098     emit_d32(cbuf, 0x00);
2099   %}
2100 
2101   enc_class lock_prefix( ) %{
2102     if( os::is_MP() )
2103       emit_opcode(cbuf,0xF0);         // [Lock]
2104   %}
2105 
2106   // Cmp-xchg long value.
2107   // Note: we need to swap rbx, and rcx before and after the
2108   //       cmpxchg8 instruction because the instruction uses
2109   //       rcx as the high order word of the new value to store but
2110   //       our register encoding uses rbx,.
2111   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2112 
2113     // XCHG  rbx,ecx
2114     emit_opcode(cbuf,0x87);
2115     emit_opcode(cbuf,0xD9);
2116     // [Lock]
2117     if( os::is_MP() )
2118       emit_opcode(cbuf,0xF0);
2119     // CMPXCHG8 [Eptr]
2120     emit_opcode(cbuf,0x0F);
2121     emit_opcode(cbuf,0xC7);
2122     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2123     // XCHG  rbx,ecx
2124     emit_opcode(cbuf,0x87);
2125     emit_opcode(cbuf,0xD9);
2126   %}
2127 
2128   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2129     // [Lock]
2130     if( os::is_MP() )
2131       emit_opcode(cbuf,0xF0);
2132 
2133     // CMPXCHG [Eptr]
2134     emit_opcode(cbuf,0x0F);
2135     emit_opcode(cbuf,0xB1);
2136     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2137   %}
2138 
2139   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2140     // [Lock]
2141     if( os::is_MP() )
2142       emit_opcode(cbuf,0xF0);
2143 
2144     // CMPXCHGB [Eptr]
2145     emit_opcode(cbuf,0x0F);
2146     emit_opcode(cbuf,0xB0);
2147     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2148   %}
2149 
2150   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2151     // [Lock]
2152     if( os::is_MP() )
2153       emit_opcode(cbuf,0xF0);
2154 
2155     // 16-bit mode
2156     emit_opcode(cbuf, 0x66);
2157 
2158     // CMPXCHGW [Eptr]
2159     emit_opcode(cbuf,0x0F);
2160     emit_opcode(cbuf,0xB1);
2161     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2162   %}
2163 
2164   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2165     int res_encoding = $res$$reg;
2166 
2167     // MOV  res,0
2168     emit_opcode( cbuf, 0xB8 + res_encoding);
2169     emit_d32( cbuf, 0 );
2170     // JNE,s  fail
2171     emit_opcode(cbuf,0x75);
2172     emit_d8(cbuf, 5 );
2173     // MOV  res,1
2174     emit_opcode( cbuf, 0xB8 + res_encoding);
2175     emit_d32( cbuf, 1 );
2176     // fail:
2177   %}
2178 
2179   enc_class set_instruction_start( ) %{
2180     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2181   %}
2182 
2183   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2184     int reg_encoding = $ereg$$reg;
2185     int base  = $mem$$base;
2186     int index = $mem$$index;
2187     int scale = $mem$$scale;
2188     int displace = $mem$$disp;
2189     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2190     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2191   %}
2192 
2193   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2194     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2195     int base  = $mem$$base;
2196     int index = $mem$$index;
2197     int scale = $mem$$scale;
2198     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2199     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2200     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2201   %}
2202 
2203   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2204     int r1, r2;
2205     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2206     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2207     emit_opcode(cbuf,0x0F);
2208     emit_opcode(cbuf,$tertiary);
2209     emit_rm(cbuf, 0x3, r1, r2);
2210     emit_d8(cbuf,$cnt$$constant);
2211     emit_d8(cbuf,$primary);
2212     emit_rm(cbuf, 0x3, $secondary, r1);
2213     emit_d8(cbuf,$cnt$$constant);
2214   %}
2215 
2216   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2217     emit_opcode( cbuf, 0x8B ); // Move
2218     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2219     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220       emit_d8(cbuf,$primary);
2221       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2222       emit_d8(cbuf,$cnt$$constant-32);
2223     }
2224     emit_d8(cbuf,$primary);
2225     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2226     emit_d8(cbuf,31);
2227   %}
2228 
2229   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2230     int r1, r2;
2231     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2232     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2233 
2234     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2235     emit_rm(cbuf, 0x3, r1, r2);
2236     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2237       emit_opcode(cbuf,$primary);
2238       emit_rm(cbuf, 0x3, $secondary, r1);
2239       emit_d8(cbuf,$cnt$$constant-32);
2240     }
2241     emit_opcode(cbuf,0x33);  // XOR r2,r2
2242     emit_rm(cbuf, 0x3, r2, r2);
2243   %}
2244 
2245   // Clone of RegMem but accepts an extra parameter to access each
2246   // half of a double in memory; it never needs relocation info.
2247   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2248     emit_opcode(cbuf,$opcode$$constant);
2249     int reg_encoding = $rm_reg$$reg;
2250     int base     = $mem$$base;
2251     int index    = $mem$$index;
2252     int scale    = $mem$$scale;
2253     int displace = $mem$$disp + $disp_for_half$$constant;
2254     relocInfo::relocType disp_reloc = relocInfo::none;
2255     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2256   %}
2257 
2258   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2259   //
2260   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2261   // and it never needs relocation information.
2262   // Frequently used to move data between FPU's Stack Top and memory.
2263   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2264     int rm_byte_opcode = $rm_opcode$$constant;
2265     int base     = $mem$$base;
2266     int index    = $mem$$index;
2267     int scale    = $mem$$scale;
2268     int displace = $mem$$disp;
2269     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2270     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2271   %}
2272 
2273   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2274     int rm_byte_opcode = $rm_opcode$$constant;
2275     int base     = $mem$$base;
2276     int index    = $mem$$index;
2277     int scale    = $mem$$scale;
2278     int displace = $mem$$disp;
2279     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2280     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2281   %}
2282 
2283   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2284     int reg_encoding = $dst$$reg;
2285     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2286     int index        = 0x04;            // 0x04 indicates no index
2287     int scale        = 0x00;            // 0x00 indicates no scale
2288     int displace     = $src1$$constant; // 0x00 indicates no displacement
2289     relocInfo::relocType disp_reloc = relocInfo::none;
2290     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2291   %}
2292 
2293   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2294     // Compare dst,src
2295     emit_opcode(cbuf,0x3B);
2296     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2297     // jmp dst < src around move
2298     emit_opcode(cbuf,0x7C);
2299     emit_d8(cbuf,2);
2300     // move dst,src
2301     emit_opcode(cbuf,0x8B);
2302     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2303   %}
2304 
2305   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2306     // Compare dst,src
2307     emit_opcode(cbuf,0x3B);
2308     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2309     // jmp dst > src around move
2310     emit_opcode(cbuf,0x7F);
2311     emit_d8(cbuf,2);
2312     // move dst,src
2313     emit_opcode(cbuf,0x8B);
2314     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2315   %}
2316 
2317   enc_class enc_FPR_store(memory mem, regDPR src) %{
2318     // If src is FPR1, we can just FST to store it.
2319     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2320     int reg_encoding = 0x2; // Just store
2321     int base  = $mem$$base;
2322     int index = $mem$$index;
2323     int scale = $mem$$scale;
2324     int displace = $mem$$disp;
2325     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2326     if( $src$$reg != FPR1L_enc ) {
2327       reg_encoding = 0x3;  // Store & pop
2328       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2329       emit_d8( cbuf, 0xC0-1+$src$$reg );
2330     }
2331     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2332     emit_opcode(cbuf,$primary);
2333     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2334   %}
2335 
2336   enc_class neg_reg(rRegI dst) %{
2337     // NEG $dst
2338     emit_opcode(cbuf,0xF7);
2339     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2340   %}
2341 
2342   enc_class setLT_reg(eCXRegI dst) %{
2343     // SETLT $dst
2344     emit_opcode(cbuf,0x0F);
2345     emit_opcode(cbuf,0x9C);
2346     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2347   %}
2348 
2349   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2350     int tmpReg = $tmp$$reg;
2351 
2352     // SUB $p,$q
2353     emit_opcode(cbuf,0x2B);
2354     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2355     // SBB $tmp,$tmp
2356     emit_opcode(cbuf,0x1B);
2357     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2358     // AND $tmp,$y
2359     emit_opcode(cbuf,0x23);
2360     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2361     // ADD $p,$tmp
2362     emit_opcode(cbuf,0x03);
2363     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2364   %}
2365 
2366   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2367     // TEST shift,32
2368     emit_opcode(cbuf,0xF7);
2369     emit_rm(cbuf, 0x3, 0, ECX_enc);
2370     emit_d32(cbuf,0x20);
2371     // JEQ,s small
2372     emit_opcode(cbuf, 0x74);
2373     emit_d8(cbuf, 0x04);
2374     // MOV    $dst.hi,$dst.lo
2375     emit_opcode( cbuf, 0x8B );
2376     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2377     // CLR    $dst.lo
2378     emit_opcode(cbuf, 0x33);
2379     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2380 // small:
2381     // SHLD   $dst.hi,$dst.lo,$shift
2382     emit_opcode(cbuf,0x0F);
2383     emit_opcode(cbuf,0xA5);
2384     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2385     // SHL    $dst.lo,$shift"
2386     emit_opcode(cbuf,0xD3);
2387     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2388   %}
2389 
2390   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2391     // TEST shift,32
2392     emit_opcode(cbuf,0xF7);
2393     emit_rm(cbuf, 0x3, 0, ECX_enc);
2394     emit_d32(cbuf,0x20);
2395     // JEQ,s small
2396     emit_opcode(cbuf, 0x74);
2397     emit_d8(cbuf, 0x04);
2398     // MOV    $dst.lo,$dst.hi
2399     emit_opcode( cbuf, 0x8B );
2400     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2401     // CLR    $dst.hi
2402     emit_opcode(cbuf, 0x33);
2403     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2404 // small:
2405     // SHRD   $dst.lo,$dst.hi,$shift
2406     emit_opcode(cbuf,0x0F);
2407     emit_opcode(cbuf,0xAD);
2408     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2409     // SHR    $dst.hi,$shift"
2410     emit_opcode(cbuf,0xD3);
2411     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2412   %}
2413 
2414   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2415     // TEST shift,32
2416     emit_opcode(cbuf,0xF7);
2417     emit_rm(cbuf, 0x3, 0, ECX_enc);
2418     emit_d32(cbuf,0x20);
2419     // JEQ,s small
2420     emit_opcode(cbuf, 0x74);
2421     emit_d8(cbuf, 0x05);
2422     // MOV    $dst.lo,$dst.hi
2423     emit_opcode( cbuf, 0x8B );
2424     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2425     // SAR    $dst.hi,31
2426     emit_opcode(cbuf, 0xC1);
2427     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2428     emit_d8(cbuf, 0x1F );
2429 // small:
2430     // SHRD   $dst.lo,$dst.hi,$shift
2431     emit_opcode(cbuf,0x0F);
2432     emit_opcode(cbuf,0xAD);
2433     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2434     // SAR    $dst.hi,$shift"
2435     emit_opcode(cbuf,0xD3);
2436     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2437   %}
2438 
2439 
2440   // ----------------- Encodings for floating point unit -----------------
2441   // May leave result in FPU-TOS or FPU reg depending on opcodes
2442   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2443     $$$emit8$primary;
2444     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2445   %}
2446 
2447   // Pop argument in FPR0 with FSTP ST(0)
2448   enc_class PopFPU() %{
2449     emit_opcode( cbuf, 0xDD );
2450     emit_d8( cbuf, 0xD8 );
2451   %}
2452 
2453   // !!!!! equivalent to Pop_Reg_F
2454   enc_class Pop_Reg_DPR( regDPR dst ) %{
2455     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2456     emit_d8( cbuf, 0xD8+$dst$$reg );
2457   %}
2458 
2459   enc_class Push_Reg_DPR( regDPR dst ) %{
2460     emit_opcode( cbuf, 0xD9 );
2461     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2462   %}
2463 
2464   enc_class strictfp_bias1( regDPR dst ) %{
2465     emit_opcode( cbuf, 0xDB );           // FLD m80real
2466     emit_opcode( cbuf, 0x2D );
2467     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2468     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2469     emit_opcode( cbuf, 0xC8+$dst$$reg );
2470   %}
2471 
2472   enc_class strictfp_bias2( regDPR dst ) %{
2473     emit_opcode( cbuf, 0xDB );           // FLD m80real
2474     emit_opcode( cbuf, 0x2D );
2475     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2476     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2477     emit_opcode( cbuf, 0xC8+$dst$$reg );
2478   %}
2479 
2480   // Special case for moving an integer register to a stack slot.
2481   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2482     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2483   %}
2484 
2485   // Special case for moving a register to a stack slot.
2486   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2487     // Opcode already emitted
2488     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2489     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2490     emit_d32(cbuf, $dst$$disp);   // Displacement
2491   %}
2492 
2493   // Push the integer in stackSlot 'src' onto FP-stack
2494   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2495     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2496   %}
2497 
2498   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2499   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2500     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2501   %}
2502 
2503   // Same as Pop_Mem_F except for opcode
2504   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2505   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2506     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2507   %}
2508 
2509   enc_class Pop_Reg_FPR( regFPR dst ) %{
2510     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2511     emit_d8( cbuf, 0xD8+$dst$$reg );
2512   %}
2513 
2514   enc_class Push_Reg_FPR( regFPR dst ) %{
2515     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2516     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2517   %}
2518 
2519   // Push FPU's float to a stack-slot, and pop FPU-stack
2520   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2521     int pop = 0x02;
2522     if ($src$$reg != FPR1L_enc) {
2523       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2524       emit_d8( cbuf, 0xC0-1+$src$$reg );
2525       pop = 0x03;
2526     }
2527     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2528   %}
2529 
2530   // Push FPU's double to a stack-slot, and pop FPU-stack
2531   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2532     int pop = 0x02;
2533     if ($src$$reg != FPR1L_enc) {
2534       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2535       emit_d8( cbuf, 0xC0-1+$src$$reg );
2536       pop = 0x03;
2537     }
2538     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2539   %}
2540 
2541   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2542   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2543     int pop = 0xD0 - 1; // -1 since we skip FLD
2544     if ($src$$reg != FPR1L_enc) {
2545       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2546       emit_d8( cbuf, 0xC0-1+$src$$reg );
2547       pop = 0xD8;
2548     }
2549     emit_opcode( cbuf, 0xDD );
2550     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2551   %}
2552 
2553 
2554   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2555     // load dst in FPR0
2556     emit_opcode( cbuf, 0xD9 );
2557     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2558     if ($src$$reg != FPR1L_enc) {
2559       // fincstp
2560       emit_opcode (cbuf, 0xD9);
2561       emit_opcode (cbuf, 0xF7);
2562       // swap src with FPR1:
2563       // FXCH FPR1 with src
2564       emit_opcode(cbuf, 0xD9);
2565       emit_d8(cbuf, 0xC8-1+$src$$reg );
2566       // fdecstp
2567       emit_opcode (cbuf, 0xD9);
2568       emit_opcode (cbuf, 0xF6);
2569     }
2570   %}
2571 
2572   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2573     MacroAssembler _masm(&cbuf);
2574     __ subptr(rsp, 8);
2575     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2576     __ fld_d(Address(rsp, 0));
2577     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2578     __ fld_d(Address(rsp, 0));
2579   %}
2580 
2581   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2582     MacroAssembler _masm(&cbuf);
2583     __ subptr(rsp, 4);
2584     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2585     __ fld_s(Address(rsp, 0));
2586     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2587     __ fld_s(Address(rsp, 0));
2588   %}
2589 
2590   enc_class Push_ResultD(regD dst) %{
2591     MacroAssembler _masm(&cbuf);
2592     __ fstp_d(Address(rsp, 0));
2593     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2594     __ addptr(rsp, 8);
2595   %}
2596 
2597   enc_class Push_ResultF(regF dst, immI d8) %{
2598     MacroAssembler _masm(&cbuf);
2599     __ fstp_s(Address(rsp, 0));
2600     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2601     __ addptr(rsp, $d8$$constant);
2602   %}
2603 
2604   enc_class Push_SrcD(regD src) %{
2605     MacroAssembler _masm(&cbuf);
2606     __ subptr(rsp, 8);
2607     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2608     __ fld_d(Address(rsp, 0));
2609   %}
2610 
2611   enc_class push_stack_temp_qword() %{
2612     MacroAssembler _masm(&cbuf);
2613     __ subptr(rsp, 8);
2614   %}
2615 
2616   enc_class pop_stack_temp_qword() %{
2617     MacroAssembler _masm(&cbuf);
2618     __ addptr(rsp, 8);
2619   %}
2620 
2621   enc_class push_xmm_to_fpr1(regD src) %{
2622     MacroAssembler _masm(&cbuf);
2623     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2624     __ fld_d(Address(rsp, 0));
2625   %}
2626 
2627   enc_class Push_Result_Mod_DPR( regDPR src) %{
2628     if ($src$$reg != FPR1L_enc) {
2629       // fincstp
2630       emit_opcode (cbuf, 0xD9);
2631       emit_opcode (cbuf, 0xF7);
2632       // FXCH FPR1 with src
2633       emit_opcode(cbuf, 0xD9);
2634       emit_d8(cbuf, 0xC8-1+$src$$reg );
2635       // fdecstp
2636       emit_opcode (cbuf, 0xD9);
2637       emit_opcode (cbuf, 0xF6);
2638     }
2639     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2640     // // FSTP   FPR$dst$$reg
2641     // emit_opcode( cbuf, 0xDD );
2642     // emit_d8( cbuf, 0xD8+$dst$$reg );
2643   %}
2644 
2645   enc_class fnstsw_sahf_skip_parity() %{
2646     // fnstsw ax
2647     emit_opcode( cbuf, 0xDF );
2648     emit_opcode( cbuf, 0xE0 );
2649     // sahf
2650     emit_opcode( cbuf, 0x9E );
2651     // jnp  ::skip
2652     emit_opcode( cbuf, 0x7B );
2653     emit_opcode( cbuf, 0x05 );
2654   %}
2655 
2656   enc_class emitModDPR() %{
2657     // fprem must be iterative
2658     // :: loop
2659     // fprem
2660     emit_opcode( cbuf, 0xD9 );
2661     emit_opcode( cbuf, 0xF8 );
2662     // wait
2663     emit_opcode( cbuf, 0x9b );
2664     // fnstsw ax
2665     emit_opcode( cbuf, 0xDF );
2666     emit_opcode( cbuf, 0xE0 );
2667     // sahf
2668     emit_opcode( cbuf, 0x9E );
2669     // jp  ::loop
2670     emit_opcode( cbuf, 0x0F );
2671     emit_opcode( cbuf, 0x8A );
2672     emit_opcode( cbuf, 0xF4 );
2673     emit_opcode( cbuf, 0xFF );
2674     emit_opcode( cbuf, 0xFF );
2675     emit_opcode( cbuf, 0xFF );
2676   %}
2677 
2678   enc_class fpu_flags() %{
2679     // fnstsw_ax
2680     emit_opcode( cbuf, 0xDF);
2681     emit_opcode( cbuf, 0xE0);
2682     // test ax,0x0400
2683     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2684     emit_opcode( cbuf, 0xA9 );
2685     emit_d16   ( cbuf, 0x0400 );
2686     // // // This sequence works, but stalls for 12-16 cycles on PPro
2687     // // test rax,0x0400
2688     // emit_opcode( cbuf, 0xA9 );
2689     // emit_d32   ( cbuf, 0x00000400 );
2690     //
2691     // jz exit (no unordered comparison)
2692     emit_opcode( cbuf, 0x74 );
2693     emit_d8    ( cbuf, 0x02 );
2694     // mov ah,1 - treat as LT case (set carry flag)
2695     emit_opcode( cbuf, 0xB4 );
2696     emit_d8    ( cbuf, 0x01 );
2697     // sahf
2698     emit_opcode( cbuf, 0x9E);
2699   %}
2700 
2701   enc_class cmpF_P6_fixup() %{
2702     // Fixup the integer flags in case comparison involved a NaN
2703     //
2704     // JNP exit (no unordered comparison, P-flag is set by NaN)
2705     emit_opcode( cbuf, 0x7B );
2706     emit_d8    ( cbuf, 0x03 );
2707     // MOV AH,1 - treat as LT case (set carry flag)
2708     emit_opcode( cbuf, 0xB4 );
2709     emit_d8    ( cbuf, 0x01 );
2710     // SAHF
2711     emit_opcode( cbuf, 0x9E);
2712     // NOP     // target for branch to avoid branch to branch
2713     emit_opcode( cbuf, 0x90);
2714   %}
2715 
2716 //     fnstsw_ax();
2717 //     sahf();
2718 //     movl(dst, nan_result);
2719 //     jcc(Assembler::parity, exit);
2720 //     movl(dst, less_result);
2721 //     jcc(Assembler::below, exit);
2722 //     movl(dst, equal_result);
2723 //     jcc(Assembler::equal, exit);
2724 //     movl(dst, greater_result);
2725 
2726 // less_result     =  1;
2727 // greater_result  = -1;
2728 // equal_result    = 0;
2729 // nan_result      = -1;
2730 
2731   enc_class CmpF_Result(rRegI dst) %{
2732     // fnstsw_ax();
2733     emit_opcode( cbuf, 0xDF);
2734     emit_opcode( cbuf, 0xE0);
2735     // sahf
2736     emit_opcode( cbuf, 0x9E);
2737     // movl(dst, nan_result);
2738     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2739     emit_d32( cbuf, -1 );
2740     // jcc(Assembler::parity, exit);
2741     emit_opcode( cbuf, 0x7A );
2742     emit_d8    ( cbuf, 0x13 );
2743     // movl(dst, less_result);
2744     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2745     emit_d32( cbuf, -1 );
2746     // jcc(Assembler::below, exit);
2747     emit_opcode( cbuf, 0x72 );
2748     emit_d8    ( cbuf, 0x0C );
2749     // movl(dst, equal_result);
2750     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2751     emit_d32( cbuf, 0 );
2752     // jcc(Assembler::equal, exit);
2753     emit_opcode( cbuf, 0x74 );
2754     emit_d8    ( cbuf, 0x05 );
2755     // movl(dst, greater_result);
2756     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2757     emit_d32( cbuf, 1 );
2758   %}
2759 
2760 
2761   // Compare the longs and set flags
2762   // BROKEN!  Do Not use as-is
2763   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2764     // CMP    $src1.hi,$src2.hi
2765     emit_opcode( cbuf, 0x3B );
2766     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2767     // JNE,s  done
2768     emit_opcode(cbuf,0x75);
2769     emit_d8(cbuf, 2 );
2770     // CMP    $src1.lo,$src2.lo
2771     emit_opcode( cbuf, 0x3B );
2772     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2773 // done:
2774   %}
2775 
2776   enc_class convert_int_long( regL dst, rRegI src ) %{
2777     // mov $dst.lo,$src
2778     int dst_encoding = $dst$$reg;
2779     int src_encoding = $src$$reg;
2780     encode_Copy( cbuf, dst_encoding  , src_encoding );
2781     // mov $dst.hi,$src
2782     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2783     // sar $dst.hi,31
2784     emit_opcode( cbuf, 0xC1 );
2785     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2786     emit_d8(cbuf, 0x1F );
2787   %}
2788 
2789   enc_class convert_long_double( eRegL src ) %{
2790     // push $src.hi
2791     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2792     // push $src.lo
2793     emit_opcode(cbuf, 0x50+$src$$reg  );
2794     // fild 64-bits at [SP]
2795     emit_opcode(cbuf,0xdf);
2796     emit_d8(cbuf, 0x6C);
2797     emit_d8(cbuf, 0x24);
2798     emit_d8(cbuf, 0x00);
2799     // pop stack
2800     emit_opcode(cbuf, 0x83); // add  SP, #8
2801     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2802     emit_d8(cbuf, 0x8);
2803   %}
2804 
2805   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2806     // IMUL   EDX:EAX,$src1
2807     emit_opcode( cbuf, 0xF7 );
2808     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2809     // SAR    EDX,$cnt-32
2810     int shift_count = ((int)$cnt$$constant) - 32;
2811     if (shift_count > 0) {
2812       emit_opcode(cbuf, 0xC1);
2813       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2814       emit_d8(cbuf, shift_count);
2815     }
2816   %}
2817 
2818   // this version doesn't have add sp, 8
2819   enc_class convert_long_double2( eRegL src ) %{
2820     // push $src.hi
2821     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2822     // push $src.lo
2823     emit_opcode(cbuf, 0x50+$src$$reg  );
2824     // fild 64-bits at [SP]
2825     emit_opcode(cbuf,0xdf);
2826     emit_d8(cbuf, 0x6C);
2827     emit_d8(cbuf, 0x24);
2828     emit_d8(cbuf, 0x00);
2829   %}
2830 
2831   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2832     // Basic idea: long = (long)int * (long)int
2833     // IMUL EDX:EAX, src
2834     emit_opcode( cbuf, 0xF7 );
2835     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2836   %}
2837 
2838   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2839     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2840     // MUL EDX:EAX, src
2841     emit_opcode( cbuf, 0xF7 );
2842     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2843   %}
2844 
2845   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2846     // Basic idea: lo(result) = lo(x_lo * y_lo)
2847     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2848     // MOV    $tmp,$src.lo
2849     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2850     // IMUL   $tmp,EDX
2851     emit_opcode( cbuf, 0x0F );
2852     emit_opcode( cbuf, 0xAF );
2853     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2854     // MOV    EDX,$src.hi
2855     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2856     // IMUL   EDX,EAX
2857     emit_opcode( cbuf, 0x0F );
2858     emit_opcode( cbuf, 0xAF );
2859     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2860     // ADD    $tmp,EDX
2861     emit_opcode( cbuf, 0x03 );
2862     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2863     // MUL   EDX:EAX,$src.lo
2864     emit_opcode( cbuf, 0xF7 );
2865     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2866     // ADD    EDX,ESI
2867     emit_opcode( cbuf, 0x03 );
2868     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2869   %}
2870 
2871   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2872     // Basic idea: lo(result) = lo(src * y_lo)
2873     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2874     // IMUL   $tmp,EDX,$src
2875     emit_opcode( cbuf, 0x6B );
2876     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2877     emit_d8( cbuf, (int)$src$$constant );
2878     // MOV    EDX,$src
2879     emit_opcode(cbuf, 0xB8 + EDX_enc);
2880     emit_d32( cbuf, (int)$src$$constant );
2881     // MUL   EDX:EAX,EDX
2882     emit_opcode( cbuf, 0xF7 );
2883     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2884     // ADD    EDX,ESI
2885     emit_opcode( cbuf, 0x03 );
2886     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2887   %}
2888 
2889   enc_class long_div( eRegL src1, eRegL src2 ) %{
2890     // PUSH src1.hi
2891     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2892     // PUSH src1.lo
2893     emit_opcode(cbuf,               0x50+$src1$$reg  );
2894     // PUSH src2.hi
2895     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2896     // PUSH src2.lo
2897     emit_opcode(cbuf,               0x50+$src2$$reg  );
2898     // CALL directly to the runtime
2899     cbuf.set_insts_mark();
2900     emit_opcode(cbuf,0xE8);       // Call into runtime
2901     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2902     // Restore stack
2903     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2904     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2905     emit_d8(cbuf, 4*4);
2906   %}
2907 
2908   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2909     // PUSH src1.hi
2910     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2911     // PUSH src1.lo
2912     emit_opcode(cbuf,               0x50+$src1$$reg  );
2913     // PUSH src2.hi
2914     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2915     // PUSH src2.lo
2916     emit_opcode(cbuf,               0x50+$src2$$reg  );
2917     // CALL directly to the runtime
2918     cbuf.set_insts_mark();
2919     emit_opcode(cbuf,0xE8);       // Call into runtime
2920     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2921     // Restore stack
2922     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2923     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2924     emit_d8(cbuf, 4*4);
2925   %}
2926 
2927   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2928     // MOV   $tmp,$src.lo
2929     emit_opcode(cbuf, 0x8B);
2930     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2931     // OR    $tmp,$src.hi
2932     emit_opcode(cbuf, 0x0B);
2933     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2934   %}
2935 
2936   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2937     // CMP    $src1.lo,$src2.lo
2938     emit_opcode( cbuf, 0x3B );
2939     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2940     // JNE,s  skip
2941     emit_cc(cbuf, 0x70, 0x5);
2942     emit_d8(cbuf,2);
2943     // CMP    $src1.hi,$src2.hi
2944     emit_opcode( cbuf, 0x3B );
2945     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2946   %}
2947 
2948   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2949     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2950     emit_opcode( cbuf, 0x3B );
2951     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2952     // MOV    $tmp,$src1.hi
2953     emit_opcode( cbuf, 0x8B );
2954     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2955     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2956     emit_opcode( cbuf, 0x1B );
2957     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2958   %}
2959 
2960   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2961     // XOR    $tmp,$tmp
2962     emit_opcode(cbuf,0x33);  // XOR
2963     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2964     // CMP    $tmp,$src.lo
2965     emit_opcode( cbuf, 0x3B );
2966     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2967     // SBB    $tmp,$src.hi
2968     emit_opcode( cbuf, 0x1B );
2969     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2970   %}
2971 
2972  // Sniff, sniff... smells like Gnu Superoptimizer
2973   enc_class neg_long( eRegL dst ) %{
2974     emit_opcode(cbuf,0xF7);    // NEG hi
2975     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2976     emit_opcode(cbuf,0xF7);    // NEG lo
2977     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2978     emit_opcode(cbuf,0x83);    // SBB hi,0
2979     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2980     emit_d8    (cbuf,0 );
2981   %}
2982 
2983   enc_class enc_pop_rdx() %{
2984     emit_opcode(cbuf,0x5A);
2985   %}
2986 
2987   enc_class enc_rethrow() %{
2988     cbuf.set_insts_mark();
2989     emit_opcode(cbuf, 0xE9);        // jmp    entry
2990     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2991                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2992   %}
2993 
2994 
2995   // Convert a double to an int.  Java semantics require we do complex
2996   // manglelations in the corner cases.  So we set the rounding mode to
2997   // 'zero', store the darned double down as an int, and reset the
2998   // rounding mode to 'nearest'.  The hardware throws an exception which
2999   // patches up the correct value directly to the stack.
3000   enc_class DPR2I_encoding( regDPR src ) %{
3001     // Flip to round-to-zero mode.  We attempted to allow invalid-op
3002     // exceptions here, so that a NAN or other corner-case value will
3003     // thrown an exception (but normal values get converted at full speed).
3004     // However, I2C adapters and other float-stack manglers leave pending
3005     // invalid-op exceptions hanging.  We would have to clear them before
3006     // enabling them and that is more expensive than just testing for the
3007     // invalid value Intel stores down in the corner cases.
3008     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3009     emit_opcode(cbuf,0x2D);
3010     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3011     // Allocate a word
3012     emit_opcode(cbuf,0x83);            // SUB ESP,4
3013     emit_opcode(cbuf,0xEC);
3014     emit_d8(cbuf,0x04);
3015     // Encoding assumes a double has been pushed into FPR0.
3016     // Store down the double as an int, popping the FPU stack
3017     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3018     emit_opcode(cbuf,0x1C);
3019     emit_d8(cbuf,0x24);
3020     // Restore the rounding mode; mask the exception
3021     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3022     emit_opcode(cbuf,0x2D);
3023     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3024         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3025         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3026 
3027     // Load the converted int; adjust CPU stack
3028     emit_opcode(cbuf,0x58);       // POP EAX
3029     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3030     emit_d32   (cbuf,0x80000000); //         0x80000000
3031     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3032     emit_d8    (cbuf,0x07);       // Size of slow_call
3033     // Push src onto stack slow-path
3034     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3035     emit_d8    (cbuf,0xC0-1+$src$$reg );
3036     // CALL directly to the runtime
3037     cbuf.set_insts_mark();
3038     emit_opcode(cbuf,0xE8);       // Call into runtime
3039     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3040     // Carry on here...
3041   %}
3042 
3043   enc_class DPR2L_encoding( regDPR src ) %{
3044     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3045     emit_opcode(cbuf,0x2D);
3046     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3047     // Allocate a word
3048     emit_opcode(cbuf,0x83);            // SUB ESP,8
3049     emit_opcode(cbuf,0xEC);
3050     emit_d8(cbuf,0x08);
3051     // Encoding assumes a double has been pushed into FPR0.
3052     // Store down the double as a long, popping the FPU stack
3053     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3054     emit_opcode(cbuf,0x3C);
3055     emit_d8(cbuf,0x24);
3056     // Restore the rounding mode; mask the exception
3057     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3058     emit_opcode(cbuf,0x2D);
3059     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3060         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3061         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3062 
3063     // Load the converted int; adjust CPU stack
3064     emit_opcode(cbuf,0x58);       // POP EAX
3065     emit_opcode(cbuf,0x5A);       // POP EDX
3066     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3067     emit_d8    (cbuf,0xFA);       // rdx
3068     emit_d32   (cbuf,0x80000000); //         0x80000000
3069     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3070     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3071     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3072     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3073     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3074     emit_d8    (cbuf,0x07);       // Size of slow_call
3075     // Push src onto stack slow-path
3076     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3077     emit_d8    (cbuf,0xC0-1+$src$$reg );
3078     // CALL directly to the runtime
3079     cbuf.set_insts_mark();
3080     emit_opcode(cbuf,0xE8);       // Call into runtime
3081     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3082     // Carry on here...
3083   %}
3084 
3085   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3086     // Operand was loaded from memory into fp ST (stack top)
3087     // FMUL   ST,$src  /* D8 C8+i */
3088     emit_opcode(cbuf, 0xD8);
3089     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3090   %}
3091 
3092   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3093     // FADDP  ST,src2  /* D8 C0+i */
3094     emit_opcode(cbuf, 0xD8);
3095     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3096     //could use FADDP  src2,fpST  /* DE C0+i */
3097   %}
3098 
3099   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3100     // FADDP  src2,ST  /* DE C0+i */
3101     emit_opcode(cbuf, 0xDE);
3102     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3103   %}
3104 
3105   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3106     // Operand has been loaded into fp ST (stack top)
3107       // FSUB   ST,$src1
3108       emit_opcode(cbuf, 0xD8);
3109       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3110 
3111       // FDIV
3112       emit_opcode(cbuf, 0xD8);
3113       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3114   %}
3115 
3116   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3117     // Operand was loaded from memory into fp ST (stack top)
3118     // FADD   ST,$src  /* D8 C0+i */
3119     emit_opcode(cbuf, 0xD8);
3120     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3121 
3122     // FMUL  ST,src2  /* D8 C*+i */
3123     emit_opcode(cbuf, 0xD8);
3124     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3125   %}
3126 
3127 
3128   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3129     // Operand was loaded from memory into fp ST (stack top)
3130     // FADD   ST,$src  /* D8 C0+i */
3131     emit_opcode(cbuf, 0xD8);
3132     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3133 
3134     // FMULP  src2,ST  /* DE C8+i */
3135     emit_opcode(cbuf, 0xDE);
3136     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3137   %}
3138 
3139   // Atomically load the volatile long
3140   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3141     emit_opcode(cbuf,0xDF);
3142     int rm_byte_opcode = 0x05;
3143     int base     = $mem$$base;
3144     int index    = $mem$$index;
3145     int scale    = $mem$$scale;
3146     int displace = $mem$$disp;
3147     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3148     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3149     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3150   %}
3151 
3152   // Volatile Store Long.  Must be atomic, so move it into
3153   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3154   // target address before the store (for null-ptr checks)
3155   // so the memory operand is used twice in the encoding.
3156   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3157     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3158     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3159     emit_opcode(cbuf,0xDF);
3160     int rm_byte_opcode = 0x07;
3161     int base     = $mem$$base;
3162     int index    = $mem$$index;
3163     int scale    = $mem$$scale;
3164     int displace = $mem$$disp;
3165     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3166     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3167   %}
3168 
3169   // Safepoint Poll.  This polls the safepoint page, and causes an
3170   // exception if it is not readable. Unfortunately, it kills the condition code
3171   // in the process
3172   // We current use TESTL [spp],EDI
3173   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3174 
3175   enc_class Safepoint_Poll() %{
3176     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3177     emit_opcode(cbuf,0x85);
3178     emit_rm (cbuf, 0x0, 0x7, 0x5);
3179     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3180   %}
3181 %}
3182 
3183 
3184 //----------FRAME--------------------------------------------------------------
3185 // Definition of frame structure and management information.
3186 //
3187 //  S T A C K   L A Y O U T    Allocators stack-slot number
3188 //                             |   (to get allocators register number
3189 //  G  Owned by    |        |  v    add OptoReg::stack0())
3190 //  r   CALLER     |        |
3191 //  o     |        +--------+      pad to even-align allocators stack-slot
3192 //  w     V        |  pad0  |        numbers; owned by CALLER
3193 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3194 //  h     ^        |   in   |  5
3195 //        |        |  args  |  4   Holes in incoming args owned by SELF
3196 //  |     |        |        |  3
3197 //  |     |        +--------+
3198 //  V     |        | old out|      Empty on Intel, window on Sparc
3199 //        |    old |preserve|      Must be even aligned.
3200 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3201 //        |        |   in   |  3   area for Intel ret address
3202 //     Owned by    |preserve|      Empty on Sparc.
3203 //       SELF      +--------+
3204 //        |        |  pad2  |  2   pad to align old SP
3205 //        |        +--------+  1
3206 //        |        | locks  |  0
3207 //        |        +--------+----> OptoReg::stack0(), even aligned
3208 //        |        |  pad1  | 11   pad to align new SP
3209 //        |        +--------+
3210 //        |        |        | 10
3211 //        |        | spills |  9   spills
3212 //        V        |        |  8   (pad0 slot for callee)
3213 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3214 //        ^        |  out   |  7
3215 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3216 //     Owned by    +--------+
3217 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3218 //        |    new |preserve|      Must be even-aligned.
3219 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3220 //        |        |        |
3221 //
3222 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3223 //         known from SELF's arguments and the Java calling convention.
3224 //         Region 6-7 is determined per call site.
3225 // Note 2: If the calling convention leaves holes in the incoming argument
3226 //         area, those holes are owned by SELF.  Holes in the outgoing area
3227 //         are owned by the CALLEE.  Holes should not be nessecary in the
3228 //         incoming area, as the Java calling convention is completely under
3229 //         the control of the AD file.  Doubles can be sorted and packed to
3230 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3231 //         varargs C calling conventions.
3232 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3233 //         even aligned with pad0 as needed.
3234 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3235 //         region 6-11 is even aligned; it may be padded out more so that
3236 //         the region from SP to FP meets the minimum stack alignment.
3237 
3238 frame %{
3239   // What direction does stack grow in (assumed to be same for C & Java)
3240   stack_direction(TOWARDS_LOW);
3241 
3242   // These three registers define part of the calling convention
3243   // between compiled code and the interpreter.
3244   inline_cache_reg(EAX);                // Inline Cache Register
3245   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3246 
3247   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3248   cisc_spilling_operand_name(indOffset32);
3249 
3250   // Number of stack slots consumed by locking an object
3251   sync_stack_slots(1);
3252 
3253   // Compiled code's Frame Pointer
3254   frame_pointer(ESP);
3255   // Interpreter stores its frame pointer in a register which is
3256   // stored to the stack by I2CAdaptors.
3257   // I2CAdaptors convert from interpreted java to compiled java.
3258   interpreter_frame_pointer(EBP);
3259 
3260   // Stack alignment requirement
3261   // Alignment size in bytes (128-bit -> 16 bytes)
3262   stack_alignment(StackAlignmentInBytes);
3263 
3264   // Number of stack slots between incoming argument block and the start of
3265   // a new frame.  The PROLOG must add this many slots to the stack.  The
3266   // EPILOG must remove this many slots.  Intel needs one slot for
3267   // return address and one for rbp, (must save rbp)
3268   in_preserve_stack_slots(2+VerifyStackAtCalls);
3269 
3270   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3271   // for calls to C.  Supports the var-args backing area for register parms.
3272   varargs_C_out_slots_killed(0);
3273 
3274   // The after-PROLOG location of the return address.  Location of
3275   // return address specifies a type (REG or STACK) and a number
3276   // representing the register number (i.e. - use a register name) or
3277   // stack slot.
3278   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3279   // Otherwise, it is above the locks and verification slot and alignment word
3280   return_addr(STACK - 1 +
3281               round_to((Compile::current()->in_preserve_stack_slots() +
3282                         Compile::current()->fixed_slots()),
3283                        stack_alignment_in_slots()));
3284 
3285   // Body of function which returns an integer array locating
3286   // arguments either in registers or in stack slots.  Passed an array
3287   // of ideal registers called "sig" and a "length" count.  Stack-slot
3288   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3289   // arguments for a CALLEE.  Incoming stack arguments are
3290   // automatically biased by the preserve_stack_slots field above.
3291   calling_convention %{
3292     // No difference between ingoing/outgoing just pass false
3293     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3294   %}
3295 
3296 
3297   // Body of function which returns an integer array locating
3298   // arguments either in registers or in stack slots.  Passed an array
3299   // of ideal registers called "sig" and a "length" count.  Stack-slot
3300   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3301   // arguments for a CALLEE.  Incoming stack arguments are
3302   // automatically biased by the preserve_stack_slots field above.
3303   c_calling_convention %{
3304     // This is obviously always outgoing
3305     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3306   %}
3307 
3308   // Location of C & interpreter return values
3309   c_return_value %{
3310     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3311     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3312     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3313 
3314     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3315     // that C functions return float and double results in XMM0.
3316     if( ideal_reg == Op_RegD && UseSSE>=2 )
3317       return OptoRegPair(XMM0b_num,XMM0_num);
3318     if( ideal_reg == Op_RegF && UseSSE>=2 )
3319       return OptoRegPair(OptoReg::Bad,XMM0_num);
3320 
3321     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3322   %}
3323 
3324   // Location of return values
3325   return_value %{
3326     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3327     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3328     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3329     if( ideal_reg == Op_RegD && UseSSE>=2 )
3330       return OptoRegPair(XMM0b_num,XMM0_num);
3331     if( ideal_reg == Op_RegF && UseSSE>=1 )
3332       return OptoRegPair(OptoReg::Bad,XMM0_num);
3333     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3334   %}
3335 
3336 %}
3337 
3338 //----------ATTRIBUTES---------------------------------------------------------
3339 //----------Operand Attributes-------------------------------------------------
3340 op_attrib op_cost(0);        // Required cost attribute
3341 
3342 //----------Instruction Attributes---------------------------------------------
3343 ins_attrib ins_cost(100);       // Required cost attribute
3344 ins_attrib ins_size(8);         // Required size attribute (in bits)
3345 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3346                                 // non-matching short branch variant of some
3347                                                             // long branch?
3348 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3349                                 // specifies the alignment that some part of the instruction (not
3350                                 // necessarily the start) requires.  If > 1, a compute_padding()
3351                                 // function must be provided for the instruction
3352 
3353 //----------OPERANDS-----------------------------------------------------------
3354 // Operand definitions must precede instruction definitions for correct parsing
3355 // in the ADLC because operands constitute user defined types which are used in
3356 // instruction definitions.
3357 
3358 //----------Simple Operands----------------------------------------------------
3359 // Immediate Operands
3360 // Integer Immediate
3361 operand immI() %{
3362   match(ConI);
3363 
3364   op_cost(10);
3365   format %{ %}
3366   interface(CONST_INTER);
3367 %}
3368 
3369 // Constant for test vs zero
3370 operand immI0() %{
3371   predicate(n->get_int() == 0);
3372   match(ConI);
3373 
3374   op_cost(0);
3375   format %{ %}
3376   interface(CONST_INTER);
3377 %}
3378 
3379 // Constant for increment
3380 operand immI1() %{
3381   predicate(n->get_int() == 1);
3382   match(ConI);
3383 
3384   op_cost(0);
3385   format %{ %}
3386   interface(CONST_INTER);
3387 %}
3388 
3389 // Constant for decrement
3390 operand immI_M1() %{
3391   predicate(n->get_int() == -1);
3392   match(ConI);
3393 
3394   op_cost(0);
3395   format %{ %}
3396   interface(CONST_INTER);
3397 %}
3398 
3399 // Valid scale values for addressing modes
3400 operand immI2() %{
3401   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3402   match(ConI);
3403 
3404   format %{ %}
3405   interface(CONST_INTER);
3406 %}
3407 
3408 operand immI8() %{
3409   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3410   match(ConI);
3411 
3412   op_cost(5);
3413   format %{ %}
3414   interface(CONST_INTER);
3415 %}
3416 
3417 operand immI16() %{
3418   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3419   match(ConI);
3420 
3421   op_cost(10);
3422   format %{ %}
3423   interface(CONST_INTER);
3424 %}
3425 
3426 // Int Immediate non-negative
3427 operand immU31()
3428 %{
3429   predicate(n->get_int() >= 0);
3430   match(ConI);
3431 
3432   op_cost(0);
3433   format %{ %}
3434   interface(CONST_INTER);
3435 %}
3436 
3437 // Constant for long shifts
3438 operand immI_32() %{
3439   predicate( n->get_int() == 32 );
3440   match(ConI);
3441 
3442   op_cost(0);
3443   format %{ %}
3444   interface(CONST_INTER);
3445 %}
3446 
3447 operand immI_1_31() %{
3448   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3449   match(ConI);
3450 
3451   op_cost(0);
3452   format %{ %}
3453   interface(CONST_INTER);
3454 %}
3455 
3456 operand immI_32_63() %{
3457   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3458   match(ConI);
3459   op_cost(0);
3460 
3461   format %{ %}
3462   interface(CONST_INTER);
3463 %}
3464 
3465 operand immI_1() %{
3466   predicate( n->get_int() == 1 );
3467   match(ConI);
3468 
3469   op_cost(0);
3470   format %{ %}
3471   interface(CONST_INTER);
3472 %}
3473 
3474 operand immI_2() %{
3475   predicate( n->get_int() == 2 );
3476   match(ConI);
3477 
3478   op_cost(0);
3479   format %{ %}
3480   interface(CONST_INTER);
3481 %}
3482 
3483 operand immI_3() %{
3484   predicate( n->get_int() == 3 );
3485   match(ConI);
3486 
3487   op_cost(0);
3488   format %{ %}
3489   interface(CONST_INTER);
3490 %}
3491 
3492 // Pointer Immediate
3493 operand immP() %{
3494   match(ConP);
3495 
3496   op_cost(10);
3497   format %{ %}
3498   interface(CONST_INTER);
3499 %}
3500 
3501 // NULL Pointer Immediate
3502 operand immP0() %{
3503   predicate( n->get_ptr() == 0 );
3504   match(ConP);
3505   op_cost(0);
3506 
3507   format %{ %}
3508   interface(CONST_INTER);
3509 %}
3510 
3511 // Long Immediate
3512 operand immL() %{
3513   match(ConL);
3514 
3515   op_cost(20);
3516   format %{ %}
3517   interface(CONST_INTER);
3518 %}
3519 
3520 // Long Immediate zero
3521 operand immL0() %{
3522   predicate( n->get_long() == 0L );
3523   match(ConL);
3524   op_cost(0);
3525 
3526   format %{ %}
3527   interface(CONST_INTER);
3528 %}
3529 
3530 // Long Immediate zero
3531 operand immL_M1() %{
3532   predicate( n->get_long() == -1L );
3533   match(ConL);
3534   op_cost(0);
3535 
3536   format %{ %}
3537   interface(CONST_INTER);
3538 %}
3539 
3540 // Long immediate from 0 to 127.
3541 // Used for a shorter form of long mul by 10.
3542 operand immL_127() %{
3543   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3544   match(ConL);
3545   op_cost(0);
3546 
3547   format %{ %}
3548   interface(CONST_INTER);
3549 %}
3550 
3551 // Long Immediate: low 32-bit mask
3552 operand immL_32bits() %{
3553   predicate(n->get_long() == 0xFFFFFFFFL);
3554   match(ConL);
3555   op_cost(0);
3556 
3557   format %{ %}
3558   interface(CONST_INTER);
3559 %}
3560 
3561 // Long Immediate: low 32-bit mask
3562 operand immL32() %{
3563   predicate(n->get_long() == (int)(n->get_long()));
3564   match(ConL);
3565   op_cost(20);
3566 
3567   format %{ %}
3568   interface(CONST_INTER);
3569 %}
3570 
3571 //Double Immediate zero
3572 operand immDPR0() %{
3573   // Do additional (and counter-intuitive) test against NaN to work around VC++
3574   // bug that generates code such that NaNs compare equal to 0.0
3575   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3576   match(ConD);
3577 
3578   op_cost(5);
3579   format %{ %}
3580   interface(CONST_INTER);
3581 %}
3582 
3583 // Double Immediate one
3584 operand immDPR1() %{
3585   predicate( UseSSE<=1 && n->getd() == 1.0 );
3586   match(ConD);
3587 
3588   op_cost(5);
3589   format %{ %}
3590   interface(CONST_INTER);
3591 %}
3592 
3593 // Double Immediate
3594 operand immDPR() %{
3595   predicate(UseSSE<=1);
3596   match(ConD);
3597 
3598   op_cost(5);
3599   format %{ %}
3600   interface(CONST_INTER);
3601 %}
3602 
3603 operand immD() %{
3604   predicate(UseSSE>=2);
3605   match(ConD);
3606 
3607   op_cost(5);
3608   format %{ %}
3609   interface(CONST_INTER);
3610 %}
3611 
3612 // Double Immediate zero
3613 operand immD0() %{
3614   // Do additional (and counter-intuitive) test against NaN to work around VC++
3615   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3616   // compare equal to -0.0.
3617   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3618   match(ConD);
3619 
3620   format %{ %}
3621   interface(CONST_INTER);
3622 %}
3623 
3624 // Float Immediate zero
3625 operand immFPR0() %{
3626   predicate(UseSSE == 0 && n->getf() == 0.0F);
3627   match(ConF);
3628 
3629   op_cost(5);
3630   format %{ %}
3631   interface(CONST_INTER);
3632 %}
3633 
3634 // Float Immediate one
3635 operand immFPR1() %{
3636   predicate(UseSSE == 0 && n->getf() == 1.0F);
3637   match(ConF);
3638 
3639   op_cost(5);
3640   format %{ %}
3641   interface(CONST_INTER);
3642 %}
3643 
3644 // Float Immediate
3645 operand immFPR() %{
3646   predicate( UseSSE == 0 );
3647   match(ConF);
3648 
3649   op_cost(5);
3650   format %{ %}
3651   interface(CONST_INTER);
3652 %}
3653 
3654 // Float Immediate
3655 operand immF() %{
3656   predicate(UseSSE >= 1);
3657   match(ConF);
3658 
3659   op_cost(5);
3660   format %{ %}
3661   interface(CONST_INTER);
3662 %}
3663 
3664 // Float Immediate zero.  Zero and not -0.0
3665 operand immF0() %{
3666   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3667   match(ConF);
3668 
3669   op_cost(5);
3670   format %{ %}
3671   interface(CONST_INTER);
3672 %}
3673 
3674 // Immediates for special shifts (sign extend)
3675 
3676 // Constants for increment
3677 operand immI_16() %{
3678   predicate( n->get_int() == 16 );
3679   match(ConI);
3680 
3681   format %{ %}
3682   interface(CONST_INTER);
3683 %}
3684 
3685 operand immI_24() %{
3686   predicate( n->get_int() == 24 );
3687   match(ConI);
3688 
3689   format %{ %}
3690   interface(CONST_INTER);
3691 %}
3692 
3693 // Constant for byte-wide masking
3694 operand immI_255() %{
3695   predicate( n->get_int() == 255 );
3696   match(ConI);
3697 
3698   format %{ %}
3699   interface(CONST_INTER);
3700 %}
3701 
3702 // Constant for short-wide masking
3703 operand immI_65535() %{
3704   predicate(n->get_int() == 65535);
3705   match(ConI);
3706 
3707   format %{ %}
3708   interface(CONST_INTER);
3709 %}
3710 
3711 // Register Operands
3712 // Integer Register
3713 operand rRegI() %{
3714   constraint(ALLOC_IN_RC(int_reg));
3715   match(RegI);
3716   match(xRegI);
3717   match(eAXRegI);
3718   match(eBXRegI);
3719   match(eCXRegI);
3720   match(eDXRegI);
3721   match(eDIRegI);
3722   match(eSIRegI);
3723 
3724   format %{ %}
3725   interface(REG_INTER);
3726 %}
3727 
3728 // Subset of Integer Register
3729 operand xRegI(rRegI reg) %{
3730   constraint(ALLOC_IN_RC(int_x_reg));
3731   match(reg);
3732   match(eAXRegI);
3733   match(eBXRegI);
3734   match(eCXRegI);
3735   match(eDXRegI);
3736 
3737   format %{ %}
3738   interface(REG_INTER);
3739 %}
3740 
3741 // Special Registers
3742 operand eAXRegI(xRegI reg) %{
3743   constraint(ALLOC_IN_RC(eax_reg));
3744   match(reg);
3745   match(rRegI);
3746 
3747   format %{ "EAX" %}
3748   interface(REG_INTER);
3749 %}
3750 
3751 // Special Registers
3752 operand eBXRegI(xRegI reg) %{
3753   constraint(ALLOC_IN_RC(ebx_reg));
3754   match(reg);
3755   match(rRegI);
3756 
3757   format %{ "EBX" %}
3758   interface(REG_INTER);
3759 %}
3760 
3761 operand eCXRegI(xRegI reg) %{
3762   constraint(ALLOC_IN_RC(ecx_reg));
3763   match(reg);
3764   match(rRegI);
3765 
3766   format %{ "ECX" %}
3767   interface(REG_INTER);
3768 %}
3769 
3770 operand eDXRegI(xRegI reg) %{
3771   constraint(ALLOC_IN_RC(edx_reg));
3772   match(reg);
3773   match(rRegI);
3774 
3775   format %{ "EDX" %}
3776   interface(REG_INTER);
3777 %}
3778 
3779 operand eDIRegI(xRegI reg) %{
3780   constraint(ALLOC_IN_RC(edi_reg));
3781   match(reg);
3782   match(rRegI);
3783 
3784   format %{ "EDI" %}
3785   interface(REG_INTER);
3786 %}
3787 
3788 operand naxRegI() %{
3789   constraint(ALLOC_IN_RC(nax_reg));
3790   match(RegI);
3791   match(eCXRegI);
3792   match(eDXRegI);
3793   match(eSIRegI);
3794   match(eDIRegI);
3795 
3796   format %{ %}
3797   interface(REG_INTER);
3798 %}
3799 
3800 operand nadxRegI() %{
3801   constraint(ALLOC_IN_RC(nadx_reg));
3802   match(RegI);
3803   match(eBXRegI);
3804   match(eCXRegI);
3805   match(eSIRegI);
3806   match(eDIRegI);
3807 
3808   format %{ %}
3809   interface(REG_INTER);
3810 %}
3811 
3812 operand ncxRegI() %{
3813   constraint(ALLOC_IN_RC(ncx_reg));
3814   match(RegI);
3815   match(eAXRegI);
3816   match(eDXRegI);
3817   match(eSIRegI);
3818   match(eDIRegI);
3819 
3820   format %{ %}
3821   interface(REG_INTER);
3822 %}
3823 
3824 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3825 // //
3826 operand eSIRegI(xRegI reg) %{
3827    constraint(ALLOC_IN_RC(esi_reg));
3828    match(reg);
3829    match(rRegI);
3830 
3831    format %{ "ESI" %}
3832    interface(REG_INTER);
3833 %}
3834 
3835 // Pointer Register
3836 operand anyRegP() %{
3837   constraint(ALLOC_IN_RC(any_reg));
3838   match(RegP);
3839   match(eAXRegP);
3840   match(eBXRegP);
3841   match(eCXRegP);
3842   match(eDIRegP);
3843   match(eRegP);
3844 
3845   format %{ %}
3846   interface(REG_INTER);
3847 %}
3848 
3849 operand eRegP() %{
3850   constraint(ALLOC_IN_RC(int_reg));
3851   match(RegP);
3852   match(eAXRegP);
3853   match(eBXRegP);
3854   match(eCXRegP);
3855   match(eDIRegP);
3856 
3857   format %{ %}
3858   interface(REG_INTER);
3859 %}
3860 
3861 // On windows95, EBP is not safe to use for implicit null tests.
3862 operand eRegP_no_EBP() %{
3863   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3864   match(RegP);
3865   match(eAXRegP);
3866   match(eBXRegP);
3867   match(eCXRegP);
3868   match(eDIRegP);
3869 
3870   op_cost(100);
3871   format %{ %}
3872   interface(REG_INTER);
3873 %}
3874 
3875 operand naxRegP() %{
3876   constraint(ALLOC_IN_RC(nax_reg));
3877   match(RegP);
3878   match(eBXRegP);
3879   match(eDXRegP);
3880   match(eCXRegP);
3881   match(eSIRegP);
3882   match(eDIRegP);
3883 
3884   format %{ %}
3885   interface(REG_INTER);
3886 %}
3887 
3888 operand nabxRegP() %{
3889   constraint(ALLOC_IN_RC(nabx_reg));
3890   match(RegP);
3891   match(eCXRegP);
3892   match(eDXRegP);
3893   match(eSIRegP);
3894   match(eDIRegP);
3895 
3896   format %{ %}
3897   interface(REG_INTER);
3898 %}
3899 
3900 operand pRegP() %{
3901   constraint(ALLOC_IN_RC(p_reg));
3902   match(RegP);
3903   match(eBXRegP);
3904   match(eDXRegP);
3905   match(eSIRegP);
3906   match(eDIRegP);
3907 
3908   format %{ %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Special Registers
3913 // Return a pointer value
3914 operand eAXRegP(eRegP reg) %{
3915   constraint(ALLOC_IN_RC(eax_reg));
3916   match(reg);
3917   format %{ "EAX" %}
3918   interface(REG_INTER);
3919 %}
3920 
3921 // Used in AtomicAdd
3922 operand eBXRegP(eRegP reg) %{
3923   constraint(ALLOC_IN_RC(ebx_reg));
3924   match(reg);
3925   format %{ "EBX" %}
3926   interface(REG_INTER);
3927 %}
3928 
3929 // Tail-call (interprocedural jump) to interpreter
3930 operand eCXRegP(eRegP reg) %{
3931   constraint(ALLOC_IN_RC(ecx_reg));
3932   match(reg);
3933   format %{ "ECX" %}
3934   interface(REG_INTER);
3935 %}
3936 
3937 operand eSIRegP(eRegP reg) %{
3938   constraint(ALLOC_IN_RC(esi_reg));
3939   match(reg);
3940   format %{ "ESI" %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 // Used in rep stosw
3945 operand eDIRegP(eRegP reg) %{
3946   constraint(ALLOC_IN_RC(edi_reg));
3947   match(reg);
3948   format %{ "EDI" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eRegL() %{
3953   constraint(ALLOC_IN_RC(long_reg));
3954   match(RegL);
3955   match(eADXRegL);
3956 
3957   format %{ %}
3958   interface(REG_INTER);
3959 %}
3960 
3961 operand eADXRegL( eRegL reg ) %{
3962   constraint(ALLOC_IN_RC(eadx_reg));
3963   match(reg);
3964 
3965   format %{ "EDX:EAX" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 operand eBCXRegL( eRegL reg ) %{
3970   constraint(ALLOC_IN_RC(ebcx_reg));
3971   match(reg);
3972 
3973   format %{ "EBX:ECX" %}
3974   interface(REG_INTER);
3975 %}
3976 
3977 // Special case for integer high multiply
3978 operand eADXRegL_low_only() %{
3979   constraint(ALLOC_IN_RC(eadx_reg));
3980   match(RegL);
3981 
3982   format %{ "EAX" %}
3983   interface(REG_INTER);
3984 %}
3985 
3986 // Flags register, used as output of compare instructions
3987 operand eFlagsReg() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990 
3991   format %{ "EFLAGS" %}
3992   interface(REG_INTER);
3993 %}
3994 
3995 // Flags register, used as output of FLOATING POINT compare instructions
3996 operand eFlagsRegU() %{
3997   constraint(ALLOC_IN_RC(int_flags));
3998   match(RegFlags);
3999 
4000   format %{ "EFLAGS_U" %}
4001   interface(REG_INTER);
4002 %}
4003 
4004 operand eFlagsRegUCF() %{
4005   constraint(ALLOC_IN_RC(int_flags));
4006   match(RegFlags);
4007   predicate(false);
4008 
4009   format %{ "EFLAGS_U_CF" %}
4010   interface(REG_INTER);
4011 %}
4012 
4013 // Condition Code Register used by long compare
4014 operand flagsReg_long_LTGE() %{
4015   constraint(ALLOC_IN_RC(int_flags));
4016   match(RegFlags);
4017   format %{ "FLAGS_LTGE" %}
4018   interface(REG_INTER);
4019 %}
4020 operand flagsReg_long_EQNE() %{
4021   constraint(ALLOC_IN_RC(int_flags));
4022   match(RegFlags);
4023   format %{ "FLAGS_EQNE" %}
4024   interface(REG_INTER);
4025 %}
4026 operand flagsReg_long_LEGT() %{
4027   constraint(ALLOC_IN_RC(int_flags));
4028   match(RegFlags);
4029   format %{ "FLAGS_LEGT" %}
4030   interface(REG_INTER);
4031 %}
4032 
4033 // Float register operands
4034 operand regDPR() %{
4035   predicate( UseSSE < 2 );
4036   constraint(ALLOC_IN_RC(fp_dbl_reg));
4037   match(RegD);
4038   match(regDPR1);
4039   match(regDPR2);
4040   format %{ %}
4041   interface(REG_INTER);
4042 %}
4043 
4044 operand regDPR1(regDPR reg) %{
4045   predicate( UseSSE < 2 );
4046   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4047   match(reg);
4048   format %{ "FPR1" %}
4049   interface(REG_INTER);
4050 %}
4051 
4052 operand regDPR2(regDPR reg) %{
4053   predicate( UseSSE < 2 );
4054   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4055   match(reg);
4056   format %{ "FPR2" %}
4057   interface(REG_INTER);
4058 %}
4059 
4060 operand regnotDPR1(regDPR reg) %{
4061   predicate( UseSSE < 2 );
4062   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4063   match(reg);
4064   format %{ %}
4065   interface(REG_INTER);
4066 %}
4067 
4068 // Float register operands
4069 operand regFPR() %{
4070   predicate( UseSSE < 2 );
4071   constraint(ALLOC_IN_RC(fp_flt_reg));
4072   match(RegF);
4073   match(regFPR1);
4074   format %{ %}
4075   interface(REG_INTER);
4076 %}
4077 
4078 // Float register operands
4079 operand regFPR1(regFPR reg) %{
4080   predicate( UseSSE < 2 );
4081   constraint(ALLOC_IN_RC(fp_flt_reg0));
4082   match(reg);
4083   format %{ "FPR1" %}
4084   interface(REG_INTER);
4085 %}
4086 
4087 // XMM Float register operands
4088 operand regF() %{
4089   predicate( UseSSE>=1 );
4090   constraint(ALLOC_IN_RC(float_reg_legacy));
4091   match(RegF);
4092   format %{ %}
4093   interface(REG_INTER);
4094 %}
4095 
4096 // XMM Double register operands
4097 operand regD() %{
4098   predicate( UseSSE>=2 );
4099   constraint(ALLOC_IN_RC(double_reg_legacy));
4100   match(RegD);
4101   format %{ %}
4102   interface(REG_INTER);
4103 %}
4104 
4105 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4106 // runtime code generation via reg_class_dynamic.
4107 operand vecS() %{
4108   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4109   match(VecS);
4110 
4111   format %{ %}
4112   interface(REG_INTER);
4113 %}
4114 
4115 operand vecD() %{
4116   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4117   match(VecD);
4118 
4119   format %{ %}
4120   interface(REG_INTER);
4121 %}
4122 
4123 operand vecX() %{
4124   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4125   match(VecX);
4126 
4127   format %{ %}
4128   interface(REG_INTER);
4129 %}
4130 
4131 operand vecY() %{
4132   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4133   match(VecY);
4134 
4135   format %{ %}
4136   interface(REG_INTER);
4137 %}
4138 
4139 //----------Memory Operands----------------------------------------------------
4140 // Direct Memory Operand
4141 operand direct(immP addr) %{
4142   match(addr);
4143 
4144   format %{ "[$addr]" %}
4145   interface(MEMORY_INTER) %{
4146     base(0xFFFFFFFF);
4147     index(0x4);
4148     scale(0x0);
4149     disp($addr);
4150   %}
4151 %}
4152 
4153 // Indirect Memory Operand
4154 operand indirect(eRegP reg) %{
4155   constraint(ALLOC_IN_RC(int_reg));
4156   match(reg);
4157 
4158   format %{ "[$reg]" %}
4159   interface(MEMORY_INTER) %{
4160     base($reg);
4161     index(0x4);
4162     scale(0x0);
4163     disp(0x0);
4164   %}
4165 %}
4166 
4167 // Indirect Memory Plus Short Offset Operand
4168 operand indOffset8(eRegP reg, immI8 off) %{
4169   match(AddP reg off);
4170 
4171   format %{ "[$reg + $off]" %}
4172   interface(MEMORY_INTER) %{
4173     base($reg);
4174     index(0x4);
4175     scale(0x0);
4176     disp($off);
4177   %}
4178 %}
4179 
4180 // Indirect Memory Plus Long Offset Operand
4181 operand indOffset32(eRegP reg, immI off) %{
4182   match(AddP reg off);
4183 
4184   format %{ "[$reg + $off]" %}
4185   interface(MEMORY_INTER) %{
4186     base($reg);
4187     index(0x4);
4188     scale(0x0);
4189     disp($off);
4190   %}
4191 %}
4192 
4193 // Indirect Memory Plus Long Offset Operand
4194 operand indOffset32X(rRegI reg, immP off) %{
4195   match(AddP off reg);
4196 
4197   format %{ "[$reg + $off]" %}
4198   interface(MEMORY_INTER) %{
4199     base($reg);
4200     index(0x4);
4201     scale(0x0);
4202     disp($off);
4203   %}
4204 %}
4205 
4206 // Indirect Memory Plus Index Register Plus Offset Operand
4207 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4208   match(AddP (AddP reg ireg) off);
4209 
4210   op_cost(10);
4211   format %{"[$reg + $off + $ireg]" %}
4212   interface(MEMORY_INTER) %{
4213     base($reg);
4214     index($ireg);
4215     scale(0x0);
4216     disp($off);
4217   %}
4218 %}
4219 
4220 // Indirect Memory Plus Index Register Plus Offset Operand
4221 operand indIndex(eRegP reg, rRegI ireg) %{
4222   match(AddP reg ireg);
4223 
4224   op_cost(10);
4225   format %{"[$reg + $ireg]" %}
4226   interface(MEMORY_INTER) %{
4227     base($reg);
4228     index($ireg);
4229     scale(0x0);
4230     disp(0x0);
4231   %}
4232 %}
4233 
4234 // // -------------------------------------------------------------------------
4235 // // 486 architecture doesn't support "scale * index + offset" with out a base
4236 // // -------------------------------------------------------------------------
4237 // // Scaled Memory Operands
4238 // // Indirect Memory Times Scale Plus Offset Operand
4239 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4240 //   match(AddP off (LShiftI ireg scale));
4241 //
4242 //   op_cost(10);
4243 //   format %{"[$off + $ireg << $scale]" %}
4244 //   interface(MEMORY_INTER) %{
4245 //     base(0x4);
4246 //     index($ireg);
4247 //     scale($scale);
4248 //     disp($off);
4249 //   %}
4250 // %}
4251 
4252 // Indirect Memory Times Scale Plus Index Register
4253 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4254   match(AddP reg (LShiftI ireg scale));
4255 
4256   op_cost(10);
4257   format %{"[$reg + $ireg << $scale]" %}
4258   interface(MEMORY_INTER) %{
4259     base($reg);
4260     index($ireg);
4261     scale($scale);
4262     disp(0x0);
4263   %}
4264 %}
4265 
4266 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4267 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4268   match(AddP (AddP reg (LShiftI ireg scale)) off);
4269 
4270   op_cost(10);
4271   format %{"[$reg + $off + $ireg << $scale]" %}
4272   interface(MEMORY_INTER) %{
4273     base($reg);
4274     index($ireg);
4275     scale($scale);
4276     disp($off);
4277   %}
4278 %}
4279 
4280 //----------Load Long Memory Operands------------------------------------------
4281 // The load-long idiom will use it's address expression again after loading
4282 // the first word of the long.  If the load-long destination overlaps with
4283 // registers used in the addressing expression, the 2nd half will be loaded
4284 // from a clobbered address.  Fix this by requiring that load-long use
4285 // address registers that do not overlap with the load-long target.
4286 
4287 // load-long support
4288 operand load_long_RegP() %{
4289   constraint(ALLOC_IN_RC(esi_reg));
4290   match(RegP);
4291   match(eSIRegP);
4292   op_cost(100);
4293   format %{  %}
4294   interface(REG_INTER);
4295 %}
4296 
4297 // Indirect Memory Operand Long
4298 operand load_long_indirect(load_long_RegP reg) %{
4299   constraint(ALLOC_IN_RC(esi_reg));
4300   match(reg);
4301 
4302   format %{ "[$reg]" %}
4303   interface(MEMORY_INTER) %{
4304     base($reg);
4305     index(0x4);
4306     scale(0x0);
4307     disp(0x0);
4308   %}
4309 %}
4310 
4311 // Indirect Memory Plus Long Offset Operand
4312 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4313   match(AddP reg off);
4314 
4315   format %{ "[$reg + $off]" %}
4316   interface(MEMORY_INTER) %{
4317     base($reg);
4318     index(0x4);
4319     scale(0x0);
4320     disp($off);
4321   %}
4322 %}
4323 
4324 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4325 
4326 
4327 //----------Special Memory Operands--------------------------------------------
4328 // Stack Slot Operand - This operand is used for loading and storing temporary
4329 //                      values on the stack where a match requires a value to
4330 //                      flow through memory.
4331 operand stackSlotP(sRegP reg) %{
4332   constraint(ALLOC_IN_RC(stack_slots));
4333   // No match rule because this operand is only generated in matching
4334   format %{ "[$reg]" %}
4335   interface(MEMORY_INTER) %{
4336     base(0x4);   // ESP
4337     index(0x4);  // No Index
4338     scale(0x0);  // No Scale
4339     disp($reg);  // Stack Offset
4340   %}
4341 %}
4342 
4343 operand stackSlotI(sRegI reg) %{
4344   constraint(ALLOC_IN_RC(stack_slots));
4345   // No match rule because this operand is only generated in matching
4346   format %{ "[$reg]" %}
4347   interface(MEMORY_INTER) %{
4348     base(0x4);   // ESP
4349     index(0x4);  // No Index
4350     scale(0x0);  // No Scale
4351     disp($reg);  // Stack Offset
4352   %}
4353 %}
4354 
4355 operand stackSlotF(sRegF reg) %{
4356   constraint(ALLOC_IN_RC(stack_slots));
4357   // No match rule because this operand is only generated in matching
4358   format %{ "[$reg]" %}
4359   interface(MEMORY_INTER) %{
4360     base(0x4);   // ESP
4361     index(0x4);  // No Index
4362     scale(0x0);  // No Scale
4363     disp($reg);  // Stack Offset
4364   %}
4365 %}
4366 
4367 operand stackSlotD(sRegD reg) %{
4368   constraint(ALLOC_IN_RC(stack_slots));
4369   // No match rule because this operand is only generated in matching
4370   format %{ "[$reg]" %}
4371   interface(MEMORY_INTER) %{
4372     base(0x4);   // ESP
4373     index(0x4);  // No Index
4374     scale(0x0);  // No Scale
4375     disp($reg);  // Stack Offset
4376   %}
4377 %}
4378 
4379 operand stackSlotL(sRegL reg) %{
4380   constraint(ALLOC_IN_RC(stack_slots));
4381   // No match rule because this operand is only generated in matching
4382   format %{ "[$reg]" %}
4383   interface(MEMORY_INTER) %{
4384     base(0x4);   // ESP
4385     index(0x4);  // No Index
4386     scale(0x0);  // No Scale
4387     disp($reg);  // Stack Offset
4388   %}
4389 %}
4390 
4391 //----------Memory Operands - Win95 Implicit Null Variants----------------
4392 // Indirect Memory Operand
4393 operand indirect_win95_safe(eRegP_no_EBP reg)
4394 %{
4395   constraint(ALLOC_IN_RC(int_reg));
4396   match(reg);
4397 
4398   op_cost(100);
4399   format %{ "[$reg]" %}
4400   interface(MEMORY_INTER) %{
4401     base($reg);
4402     index(0x4);
4403     scale(0x0);
4404     disp(0x0);
4405   %}
4406 %}
4407 
4408 // Indirect Memory Plus Short Offset Operand
4409 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4410 %{
4411   match(AddP reg off);
4412 
4413   op_cost(100);
4414   format %{ "[$reg + $off]" %}
4415   interface(MEMORY_INTER) %{
4416     base($reg);
4417     index(0x4);
4418     scale(0x0);
4419     disp($off);
4420   %}
4421 %}
4422 
4423 // Indirect Memory Plus Long Offset Operand
4424 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4425 %{
4426   match(AddP reg off);
4427 
4428   op_cost(100);
4429   format %{ "[$reg + $off]" %}
4430   interface(MEMORY_INTER) %{
4431     base($reg);
4432     index(0x4);
4433     scale(0x0);
4434     disp($off);
4435   %}
4436 %}
4437 
4438 // Indirect Memory Plus Index Register Plus Offset Operand
4439 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4440 %{
4441   match(AddP (AddP reg ireg) off);
4442 
4443   op_cost(100);
4444   format %{"[$reg + $off + $ireg]" %}
4445   interface(MEMORY_INTER) %{
4446     base($reg);
4447     index($ireg);
4448     scale(0x0);
4449     disp($off);
4450   %}
4451 %}
4452 
4453 // Indirect Memory Times Scale Plus Index Register
4454 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4455 %{
4456   match(AddP reg (LShiftI ireg scale));
4457 
4458   op_cost(100);
4459   format %{"[$reg + $ireg << $scale]" %}
4460   interface(MEMORY_INTER) %{
4461     base($reg);
4462     index($ireg);
4463     scale($scale);
4464     disp(0x0);
4465   %}
4466 %}
4467 
4468 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4469 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4470 %{
4471   match(AddP (AddP reg (LShiftI ireg scale)) off);
4472 
4473   op_cost(100);
4474   format %{"[$reg + $off + $ireg << $scale]" %}
4475   interface(MEMORY_INTER) %{
4476     base($reg);
4477     index($ireg);
4478     scale($scale);
4479     disp($off);
4480   %}
4481 %}
4482 
4483 //----------Conditional Branch Operands----------------------------------------
4484 // Comparison Op  - This is the operation of the comparison, and is limited to
4485 //                  the following set of codes:
4486 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4487 //
4488 // Other attributes of the comparison, such as unsignedness, are specified
4489 // by the comparison instruction that sets a condition code flags register.
4490 // That result is represented by a flags operand whose subtype is appropriate
4491 // to the unsignedness (etc.) of the comparison.
4492 //
4493 // Later, the instruction which matches both the Comparison Op (a Bool) and
4494 // the flags (produced by the Cmp) specifies the coding of the comparison op
4495 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4496 
4497 // Comparision Code
4498 operand cmpOp() %{
4499   match(Bool);
4500 
4501   format %{ "" %}
4502   interface(COND_INTER) %{
4503     equal(0x4, "e");
4504     not_equal(0x5, "ne");
4505     less(0xC, "l");
4506     greater_equal(0xD, "ge");
4507     less_equal(0xE, "le");
4508     greater(0xF, "g");
4509     overflow(0x0, "o");
4510     no_overflow(0x1, "no");
4511   %}
4512 %}
4513 
4514 // Comparison Code, unsigned compare.  Used by FP also, with
4515 // C2 (unordered) turned into GT or LT already.  The other bits
4516 // C0 and C3 are turned into Carry & Zero flags.
4517 operand cmpOpU() %{
4518   match(Bool);
4519 
4520   format %{ "" %}
4521   interface(COND_INTER) %{
4522     equal(0x4, "e");
4523     not_equal(0x5, "ne");
4524     less(0x2, "b");
4525     greater_equal(0x3, "nb");
4526     less_equal(0x6, "be");
4527     greater(0x7, "nbe");
4528     overflow(0x0, "o");
4529     no_overflow(0x1, "no");
4530   %}
4531 %}
4532 
4533 // Floating comparisons that don't require any fixup for the unordered case
4534 operand cmpOpUCF() %{
4535   match(Bool);
4536   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4537             n->as_Bool()->_test._test == BoolTest::ge ||
4538             n->as_Bool()->_test._test == BoolTest::le ||
4539             n->as_Bool()->_test._test == BoolTest::gt);
4540   format %{ "" %}
4541   interface(COND_INTER) %{
4542     equal(0x4, "e");
4543     not_equal(0x5, "ne");
4544     less(0x2, "b");
4545     greater_equal(0x3, "nb");
4546     less_equal(0x6, "be");
4547     greater(0x7, "nbe");
4548     overflow(0x0, "o");
4549     no_overflow(0x1, "no");
4550   %}
4551 %}
4552 
4553 
4554 // Floating comparisons that can be fixed up with extra conditional jumps
4555 operand cmpOpUCF2() %{
4556   match(Bool);
4557   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4558             n->as_Bool()->_test._test == BoolTest::eq);
4559   format %{ "" %}
4560   interface(COND_INTER) %{
4561     equal(0x4, "e");
4562     not_equal(0x5, "ne");
4563     less(0x2, "b");
4564     greater_equal(0x3, "nb");
4565     less_equal(0x6, "be");
4566     greater(0x7, "nbe");
4567     overflow(0x0, "o");
4568     no_overflow(0x1, "no");
4569   %}
4570 %}
4571 
4572 // Comparison Code for FP conditional move
4573 operand cmpOp_fcmov() %{
4574   match(Bool);
4575 
4576   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4577             n->as_Bool()->_test._test != BoolTest::no_overflow);
4578   format %{ "" %}
4579   interface(COND_INTER) %{
4580     equal        (0x0C8);
4581     not_equal    (0x1C8);
4582     less         (0x0C0);
4583     greater_equal(0x1C0);
4584     less_equal   (0x0D0);
4585     greater      (0x1D0);
4586     overflow(0x0, "o"); // not really supported by the instruction
4587     no_overflow(0x1, "no"); // not really supported by the instruction
4588   %}
4589 %}
4590 
4591 // Comparision Code used in long compares
4592 operand cmpOp_commute() %{
4593   match(Bool);
4594 
4595   format %{ "" %}
4596   interface(COND_INTER) %{
4597     equal(0x4, "e");
4598     not_equal(0x5, "ne");
4599     less(0xF, "g");
4600     greater_equal(0xE, "le");
4601     less_equal(0xD, "ge");
4602     greater(0xC, "l");
4603     overflow(0x0, "o");
4604     no_overflow(0x1, "no");
4605   %}
4606 %}
4607 
4608 //----------OPERAND CLASSES----------------------------------------------------
4609 // Operand Classes are groups of operands that are used as to simplify
4610 // instruction definitions by not requiring the AD writer to specify separate
4611 // instructions for every form of operand when the instruction accepts
4612 // multiple operand types with the same basic encoding and format.  The classic
4613 // case of this is memory operands.
4614 
4615 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4616                indIndex, indIndexScale, indIndexScaleOffset);
4617 
4618 // Long memory operations are encoded in 2 instructions and a +4 offset.
4619 // This means some kind of offset is always required and you cannot use
4620 // an oop as the offset (done when working on static globals).
4621 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4622                     indIndex, indIndexScale, indIndexScaleOffset);
4623 
4624 
4625 //----------PIPELINE-----------------------------------------------------------
4626 // Rules which define the behavior of the target architectures pipeline.
4627 pipeline %{
4628 
4629 //----------ATTRIBUTES---------------------------------------------------------
4630 attributes %{
4631   variable_size_instructions;        // Fixed size instructions
4632   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4633   instruction_unit_size = 1;         // An instruction is 1 bytes long
4634   instruction_fetch_unit_size = 16;  // The processor fetches one line
4635   instruction_fetch_units = 1;       // of 16 bytes
4636 
4637   // List of nop instructions
4638   nops( MachNop );
4639 %}
4640 
4641 //----------RESOURCES----------------------------------------------------------
4642 // Resources are the functional units available to the machine
4643 
4644 // Generic P2/P3 pipeline
4645 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4646 // 3 instructions decoded per cycle.
4647 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4648 // 2 ALU op, only ALU0 handles mul/div instructions.
4649 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4650            MS0, MS1, MEM = MS0 | MS1,
4651            BR, FPU,
4652            ALU0, ALU1, ALU = ALU0 | ALU1 );
4653 
4654 //----------PIPELINE DESCRIPTION-----------------------------------------------
4655 // Pipeline Description specifies the stages in the machine's pipeline
4656 
4657 // Generic P2/P3 pipeline
4658 pipe_desc(S0, S1, S2, S3, S4, S5);
4659 
4660 //----------PIPELINE CLASSES---------------------------------------------------
4661 // Pipeline Classes describe the stages in which input and output are
4662 // referenced by the hardware pipeline.
4663 
4664 // Naming convention: ialu or fpu
4665 // Then: _reg
4666 // Then: _reg if there is a 2nd register
4667 // Then: _long if it's a pair of instructions implementing a long
4668 // Then: _fat if it requires the big decoder
4669 //   Or: _mem if it requires the big decoder and a memory unit.
4670 
4671 // Integer ALU reg operation
4672 pipe_class ialu_reg(rRegI dst) %{
4673     single_instruction;
4674     dst    : S4(write);
4675     dst    : S3(read);
4676     DECODE : S0;        // any decoder
4677     ALU    : S3;        // any alu
4678 %}
4679 
4680 // Long ALU reg operation
4681 pipe_class ialu_reg_long(eRegL dst) %{
4682     instruction_count(2);
4683     dst    : S4(write);
4684     dst    : S3(read);
4685     DECODE : S0(2);     // any 2 decoders
4686     ALU    : S3(2);     // both alus
4687 %}
4688 
4689 // Integer ALU reg operation using big decoder
4690 pipe_class ialu_reg_fat(rRegI dst) %{
4691     single_instruction;
4692     dst    : S4(write);
4693     dst    : S3(read);
4694     D0     : S0;        // big decoder only
4695     ALU    : S3;        // any alu
4696 %}
4697 
4698 // Long ALU reg operation using big decoder
4699 pipe_class ialu_reg_long_fat(eRegL dst) %{
4700     instruction_count(2);
4701     dst    : S4(write);
4702     dst    : S3(read);
4703     D0     : S0(2);     // big decoder only; twice
4704     ALU    : S3(2);     // any 2 alus
4705 %}
4706 
4707 // Integer ALU reg-reg operation
4708 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4709     single_instruction;
4710     dst    : S4(write);
4711     src    : S3(read);
4712     DECODE : S0;        // any decoder
4713     ALU    : S3;        // any alu
4714 %}
4715 
4716 // Long ALU reg-reg operation
4717 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4718     instruction_count(2);
4719     dst    : S4(write);
4720     src    : S3(read);
4721     DECODE : S0(2);     // any 2 decoders
4722     ALU    : S3(2);     // both alus
4723 %}
4724 
4725 // Integer ALU reg-reg operation
4726 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4727     single_instruction;
4728     dst    : S4(write);
4729     src    : S3(read);
4730     D0     : S0;        // big decoder only
4731     ALU    : S3;        // any alu
4732 %}
4733 
4734 // Long ALU reg-reg operation
4735 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4736     instruction_count(2);
4737     dst    : S4(write);
4738     src    : S3(read);
4739     D0     : S0(2);     // big decoder only; twice
4740     ALU    : S3(2);     // both alus
4741 %}
4742 
4743 // Integer ALU reg-mem operation
4744 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4745     single_instruction;
4746     dst    : S5(write);
4747     mem    : S3(read);
4748     D0     : S0;        // big decoder only
4749     ALU    : S4;        // any alu
4750     MEM    : S3;        // any mem
4751 %}
4752 
4753 // Long ALU reg-mem operation
4754 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4755     instruction_count(2);
4756     dst    : S5(write);
4757     mem    : S3(read);
4758     D0     : S0(2);     // big decoder only; twice
4759     ALU    : S4(2);     // any 2 alus
4760     MEM    : S3(2);     // both mems
4761 %}
4762 
4763 // Integer mem operation (prefetch)
4764 pipe_class ialu_mem(memory mem)
4765 %{
4766     single_instruction;
4767     mem    : S3(read);
4768     D0     : S0;        // big decoder only
4769     MEM    : S3;        // any mem
4770 %}
4771 
4772 // Integer Store to Memory
4773 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4774     single_instruction;
4775     mem    : S3(read);
4776     src    : S5(read);
4777     D0     : S0;        // big decoder only
4778     ALU    : S4;        // any alu
4779     MEM    : S3;
4780 %}
4781 
4782 // Long Store to Memory
4783 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4784     instruction_count(2);
4785     mem    : S3(read);
4786     src    : S5(read);
4787     D0     : S0(2);     // big decoder only; twice
4788     ALU    : S4(2);     // any 2 alus
4789     MEM    : S3(2);     // Both mems
4790 %}
4791 
4792 // Integer Store to Memory
4793 pipe_class ialu_mem_imm(memory mem) %{
4794     single_instruction;
4795     mem    : S3(read);
4796     D0     : S0;        // big decoder only
4797     ALU    : S4;        // any alu
4798     MEM    : S3;
4799 %}
4800 
4801 // Integer ALU0 reg-reg operation
4802 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4803     single_instruction;
4804     dst    : S4(write);
4805     src    : S3(read);
4806     D0     : S0;        // Big decoder only
4807     ALU0   : S3;        // only alu0
4808 %}
4809 
4810 // Integer ALU0 reg-mem operation
4811 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4812     single_instruction;
4813     dst    : S5(write);
4814     mem    : S3(read);
4815     D0     : S0;        // big decoder only
4816     ALU0   : S4;        // ALU0 only
4817     MEM    : S3;        // any mem
4818 %}
4819 
4820 // Integer ALU reg-reg operation
4821 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4822     single_instruction;
4823     cr     : S4(write);
4824     src1   : S3(read);
4825     src2   : S3(read);
4826     DECODE : S0;        // any decoder
4827     ALU    : S3;        // any alu
4828 %}
4829 
4830 // Integer ALU reg-imm operation
4831 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4832     single_instruction;
4833     cr     : S4(write);
4834     src1   : S3(read);
4835     DECODE : S0;        // any decoder
4836     ALU    : S3;        // any alu
4837 %}
4838 
4839 // Integer ALU reg-mem operation
4840 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4841     single_instruction;
4842     cr     : S4(write);
4843     src1   : S3(read);
4844     src2   : S3(read);
4845     D0     : S0;        // big decoder only
4846     ALU    : S4;        // any alu
4847     MEM    : S3;
4848 %}
4849 
4850 // Conditional move reg-reg
4851 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4852     instruction_count(4);
4853     y      : S4(read);
4854     q      : S3(read);
4855     p      : S3(read);
4856     DECODE : S0(4);     // any decoder
4857 %}
4858 
4859 // Conditional move reg-reg
4860 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4861     single_instruction;
4862     dst    : S4(write);
4863     src    : S3(read);
4864     cr     : S3(read);
4865     DECODE : S0;        // any decoder
4866 %}
4867 
4868 // Conditional move reg-mem
4869 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4870     single_instruction;
4871     dst    : S4(write);
4872     src    : S3(read);
4873     cr     : S3(read);
4874     DECODE : S0;        // any decoder
4875     MEM    : S3;
4876 %}
4877 
4878 // Conditional move reg-reg long
4879 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4880     single_instruction;
4881     dst    : S4(write);
4882     src    : S3(read);
4883     cr     : S3(read);
4884     DECODE : S0(2);     // any 2 decoders
4885 %}
4886 
4887 // Conditional move double reg-reg
4888 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4889     single_instruction;
4890     dst    : S4(write);
4891     src    : S3(read);
4892     cr     : S3(read);
4893     DECODE : S0;        // any decoder
4894 %}
4895 
4896 // Float reg-reg operation
4897 pipe_class fpu_reg(regDPR dst) %{
4898     instruction_count(2);
4899     dst    : S3(read);
4900     DECODE : S0(2);     // any 2 decoders
4901     FPU    : S3;
4902 %}
4903 
4904 // Float reg-reg operation
4905 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4906     instruction_count(2);
4907     dst    : S4(write);
4908     src    : S3(read);
4909     DECODE : S0(2);     // any 2 decoders
4910     FPU    : S3;
4911 %}
4912 
4913 // Float reg-reg operation
4914 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4915     instruction_count(3);
4916     dst    : S4(write);
4917     src1   : S3(read);
4918     src2   : S3(read);
4919     DECODE : S0(3);     // any 3 decoders
4920     FPU    : S3(2);
4921 %}
4922 
4923 // Float reg-reg operation
4924 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4925     instruction_count(4);
4926     dst    : S4(write);
4927     src1   : S3(read);
4928     src2   : S3(read);
4929     src3   : S3(read);
4930     DECODE : S0(4);     // any 3 decoders
4931     FPU    : S3(2);
4932 %}
4933 
4934 // Float reg-reg operation
4935 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4936     instruction_count(4);
4937     dst    : S4(write);
4938     src1   : S3(read);
4939     src2   : S3(read);
4940     src3   : S3(read);
4941     DECODE : S1(3);     // any 3 decoders
4942     D0     : S0;        // Big decoder only
4943     FPU    : S3(2);
4944     MEM    : S3;
4945 %}
4946 
4947 // Float reg-mem operation
4948 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4949     instruction_count(2);
4950     dst    : S5(write);
4951     mem    : S3(read);
4952     D0     : S0;        // big decoder only
4953     DECODE : S1;        // any decoder for FPU POP
4954     FPU    : S4;
4955     MEM    : S3;        // any mem
4956 %}
4957 
4958 // Float reg-mem operation
4959 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4960     instruction_count(3);
4961     dst    : S5(write);
4962     src1   : S3(read);
4963     mem    : S3(read);
4964     D0     : S0;        // big decoder only
4965     DECODE : S1(2);     // any decoder for FPU POP
4966     FPU    : S4;
4967     MEM    : S3;        // any mem
4968 %}
4969 
4970 // Float mem-reg operation
4971 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4972     instruction_count(2);
4973     src    : S5(read);
4974     mem    : S3(read);
4975     DECODE : S0;        // any decoder for FPU PUSH
4976     D0     : S1;        // big decoder only
4977     FPU    : S4;
4978     MEM    : S3;        // any mem
4979 %}
4980 
4981 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4982     instruction_count(3);
4983     src1   : S3(read);
4984     src2   : S3(read);
4985     mem    : S3(read);
4986     DECODE : S0(2);     // any decoder for FPU PUSH
4987     D0     : S1;        // big decoder only
4988     FPU    : S4;
4989     MEM    : S3;        // any mem
4990 %}
4991 
4992 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4993     instruction_count(3);
4994     src1   : S3(read);
4995     src2   : S3(read);
4996     mem    : S4(read);
4997     DECODE : S0;        // any decoder for FPU PUSH
4998     D0     : S0(2);     // big decoder only
4999     FPU    : S4;
5000     MEM    : S3(2);     // any mem
5001 %}
5002 
5003 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5004     instruction_count(2);
5005     src1   : S3(read);
5006     dst    : S4(read);
5007     D0     : S0(2);     // big decoder only
5008     MEM    : S3(2);     // any mem
5009 %}
5010 
5011 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5012     instruction_count(3);
5013     src1   : S3(read);
5014     src2   : S3(read);
5015     dst    : S4(read);
5016     D0     : S0(3);     // big decoder only
5017     FPU    : S4;
5018     MEM    : S3(3);     // any mem
5019 %}
5020 
5021 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5022     instruction_count(3);
5023     src1   : S4(read);
5024     mem    : S4(read);
5025     DECODE : S0;        // any decoder for FPU PUSH
5026     D0     : S0(2);     // big decoder only
5027     FPU    : S4;
5028     MEM    : S3(2);     // any mem
5029 %}
5030 
5031 // Float load constant
5032 pipe_class fpu_reg_con(regDPR dst) %{
5033     instruction_count(2);
5034     dst    : S5(write);
5035     D0     : S0;        // big decoder only for the load
5036     DECODE : S1;        // any decoder for FPU POP
5037     FPU    : S4;
5038     MEM    : S3;        // any mem
5039 %}
5040 
5041 // Float load constant
5042 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5043     instruction_count(3);
5044     dst    : S5(write);
5045     src    : S3(read);
5046     D0     : S0;        // big decoder only for the load
5047     DECODE : S1(2);     // any decoder for FPU POP
5048     FPU    : S4;
5049     MEM    : S3;        // any mem
5050 %}
5051 
5052 // UnConditional branch
5053 pipe_class pipe_jmp( label labl ) %{
5054     single_instruction;
5055     BR   : S3;
5056 %}
5057 
5058 // Conditional branch
5059 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5060     single_instruction;
5061     cr    : S1(read);
5062     BR    : S3;
5063 %}
5064 
5065 // Allocation idiom
5066 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5067     instruction_count(1); force_serialization;
5068     fixed_latency(6);
5069     heap_ptr : S3(read);
5070     DECODE   : S0(3);
5071     D0       : S2;
5072     MEM      : S3;
5073     ALU      : S3(2);
5074     dst      : S5(write);
5075     BR       : S5;
5076 %}
5077 
5078 // Generic big/slow expanded idiom
5079 pipe_class pipe_slow(  ) %{
5080     instruction_count(10); multiple_bundles; force_serialization;
5081     fixed_latency(100);
5082     D0  : S0(2);
5083     MEM : S3(2);
5084 %}
5085 
5086 // The real do-nothing guy
5087 pipe_class empty( ) %{
5088     instruction_count(0);
5089 %}
5090 
5091 // Define the class for the Nop node
5092 define %{
5093    MachNop = empty;
5094 %}
5095 
5096 %}
5097 
5098 //----------INSTRUCTIONS-------------------------------------------------------
5099 //
5100 // match      -- States which machine-independent subtree may be replaced
5101 //               by this instruction.
5102 // ins_cost   -- The estimated cost of this instruction is used by instruction
5103 //               selection to identify a minimum cost tree of machine
5104 //               instructions that matches a tree of machine-independent
5105 //               instructions.
5106 // format     -- A string providing the disassembly for this instruction.
5107 //               The value of an instruction's operand may be inserted
5108 //               by referring to it with a '$' prefix.
5109 // opcode     -- Three instruction opcodes may be provided.  These are referred
5110 //               to within an encode class as $primary, $secondary, and $tertiary
5111 //               respectively.  The primary opcode is commonly used to
5112 //               indicate the type of machine instruction, while secondary
5113 //               and tertiary are often used for prefix options or addressing
5114 //               modes.
5115 // ins_encode -- A list of encode classes with parameters. The encode class
5116 //               name must have been defined in an 'enc_class' specification
5117 //               in the encode section of the architecture description.
5118 
5119 //----------BSWAP-Instruction--------------------------------------------------
5120 instruct bytes_reverse_int(rRegI dst) %{
5121   match(Set dst (ReverseBytesI dst));
5122 
5123   format %{ "BSWAP  $dst" %}
5124   opcode(0x0F, 0xC8);
5125   ins_encode( OpcP, OpcSReg(dst) );
5126   ins_pipe( ialu_reg );
5127 %}
5128 
5129 instruct bytes_reverse_long(eRegL dst) %{
5130   match(Set dst (ReverseBytesL dst));
5131 
5132   format %{ "BSWAP  $dst.lo\n\t"
5133             "BSWAP  $dst.hi\n\t"
5134             "XCHG   $dst.lo $dst.hi" %}
5135 
5136   ins_cost(125);
5137   ins_encode( bswap_long_bytes(dst) );
5138   ins_pipe( ialu_reg_reg);
5139 %}
5140 
5141 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5142   match(Set dst (ReverseBytesUS dst));
5143   effect(KILL cr);
5144 
5145   format %{ "BSWAP  $dst\n\t"
5146             "SHR    $dst,16\n\t" %}
5147   ins_encode %{
5148     __ bswapl($dst$$Register);
5149     __ shrl($dst$$Register, 16);
5150   %}
5151   ins_pipe( ialu_reg );
5152 %}
5153 
5154 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5155   match(Set dst (ReverseBytesS dst));
5156   effect(KILL cr);
5157 
5158   format %{ "BSWAP  $dst\n\t"
5159             "SAR    $dst,16\n\t" %}
5160   ins_encode %{
5161     __ bswapl($dst$$Register);
5162     __ sarl($dst$$Register, 16);
5163   %}
5164   ins_pipe( ialu_reg );
5165 %}
5166 
5167 
5168 //---------- Zeros Count Instructions ------------------------------------------
5169 
5170 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5171   predicate(UseCountLeadingZerosInstruction);
5172   match(Set dst (CountLeadingZerosI src));
5173   effect(KILL cr);
5174 
5175   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5176   ins_encode %{
5177     __ lzcntl($dst$$Register, $src$$Register);
5178   %}
5179   ins_pipe(ialu_reg);
5180 %}
5181 
5182 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5183   predicate(!UseCountLeadingZerosInstruction);
5184   match(Set dst (CountLeadingZerosI src));
5185   effect(KILL cr);
5186 
5187   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5188             "JNZ    skip\n\t"
5189             "MOV    $dst, -1\n"
5190       "skip:\n\t"
5191             "NEG    $dst\n\t"
5192             "ADD    $dst, 31" %}
5193   ins_encode %{
5194     Register Rdst = $dst$$Register;
5195     Register Rsrc = $src$$Register;
5196     Label skip;
5197     __ bsrl(Rdst, Rsrc);
5198     __ jccb(Assembler::notZero, skip);
5199     __ movl(Rdst, -1);
5200     __ bind(skip);
5201     __ negl(Rdst);
5202     __ addl(Rdst, BitsPerInt - 1);
5203   %}
5204   ins_pipe(ialu_reg);
5205 %}
5206 
5207 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5208   predicate(UseCountLeadingZerosInstruction);
5209   match(Set dst (CountLeadingZerosL src));
5210   effect(TEMP dst, KILL cr);
5211 
5212   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5213             "JNC    done\n\t"
5214             "LZCNT  $dst, $src.lo\n\t"
5215             "ADD    $dst, 32\n"
5216       "done:" %}
5217   ins_encode %{
5218     Register Rdst = $dst$$Register;
5219     Register Rsrc = $src$$Register;
5220     Label done;
5221     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5222     __ jccb(Assembler::carryClear, done);
5223     __ lzcntl(Rdst, Rsrc);
5224     __ addl(Rdst, BitsPerInt);
5225     __ bind(done);
5226   %}
5227   ins_pipe(ialu_reg);
5228 %}
5229 
5230 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5231   predicate(!UseCountLeadingZerosInstruction);
5232   match(Set dst (CountLeadingZerosL src));
5233   effect(TEMP dst, KILL cr);
5234 
5235   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5236             "JZ     msw_is_zero\n\t"
5237             "ADD    $dst, 32\n\t"
5238             "JMP    not_zero\n"
5239       "msw_is_zero:\n\t"
5240             "BSR    $dst, $src.lo\n\t"
5241             "JNZ    not_zero\n\t"
5242             "MOV    $dst, -1\n"
5243       "not_zero:\n\t"
5244             "NEG    $dst\n\t"
5245             "ADD    $dst, 63\n" %}
5246  ins_encode %{
5247     Register Rdst = $dst$$Register;
5248     Register Rsrc = $src$$Register;
5249     Label msw_is_zero;
5250     Label not_zero;
5251     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5252     __ jccb(Assembler::zero, msw_is_zero);
5253     __ addl(Rdst, BitsPerInt);
5254     __ jmpb(not_zero);
5255     __ bind(msw_is_zero);
5256     __ bsrl(Rdst, Rsrc);
5257     __ jccb(Assembler::notZero, not_zero);
5258     __ movl(Rdst, -1);
5259     __ bind(not_zero);
5260     __ negl(Rdst);
5261     __ addl(Rdst, BitsPerLong - 1);
5262   %}
5263   ins_pipe(ialu_reg);
5264 %}
5265 
5266 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5267   predicate(UseCountTrailingZerosInstruction);
5268   match(Set dst (CountTrailingZerosI src));
5269   effect(KILL cr);
5270 
5271   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5272   ins_encode %{
5273     __ tzcntl($dst$$Register, $src$$Register);
5274   %}
5275   ins_pipe(ialu_reg);
5276 %}
5277 
5278 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5279   predicate(!UseCountTrailingZerosInstruction);
5280   match(Set dst (CountTrailingZerosI src));
5281   effect(KILL cr);
5282 
5283   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5284             "JNZ    done\n\t"
5285             "MOV    $dst, 32\n"
5286       "done:" %}
5287   ins_encode %{
5288     Register Rdst = $dst$$Register;
5289     Label done;
5290     __ bsfl(Rdst, $src$$Register);
5291     __ jccb(Assembler::notZero, done);
5292     __ movl(Rdst, BitsPerInt);
5293     __ bind(done);
5294   %}
5295   ins_pipe(ialu_reg);
5296 %}
5297 
5298 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5299   predicate(UseCountTrailingZerosInstruction);
5300   match(Set dst (CountTrailingZerosL src));
5301   effect(TEMP dst, KILL cr);
5302 
5303   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5304             "JNC    done\n\t"
5305             "TZCNT  $dst, $src.hi\n\t"
5306             "ADD    $dst, 32\n"
5307             "done:" %}
5308   ins_encode %{
5309     Register Rdst = $dst$$Register;
5310     Register Rsrc = $src$$Register;
5311     Label done;
5312     __ tzcntl(Rdst, Rsrc);
5313     __ jccb(Assembler::carryClear, done);
5314     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5315     __ addl(Rdst, BitsPerInt);
5316     __ bind(done);
5317   %}
5318   ins_pipe(ialu_reg);
5319 %}
5320 
5321 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5322   predicate(!UseCountTrailingZerosInstruction);
5323   match(Set dst (CountTrailingZerosL src));
5324   effect(TEMP dst, KILL cr);
5325 
5326   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5327             "JNZ    done\n\t"
5328             "BSF    $dst, $src.hi\n\t"
5329             "JNZ    msw_not_zero\n\t"
5330             "MOV    $dst, 32\n"
5331       "msw_not_zero:\n\t"
5332             "ADD    $dst, 32\n"
5333       "done:" %}
5334   ins_encode %{
5335     Register Rdst = $dst$$Register;
5336     Register Rsrc = $src$$Register;
5337     Label msw_not_zero;
5338     Label done;
5339     __ bsfl(Rdst, Rsrc);
5340     __ jccb(Assembler::notZero, done);
5341     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5342     __ jccb(Assembler::notZero, msw_not_zero);
5343     __ movl(Rdst, BitsPerInt);
5344     __ bind(msw_not_zero);
5345     __ addl(Rdst, BitsPerInt);
5346     __ bind(done);
5347   %}
5348   ins_pipe(ialu_reg);
5349 %}
5350 
5351 
5352 //---------- Population Count Instructions -------------------------------------
5353 
5354 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5355   predicate(UsePopCountInstruction);
5356   match(Set dst (PopCountI src));
5357   effect(KILL cr);
5358 
5359   format %{ "POPCNT $dst, $src" %}
5360   ins_encode %{
5361     __ popcntl($dst$$Register, $src$$Register);
5362   %}
5363   ins_pipe(ialu_reg);
5364 %}
5365 
5366 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5367   predicate(UsePopCountInstruction);
5368   match(Set dst (PopCountI (LoadI mem)));
5369   effect(KILL cr);
5370 
5371   format %{ "POPCNT $dst, $mem" %}
5372   ins_encode %{
5373     __ popcntl($dst$$Register, $mem$$Address);
5374   %}
5375   ins_pipe(ialu_reg);
5376 %}
5377 
5378 // Note: Long.bitCount(long) returns an int.
5379 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5380   predicate(UsePopCountInstruction);
5381   match(Set dst (PopCountL src));
5382   effect(KILL cr, TEMP tmp, TEMP dst);
5383 
5384   format %{ "POPCNT $dst, $src.lo\n\t"
5385             "POPCNT $tmp, $src.hi\n\t"
5386             "ADD    $dst, $tmp" %}
5387   ins_encode %{
5388     __ popcntl($dst$$Register, $src$$Register);
5389     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5390     __ addl($dst$$Register, $tmp$$Register);
5391   %}
5392   ins_pipe(ialu_reg);
5393 %}
5394 
5395 // Note: Long.bitCount(long) returns an int.
5396 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5397   predicate(UsePopCountInstruction);
5398   match(Set dst (PopCountL (LoadL mem)));
5399   effect(KILL cr, TEMP tmp, TEMP dst);
5400 
5401   format %{ "POPCNT $dst, $mem\n\t"
5402             "POPCNT $tmp, $mem+4\n\t"
5403             "ADD    $dst, $tmp" %}
5404   ins_encode %{
5405     //__ popcntl($dst$$Register, $mem$$Address$$first);
5406     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5407     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5408     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5409     __ addl($dst$$Register, $tmp$$Register);
5410   %}
5411   ins_pipe(ialu_reg);
5412 %}
5413 
5414 
5415 //----------Load/Store/Move Instructions---------------------------------------
5416 //----------Load Instructions--------------------------------------------------
5417 // Load Byte (8bit signed)
5418 instruct loadB(xRegI dst, memory mem) %{
5419   match(Set dst (LoadB mem));
5420 
5421   ins_cost(125);
5422   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5423 
5424   ins_encode %{
5425     __ movsbl($dst$$Register, $mem$$Address);
5426   %}
5427 
5428   ins_pipe(ialu_reg_mem);
5429 %}
5430 
5431 // Load Byte (8bit signed) into Long Register
5432 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5433   match(Set dst (ConvI2L (LoadB mem)));
5434   effect(KILL cr);
5435 
5436   ins_cost(375);
5437   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5438             "MOV    $dst.hi,$dst.lo\n\t"
5439             "SAR    $dst.hi,7" %}
5440 
5441   ins_encode %{
5442     __ movsbl($dst$$Register, $mem$$Address);
5443     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5444     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5445   %}
5446 
5447   ins_pipe(ialu_reg_mem);
5448 %}
5449 
5450 // Load Unsigned Byte (8bit UNsigned)
5451 instruct loadUB(xRegI dst, memory mem) %{
5452   match(Set dst (LoadUB mem));
5453 
5454   ins_cost(125);
5455   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5456 
5457   ins_encode %{
5458     __ movzbl($dst$$Register, $mem$$Address);
5459   %}
5460 
5461   ins_pipe(ialu_reg_mem);
5462 %}
5463 
5464 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5465 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5466   match(Set dst (ConvI2L (LoadUB mem)));
5467   effect(KILL cr);
5468 
5469   ins_cost(250);
5470   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5471             "XOR    $dst.hi,$dst.hi" %}
5472 
5473   ins_encode %{
5474     Register Rdst = $dst$$Register;
5475     __ movzbl(Rdst, $mem$$Address);
5476     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5477   %}
5478 
5479   ins_pipe(ialu_reg_mem);
5480 %}
5481 
5482 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5483 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5484   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5485   effect(KILL cr);
5486 
5487   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5488             "XOR    $dst.hi,$dst.hi\n\t"
5489             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5490   ins_encode %{
5491     Register Rdst = $dst$$Register;
5492     __ movzbl(Rdst, $mem$$Address);
5493     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5494     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5495   %}
5496   ins_pipe(ialu_reg_mem);
5497 %}
5498 
5499 // Load Short (16bit signed)
5500 instruct loadS(rRegI dst, memory mem) %{
5501   match(Set dst (LoadS mem));
5502 
5503   ins_cost(125);
5504   format %{ "MOVSX  $dst,$mem\t# short" %}
5505 
5506   ins_encode %{
5507     __ movswl($dst$$Register, $mem$$Address);
5508   %}
5509 
5510   ins_pipe(ialu_reg_mem);
5511 %}
5512 
5513 // Load Short (16 bit signed) to Byte (8 bit signed)
5514 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5515   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5516 
5517   ins_cost(125);
5518   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5519   ins_encode %{
5520     __ movsbl($dst$$Register, $mem$$Address);
5521   %}
5522   ins_pipe(ialu_reg_mem);
5523 %}
5524 
5525 // Load Short (16bit signed) into Long Register
5526 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5527   match(Set dst (ConvI2L (LoadS mem)));
5528   effect(KILL cr);
5529 
5530   ins_cost(375);
5531   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5532             "MOV    $dst.hi,$dst.lo\n\t"
5533             "SAR    $dst.hi,15" %}
5534 
5535   ins_encode %{
5536     __ movswl($dst$$Register, $mem$$Address);
5537     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5538     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5539   %}
5540 
5541   ins_pipe(ialu_reg_mem);
5542 %}
5543 
5544 // Load Unsigned Short/Char (16bit unsigned)
5545 instruct loadUS(rRegI dst, memory mem) %{
5546   match(Set dst (LoadUS mem));
5547 
5548   ins_cost(125);
5549   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5550 
5551   ins_encode %{
5552     __ movzwl($dst$$Register, $mem$$Address);
5553   %}
5554 
5555   ins_pipe(ialu_reg_mem);
5556 %}
5557 
5558 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5559 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5560   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5561 
5562   ins_cost(125);
5563   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5564   ins_encode %{
5565     __ movsbl($dst$$Register, $mem$$Address);
5566   %}
5567   ins_pipe(ialu_reg_mem);
5568 %}
5569 
5570 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5571 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5572   match(Set dst (ConvI2L (LoadUS mem)));
5573   effect(KILL cr);
5574 
5575   ins_cost(250);
5576   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5577             "XOR    $dst.hi,$dst.hi" %}
5578 
5579   ins_encode %{
5580     __ movzwl($dst$$Register, $mem$$Address);
5581     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5582   %}
5583 
5584   ins_pipe(ialu_reg_mem);
5585 %}
5586 
5587 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5588 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5589   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5590   effect(KILL cr);
5591 
5592   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5593             "XOR    $dst.hi,$dst.hi" %}
5594   ins_encode %{
5595     Register Rdst = $dst$$Register;
5596     __ movzbl(Rdst, $mem$$Address);
5597     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5598   %}
5599   ins_pipe(ialu_reg_mem);
5600 %}
5601 
5602 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5603 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5604   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5605   effect(KILL cr);
5606 
5607   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5608             "XOR    $dst.hi,$dst.hi\n\t"
5609             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5610   ins_encode %{
5611     Register Rdst = $dst$$Register;
5612     __ movzwl(Rdst, $mem$$Address);
5613     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5614     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5615   %}
5616   ins_pipe(ialu_reg_mem);
5617 %}
5618 
5619 // Load Integer
5620 instruct loadI(rRegI dst, memory mem) %{
5621   match(Set dst (LoadI mem));
5622 
5623   ins_cost(125);
5624   format %{ "MOV    $dst,$mem\t# int" %}
5625 
5626   ins_encode %{
5627     __ movl($dst$$Register, $mem$$Address);
5628   %}
5629 
5630   ins_pipe(ialu_reg_mem);
5631 %}
5632 
5633 // Load Integer (32 bit signed) to Byte (8 bit signed)
5634 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5635   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5636 
5637   ins_cost(125);
5638   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5639   ins_encode %{
5640     __ movsbl($dst$$Register, $mem$$Address);
5641   %}
5642   ins_pipe(ialu_reg_mem);
5643 %}
5644 
5645 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5646 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5647   match(Set dst (AndI (LoadI mem) mask));
5648 
5649   ins_cost(125);
5650   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5651   ins_encode %{
5652     __ movzbl($dst$$Register, $mem$$Address);
5653   %}
5654   ins_pipe(ialu_reg_mem);
5655 %}
5656 
5657 // Load Integer (32 bit signed) to Short (16 bit signed)
5658 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5659   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5660 
5661   ins_cost(125);
5662   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5663   ins_encode %{
5664     __ movswl($dst$$Register, $mem$$Address);
5665   %}
5666   ins_pipe(ialu_reg_mem);
5667 %}
5668 
5669 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5670 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5671   match(Set dst (AndI (LoadI mem) mask));
5672 
5673   ins_cost(125);
5674   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5675   ins_encode %{
5676     __ movzwl($dst$$Register, $mem$$Address);
5677   %}
5678   ins_pipe(ialu_reg_mem);
5679 %}
5680 
5681 // Load Integer into Long Register
5682 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5683   match(Set dst (ConvI2L (LoadI mem)));
5684   effect(KILL cr);
5685 
5686   ins_cost(375);
5687   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5688             "MOV    $dst.hi,$dst.lo\n\t"
5689             "SAR    $dst.hi,31" %}
5690 
5691   ins_encode %{
5692     __ movl($dst$$Register, $mem$$Address);
5693     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5694     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5695   %}
5696 
5697   ins_pipe(ialu_reg_mem);
5698 %}
5699 
5700 // Load Integer with mask 0xFF into Long Register
5701 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5702   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5703   effect(KILL cr);
5704 
5705   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5706             "XOR    $dst.hi,$dst.hi" %}
5707   ins_encode %{
5708     Register Rdst = $dst$$Register;
5709     __ movzbl(Rdst, $mem$$Address);
5710     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5711   %}
5712   ins_pipe(ialu_reg_mem);
5713 %}
5714 
5715 // Load Integer with mask 0xFFFF into Long Register
5716 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5717   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5718   effect(KILL cr);
5719 
5720   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5721             "XOR    $dst.hi,$dst.hi" %}
5722   ins_encode %{
5723     Register Rdst = $dst$$Register;
5724     __ movzwl(Rdst, $mem$$Address);
5725     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5726   %}
5727   ins_pipe(ialu_reg_mem);
5728 %}
5729 
5730 // Load Integer with 31-bit mask into Long Register
5731 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5732   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5733   effect(KILL cr);
5734 
5735   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5736             "XOR    $dst.hi,$dst.hi\n\t"
5737             "AND    $dst.lo,$mask" %}
5738   ins_encode %{
5739     Register Rdst = $dst$$Register;
5740     __ movl(Rdst, $mem$$Address);
5741     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5742     __ andl(Rdst, $mask$$constant);
5743   %}
5744   ins_pipe(ialu_reg_mem);
5745 %}
5746 
5747 // Load Unsigned Integer into Long Register
5748 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5749   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5750   effect(KILL cr);
5751 
5752   ins_cost(250);
5753   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5754             "XOR    $dst.hi,$dst.hi" %}
5755 
5756   ins_encode %{
5757     __ movl($dst$$Register, $mem$$Address);
5758     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5759   %}
5760 
5761   ins_pipe(ialu_reg_mem);
5762 %}
5763 
5764 // Load Long.  Cannot clobber address while loading, so restrict address
5765 // register to ESI
5766 instruct loadL(eRegL dst, load_long_memory mem) %{
5767   predicate(!((LoadLNode*)n)->require_atomic_access());
5768   match(Set dst (LoadL mem));
5769 
5770   ins_cost(250);
5771   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5772             "MOV    $dst.hi,$mem+4" %}
5773 
5774   ins_encode %{
5775     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5776     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5777     __ movl($dst$$Register, Amemlo);
5778     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5779   %}
5780 
5781   ins_pipe(ialu_reg_long_mem);
5782 %}
5783 
5784 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5785 // then store it down to the stack and reload on the int
5786 // side.
5787 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5788   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5789   match(Set dst (LoadL mem));
5790 
5791   ins_cost(200);
5792   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5793             "FISTp  $dst" %}
5794   ins_encode(enc_loadL_volatile(mem,dst));
5795   ins_pipe( fpu_reg_mem );
5796 %}
5797 
5798 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5799   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5800   match(Set dst (LoadL mem));
5801   effect(TEMP tmp);
5802   ins_cost(180);
5803   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5804             "MOVSD  $dst,$tmp" %}
5805   ins_encode %{
5806     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5807     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5808   %}
5809   ins_pipe( pipe_slow );
5810 %}
5811 
5812 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5813   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5814   match(Set dst (LoadL mem));
5815   effect(TEMP tmp);
5816   ins_cost(160);
5817   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5818             "MOVD   $dst.lo,$tmp\n\t"
5819             "PSRLQ  $tmp,32\n\t"
5820             "MOVD   $dst.hi,$tmp" %}
5821   ins_encode %{
5822     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5823     __ movdl($dst$$Register, $tmp$$XMMRegister);
5824     __ psrlq($tmp$$XMMRegister, 32);
5825     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5826   %}
5827   ins_pipe( pipe_slow );
5828 %}
5829 
5830 // Load Range
5831 instruct loadRange(rRegI dst, memory mem) %{
5832   match(Set dst (LoadRange mem));
5833 
5834   ins_cost(125);
5835   format %{ "MOV    $dst,$mem" %}
5836   opcode(0x8B);
5837   ins_encode( OpcP, RegMem(dst,mem));
5838   ins_pipe( ialu_reg_mem );
5839 %}
5840 
5841 
5842 // Load Pointer
5843 instruct loadP(eRegP dst, memory mem) %{
5844   match(Set dst (LoadP mem));
5845 
5846   ins_cost(125);
5847   format %{ "MOV    $dst,$mem" %}
5848   opcode(0x8B);
5849   ins_encode( OpcP, RegMem(dst,mem));
5850   ins_pipe( ialu_reg_mem );
5851 %}
5852 
5853 // Load Klass Pointer
5854 instruct loadKlass(eRegP dst, memory mem) %{
5855   match(Set dst (LoadKlass mem));
5856 
5857   ins_cost(125);
5858   format %{ "MOV    $dst,$mem" %}
5859   opcode(0x8B);
5860   ins_encode( OpcP, RegMem(dst,mem));
5861   ins_pipe( ialu_reg_mem );
5862 %}
5863 
5864 // Load Double
5865 instruct loadDPR(regDPR dst, memory mem) %{
5866   predicate(UseSSE<=1);
5867   match(Set dst (LoadD mem));
5868 
5869   ins_cost(150);
5870   format %{ "FLD_D  ST,$mem\n\t"
5871             "FSTP   $dst" %}
5872   opcode(0xDD);               /* DD /0 */
5873   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5874               Pop_Reg_DPR(dst) );
5875   ins_pipe( fpu_reg_mem );
5876 %}
5877 
5878 // Load Double to XMM
5879 instruct loadD(regD dst, memory mem) %{
5880   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5881   match(Set dst (LoadD mem));
5882   ins_cost(145);
5883   format %{ "MOVSD  $dst,$mem" %}
5884   ins_encode %{
5885     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5886   %}
5887   ins_pipe( pipe_slow );
5888 %}
5889 
5890 instruct loadD_partial(regD dst, memory mem) %{
5891   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5892   match(Set dst (LoadD mem));
5893   ins_cost(145);
5894   format %{ "MOVLPD $dst,$mem" %}
5895   ins_encode %{
5896     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5897   %}
5898   ins_pipe( pipe_slow );
5899 %}
5900 
5901 // Load to XMM register (single-precision floating point)
5902 // MOVSS instruction
5903 instruct loadF(regF dst, memory mem) %{
5904   predicate(UseSSE>=1);
5905   match(Set dst (LoadF mem));
5906   ins_cost(145);
5907   format %{ "MOVSS  $dst,$mem" %}
5908   ins_encode %{
5909     __ movflt ($dst$$XMMRegister, $mem$$Address);
5910   %}
5911   ins_pipe( pipe_slow );
5912 %}
5913 
5914 // Load Float
5915 instruct loadFPR(regFPR dst, memory mem) %{
5916   predicate(UseSSE==0);
5917   match(Set dst (LoadF mem));
5918 
5919   ins_cost(150);
5920   format %{ "FLD_S  ST,$mem\n\t"
5921             "FSTP   $dst" %}
5922   opcode(0xD9);               /* D9 /0 */
5923   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5924               Pop_Reg_FPR(dst) );
5925   ins_pipe( fpu_reg_mem );
5926 %}
5927 
5928 // Load Effective Address
5929 instruct leaP8(eRegP dst, indOffset8 mem) %{
5930   match(Set dst mem);
5931 
5932   ins_cost(110);
5933   format %{ "LEA    $dst,$mem" %}
5934   opcode(0x8D);
5935   ins_encode( OpcP, RegMem(dst,mem));
5936   ins_pipe( ialu_reg_reg_fat );
5937 %}
5938 
5939 instruct leaP32(eRegP dst, indOffset32 mem) %{
5940   match(Set dst mem);
5941 
5942   ins_cost(110);
5943   format %{ "LEA    $dst,$mem" %}
5944   opcode(0x8D);
5945   ins_encode( OpcP, RegMem(dst,mem));
5946   ins_pipe( ialu_reg_reg_fat );
5947 %}
5948 
5949 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5950   match(Set dst mem);
5951 
5952   ins_cost(110);
5953   format %{ "LEA    $dst,$mem" %}
5954   opcode(0x8D);
5955   ins_encode( OpcP, RegMem(dst,mem));
5956   ins_pipe( ialu_reg_reg_fat );
5957 %}
5958 
5959 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5960   match(Set dst mem);
5961 
5962   ins_cost(110);
5963   format %{ "LEA    $dst,$mem" %}
5964   opcode(0x8D);
5965   ins_encode( OpcP, RegMem(dst,mem));
5966   ins_pipe( ialu_reg_reg_fat );
5967 %}
5968 
5969 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5970   match(Set dst mem);
5971 
5972   ins_cost(110);
5973   format %{ "LEA    $dst,$mem" %}
5974   opcode(0x8D);
5975   ins_encode( OpcP, RegMem(dst,mem));
5976   ins_pipe( ialu_reg_reg_fat );
5977 %}
5978 
5979 // Load Constant
5980 instruct loadConI(rRegI dst, immI src) %{
5981   match(Set dst src);
5982 
5983   format %{ "MOV    $dst,$src" %}
5984   ins_encode( LdImmI(dst, src) );
5985   ins_pipe( ialu_reg_fat );
5986 %}
5987 
5988 // Load Constant zero
5989 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5990   match(Set dst src);
5991   effect(KILL cr);
5992 
5993   ins_cost(50);
5994   format %{ "XOR    $dst,$dst" %}
5995   opcode(0x33);  /* + rd */
5996   ins_encode( OpcP, RegReg( dst, dst ) );
5997   ins_pipe( ialu_reg );
5998 %}
5999 
6000 instruct loadConP(eRegP dst, immP src) %{
6001   match(Set dst src);
6002 
6003   format %{ "MOV    $dst,$src" %}
6004   opcode(0xB8);  /* + rd */
6005   ins_encode( LdImmP(dst, src) );
6006   ins_pipe( ialu_reg_fat );
6007 %}
6008 
6009 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6010   match(Set dst src);
6011   effect(KILL cr);
6012   ins_cost(200);
6013   format %{ "MOV    $dst.lo,$src.lo\n\t"
6014             "MOV    $dst.hi,$src.hi" %}
6015   opcode(0xB8);
6016   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6017   ins_pipe( ialu_reg_long_fat );
6018 %}
6019 
6020 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6021   match(Set dst src);
6022   effect(KILL cr);
6023   ins_cost(150);
6024   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6025             "XOR    $dst.hi,$dst.hi" %}
6026   opcode(0x33,0x33);
6027   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6028   ins_pipe( ialu_reg_long );
6029 %}
6030 
6031 // The instruction usage is guarded by predicate in operand immFPR().
6032 instruct loadConFPR(regFPR dst, immFPR con) %{
6033   match(Set dst con);
6034   ins_cost(125);
6035   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6036             "FSTP   $dst" %}
6037   ins_encode %{
6038     __ fld_s($constantaddress($con));
6039     __ fstp_d($dst$$reg);
6040   %}
6041   ins_pipe(fpu_reg_con);
6042 %}
6043 
6044 // The instruction usage is guarded by predicate in operand immFPR0().
6045 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6046   match(Set dst con);
6047   ins_cost(125);
6048   format %{ "FLDZ   ST\n\t"
6049             "FSTP   $dst" %}
6050   ins_encode %{
6051     __ fldz();
6052     __ fstp_d($dst$$reg);
6053   %}
6054   ins_pipe(fpu_reg_con);
6055 %}
6056 
6057 // The instruction usage is guarded by predicate in operand immFPR1().
6058 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6059   match(Set dst con);
6060   ins_cost(125);
6061   format %{ "FLD1   ST\n\t"
6062             "FSTP   $dst" %}
6063   ins_encode %{
6064     __ fld1();
6065     __ fstp_d($dst$$reg);
6066   %}
6067   ins_pipe(fpu_reg_con);
6068 %}
6069 
6070 // The instruction usage is guarded by predicate in operand immF().
6071 instruct loadConF(regF dst, immF con) %{
6072   match(Set dst con);
6073   ins_cost(125);
6074   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6075   ins_encode %{
6076     __ movflt($dst$$XMMRegister, $constantaddress($con));
6077   %}
6078   ins_pipe(pipe_slow);
6079 %}
6080 
6081 // The instruction usage is guarded by predicate in operand immF0().
6082 instruct loadConF0(regF dst, immF0 src) %{
6083   match(Set dst src);
6084   ins_cost(100);
6085   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6086   ins_encode %{
6087     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6088   %}
6089   ins_pipe(pipe_slow);
6090 %}
6091 
6092 // The instruction usage is guarded by predicate in operand immDPR().
6093 instruct loadConDPR(regDPR dst, immDPR con) %{
6094   match(Set dst con);
6095   ins_cost(125);
6096 
6097   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6098             "FSTP   $dst" %}
6099   ins_encode %{
6100     __ fld_d($constantaddress($con));
6101     __ fstp_d($dst$$reg);
6102   %}
6103   ins_pipe(fpu_reg_con);
6104 %}
6105 
6106 // The instruction usage is guarded by predicate in operand immDPR0().
6107 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6108   match(Set dst con);
6109   ins_cost(125);
6110 
6111   format %{ "FLDZ   ST\n\t"
6112             "FSTP   $dst" %}
6113   ins_encode %{
6114     __ fldz();
6115     __ fstp_d($dst$$reg);
6116   %}
6117   ins_pipe(fpu_reg_con);
6118 %}
6119 
6120 // The instruction usage is guarded by predicate in operand immDPR1().
6121 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6122   match(Set dst con);
6123   ins_cost(125);
6124 
6125   format %{ "FLD1   ST\n\t"
6126             "FSTP   $dst" %}
6127   ins_encode %{
6128     __ fld1();
6129     __ fstp_d($dst$$reg);
6130   %}
6131   ins_pipe(fpu_reg_con);
6132 %}
6133 
6134 // The instruction usage is guarded by predicate in operand immD().
6135 instruct loadConD(regD dst, immD con) %{
6136   match(Set dst con);
6137   ins_cost(125);
6138   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6139   ins_encode %{
6140     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6141   %}
6142   ins_pipe(pipe_slow);
6143 %}
6144 
6145 // The instruction usage is guarded by predicate in operand immD0().
6146 instruct loadConD0(regD dst, immD0 src) %{
6147   match(Set dst src);
6148   ins_cost(100);
6149   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6150   ins_encode %{
6151     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6152   %}
6153   ins_pipe( pipe_slow );
6154 %}
6155 
6156 // Load Stack Slot
6157 instruct loadSSI(rRegI dst, stackSlotI src) %{
6158   match(Set dst src);
6159   ins_cost(125);
6160 
6161   format %{ "MOV    $dst,$src" %}
6162   opcode(0x8B);
6163   ins_encode( OpcP, RegMem(dst,src));
6164   ins_pipe( ialu_reg_mem );
6165 %}
6166 
6167 instruct loadSSL(eRegL dst, stackSlotL src) %{
6168   match(Set dst src);
6169 
6170   ins_cost(200);
6171   format %{ "MOV    $dst,$src.lo\n\t"
6172             "MOV    $dst+4,$src.hi" %}
6173   opcode(0x8B, 0x8B);
6174   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6175   ins_pipe( ialu_mem_long_reg );
6176 %}
6177 
6178 // Load Stack Slot
6179 instruct loadSSP(eRegP dst, stackSlotP src) %{
6180   match(Set dst src);
6181   ins_cost(125);
6182 
6183   format %{ "MOV    $dst,$src" %}
6184   opcode(0x8B);
6185   ins_encode( OpcP, RegMem(dst,src));
6186   ins_pipe( ialu_reg_mem );
6187 %}
6188 
6189 // Load Stack Slot
6190 instruct loadSSF(regFPR dst, stackSlotF src) %{
6191   match(Set dst src);
6192   ins_cost(125);
6193 
6194   format %{ "FLD_S  $src\n\t"
6195             "FSTP   $dst" %}
6196   opcode(0xD9);               /* D9 /0, FLD m32real */
6197   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6198               Pop_Reg_FPR(dst) );
6199   ins_pipe( fpu_reg_mem );
6200 %}
6201 
6202 // Load Stack Slot
6203 instruct loadSSD(regDPR dst, stackSlotD src) %{
6204   match(Set dst src);
6205   ins_cost(125);
6206 
6207   format %{ "FLD_D  $src\n\t"
6208             "FSTP   $dst" %}
6209   opcode(0xDD);               /* DD /0, FLD m64real */
6210   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6211               Pop_Reg_DPR(dst) );
6212   ins_pipe( fpu_reg_mem );
6213 %}
6214 
6215 // Prefetch instructions for allocation.
6216 // Must be safe to execute with invalid address (cannot fault).
6217 
6218 instruct prefetchAlloc0( memory mem ) %{
6219   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6220   match(PrefetchAllocation mem);
6221   ins_cost(0);
6222   size(0);
6223   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6224   ins_encode();
6225   ins_pipe(empty);
6226 %}
6227 
6228 instruct prefetchAlloc( memory mem ) %{
6229   predicate(AllocatePrefetchInstr==3);
6230   match( PrefetchAllocation mem );
6231   ins_cost(100);
6232 
6233   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6234   ins_encode %{
6235     __ prefetchw($mem$$Address);
6236   %}
6237   ins_pipe(ialu_mem);
6238 %}
6239 
6240 instruct prefetchAllocNTA( memory mem ) %{
6241   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6242   match(PrefetchAllocation mem);
6243   ins_cost(100);
6244 
6245   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6246   ins_encode %{
6247     __ prefetchnta($mem$$Address);
6248   %}
6249   ins_pipe(ialu_mem);
6250 %}
6251 
6252 instruct prefetchAllocT0( memory mem ) %{
6253   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6254   match(PrefetchAllocation mem);
6255   ins_cost(100);
6256 
6257   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6258   ins_encode %{
6259     __ prefetcht0($mem$$Address);
6260   %}
6261   ins_pipe(ialu_mem);
6262 %}
6263 
6264 instruct prefetchAllocT2( memory mem ) %{
6265   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6266   match(PrefetchAllocation mem);
6267   ins_cost(100);
6268 
6269   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6270   ins_encode %{
6271     __ prefetcht2($mem$$Address);
6272   %}
6273   ins_pipe(ialu_mem);
6274 %}
6275 
6276 //----------Store Instructions-------------------------------------------------
6277 
6278 // Store Byte
6279 instruct storeB(memory mem, xRegI src) %{
6280   match(Set mem (StoreB mem src));
6281 
6282   ins_cost(125);
6283   format %{ "MOV8   $mem,$src" %}
6284   opcode(0x88);
6285   ins_encode( OpcP, RegMem( src, mem ) );
6286   ins_pipe( ialu_mem_reg );
6287 %}
6288 
6289 // Store Char/Short
6290 instruct storeC(memory mem, rRegI src) %{
6291   match(Set mem (StoreC mem src));
6292 
6293   ins_cost(125);
6294   format %{ "MOV16  $mem,$src" %}
6295   opcode(0x89, 0x66);
6296   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6297   ins_pipe( ialu_mem_reg );
6298 %}
6299 
6300 // Store Integer
6301 instruct storeI(memory mem, rRegI src) %{
6302   match(Set mem (StoreI mem src));
6303 
6304   ins_cost(125);
6305   format %{ "MOV    $mem,$src" %}
6306   opcode(0x89);
6307   ins_encode( OpcP, RegMem( src, mem ) );
6308   ins_pipe( ialu_mem_reg );
6309 %}
6310 
6311 // Store Long
6312 instruct storeL(long_memory mem, eRegL src) %{
6313   predicate(!((StoreLNode*)n)->require_atomic_access());
6314   match(Set mem (StoreL mem src));
6315 
6316   ins_cost(200);
6317   format %{ "MOV    $mem,$src.lo\n\t"
6318             "MOV    $mem+4,$src.hi" %}
6319   opcode(0x89, 0x89);
6320   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6321   ins_pipe( ialu_mem_long_reg );
6322 %}
6323 
6324 // Store Long to Integer
6325 instruct storeL2I(memory mem, eRegL src) %{
6326   match(Set mem (StoreI mem (ConvL2I src)));
6327 
6328   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6329   ins_encode %{
6330     __ movl($mem$$Address, $src$$Register);
6331   %}
6332   ins_pipe(ialu_mem_reg);
6333 %}
6334 
6335 // Volatile Store Long.  Must be atomic, so move it into
6336 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6337 // target address before the store (for null-ptr checks)
6338 // so the memory operand is used twice in the encoding.
6339 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6340   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6341   match(Set mem (StoreL mem src));
6342   effect( KILL cr );
6343   ins_cost(400);
6344   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6345             "FILD   $src\n\t"
6346             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6347   opcode(0x3B);
6348   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6349   ins_pipe( fpu_reg_mem );
6350 %}
6351 
6352 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6353   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6354   match(Set mem (StoreL mem src));
6355   effect( TEMP tmp, KILL cr );
6356   ins_cost(380);
6357   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6358             "MOVSD  $tmp,$src\n\t"
6359             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6360   ins_encode %{
6361     __ cmpl(rax, $mem$$Address);
6362     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6363     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6364   %}
6365   ins_pipe( pipe_slow );
6366 %}
6367 
6368 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6369   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6370   match(Set mem (StoreL mem src));
6371   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6372   ins_cost(360);
6373   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6374             "MOVD   $tmp,$src.lo\n\t"
6375             "MOVD   $tmp2,$src.hi\n\t"
6376             "PUNPCKLDQ $tmp,$tmp2\n\t"
6377             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6378   ins_encode %{
6379     __ cmpl(rax, $mem$$Address);
6380     __ movdl($tmp$$XMMRegister, $src$$Register);
6381     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6382     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6383     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6384   %}
6385   ins_pipe( pipe_slow );
6386 %}
6387 
6388 // Store Pointer; for storing unknown oops and raw pointers
6389 instruct storeP(memory mem, anyRegP src) %{
6390   match(Set mem (StoreP mem src));
6391 
6392   ins_cost(125);
6393   format %{ "MOV    $mem,$src" %}
6394   opcode(0x89);
6395   ins_encode( OpcP, RegMem( src, mem ) );
6396   ins_pipe( ialu_mem_reg );
6397 %}
6398 
6399 // Store Integer Immediate
6400 instruct storeImmI(memory mem, immI src) %{
6401   match(Set mem (StoreI mem src));
6402 
6403   ins_cost(150);
6404   format %{ "MOV    $mem,$src" %}
6405   opcode(0xC7);               /* C7 /0 */
6406   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6407   ins_pipe( ialu_mem_imm );
6408 %}
6409 
6410 // Store Short/Char Immediate
6411 instruct storeImmI16(memory mem, immI16 src) %{
6412   predicate(UseStoreImmI16);
6413   match(Set mem (StoreC mem src));
6414 
6415   ins_cost(150);
6416   format %{ "MOV16  $mem,$src" %}
6417   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6418   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6419   ins_pipe( ialu_mem_imm );
6420 %}
6421 
6422 // Store Pointer Immediate; null pointers or constant oops that do not
6423 // need card-mark barriers.
6424 instruct storeImmP(memory mem, immP src) %{
6425   match(Set mem (StoreP mem src));
6426 
6427   ins_cost(150);
6428   format %{ "MOV    $mem,$src" %}
6429   opcode(0xC7);               /* C7 /0 */
6430   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6431   ins_pipe( ialu_mem_imm );
6432 %}
6433 
6434 // Store Byte Immediate
6435 instruct storeImmB(memory mem, immI8 src) %{
6436   match(Set mem (StoreB mem src));
6437 
6438   ins_cost(150);
6439   format %{ "MOV8   $mem,$src" %}
6440   opcode(0xC6);               /* C6 /0 */
6441   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6442   ins_pipe( ialu_mem_imm );
6443 %}
6444 
6445 // Store CMS card-mark Immediate
6446 instruct storeImmCM(memory mem, immI8 src) %{
6447   match(Set mem (StoreCM mem src));
6448 
6449   ins_cost(150);
6450   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6451   opcode(0xC6);               /* C6 /0 */
6452   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6453   ins_pipe( ialu_mem_imm );
6454 %}
6455 
6456 // Store Double
6457 instruct storeDPR( memory mem, regDPR1 src) %{
6458   predicate(UseSSE<=1);
6459   match(Set mem (StoreD mem src));
6460 
6461   ins_cost(100);
6462   format %{ "FST_D  $mem,$src" %}
6463   opcode(0xDD);       /* DD /2 */
6464   ins_encode( enc_FPR_store(mem,src) );
6465   ins_pipe( fpu_mem_reg );
6466 %}
6467 
6468 // Store double does rounding on x86
6469 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6470   predicate(UseSSE<=1);
6471   match(Set mem (StoreD mem (RoundDouble src)));
6472 
6473   ins_cost(100);
6474   format %{ "FST_D  $mem,$src\t# round" %}
6475   opcode(0xDD);       /* DD /2 */
6476   ins_encode( enc_FPR_store(mem,src) );
6477   ins_pipe( fpu_mem_reg );
6478 %}
6479 
6480 // Store XMM register to memory (double-precision floating points)
6481 // MOVSD instruction
6482 instruct storeD(memory mem, regD src) %{
6483   predicate(UseSSE>=2);
6484   match(Set mem (StoreD mem src));
6485   ins_cost(95);
6486   format %{ "MOVSD  $mem,$src" %}
6487   ins_encode %{
6488     __ movdbl($mem$$Address, $src$$XMMRegister);
6489   %}
6490   ins_pipe( pipe_slow );
6491 %}
6492 
6493 // Store XMM register to memory (single-precision floating point)
6494 // MOVSS instruction
6495 instruct storeF(memory mem, regF src) %{
6496   predicate(UseSSE>=1);
6497   match(Set mem (StoreF mem src));
6498   ins_cost(95);
6499   format %{ "MOVSS  $mem,$src" %}
6500   ins_encode %{
6501     __ movflt($mem$$Address, $src$$XMMRegister);
6502   %}
6503   ins_pipe( pipe_slow );
6504 %}
6505 
6506 // Store Float
6507 instruct storeFPR( memory mem, regFPR1 src) %{
6508   predicate(UseSSE==0);
6509   match(Set mem (StoreF mem src));
6510 
6511   ins_cost(100);
6512   format %{ "FST_S  $mem,$src" %}
6513   opcode(0xD9);       /* D9 /2 */
6514   ins_encode( enc_FPR_store(mem,src) );
6515   ins_pipe( fpu_mem_reg );
6516 %}
6517 
6518 // Store Float does rounding on x86
6519 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6520   predicate(UseSSE==0);
6521   match(Set mem (StoreF mem (RoundFloat src)));
6522 
6523   ins_cost(100);
6524   format %{ "FST_S  $mem,$src\t# round" %}
6525   opcode(0xD9);       /* D9 /2 */
6526   ins_encode( enc_FPR_store(mem,src) );
6527   ins_pipe( fpu_mem_reg );
6528 %}
6529 
6530 // Store Float does rounding on x86
6531 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6532   predicate(UseSSE<=1);
6533   match(Set mem (StoreF mem (ConvD2F src)));
6534 
6535   ins_cost(100);
6536   format %{ "FST_S  $mem,$src\t# D-round" %}
6537   opcode(0xD9);       /* D9 /2 */
6538   ins_encode( enc_FPR_store(mem,src) );
6539   ins_pipe( fpu_mem_reg );
6540 %}
6541 
6542 // Store immediate Float value (it is faster than store from FPU register)
6543 // The instruction usage is guarded by predicate in operand immFPR().
6544 instruct storeFPR_imm( memory mem, immFPR src) %{
6545   match(Set mem (StoreF mem src));
6546 
6547   ins_cost(50);
6548   format %{ "MOV    $mem,$src\t# store float" %}
6549   opcode(0xC7);               /* C7 /0 */
6550   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6551   ins_pipe( ialu_mem_imm );
6552 %}
6553 
6554 // Store immediate Float value (it is faster than store from XMM register)
6555 // The instruction usage is guarded by predicate in operand immF().
6556 instruct storeF_imm( memory mem, immF src) %{
6557   match(Set mem (StoreF mem src));
6558 
6559   ins_cost(50);
6560   format %{ "MOV    $mem,$src\t# store float" %}
6561   opcode(0xC7);               /* C7 /0 */
6562   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6563   ins_pipe( ialu_mem_imm );
6564 %}
6565 
6566 // Store Integer to stack slot
6567 instruct storeSSI(stackSlotI dst, rRegI src) %{
6568   match(Set dst src);
6569 
6570   ins_cost(100);
6571   format %{ "MOV    $dst,$src" %}
6572   opcode(0x89);
6573   ins_encode( OpcPRegSS( dst, src ) );
6574   ins_pipe( ialu_mem_reg );
6575 %}
6576 
6577 // Store Integer to stack slot
6578 instruct storeSSP(stackSlotP dst, eRegP src) %{
6579   match(Set dst src);
6580 
6581   ins_cost(100);
6582   format %{ "MOV    $dst,$src" %}
6583   opcode(0x89);
6584   ins_encode( OpcPRegSS( dst, src ) );
6585   ins_pipe( ialu_mem_reg );
6586 %}
6587 
6588 // Store Long to stack slot
6589 instruct storeSSL(stackSlotL dst, eRegL src) %{
6590   match(Set dst src);
6591 
6592   ins_cost(200);
6593   format %{ "MOV    $dst,$src.lo\n\t"
6594             "MOV    $dst+4,$src.hi" %}
6595   opcode(0x89, 0x89);
6596   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6597   ins_pipe( ialu_mem_long_reg );
6598 %}
6599 
6600 //----------MemBar Instructions-----------------------------------------------
6601 // Memory barrier flavors
6602 
6603 instruct membar_acquire() %{
6604   match(MemBarAcquire);
6605   match(LoadFence);
6606   ins_cost(400);
6607 
6608   size(0);
6609   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6610   ins_encode();
6611   ins_pipe(empty);
6612 %}
6613 
6614 instruct membar_acquire_lock() %{
6615   match(MemBarAcquireLock);
6616   ins_cost(0);
6617 
6618   size(0);
6619   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6620   ins_encode( );
6621   ins_pipe(empty);
6622 %}
6623 
6624 instruct membar_release() %{
6625   match(MemBarRelease);
6626   match(StoreFence);
6627   ins_cost(400);
6628 
6629   size(0);
6630   format %{ "MEMBAR-release ! (empty encoding)" %}
6631   ins_encode( );
6632   ins_pipe(empty);
6633 %}
6634 
6635 instruct membar_release_lock() %{
6636   match(MemBarReleaseLock);
6637   ins_cost(0);
6638 
6639   size(0);
6640   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6641   ins_encode( );
6642   ins_pipe(empty);
6643 %}
6644 
6645 instruct membar_volatile(eFlagsReg cr) %{
6646   match(MemBarVolatile);
6647   effect(KILL cr);
6648   ins_cost(400);
6649 
6650   format %{
6651     $$template
6652     if (os::is_MP()) {
6653       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6654     } else {
6655       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6656     }
6657   %}
6658   ins_encode %{
6659     __ membar(Assembler::StoreLoad);
6660   %}
6661   ins_pipe(pipe_slow);
6662 %}
6663 
6664 instruct unnecessary_membar_volatile() %{
6665   match(MemBarVolatile);
6666   predicate(Matcher::post_store_load_barrier(n));
6667   ins_cost(0);
6668 
6669   size(0);
6670   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6671   ins_encode( );
6672   ins_pipe(empty);
6673 %}
6674 
6675 instruct membar_storestore() %{
6676   match(MemBarStoreStore);
6677   ins_cost(0);
6678 
6679   size(0);
6680   format %{ "MEMBAR-storestore (empty encoding)" %}
6681   ins_encode( );
6682   ins_pipe(empty);
6683 %}
6684 
6685 //----------Move Instructions--------------------------------------------------
6686 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6687   match(Set dst (CastX2P src));
6688   format %{ "# X2P  $dst, $src" %}
6689   ins_encode( /*empty encoding*/ );
6690   ins_cost(0);
6691   ins_pipe(empty);
6692 %}
6693 
6694 instruct castP2X(rRegI dst, eRegP src ) %{
6695   match(Set dst (CastP2X src));
6696   ins_cost(50);
6697   format %{ "MOV    $dst, $src\t# CastP2X" %}
6698   ins_encode( enc_Copy( dst, src) );
6699   ins_pipe( ialu_reg_reg );
6700 %}
6701 
6702 //----------Conditional Move---------------------------------------------------
6703 // Conditional move
6704 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6705   predicate(!VM_Version::supports_cmov() );
6706   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6707   ins_cost(200);
6708   format %{ "J$cop,us skip\t# signed cmove\n\t"
6709             "MOV    $dst,$src\n"
6710       "skip:" %}
6711   ins_encode %{
6712     Label Lskip;
6713     // Invert sense of branch from sense of CMOV
6714     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6715     __ movl($dst$$Register, $src$$Register);
6716     __ bind(Lskip);
6717   %}
6718   ins_pipe( pipe_cmov_reg );
6719 %}
6720 
6721 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6722   predicate(!VM_Version::supports_cmov() );
6723   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6724   ins_cost(200);
6725   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6726             "MOV    $dst,$src\n"
6727       "skip:" %}
6728   ins_encode %{
6729     Label Lskip;
6730     // Invert sense of branch from sense of CMOV
6731     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6732     __ movl($dst$$Register, $src$$Register);
6733     __ bind(Lskip);
6734   %}
6735   ins_pipe( pipe_cmov_reg );
6736 %}
6737 
6738 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6739   predicate(VM_Version::supports_cmov() );
6740   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6741   ins_cost(200);
6742   format %{ "CMOV$cop $dst,$src" %}
6743   opcode(0x0F,0x40);
6744   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6745   ins_pipe( pipe_cmov_reg );
6746 %}
6747 
6748 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6749   predicate(VM_Version::supports_cmov() );
6750   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6751   ins_cost(200);
6752   format %{ "CMOV$cop $dst,$src" %}
6753   opcode(0x0F,0x40);
6754   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6755   ins_pipe( pipe_cmov_reg );
6756 %}
6757 
6758 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6759   predicate(VM_Version::supports_cmov() );
6760   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6761   ins_cost(200);
6762   expand %{
6763     cmovI_regU(cop, cr, dst, src);
6764   %}
6765 %}
6766 
6767 // Conditional move
6768 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6769   predicate(VM_Version::supports_cmov() );
6770   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6771   ins_cost(250);
6772   format %{ "CMOV$cop $dst,$src" %}
6773   opcode(0x0F,0x40);
6774   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6775   ins_pipe( pipe_cmov_mem );
6776 %}
6777 
6778 // Conditional move
6779 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6780   predicate(VM_Version::supports_cmov() );
6781   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6782   ins_cost(250);
6783   format %{ "CMOV$cop $dst,$src" %}
6784   opcode(0x0F,0x40);
6785   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6786   ins_pipe( pipe_cmov_mem );
6787 %}
6788 
6789 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6790   predicate(VM_Version::supports_cmov() );
6791   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6792   ins_cost(250);
6793   expand %{
6794     cmovI_memU(cop, cr, dst, src);
6795   %}
6796 %}
6797 
6798 // Conditional move
6799 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6800   predicate(VM_Version::supports_cmov() );
6801   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6802   ins_cost(200);
6803   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6804   opcode(0x0F,0x40);
6805   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6806   ins_pipe( pipe_cmov_reg );
6807 %}
6808 
6809 // Conditional move (non-P6 version)
6810 // Note:  a CMoveP is generated for  stubs and native wrappers
6811 //        regardless of whether we are on a P6, so we
6812 //        emulate a cmov here
6813 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6814   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6815   ins_cost(300);
6816   format %{ "Jn$cop   skip\n\t"
6817           "MOV    $dst,$src\t# pointer\n"
6818       "skip:" %}
6819   opcode(0x8b);
6820   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6821   ins_pipe( pipe_cmov_reg );
6822 %}
6823 
6824 // Conditional move
6825 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6826   predicate(VM_Version::supports_cmov() );
6827   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6828   ins_cost(200);
6829   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6830   opcode(0x0F,0x40);
6831   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6832   ins_pipe( pipe_cmov_reg );
6833 %}
6834 
6835 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6836   predicate(VM_Version::supports_cmov() );
6837   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6838   ins_cost(200);
6839   expand %{
6840     cmovP_regU(cop, cr, dst, src);
6841   %}
6842 %}
6843 
6844 // DISABLED: Requires the ADLC to emit a bottom_type call that
6845 // correctly meets the two pointer arguments; one is an incoming
6846 // register but the other is a memory operand.  ALSO appears to
6847 // be buggy with implicit null checks.
6848 //
6849 //// Conditional move
6850 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6851 //  predicate(VM_Version::supports_cmov() );
6852 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6853 //  ins_cost(250);
6854 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6855 //  opcode(0x0F,0x40);
6856 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6857 //  ins_pipe( pipe_cmov_mem );
6858 //%}
6859 //
6860 //// Conditional move
6861 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6862 //  predicate(VM_Version::supports_cmov() );
6863 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6864 //  ins_cost(250);
6865 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6866 //  opcode(0x0F,0x40);
6867 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6868 //  ins_pipe( pipe_cmov_mem );
6869 //%}
6870 
6871 // Conditional move
6872 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6873   predicate(UseSSE<=1);
6874   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6875   ins_cost(200);
6876   format %{ "FCMOV$cop $dst,$src\t# double" %}
6877   opcode(0xDA);
6878   ins_encode( enc_cmov_dpr(cop,src) );
6879   ins_pipe( pipe_cmovDPR_reg );
6880 %}
6881 
6882 // Conditional move
6883 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6884   predicate(UseSSE==0);
6885   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6886   ins_cost(200);
6887   format %{ "FCMOV$cop $dst,$src\t# float" %}
6888   opcode(0xDA);
6889   ins_encode( enc_cmov_dpr(cop,src) );
6890   ins_pipe( pipe_cmovDPR_reg );
6891 %}
6892 
6893 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6894 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6895   predicate(UseSSE<=1);
6896   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6897   ins_cost(200);
6898   format %{ "Jn$cop   skip\n\t"
6899             "MOV    $dst,$src\t# double\n"
6900       "skip:" %}
6901   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6902   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6903   ins_pipe( pipe_cmovDPR_reg );
6904 %}
6905 
6906 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6907 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6908   predicate(UseSSE==0);
6909   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6910   ins_cost(200);
6911   format %{ "Jn$cop    skip\n\t"
6912             "MOV    $dst,$src\t# float\n"
6913       "skip:" %}
6914   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6915   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6916   ins_pipe( pipe_cmovDPR_reg );
6917 %}
6918 
6919 // No CMOVE with SSE/SSE2
6920 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6921   predicate (UseSSE>=1);
6922   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6923   ins_cost(200);
6924   format %{ "Jn$cop   skip\n\t"
6925             "MOVSS  $dst,$src\t# float\n"
6926       "skip:" %}
6927   ins_encode %{
6928     Label skip;
6929     // Invert sense of branch from sense of CMOV
6930     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6931     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6932     __ bind(skip);
6933   %}
6934   ins_pipe( pipe_slow );
6935 %}
6936 
6937 // No CMOVE with SSE/SSE2
6938 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6939   predicate (UseSSE>=2);
6940   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6941   ins_cost(200);
6942   format %{ "Jn$cop   skip\n\t"
6943             "MOVSD  $dst,$src\t# float\n"
6944       "skip:" %}
6945   ins_encode %{
6946     Label skip;
6947     // Invert sense of branch from sense of CMOV
6948     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6949     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6950     __ bind(skip);
6951   %}
6952   ins_pipe( pipe_slow );
6953 %}
6954 
6955 // unsigned version
6956 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6957   predicate (UseSSE>=1);
6958   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6959   ins_cost(200);
6960   format %{ "Jn$cop   skip\n\t"
6961             "MOVSS  $dst,$src\t# float\n"
6962       "skip:" %}
6963   ins_encode %{
6964     Label skip;
6965     // Invert sense of branch from sense of CMOV
6966     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6967     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6968     __ bind(skip);
6969   %}
6970   ins_pipe( pipe_slow );
6971 %}
6972 
6973 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6974   predicate (UseSSE>=1);
6975   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6976   ins_cost(200);
6977   expand %{
6978     fcmovF_regU(cop, cr, dst, src);
6979   %}
6980 %}
6981 
6982 // unsigned version
6983 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6984   predicate (UseSSE>=2);
6985   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6986   ins_cost(200);
6987   format %{ "Jn$cop   skip\n\t"
6988             "MOVSD  $dst,$src\t# float\n"
6989       "skip:" %}
6990   ins_encode %{
6991     Label skip;
6992     // Invert sense of branch from sense of CMOV
6993     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6994     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6995     __ bind(skip);
6996   %}
6997   ins_pipe( pipe_slow );
6998 %}
6999 
7000 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7001   predicate (UseSSE>=2);
7002   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7003   ins_cost(200);
7004   expand %{
7005     fcmovD_regU(cop, cr, dst, src);
7006   %}
7007 %}
7008 
7009 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7010   predicate(VM_Version::supports_cmov() );
7011   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7012   ins_cost(200);
7013   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7014             "CMOV$cop $dst.hi,$src.hi" %}
7015   opcode(0x0F,0x40);
7016   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7017   ins_pipe( pipe_cmov_reg_long );
7018 %}
7019 
7020 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7021   predicate(VM_Version::supports_cmov() );
7022   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7023   ins_cost(200);
7024   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7025             "CMOV$cop $dst.hi,$src.hi" %}
7026   opcode(0x0F,0x40);
7027   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7028   ins_pipe( pipe_cmov_reg_long );
7029 %}
7030 
7031 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7032   predicate(VM_Version::supports_cmov() );
7033   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7034   ins_cost(200);
7035   expand %{
7036     cmovL_regU(cop, cr, dst, src);
7037   %}
7038 %}
7039 
7040 //----------Arithmetic Instructions--------------------------------------------
7041 //----------Addition Instructions----------------------------------------------
7042 
7043 // Integer Addition Instructions
7044 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7045   match(Set dst (AddI dst src));
7046   effect(KILL cr);
7047 
7048   size(2);
7049   format %{ "ADD    $dst,$src" %}
7050   opcode(0x03);
7051   ins_encode( OpcP, RegReg( dst, src) );
7052   ins_pipe( ialu_reg_reg );
7053 %}
7054 
7055 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7056   match(Set dst (AddI dst src));
7057   effect(KILL cr);
7058 
7059   format %{ "ADD    $dst,$src" %}
7060   opcode(0x81, 0x00); /* /0 id */
7061   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7062   ins_pipe( ialu_reg );
7063 %}
7064 
7065 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7066   predicate(UseIncDec);
7067   match(Set dst (AddI dst src));
7068   effect(KILL cr);
7069 
7070   size(1);
7071   format %{ "INC    $dst" %}
7072   opcode(0x40); /*  */
7073   ins_encode( Opc_plus( primary, dst ) );
7074   ins_pipe( ialu_reg );
7075 %}
7076 
7077 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7078   match(Set dst (AddI src0 src1));
7079   ins_cost(110);
7080 
7081   format %{ "LEA    $dst,[$src0 + $src1]" %}
7082   opcode(0x8D); /* 0x8D /r */
7083   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7084   ins_pipe( ialu_reg_reg );
7085 %}
7086 
7087 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7088   match(Set dst (AddP src0 src1));
7089   ins_cost(110);
7090 
7091   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7092   opcode(0x8D); /* 0x8D /r */
7093   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7094   ins_pipe( ialu_reg_reg );
7095 %}
7096 
7097 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7098   predicate(UseIncDec);
7099   match(Set dst (AddI dst src));
7100   effect(KILL cr);
7101 
7102   size(1);
7103   format %{ "DEC    $dst" %}
7104   opcode(0x48); /*  */
7105   ins_encode( Opc_plus( primary, dst ) );
7106   ins_pipe( ialu_reg );
7107 %}
7108 
7109 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7110   match(Set dst (AddP dst src));
7111   effect(KILL cr);
7112 
7113   size(2);
7114   format %{ "ADD    $dst,$src" %}
7115   opcode(0x03);
7116   ins_encode( OpcP, RegReg( dst, src) );
7117   ins_pipe( ialu_reg_reg );
7118 %}
7119 
7120 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7121   match(Set dst (AddP dst src));
7122   effect(KILL cr);
7123 
7124   format %{ "ADD    $dst,$src" %}
7125   opcode(0x81,0x00); /* Opcode 81 /0 id */
7126   // ins_encode( RegImm( dst, src) );
7127   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7128   ins_pipe( ialu_reg );
7129 %}
7130 
7131 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7132   match(Set dst (AddI dst (LoadI src)));
7133   effect(KILL cr);
7134 
7135   ins_cost(125);
7136   format %{ "ADD    $dst,$src" %}
7137   opcode(0x03);
7138   ins_encode( OpcP, RegMem( dst, src) );
7139   ins_pipe( ialu_reg_mem );
7140 %}
7141 
7142 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7143   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7144   effect(KILL cr);
7145 
7146   ins_cost(150);
7147   format %{ "ADD    $dst,$src" %}
7148   opcode(0x01);  /* Opcode 01 /r */
7149   ins_encode( OpcP, RegMem( src, dst ) );
7150   ins_pipe( ialu_mem_reg );
7151 %}
7152 
7153 // Add Memory with Immediate
7154 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7155   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7156   effect(KILL cr);
7157 
7158   ins_cost(125);
7159   format %{ "ADD    $dst,$src" %}
7160   opcode(0x81);               /* Opcode 81 /0 id */
7161   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7162   ins_pipe( ialu_mem_imm );
7163 %}
7164 
7165 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7166   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7167   effect(KILL cr);
7168 
7169   ins_cost(125);
7170   format %{ "INC    $dst" %}
7171   opcode(0xFF);               /* Opcode FF /0 */
7172   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7173   ins_pipe( ialu_mem_imm );
7174 %}
7175 
7176 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7177   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7178   effect(KILL cr);
7179 
7180   ins_cost(125);
7181   format %{ "DEC    $dst" %}
7182   opcode(0xFF);               /* Opcode FF /1 */
7183   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7184   ins_pipe( ialu_mem_imm );
7185 %}
7186 
7187 
7188 instruct checkCastPP( eRegP dst ) %{
7189   match(Set dst (CheckCastPP dst));
7190 
7191   size(0);
7192   format %{ "#checkcastPP of $dst" %}
7193   ins_encode( /*empty encoding*/ );
7194   ins_pipe( empty );
7195 %}
7196 
7197 instruct castPP( eRegP dst ) %{
7198   match(Set dst (CastPP dst));
7199   format %{ "#castPP of $dst" %}
7200   ins_encode( /*empty encoding*/ );
7201   ins_pipe( empty );
7202 %}
7203 
7204 instruct castII( rRegI dst ) %{
7205   match(Set dst (CastII dst));
7206   format %{ "#castII of $dst" %}
7207   ins_encode( /*empty encoding*/ );
7208   ins_cost(0);
7209   ins_pipe( empty );
7210 %}
7211 
7212 
7213 // Load-locked - same as a regular pointer load when used with compare-swap
7214 instruct loadPLocked(eRegP dst, memory mem) %{
7215   match(Set dst (LoadPLocked mem));
7216 
7217   ins_cost(125);
7218   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7219   opcode(0x8B);
7220   ins_encode( OpcP, RegMem(dst,mem));
7221   ins_pipe( ialu_reg_mem );
7222 %}
7223 
7224 // Conditional-store of the updated heap-top.
7225 // Used during allocation of the shared heap.
7226 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7227 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7228   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7229   // EAX is killed if there is contention, but then it's also unused.
7230   // In the common case of no contention, EAX holds the new oop address.
7231   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7232   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7233   ins_pipe( pipe_cmpxchg );
7234 %}
7235 
7236 // Conditional-store of an int value.
7237 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7238 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7239   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7240   effect(KILL oldval);
7241   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7242   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7243   ins_pipe( pipe_cmpxchg );
7244 %}
7245 
7246 // Conditional-store of a long value.
7247 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7248 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7249   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7250   effect(KILL oldval);
7251   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7252             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7253             "XCHG   EBX,ECX"
7254   %}
7255   ins_encode %{
7256     // Note: we need to swap rbx, and rcx before and after the
7257     //       cmpxchg8 instruction because the instruction uses
7258     //       rcx as the high order word of the new value to store but
7259     //       our register encoding uses rbx.
7260     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7261     if( os::is_MP() )
7262       __ lock();
7263     __ cmpxchg8($mem$$Address);
7264     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7265   %}
7266   ins_pipe( pipe_cmpxchg );
7267 %}
7268 
7269 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7270 
7271 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7272   predicate(VM_Version::supports_cx8());
7273   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7274   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7275   effect(KILL cr, KILL oldval);
7276   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7277             "MOV    $res,0\n\t"
7278             "JNE,s  fail\n\t"
7279             "MOV    $res,1\n"
7280           "fail:" %}
7281   ins_encode( enc_cmpxchg8(mem_ptr),
7282               enc_flags_ne_to_boolean(res) );
7283   ins_pipe( pipe_cmpxchg );
7284 %}
7285 
7286 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7287   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7288   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7289   effect(KILL cr, KILL oldval);
7290   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7291             "MOV    $res,0\n\t"
7292             "JNE,s  fail\n\t"
7293             "MOV    $res,1\n"
7294           "fail:" %}
7295   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7296   ins_pipe( pipe_cmpxchg );
7297 %}
7298 
7299 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7300   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7301   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7302   effect(KILL cr, KILL oldval);
7303   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7304             "MOV    $res,0\n\t"
7305             "JNE,s  fail\n\t"
7306             "MOV    $res,1\n"
7307           "fail:" %}
7308   ins_encode( enc_cmpxchgb(mem_ptr),
7309               enc_flags_ne_to_boolean(res) );
7310   ins_pipe( pipe_cmpxchg );
7311 %}
7312 
7313 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7314   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7315   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7316   effect(KILL cr, KILL oldval);
7317   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7318             "MOV    $res,0\n\t"
7319             "JNE,s  fail\n\t"
7320             "MOV    $res,1\n"
7321           "fail:" %}
7322   ins_encode( enc_cmpxchgw(mem_ptr),
7323               enc_flags_ne_to_boolean(res) );
7324   ins_pipe( pipe_cmpxchg );
7325 %}
7326 
7327 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7328   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7329   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7330   effect(KILL cr, KILL oldval);
7331   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7332             "MOV    $res,0\n\t"
7333             "JNE,s  fail\n\t"
7334             "MOV    $res,1\n"
7335           "fail:" %}
7336   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7337   ins_pipe( pipe_cmpxchg );
7338 %}
7339 
7340 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7341   predicate(VM_Version::supports_cx8());
7342   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7343   effect(KILL cr);
7344   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7345   ins_encode( enc_cmpxchg8(mem_ptr) );
7346   ins_pipe( pipe_cmpxchg );
7347 %}
7348 
7349 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7350   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7351   effect(KILL cr);
7352   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7353   ins_encode( enc_cmpxchg(mem_ptr) );
7354   ins_pipe( pipe_cmpxchg );
7355 %}
7356 
7357 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7358   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7359   effect(KILL cr);
7360   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7361   ins_encode( enc_cmpxchgb(mem_ptr) );
7362   ins_pipe( pipe_cmpxchg );
7363 %}
7364 
7365 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7366   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7367   effect(KILL cr);
7368   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7369   ins_encode( enc_cmpxchgw(mem_ptr) );
7370   ins_pipe( pipe_cmpxchg );
7371 %}
7372 
7373 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7374   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7375   effect(KILL cr);
7376   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7377   ins_encode( enc_cmpxchg(mem_ptr) );
7378   ins_pipe( pipe_cmpxchg );
7379 %}
7380 
7381 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7382   predicate(n->as_LoadStore()->result_not_used());
7383   match(Set dummy (GetAndAddB mem add));
7384   effect(KILL cr);
7385   format %{ "ADDB  [$mem],$add" %}
7386   ins_encode %{
7387     if (os::is_MP()) { __ lock(); }
7388     __ addb($mem$$Address, $add$$constant);
7389   %}
7390   ins_pipe( pipe_cmpxchg );
7391 %}
7392 
7393 // Important to match to xRegI: only 8-bit regs.
7394 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7395   match(Set newval (GetAndAddB mem newval));
7396   effect(KILL cr);
7397   format %{ "XADDB  [$mem],$newval" %}
7398   ins_encode %{
7399     if (os::is_MP()) { __ lock(); }
7400     __ xaddb($mem$$Address, $newval$$Register);
7401   %}
7402   ins_pipe( pipe_cmpxchg );
7403 %}
7404 
7405 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7406   predicate(n->as_LoadStore()->result_not_used());
7407   match(Set dummy (GetAndAddS mem add));
7408   effect(KILL cr);
7409   format %{ "ADDS  [$mem],$add" %}
7410   ins_encode %{
7411     if (os::is_MP()) { __ lock(); }
7412     __ addw($mem$$Address, $add$$constant);
7413   %}
7414   ins_pipe( pipe_cmpxchg );
7415 %}
7416 
7417 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7418   match(Set newval (GetAndAddS mem newval));
7419   effect(KILL cr);
7420   format %{ "XADDS  [$mem],$newval" %}
7421   ins_encode %{
7422     if (os::is_MP()) { __ lock(); }
7423     __ xaddw($mem$$Address, $newval$$Register);
7424   %}
7425   ins_pipe( pipe_cmpxchg );
7426 %}
7427 
7428 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7429   predicate(n->as_LoadStore()->result_not_used());
7430   match(Set dummy (GetAndAddI mem add));
7431   effect(KILL cr);
7432   format %{ "ADDL  [$mem],$add" %}
7433   ins_encode %{
7434     if (os::is_MP()) { __ lock(); }
7435     __ addl($mem$$Address, $add$$constant);
7436   %}
7437   ins_pipe( pipe_cmpxchg );
7438 %}
7439 
7440 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7441   match(Set newval (GetAndAddI mem newval));
7442   effect(KILL cr);
7443   format %{ "XADDL  [$mem],$newval" %}
7444   ins_encode %{
7445     if (os::is_MP()) { __ lock(); }
7446     __ xaddl($mem$$Address, $newval$$Register);
7447   %}
7448   ins_pipe( pipe_cmpxchg );
7449 %}
7450 
7451 // Important to match to xRegI: only 8-bit regs.
7452 instruct xchgB( memory mem, xRegI newval) %{
7453   match(Set newval (GetAndSetB mem newval));
7454   format %{ "XCHGB  $newval,[$mem]" %}
7455   ins_encode %{
7456     __ xchgb($newval$$Register, $mem$$Address);
7457   %}
7458   ins_pipe( pipe_cmpxchg );
7459 %}
7460 
7461 instruct xchgS( memory mem, rRegI newval) %{
7462   match(Set newval (GetAndSetS mem newval));
7463   format %{ "XCHGW  $newval,[$mem]" %}
7464   ins_encode %{
7465     __ xchgw($newval$$Register, $mem$$Address);
7466   %}
7467   ins_pipe( pipe_cmpxchg );
7468 %}
7469 
7470 instruct xchgI( memory mem, rRegI newval) %{
7471   match(Set newval (GetAndSetI mem newval));
7472   format %{ "XCHGL  $newval,[$mem]" %}
7473   ins_encode %{
7474     __ xchgl($newval$$Register, $mem$$Address);
7475   %}
7476   ins_pipe( pipe_cmpxchg );
7477 %}
7478 
7479 instruct xchgP( memory mem, pRegP newval) %{
7480   match(Set newval (GetAndSetP mem newval));
7481   format %{ "XCHGL  $newval,[$mem]" %}
7482   ins_encode %{
7483     __ xchgl($newval$$Register, $mem$$Address);
7484   %}
7485   ins_pipe( pipe_cmpxchg );
7486 %}
7487 
7488 //----------Subtraction Instructions-------------------------------------------
7489 
7490 // Integer Subtraction Instructions
7491 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7492   match(Set dst (SubI dst src));
7493   effect(KILL cr);
7494 
7495   size(2);
7496   format %{ "SUB    $dst,$src" %}
7497   opcode(0x2B);
7498   ins_encode( OpcP, RegReg( dst, src) );
7499   ins_pipe( ialu_reg_reg );
7500 %}
7501 
7502 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7503   match(Set dst (SubI dst src));
7504   effect(KILL cr);
7505 
7506   format %{ "SUB    $dst,$src" %}
7507   opcode(0x81,0x05);  /* Opcode 81 /5 */
7508   // ins_encode( RegImm( dst, src) );
7509   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7510   ins_pipe( ialu_reg );
7511 %}
7512 
7513 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7514   match(Set dst (SubI dst (LoadI src)));
7515   effect(KILL cr);
7516 
7517   ins_cost(125);
7518   format %{ "SUB    $dst,$src" %}
7519   opcode(0x2B);
7520   ins_encode( OpcP, RegMem( dst, src) );
7521   ins_pipe( ialu_reg_mem );
7522 %}
7523 
7524 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7525   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7526   effect(KILL cr);
7527 
7528   ins_cost(150);
7529   format %{ "SUB    $dst,$src" %}
7530   opcode(0x29);  /* Opcode 29 /r */
7531   ins_encode( OpcP, RegMem( src, dst ) );
7532   ins_pipe( ialu_mem_reg );
7533 %}
7534 
7535 // Subtract from a pointer
7536 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7537   match(Set dst (AddP dst (SubI zero src)));
7538   effect(KILL cr);
7539 
7540   size(2);
7541   format %{ "SUB    $dst,$src" %}
7542   opcode(0x2B);
7543   ins_encode( OpcP, RegReg( dst, src) );
7544   ins_pipe( ialu_reg_reg );
7545 %}
7546 
7547 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7548   match(Set dst (SubI zero dst));
7549   effect(KILL cr);
7550 
7551   size(2);
7552   format %{ "NEG    $dst" %}
7553   opcode(0xF7,0x03);  // Opcode F7 /3
7554   ins_encode( OpcP, RegOpc( dst ) );
7555   ins_pipe( ialu_reg );
7556 %}
7557 
7558 //----------Multiplication/Division Instructions-------------------------------
7559 // Integer Multiplication Instructions
7560 // Multiply Register
7561 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7562   match(Set dst (MulI dst src));
7563   effect(KILL cr);
7564 
7565   size(3);
7566   ins_cost(300);
7567   format %{ "IMUL   $dst,$src" %}
7568   opcode(0xAF, 0x0F);
7569   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7570   ins_pipe( ialu_reg_reg_alu0 );
7571 %}
7572 
7573 // Multiply 32-bit Immediate
7574 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7575   match(Set dst (MulI src imm));
7576   effect(KILL cr);
7577 
7578   ins_cost(300);
7579   format %{ "IMUL   $dst,$src,$imm" %}
7580   opcode(0x69);  /* 69 /r id */
7581   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7582   ins_pipe( ialu_reg_reg_alu0 );
7583 %}
7584 
7585 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7586   match(Set dst src);
7587   effect(KILL cr);
7588 
7589   // Note that this is artificially increased to make it more expensive than loadConL
7590   ins_cost(250);
7591   format %{ "MOV    EAX,$src\t// low word only" %}
7592   opcode(0xB8);
7593   ins_encode( LdImmL_Lo(dst, src) );
7594   ins_pipe( ialu_reg_fat );
7595 %}
7596 
7597 // Multiply by 32-bit Immediate, taking the shifted high order results
7598 //  (special case for shift by 32)
7599 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7600   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7601   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7602              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7603              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7604   effect(USE src1, KILL cr);
7605 
7606   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7607   ins_cost(0*100 + 1*400 - 150);
7608   format %{ "IMUL   EDX:EAX,$src1" %}
7609   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7610   ins_pipe( pipe_slow );
7611 %}
7612 
7613 // Multiply by 32-bit Immediate, taking the shifted high order results
7614 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7615   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7616   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7617              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7618              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7619   effect(USE src1, KILL cr);
7620 
7621   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7622   ins_cost(1*100 + 1*400 - 150);
7623   format %{ "IMUL   EDX:EAX,$src1\n\t"
7624             "SAR    EDX,$cnt-32" %}
7625   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7626   ins_pipe( pipe_slow );
7627 %}
7628 
7629 // Multiply Memory 32-bit Immediate
7630 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7631   match(Set dst (MulI (LoadI src) imm));
7632   effect(KILL cr);
7633 
7634   ins_cost(300);
7635   format %{ "IMUL   $dst,$src,$imm" %}
7636   opcode(0x69);  /* 69 /r id */
7637   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7638   ins_pipe( ialu_reg_mem_alu0 );
7639 %}
7640 
7641 // Multiply Memory
7642 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7643   match(Set dst (MulI dst (LoadI src)));
7644   effect(KILL cr);
7645 
7646   ins_cost(350);
7647   format %{ "IMUL   $dst,$src" %}
7648   opcode(0xAF, 0x0F);
7649   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7650   ins_pipe( ialu_reg_mem_alu0 );
7651 %}
7652 
7653 // Multiply Register Int to Long
7654 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7655   // Basic Idea: long = (long)int * (long)int
7656   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7657   effect(DEF dst, USE src, USE src1, KILL flags);
7658 
7659   ins_cost(300);
7660   format %{ "IMUL   $dst,$src1" %}
7661 
7662   ins_encode( long_int_multiply( dst, src1 ) );
7663   ins_pipe( ialu_reg_reg_alu0 );
7664 %}
7665 
7666 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7667   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7668   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7669   effect(KILL flags);
7670 
7671   ins_cost(300);
7672   format %{ "MUL    $dst,$src1" %}
7673 
7674   ins_encode( long_uint_multiply(dst, src1) );
7675   ins_pipe( ialu_reg_reg_alu0 );
7676 %}
7677 
7678 // Multiply Register Long
7679 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7680   match(Set dst (MulL dst src));
7681   effect(KILL cr, TEMP tmp);
7682   ins_cost(4*100+3*400);
7683 // Basic idea: lo(result) = lo(x_lo * y_lo)
7684 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7685   format %{ "MOV    $tmp,$src.lo\n\t"
7686             "IMUL   $tmp,EDX\n\t"
7687             "MOV    EDX,$src.hi\n\t"
7688             "IMUL   EDX,EAX\n\t"
7689             "ADD    $tmp,EDX\n\t"
7690             "MUL    EDX:EAX,$src.lo\n\t"
7691             "ADD    EDX,$tmp" %}
7692   ins_encode( long_multiply( dst, src, tmp ) );
7693   ins_pipe( pipe_slow );
7694 %}
7695 
7696 // Multiply Register Long where the left operand's high 32 bits are zero
7697 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7698   predicate(is_operand_hi32_zero(n->in(1)));
7699   match(Set dst (MulL dst src));
7700   effect(KILL cr, TEMP tmp);
7701   ins_cost(2*100+2*400);
7702 // Basic idea: lo(result) = lo(x_lo * y_lo)
7703 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7704   format %{ "MOV    $tmp,$src.hi\n\t"
7705             "IMUL   $tmp,EAX\n\t"
7706             "MUL    EDX:EAX,$src.lo\n\t"
7707             "ADD    EDX,$tmp" %}
7708   ins_encode %{
7709     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7710     __ imull($tmp$$Register, rax);
7711     __ mull($src$$Register);
7712     __ addl(rdx, $tmp$$Register);
7713   %}
7714   ins_pipe( pipe_slow );
7715 %}
7716 
7717 // Multiply Register Long where the right operand's high 32 bits are zero
7718 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7719   predicate(is_operand_hi32_zero(n->in(2)));
7720   match(Set dst (MulL dst src));
7721   effect(KILL cr, TEMP tmp);
7722   ins_cost(2*100+2*400);
7723 // Basic idea: lo(result) = lo(x_lo * y_lo)
7724 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7725   format %{ "MOV    $tmp,$src.lo\n\t"
7726             "IMUL   $tmp,EDX\n\t"
7727             "MUL    EDX:EAX,$src.lo\n\t"
7728             "ADD    EDX,$tmp" %}
7729   ins_encode %{
7730     __ movl($tmp$$Register, $src$$Register);
7731     __ imull($tmp$$Register, rdx);
7732     __ mull($src$$Register);
7733     __ addl(rdx, $tmp$$Register);
7734   %}
7735   ins_pipe( pipe_slow );
7736 %}
7737 
7738 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7739 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7740   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7741   match(Set dst (MulL dst src));
7742   effect(KILL cr);
7743   ins_cost(1*400);
7744 // Basic idea: lo(result) = lo(x_lo * y_lo)
7745 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7746   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7747   ins_encode %{
7748     __ mull($src$$Register);
7749   %}
7750   ins_pipe( pipe_slow );
7751 %}
7752 
7753 // Multiply Register Long by small constant
7754 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7755   match(Set dst (MulL dst src));
7756   effect(KILL cr, TEMP tmp);
7757   ins_cost(2*100+2*400);
7758   size(12);
7759 // Basic idea: lo(result) = lo(src * EAX)
7760 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7761   format %{ "IMUL   $tmp,EDX,$src\n\t"
7762             "MOV    EDX,$src\n\t"
7763             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7764             "ADD    EDX,$tmp" %}
7765   ins_encode( long_multiply_con( dst, src, tmp ) );
7766   ins_pipe( pipe_slow );
7767 %}
7768 
7769 // Integer DIV with Register
7770 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7771   match(Set rax (DivI rax div));
7772   effect(KILL rdx, KILL cr);
7773   size(26);
7774   ins_cost(30*100+10*100);
7775   format %{ "CMP    EAX,0x80000000\n\t"
7776             "JNE,s  normal\n\t"
7777             "XOR    EDX,EDX\n\t"
7778             "CMP    ECX,-1\n\t"
7779             "JE,s   done\n"
7780     "normal: CDQ\n\t"
7781             "IDIV   $div\n\t"
7782     "done:"        %}
7783   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7784   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7785   ins_pipe( ialu_reg_reg_alu0 );
7786 %}
7787 
7788 // Divide Register Long
7789 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7790   match(Set dst (DivL src1 src2));
7791   effect( KILL cr, KILL cx, KILL bx );
7792   ins_cost(10000);
7793   format %{ "PUSH   $src1.hi\n\t"
7794             "PUSH   $src1.lo\n\t"
7795             "PUSH   $src2.hi\n\t"
7796             "PUSH   $src2.lo\n\t"
7797             "CALL   SharedRuntime::ldiv\n\t"
7798             "ADD    ESP,16" %}
7799   ins_encode( long_div(src1,src2) );
7800   ins_pipe( pipe_slow );
7801 %}
7802 
7803 // Integer DIVMOD with Register, both quotient and mod results
7804 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7805   match(DivModI rax div);
7806   effect(KILL cr);
7807   size(26);
7808   ins_cost(30*100+10*100);
7809   format %{ "CMP    EAX,0x80000000\n\t"
7810             "JNE,s  normal\n\t"
7811             "XOR    EDX,EDX\n\t"
7812             "CMP    ECX,-1\n\t"
7813             "JE,s   done\n"
7814     "normal: CDQ\n\t"
7815             "IDIV   $div\n\t"
7816     "done:"        %}
7817   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7818   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7819   ins_pipe( pipe_slow );
7820 %}
7821 
7822 // Integer MOD with Register
7823 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7824   match(Set rdx (ModI rax div));
7825   effect(KILL rax, KILL cr);
7826 
7827   size(26);
7828   ins_cost(300);
7829   format %{ "CDQ\n\t"
7830             "IDIV   $div" %}
7831   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7832   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7833   ins_pipe( ialu_reg_reg_alu0 );
7834 %}
7835 
7836 // Remainder Register Long
7837 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7838   match(Set dst (ModL src1 src2));
7839   effect( KILL cr, KILL cx, KILL bx );
7840   ins_cost(10000);
7841   format %{ "PUSH   $src1.hi\n\t"
7842             "PUSH   $src1.lo\n\t"
7843             "PUSH   $src2.hi\n\t"
7844             "PUSH   $src2.lo\n\t"
7845             "CALL   SharedRuntime::lrem\n\t"
7846             "ADD    ESP,16" %}
7847   ins_encode( long_mod(src1,src2) );
7848   ins_pipe( pipe_slow );
7849 %}
7850 
7851 // Divide Register Long (no special case since divisor != -1)
7852 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7853   match(Set dst (DivL dst imm));
7854   effect( TEMP tmp, TEMP tmp2, KILL cr );
7855   ins_cost(1000);
7856   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7857             "XOR    $tmp2,$tmp2\n\t"
7858             "CMP    $tmp,EDX\n\t"
7859             "JA,s   fast\n\t"
7860             "MOV    $tmp2,EAX\n\t"
7861             "MOV    EAX,EDX\n\t"
7862             "MOV    EDX,0\n\t"
7863             "JLE,s  pos\n\t"
7864             "LNEG   EAX : $tmp2\n\t"
7865             "DIV    $tmp # unsigned division\n\t"
7866             "XCHG   EAX,$tmp2\n\t"
7867             "DIV    $tmp\n\t"
7868             "LNEG   $tmp2 : EAX\n\t"
7869             "JMP,s  done\n"
7870     "pos:\n\t"
7871             "DIV    $tmp\n\t"
7872             "XCHG   EAX,$tmp2\n"
7873     "fast:\n\t"
7874             "DIV    $tmp\n"
7875     "done:\n\t"
7876             "MOV    EDX,$tmp2\n\t"
7877             "NEG    EDX:EAX # if $imm < 0" %}
7878   ins_encode %{
7879     int con = (int)$imm$$constant;
7880     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7881     int pcon = (con > 0) ? con : -con;
7882     Label Lfast, Lpos, Ldone;
7883 
7884     __ movl($tmp$$Register, pcon);
7885     __ xorl($tmp2$$Register,$tmp2$$Register);
7886     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7887     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7888 
7889     __ movl($tmp2$$Register, $dst$$Register); // save
7890     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7891     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7892     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7893 
7894     // Negative dividend.
7895     // convert value to positive to use unsigned division
7896     __ lneg($dst$$Register, $tmp2$$Register);
7897     __ divl($tmp$$Register);
7898     __ xchgl($dst$$Register, $tmp2$$Register);
7899     __ divl($tmp$$Register);
7900     // revert result back to negative
7901     __ lneg($tmp2$$Register, $dst$$Register);
7902     __ jmpb(Ldone);
7903 
7904     __ bind(Lpos);
7905     __ divl($tmp$$Register); // Use unsigned division
7906     __ xchgl($dst$$Register, $tmp2$$Register);
7907     // Fallthrow for final divide, tmp2 has 32 bit hi result
7908 
7909     __ bind(Lfast);
7910     // fast path: src is positive
7911     __ divl($tmp$$Register); // Use unsigned division
7912 
7913     __ bind(Ldone);
7914     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7915     if (con < 0) {
7916       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7917     }
7918   %}
7919   ins_pipe( pipe_slow );
7920 %}
7921 
7922 // Remainder Register Long (remainder fit into 32 bits)
7923 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7924   match(Set dst (ModL dst imm));
7925   effect( TEMP tmp, TEMP tmp2, KILL cr );
7926   ins_cost(1000);
7927   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7928             "CMP    $tmp,EDX\n\t"
7929             "JA,s   fast\n\t"
7930             "MOV    $tmp2,EAX\n\t"
7931             "MOV    EAX,EDX\n\t"
7932             "MOV    EDX,0\n\t"
7933             "JLE,s  pos\n\t"
7934             "LNEG   EAX : $tmp2\n\t"
7935             "DIV    $tmp # unsigned division\n\t"
7936             "MOV    EAX,$tmp2\n\t"
7937             "DIV    $tmp\n\t"
7938             "NEG    EDX\n\t"
7939             "JMP,s  done\n"
7940     "pos:\n\t"
7941             "DIV    $tmp\n\t"
7942             "MOV    EAX,$tmp2\n"
7943     "fast:\n\t"
7944             "DIV    $tmp\n"
7945     "done:\n\t"
7946             "MOV    EAX,EDX\n\t"
7947             "SAR    EDX,31\n\t" %}
7948   ins_encode %{
7949     int con = (int)$imm$$constant;
7950     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7951     int pcon = (con > 0) ? con : -con;
7952     Label  Lfast, Lpos, Ldone;
7953 
7954     __ movl($tmp$$Register, pcon);
7955     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7956     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7957 
7958     __ movl($tmp2$$Register, $dst$$Register); // save
7959     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7960     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7961     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7962 
7963     // Negative dividend.
7964     // convert value to positive to use unsigned division
7965     __ lneg($dst$$Register, $tmp2$$Register);
7966     __ divl($tmp$$Register);
7967     __ movl($dst$$Register, $tmp2$$Register);
7968     __ divl($tmp$$Register);
7969     // revert remainder back to negative
7970     __ negl(HIGH_FROM_LOW($dst$$Register));
7971     __ jmpb(Ldone);
7972 
7973     __ bind(Lpos);
7974     __ divl($tmp$$Register);
7975     __ movl($dst$$Register, $tmp2$$Register);
7976 
7977     __ bind(Lfast);
7978     // fast path: src is positive
7979     __ divl($tmp$$Register);
7980 
7981     __ bind(Ldone);
7982     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7983     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7984 
7985   %}
7986   ins_pipe( pipe_slow );
7987 %}
7988 
7989 // Integer Shift Instructions
7990 // Shift Left by one
7991 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7992   match(Set dst (LShiftI dst shift));
7993   effect(KILL cr);
7994 
7995   size(2);
7996   format %{ "SHL    $dst,$shift" %}
7997   opcode(0xD1, 0x4);  /* D1 /4 */
7998   ins_encode( OpcP, RegOpc( dst ) );
7999   ins_pipe( ialu_reg );
8000 %}
8001 
8002 // Shift Left by 8-bit immediate
8003 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8004   match(Set dst (LShiftI dst shift));
8005   effect(KILL cr);
8006 
8007   size(3);
8008   format %{ "SHL    $dst,$shift" %}
8009   opcode(0xC1, 0x4);  /* C1 /4 ib */
8010   ins_encode( RegOpcImm( dst, shift) );
8011   ins_pipe( ialu_reg );
8012 %}
8013 
8014 // Shift Left by variable
8015 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8016   match(Set dst (LShiftI dst shift));
8017   effect(KILL cr);
8018 
8019   size(2);
8020   format %{ "SHL    $dst,$shift" %}
8021   opcode(0xD3, 0x4);  /* D3 /4 */
8022   ins_encode( OpcP, RegOpc( dst ) );
8023   ins_pipe( ialu_reg_reg );
8024 %}
8025 
8026 // Arithmetic shift right by one
8027 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8028   match(Set dst (RShiftI dst shift));
8029   effect(KILL cr);
8030 
8031   size(2);
8032   format %{ "SAR    $dst,$shift" %}
8033   opcode(0xD1, 0x7);  /* D1 /7 */
8034   ins_encode( OpcP, RegOpc( dst ) );
8035   ins_pipe( ialu_reg );
8036 %}
8037 
8038 // Arithmetic shift right by one
8039 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8040   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8041   effect(KILL cr);
8042   format %{ "SAR    $dst,$shift" %}
8043   opcode(0xD1, 0x7);  /* D1 /7 */
8044   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8045   ins_pipe( ialu_mem_imm );
8046 %}
8047 
8048 // Arithmetic Shift Right by 8-bit immediate
8049 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8050   match(Set dst (RShiftI dst shift));
8051   effect(KILL cr);
8052 
8053   size(3);
8054   format %{ "SAR    $dst,$shift" %}
8055   opcode(0xC1, 0x7);  /* C1 /7 ib */
8056   ins_encode( RegOpcImm( dst, shift ) );
8057   ins_pipe( ialu_mem_imm );
8058 %}
8059 
8060 // Arithmetic Shift Right by 8-bit immediate
8061 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8062   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8063   effect(KILL cr);
8064 
8065   format %{ "SAR    $dst,$shift" %}
8066   opcode(0xC1, 0x7);  /* C1 /7 ib */
8067   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8068   ins_pipe( ialu_mem_imm );
8069 %}
8070 
8071 // Arithmetic Shift Right by variable
8072 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8073   match(Set dst (RShiftI dst shift));
8074   effect(KILL cr);
8075 
8076   size(2);
8077   format %{ "SAR    $dst,$shift" %}
8078   opcode(0xD3, 0x7);  /* D3 /7 */
8079   ins_encode( OpcP, RegOpc( dst ) );
8080   ins_pipe( ialu_reg_reg );
8081 %}
8082 
8083 // Logical shift right by one
8084 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8085   match(Set dst (URShiftI dst shift));
8086   effect(KILL cr);
8087 
8088   size(2);
8089   format %{ "SHR    $dst,$shift" %}
8090   opcode(0xD1, 0x5);  /* D1 /5 */
8091   ins_encode( OpcP, RegOpc( dst ) );
8092   ins_pipe( ialu_reg );
8093 %}
8094 
8095 // Logical Shift Right by 8-bit immediate
8096 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8097   match(Set dst (URShiftI dst shift));
8098   effect(KILL cr);
8099 
8100   size(3);
8101   format %{ "SHR    $dst,$shift" %}
8102   opcode(0xC1, 0x5);  /* C1 /5 ib */
8103   ins_encode( RegOpcImm( dst, shift) );
8104   ins_pipe( ialu_reg );
8105 %}
8106 
8107 
8108 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8109 // This idiom is used by the compiler for the i2b bytecode.
8110 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8111   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8112 
8113   size(3);
8114   format %{ "MOVSX  $dst,$src :8" %}
8115   ins_encode %{
8116     __ movsbl($dst$$Register, $src$$Register);
8117   %}
8118   ins_pipe(ialu_reg_reg);
8119 %}
8120 
8121 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8122 // This idiom is used by the compiler the i2s bytecode.
8123 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8124   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8125 
8126   size(3);
8127   format %{ "MOVSX  $dst,$src :16" %}
8128   ins_encode %{
8129     __ movswl($dst$$Register, $src$$Register);
8130   %}
8131   ins_pipe(ialu_reg_reg);
8132 %}
8133 
8134 
8135 // Logical Shift Right by variable
8136 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8137   match(Set dst (URShiftI dst shift));
8138   effect(KILL cr);
8139 
8140   size(2);
8141   format %{ "SHR    $dst,$shift" %}
8142   opcode(0xD3, 0x5);  /* D3 /5 */
8143   ins_encode( OpcP, RegOpc( dst ) );
8144   ins_pipe( ialu_reg_reg );
8145 %}
8146 
8147 
8148 //----------Logical Instructions-----------------------------------------------
8149 //----------Integer Logical Instructions---------------------------------------
8150 // And Instructions
8151 // And Register with Register
8152 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8153   match(Set dst (AndI dst src));
8154   effect(KILL cr);
8155 
8156   size(2);
8157   format %{ "AND    $dst,$src" %}
8158   opcode(0x23);
8159   ins_encode( OpcP, RegReg( dst, src) );
8160   ins_pipe( ialu_reg_reg );
8161 %}
8162 
8163 // And Register with Immediate
8164 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8165   match(Set dst (AndI dst src));
8166   effect(KILL cr);
8167 
8168   format %{ "AND    $dst,$src" %}
8169   opcode(0x81,0x04);  /* Opcode 81 /4 */
8170   // ins_encode( RegImm( dst, src) );
8171   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8172   ins_pipe( ialu_reg );
8173 %}
8174 
8175 // And Register with Memory
8176 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8177   match(Set dst (AndI dst (LoadI src)));
8178   effect(KILL cr);
8179 
8180   ins_cost(125);
8181   format %{ "AND    $dst,$src" %}
8182   opcode(0x23);
8183   ins_encode( OpcP, RegMem( dst, src) );
8184   ins_pipe( ialu_reg_mem );
8185 %}
8186 
8187 // And Memory with Register
8188 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8189   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8190   effect(KILL cr);
8191 
8192   ins_cost(150);
8193   format %{ "AND    $dst,$src" %}
8194   opcode(0x21);  /* Opcode 21 /r */
8195   ins_encode( OpcP, RegMem( src, dst ) );
8196   ins_pipe( ialu_mem_reg );
8197 %}
8198 
8199 // And Memory with Immediate
8200 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8201   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8202   effect(KILL cr);
8203 
8204   ins_cost(125);
8205   format %{ "AND    $dst,$src" %}
8206   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8207   // ins_encode( MemImm( dst, src) );
8208   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8209   ins_pipe( ialu_mem_imm );
8210 %}
8211 
8212 // BMI1 instructions
8213 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8214   match(Set dst (AndI (XorI src1 minus_1) src2));
8215   predicate(UseBMI1Instructions);
8216   effect(KILL cr);
8217 
8218   format %{ "ANDNL  $dst, $src1, $src2" %}
8219 
8220   ins_encode %{
8221     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8222   %}
8223   ins_pipe(ialu_reg);
8224 %}
8225 
8226 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8227   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8228   predicate(UseBMI1Instructions);
8229   effect(KILL cr);
8230 
8231   ins_cost(125);
8232   format %{ "ANDNL  $dst, $src1, $src2" %}
8233 
8234   ins_encode %{
8235     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8236   %}
8237   ins_pipe(ialu_reg_mem);
8238 %}
8239 
8240 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8241   match(Set dst (AndI (SubI imm_zero src) src));
8242   predicate(UseBMI1Instructions);
8243   effect(KILL cr);
8244 
8245   format %{ "BLSIL  $dst, $src" %}
8246 
8247   ins_encode %{
8248     __ blsil($dst$$Register, $src$$Register);
8249   %}
8250   ins_pipe(ialu_reg);
8251 %}
8252 
8253 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8254   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8255   predicate(UseBMI1Instructions);
8256   effect(KILL cr);
8257 
8258   ins_cost(125);
8259   format %{ "BLSIL  $dst, $src" %}
8260 
8261   ins_encode %{
8262     __ blsil($dst$$Register, $src$$Address);
8263   %}
8264   ins_pipe(ialu_reg_mem);
8265 %}
8266 
8267 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8268 %{
8269   match(Set dst (XorI (AddI src minus_1) src));
8270   predicate(UseBMI1Instructions);
8271   effect(KILL cr);
8272 
8273   format %{ "BLSMSKL $dst, $src" %}
8274 
8275   ins_encode %{
8276     __ blsmskl($dst$$Register, $src$$Register);
8277   %}
8278 
8279   ins_pipe(ialu_reg);
8280 %}
8281 
8282 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8283 %{
8284   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8285   predicate(UseBMI1Instructions);
8286   effect(KILL cr);
8287 
8288   ins_cost(125);
8289   format %{ "BLSMSKL $dst, $src" %}
8290 
8291   ins_encode %{
8292     __ blsmskl($dst$$Register, $src$$Address);
8293   %}
8294 
8295   ins_pipe(ialu_reg_mem);
8296 %}
8297 
8298 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8299 %{
8300   match(Set dst (AndI (AddI src minus_1) src) );
8301   predicate(UseBMI1Instructions);
8302   effect(KILL cr);
8303 
8304   format %{ "BLSRL  $dst, $src" %}
8305 
8306   ins_encode %{
8307     __ blsrl($dst$$Register, $src$$Register);
8308   %}
8309 
8310   ins_pipe(ialu_reg);
8311 %}
8312 
8313 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8314 %{
8315   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8316   predicate(UseBMI1Instructions);
8317   effect(KILL cr);
8318 
8319   ins_cost(125);
8320   format %{ "BLSRL  $dst, $src" %}
8321 
8322   ins_encode %{
8323     __ blsrl($dst$$Register, $src$$Address);
8324   %}
8325 
8326   ins_pipe(ialu_reg_mem);
8327 %}
8328 
8329 // Or Instructions
8330 // Or Register with Register
8331 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8332   match(Set dst (OrI dst src));
8333   effect(KILL cr);
8334 
8335   size(2);
8336   format %{ "OR     $dst,$src" %}
8337   opcode(0x0B);
8338   ins_encode( OpcP, RegReg( dst, src) );
8339   ins_pipe( ialu_reg_reg );
8340 %}
8341 
8342 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8343   match(Set dst (OrI dst (CastP2X src)));
8344   effect(KILL cr);
8345 
8346   size(2);
8347   format %{ "OR     $dst,$src" %}
8348   opcode(0x0B);
8349   ins_encode( OpcP, RegReg( dst, src) );
8350   ins_pipe( ialu_reg_reg );
8351 %}
8352 
8353 
8354 // Or Register with Immediate
8355 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8356   match(Set dst (OrI dst src));
8357   effect(KILL cr);
8358 
8359   format %{ "OR     $dst,$src" %}
8360   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8361   // ins_encode( RegImm( dst, src) );
8362   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8363   ins_pipe( ialu_reg );
8364 %}
8365 
8366 // Or Register with Memory
8367 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8368   match(Set dst (OrI dst (LoadI src)));
8369   effect(KILL cr);
8370 
8371   ins_cost(125);
8372   format %{ "OR     $dst,$src" %}
8373   opcode(0x0B);
8374   ins_encode( OpcP, RegMem( dst, src) );
8375   ins_pipe( ialu_reg_mem );
8376 %}
8377 
8378 // Or Memory with Register
8379 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8380   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8381   effect(KILL cr);
8382 
8383   ins_cost(150);
8384   format %{ "OR     $dst,$src" %}
8385   opcode(0x09);  /* Opcode 09 /r */
8386   ins_encode( OpcP, RegMem( src, dst ) );
8387   ins_pipe( ialu_mem_reg );
8388 %}
8389 
8390 // Or Memory with Immediate
8391 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8392   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8393   effect(KILL cr);
8394 
8395   ins_cost(125);
8396   format %{ "OR     $dst,$src" %}
8397   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8398   // ins_encode( MemImm( dst, src) );
8399   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8400   ins_pipe( ialu_mem_imm );
8401 %}
8402 
8403 // ROL/ROR
8404 // ROL expand
8405 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8406   effect(USE_DEF dst, USE shift, KILL cr);
8407 
8408   format %{ "ROL    $dst, $shift" %}
8409   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8410   ins_encode( OpcP, RegOpc( dst ));
8411   ins_pipe( ialu_reg );
8412 %}
8413 
8414 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8415   effect(USE_DEF dst, USE shift, KILL cr);
8416 
8417   format %{ "ROL    $dst, $shift" %}
8418   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8419   ins_encode( RegOpcImm(dst, shift) );
8420   ins_pipe(ialu_reg);
8421 %}
8422 
8423 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8424   effect(USE_DEF dst, USE shift, KILL cr);
8425 
8426   format %{ "ROL    $dst, $shift" %}
8427   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8428   ins_encode(OpcP, RegOpc(dst));
8429   ins_pipe( ialu_reg_reg );
8430 %}
8431 // end of ROL expand
8432 
8433 // ROL 32bit by one once
8434 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8435   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8436 
8437   expand %{
8438     rolI_eReg_imm1(dst, lshift, cr);
8439   %}
8440 %}
8441 
8442 // ROL 32bit var by imm8 once
8443 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8444   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8445   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8446 
8447   expand %{
8448     rolI_eReg_imm8(dst, lshift, cr);
8449   %}
8450 %}
8451 
8452 // ROL 32bit var by var once
8453 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8454   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8455 
8456   expand %{
8457     rolI_eReg_CL(dst, shift, cr);
8458   %}
8459 %}
8460 
8461 // ROL 32bit var by var once
8462 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8463   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8464 
8465   expand %{
8466     rolI_eReg_CL(dst, shift, cr);
8467   %}
8468 %}
8469 
8470 // ROR expand
8471 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8472   effect(USE_DEF dst, USE shift, KILL cr);
8473 
8474   format %{ "ROR    $dst, $shift" %}
8475   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8476   ins_encode( OpcP, RegOpc( dst ) );
8477   ins_pipe( ialu_reg );
8478 %}
8479 
8480 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8481   effect (USE_DEF dst, USE shift, KILL cr);
8482 
8483   format %{ "ROR    $dst, $shift" %}
8484   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8485   ins_encode( RegOpcImm(dst, shift) );
8486   ins_pipe( ialu_reg );
8487 %}
8488 
8489 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8490   effect(USE_DEF dst, USE shift, KILL cr);
8491 
8492   format %{ "ROR    $dst, $shift" %}
8493   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8494   ins_encode(OpcP, RegOpc(dst));
8495   ins_pipe( ialu_reg_reg );
8496 %}
8497 // end of ROR expand
8498 
8499 // ROR right once
8500 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8501   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8502 
8503   expand %{
8504     rorI_eReg_imm1(dst, rshift, cr);
8505   %}
8506 %}
8507 
8508 // ROR 32bit by immI8 once
8509 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8510   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8511   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8512 
8513   expand %{
8514     rorI_eReg_imm8(dst, rshift, cr);
8515   %}
8516 %}
8517 
8518 // ROR 32bit var by var once
8519 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8520   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8521 
8522   expand %{
8523     rorI_eReg_CL(dst, shift, cr);
8524   %}
8525 %}
8526 
8527 // ROR 32bit var by var once
8528 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8529   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8530 
8531   expand %{
8532     rorI_eReg_CL(dst, shift, cr);
8533   %}
8534 %}
8535 
8536 // Xor Instructions
8537 // Xor Register with Register
8538 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8539   match(Set dst (XorI dst src));
8540   effect(KILL cr);
8541 
8542   size(2);
8543   format %{ "XOR    $dst,$src" %}
8544   opcode(0x33);
8545   ins_encode( OpcP, RegReg( dst, src) );
8546   ins_pipe( ialu_reg_reg );
8547 %}
8548 
8549 // Xor Register with Immediate -1
8550 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8551   match(Set dst (XorI dst imm));
8552 
8553   size(2);
8554   format %{ "NOT    $dst" %}
8555   ins_encode %{
8556      __ notl($dst$$Register);
8557   %}
8558   ins_pipe( ialu_reg );
8559 %}
8560 
8561 // Xor Register with Immediate
8562 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8563   match(Set dst (XorI dst src));
8564   effect(KILL cr);
8565 
8566   format %{ "XOR    $dst,$src" %}
8567   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8568   // ins_encode( RegImm( dst, src) );
8569   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8570   ins_pipe( ialu_reg );
8571 %}
8572 
8573 // Xor Register with Memory
8574 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8575   match(Set dst (XorI dst (LoadI src)));
8576   effect(KILL cr);
8577 
8578   ins_cost(125);
8579   format %{ "XOR    $dst,$src" %}
8580   opcode(0x33);
8581   ins_encode( OpcP, RegMem(dst, src) );
8582   ins_pipe( ialu_reg_mem );
8583 %}
8584 
8585 // Xor Memory with Register
8586 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8587   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8588   effect(KILL cr);
8589 
8590   ins_cost(150);
8591   format %{ "XOR    $dst,$src" %}
8592   opcode(0x31);  /* Opcode 31 /r */
8593   ins_encode( OpcP, RegMem( src, dst ) );
8594   ins_pipe( ialu_mem_reg );
8595 %}
8596 
8597 // Xor Memory with Immediate
8598 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8599   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8600   effect(KILL cr);
8601 
8602   ins_cost(125);
8603   format %{ "XOR    $dst,$src" %}
8604   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8605   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8606   ins_pipe( ialu_mem_imm );
8607 %}
8608 
8609 //----------Convert Int to Boolean---------------------------------------------
8610 
8611 instruct movI_nocopy(rRegI dst, rRegI src) %{
8612   effect( DEF dst, USE src );
8613   format %{ "MOV    $dst,$src" %}
8614   ins_encode( enc_Copy( dst, src) );
8615   ins_pipe( ialu_reg_reg );
8616 %}
8617 
8618 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8619   effect( USE_DEF dst, USE src, KILL cr );
8620 
8621   size(4);
8622   format %{ "NEG    $dst\n\t"
8623             "ADC    $dst,$src" %}
8624   ins_encode( neg_reg(dst),
8625               OpcRegReg(0x13,dst,src) );
8626   ins_pipe( ialu_reg_reg_long );
8627 %}
8628 
8629 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8630   match(Set dst (Conv2B src));
8631 
8632   expand %{
8633     movI_nocopy(dst,src);
8634     ci2b(dst,src,cr);
8635   %}
8636 %}
8637 
8638 instruct movP_nocopy(rRegI dst, eRegP src) %{
8639   effect( DEF dst, USE src );
8640   format %{ "MOV    $dst,$src" %}
8641   ins_encode( enc_Copy( dst, src) );
8642   ins_pipe( ialu_reg_reg );
8643 %}
8644 
8645 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8646   effect( USE_DEF dst, USE src, KILL cr );
8647   format %{ "NEG    $dst\n\t"
8648             "ADC    $dst,$src" %}
8649   ins_encode( neg_reg(dst),
8650               OpcRegReg(0x13,dst,src) );
8651   ins_pipe( ialu_reg_reg_long );
8652 %}
8653 
8654 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8655   match(Set dst (Conv2B src));
8656 
8657   expand %{
8658     movP_nocopy(dst,src);
8659     cp2b(dst,src,cr);
8660   %}
8661 %}
8662 
8663 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8664   match(Set dst (CmpLTMask p q));
8665   effect(KILL cr);
8666   ins_cost(400);
8667 
8668   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8669   format %{ "XOR    $dst,$dst\n\t"
8670             "CMP    $p,$q\n\t"
8671             "SETlt  $dst\n\t"
8672             "NEG    $dst" %}
8673   ins_encode %{
8674     Register Rp = $p$$Register;
8675     Register Rq = $q$$Register;
8676     Register Rd = $dst$$Register;
8677     Label done;
8678     __ xorl(Rd, Rd);
8679     __ cmpl(Rp, Rq);
8680     __ setb(Assembler::less, Rd);
8681     __ negl(Rd);
8682   %}
8683 
8684   ins_pipe(pipe_slow);
8685 %}
8686 
8687 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8688   match(Set dst (CmpLTMask dst zero));
8689   effect(DEF dst, KILL cr);
8690   ins_cost(100);
8691 
8692   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8693   ins_encode %{
8694   __ sarl($dst$$Register, 31);
8695   %}
8696   ins_pipe(ialu_reg);
8697 %}
8698 
8699 /* better to save a register than avoid a branch */
8700 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8701   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8702   effect(KILL cr);
8703   ins_cost(400);
8704   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8705             "JGE    done\n\t"
8706             "ADD    $p,$y\n"
8707             "done:  " %}
8708   ins_encode %{
8709     Register Rp = $p$$Register;
8710     Register Rq = $q$$Register;
8711     Register Ry = $y$$Register;
8712     Label done;
8713     __ subl(Rp, Rq);
8714     __ jccb(Assembler::greaterEqual, done);
8715     __ addl(Rp, Ry);
8716     __ bind(done);
8717   %}
8718 
8719   ins_pipe(pipe_cmplt);
8720 %}
8721 
8722 /* better to save a register than avoid a branch */
8723 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8724   match(Set y (AndI (CmpLTMask p q) y));
8725   effect(KILL cr);
8726 
8727   ins_cost(300);
8728 
8729   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8730             "JLT      done\n\t"
8731             "XORL     $y, $y\n"
8732             "done:  " %}
8733   ins_encode %{
8734     Register Rp = $p$$Register;
8735     Register Rq = $q$$Register;
8736     Register Ry = $y$$Register;
8737     Label done;
8738     __ cmpl(Rp, Rq);
8739     __ jccb(Assembler::less, done);
8740     __ xorl(Ry, Ry);
8741     __ bind(done);
8742   %}
8743 
8744   ins_pipe(pipe_cmplt);
8745 %}
8746 
8747 /* If I enable this, I encourage spilling in the inner loop of compress.
8748 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8749   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8750 */
8751 //----------Overflow Math Instructions-----------------------------------------
8752 
8753 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8754 %{
8755   match(Set cr (OverflowAddI op1 op2));
8756   effect(DEF cr, USE_KILL op1, USE op2);
8757 
8758   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8759 
8760   ins_encode %{
8761     __ addl($op1$$Register, $op2$$Register);
8762   %}
8763   ins_pipe(ialu_reg_reg);
8764 %}
8765 
8766 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8767 %{
8768   match(Set cr (OverflowAddI op1 op2));
8769   effect(DEF cr, USE_KILL op1, USE op2);
8770 
8771   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8772 
8773   ins_encode %{
8774     __ addl($op1$$Register, $op2$$constant);
8775   %}
8776   ins_pipe(ialu_reg_reg);
8777 %}
8778 
8779 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8780 %{
8781   match(Set cr (OverflowSubI op1 op2));
8782 
8783   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8784   ins_encode %{
8785     __ cmpl($op1$$Register, $op2$$Register);
8786   %}
8787   ins_pipe(ialu_reg_reg);
8788 %}
8789 
8790 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8791 %{
8792   match(Set cr (OverflowSubI op1 op2));
8793 
8794   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8795   ins_encode %{
8796     __ cmpl($op1$$Register, $op2$$constant);
8797   %}
8798   ins_pipe(ialu_reg_reg);
8799 %}
8800 
8801 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8802 %{
8803   match(Set cr (OverflowSubI zero op2));
8804   effect(DEF cr, USE_KILL op2);
8805 
8806   format %{ "NEG    $op2\t# overflow check int" %}
8807   ins_encode %{
8808     __ negl($op2$$Register);
8809   %}
8810   ins_pipe(ialu_reg_reg);
8811 %}
8812 
8813 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8814 %{
8815   match(Set cr (OverflowMulI op1 op2));
8816   effect(DEF cr, USE_KILL op1, USE op2);
8817 
8818   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8819   ins_encode %{
8820     __ imull($op1$$Register, $op2$$Register);
8821   %}
8822   ins_pipe(ialu_reg_reg_alu0);
8823 %}
8824 
8825 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8826 %{
8827   match(Set cr (OverflowMulI op1 op2));
8828   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8829 
8830   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8831   ins_encode %{
8832     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8833   %}
8834   ins_pipe(ialu_reg_reg_alu0);
8835 %}
8836 
8837 //----------Long Instructions------------------------------------------------
8838 // Add Long Register with Register
8839 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8840   match(Set dst (AddL dst src));
8841   effect(KILL cr);
8842   ins_cost(200);
8843   format %{ "ADD    $dst.lo,$src.lo\n\t"
8844             "ADC    $dst.hi,$src.hi" %}
8845   opcode(0x03, 0x13);
8846   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8847   ins_pipe( ialu_reg_reg_long );
8848 %}
8849 
8850 // Add Long Register with Immediate
8851 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8852   match(Set dst (AddL dst src));
8853   effect(KILL cr);
8854   format %{ "ADD    $dst.lo,$src.lo\n\t"
8855             "ADC    $dst.hi,$src.hi" %}
8856   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8857   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8858   ins_pipe( ialu_reg_long );
8859 %}
8860 
8861 // Add Long Register with Memory
8862 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8863   match(Set dst (AddL dst (LoadL mem)));
8864   effect(KILL cr);
8865   ins_cost(125);
8866   format %{ "ADD    $dst.lo,$mem\n\t"
8867             "ADC    $dst.hi,$mem+4" %}
8868   opcode(0x03, 0x13);
8869   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8870   ins_pipe( ialu_reg_long_mem );
8871 %}
8872 
8873 // Subtract Long Register with Register.
8874 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8875   match(Set dst (SubL dst src));
8876   effect(KILL cr);
8877   ins_cost(200);
8878   format %{ "SUB    $dst.lo,$src.lo\n\t"
8879             "SBB    $dst.hi,$src.hi" %}
8880   opcode(0x2B, 0x1B);
8881   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8882   ins_pipe( ialu_reg_reg_long );
8883 %}
8884 
8885 // Subtract Long Register with Immediate
8886 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8887   match(Set dst (SubL dst src));
8888   effect(KILL cr);
8889   format %{ "SUB    $dst.lo,$src.lo\n\t"
8890             "SBB    $dst.hi,$src.hi" %}
8891   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8892   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8893   ins_pipe( ialu_reg_long );
8894 %}
8895 
8896 // Subtract Long Register with Memory
8897 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8898   match(Set dst (SubL dst (LoadL mem)));
8899   effect(KILL cr);
8900   ins_cost(125);
8901   format %{ "SUB    $dst.lo,$mem\n\t"
8902             "SBB    $dst.hi,$mem+4" %}
8903   opcode(0x2B, 0x1B);
8904   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8905   ins_pipe( ialu_reg_long_mem );
8906 %}
8907 
8908 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8909   match(Set dst (SubL zero dst));
8910   effect(KILL cr);
8911   ins_cost(300);
8912   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8913   ins_encode( neg_long(dst) );
8914   ins_pipe( ialu_reg_reg_long );
8915 %}
8916 
8917 // And Long Register with Register
8918 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8919   match(Set dst (AndL dst src));
8920   effect(KILL cr);
8921   format %{ "AND    $dst.lo,$src.lo\n\t"
8922             "AND    $dst.hi,$src.hi" %}
8923   opcode(0x23,0x23);
8924   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8925   ins_pipe( ialu_reg_reg_long );
8926 %}
8927 
8928 // And Long Register with Immediate
8929 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8930   match(Set dst (AndL dst src));
8931   effect(KILL cr);
8932   format %{ "AND    $dst.lo,$src.lo\n\t"
8933             "AND    $dst.hi,$src.hi" %}
8934   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8935   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8936   ins_pipe( ialu_reg_long );
8937 %}
8938 
8939 // And Long Register with Memory
8940 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8941   match(Set dst (AndL dst (LoadL mem)));
8942   effect(KILL cr);
8943   ins_cost(125);
8944   format %{ "AND    $dst.lo,$mem\n\t"
8945             "AND    $dst.hi,$mem+4" %}
8946   opcode(0x23, 0x23);
8947   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8948   ins_pipe( ialu_reg_long_mem );
8949 %}
8950 
8951 // BMI1 instructions
8952 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8953   match(Set dst (AndL (XorL src1 minus_1) src2));
8954   predicate(UseBMI1Instructions);
8955   effect(KILL cr, TEMP dst);
8956 
8957   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8958             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8959          %}
8960 
8961   ins_encode %{
8962     Register Rdst = $dst$$Register;
8963     Register Rsrc1 = $src1$$Register;
8964     Register Rsrc2 = $src2$$Register;
8965     __ andnl(Rdst, Rsrc1, Rsrc2);
8966     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8967   %}
8968   ins_pipe(ialu_reg_reg_long);
8969 %}
8970 
8971 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8972   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8973   predicate(UseBMI1Instructions);
8974   effect(KILL cr, TEMP dst);
8975 
8976   ins_cost(125);
8977   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8978             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8979          %}
8980 
8981   ins_encode %{
8982     Register Rdst = $dst$$Register;
8983     Register Rsrc1 = $src1$$Register;
8984     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8985 
8986     __ andnl(Rdst, Rsrc1, $src2$$Address);
8987     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8988   %}
8989   ins_pipe(ialu_reg_mem);
8990 %}
8991 
8992 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8993   match(Set dst (AndL (SubL imm_zero src) src));
8994   predicate(UseBMI1Instructions);
8995   effect(KILL cr, TEMP dst);
8996 
8997   format %{ "MOVL   $dst.hi, 0\n\t"
8998             "BLSIL  $dst.lo, $src.lo\n\t"
8999             "JNZ    done\n\t"
9000             "BLSIL  $dst.hi, $src.hi\n"
9001             "done:"
9002          %}
9003 
9004   ins_encode %{
9005     Label done;
9006     Register Rdst = $dst$$Register;
9007     Register Rsrc = $src$$Register;
9008     __ movl(HIGH_FROM_LOW(Rdst), 0);
9009     __ blsil(Rdst, Rsrc);
9010     __ jccb(Assembler::notZero, done);
9011     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9012     __ bind(done);
9013   %}
9014   ins_pipe(ialu_reg);
9015 %}
9016 
9017 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9018   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9019   predicate(UseBMI1Instructions);
9020   effect(KILL cr, TEMP dst);
9021 
9022   ins_cost(125);
9023   format %{ "MOVL   $dst.hi, 0\n\t"
9024             "BLSIL  $dst.lo, $src\n\t"
9025             "JNZ    done\n\t"
9026             "BLSIL  $dst.hi, $src+4\n"
9027             "done:"
9028          %}
9029 
9030   ins_encode %{
9031     Label done;
9032     Register Rdst = $dst$$Register;
9033     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9034 
9035     __ movl(HIGH_FROM_LOW(Rdst), 0);
9036     __ blsil(Rdst, $src$$Address);
9037     __ jccb(Assembler::notZero, done);
9038     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9039     __ bind(done);
9040   %}
9041   ins_pipe(ialu_reg_mem);
9042 %}
9043 
9044 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9045 %{
9046   match(Set dst (XorL (AddL src minus_1) src));
9047   predicate(UseBMI1Instructions);
9048   effect(KILL cr, TEMP dst);
9049 
9050   format %{ "MOVL    $dst.hi, 0\n\t"
9051             "BLSMSKL $dst.lo, $src.lo\n\t"
9052             "JNC     done\n\t"
9053             "BLSMSKL $dst.hi, $src.hi\n"
9054             "done:"
9055          %}
9056 
9057   ins_encode %{
9058     Label done;
9059     Register Rdst = $dst$$Register;
9060     Register Rsrc = $src$$Register;
9061     __ movl(HIGH_FROM_LOW(Rdst), 0);
9062     __ blsmskl(Rdst, Rsrc);
9063     __ jccb(Assembler::carryClear, done);
9064     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9065     __ bind(done);
9066   %}
9067 
9068   ins_pipe(ialu_reg);
9069 %}
9070 
9071 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9072 %{
9073   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9074   predicate(UseBMI1Instructions);
9075   effect(KILL cr, TEMP dst);
9076 
9077   ins_cost(125);
9078   format %{ "MOVL    $dst.hi, 0\n\t"
9079             "BLSMSKL $dst.lo, $src\n\t"
9080             "JNC     done\n\t"
9081             "BLSMSKL $dst.hi, $src+4\n"
9082             "done:"
9083          %}
9084 
9085   ins_encode %{
9086     Label done;
9087     Register Rdst = $dst$$Register;
9088     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9089 
9090     __ movl(HIGH_FROM_LOW(Rdst), 0);
9091     __ blsmskl(Rdst, $src$$Address);
9092     __ jccb(Assembler::carryClear, done);
9093     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9094     __ bind(done);
9095   %}
9096 
9097   ins_pipe(ialu_reg_mem);
9098 %}
9099 
9100 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9101 %{
9102   match(Set dst (AndL (AddL src minus_1) src) );
9103   predicate(UseBMI1Instructions);
9104   effect(KILL cr, TEMP dst);
9105 
9106   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9107             "BLSRL  $dst.lo, $src.lo\n\t"
9108             "JNC    done\n\t"
9109             "BLSRL  $dst.hi, $src.hi\n"
9110             "done:"
9111   %}
9112 
9113   ins_encode %{
9114     Label done;
9115     Register Rdst = $dst$$Register;
9116     Register Rsrc = $src$$Register;
9117     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9118     __ blsrl(Rdst, Rsrc);
9119     __ jccb(Assembler::carryClear, done);
9120     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9121     __ bind(done);
9122   %}
9123 
9124   ins_pipe(ialu_reg);
9125 %}
9126 
9127 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9128 %{
9129   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9130   predicate(UseBMI1Instructions);
9131   effect(KILL cr, TEMP dst);
9132 
9133   ins_cost(125);
9134   format %{ "MOVL   $dst.hi, $src+4\n\t"
9135             "BLSRL  $dst.lo, $src\n\t"
9136             "JNC    done\n\t"
9137             "BLSRL  $dst.hi, $src+4\n"
9138             "done:"
9139   %}
9140 
9141   ins_encode %{
9142     Label done;
9143     Register Rdst = $dst$$Register;
9144     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9145     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9146     __ blsrl(Rdst, $src$$Address);
9147     __ jccb(Assembler::carryClear, done);
9148     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9149     __ bind(done);
9150   %}
9151 
9152   ins_pipe(ialu_reg_mem);
9153 %}
9154 
9155 // Or Long Register with Register
9156 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9157   match(Set dst (OrL dst src));
9158   effect(KILL cr);
9159   format %{ "OR     $dst.lo,$src.lo\n\t"
9160             "OR     $dst.hi,$src.hi" %}
9161   opcode(0x0B,0x0B);
9162   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9163   ins_pipe( ialu_reg_reg_long );
9164 %}
9165 
9166 // Or Long Register with Immediate
9167 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9168   match(Set dst (OrL dst src));
9169   effect(KILL cr);
9170   format %{ "OR     $dst.lo,$src.lo\n\t"
9171             "OR     $dst.hi,$src.hi" %}
9172   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9173   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9174   ins_pipe( ialu_reg_long );
9175 %}
9176 
9177 // Or Long Register with Memory
9178 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9179   match(Set dst (OrL dst (LoadL mem)));
9180   effect(KILL cr);
9181   ins_cost(125);
9182   format %{ "OR     $dst.lo,$mem\n\t"
9183             "OR     $dst.hi,$mem+4" %}
9184   opcode(0x0B,0x0B);
9185   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9186   ins_pipe( ialu_reg_long_mem );
9187 %}
9188 
9189 // Xor Long Register with Register
9190 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9191   match(Set dst (XorL dst src));
9192   effect(KILL cr);
9193   format %{ "XOR    $dst.lo,$src.lo\n\t"
9194             "XOR    $dst.hi,$src.hi" %}
9195   opcode(0x33,0x33);
9196   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9197   ins_pipe( ialu_reg_reg_long );
9198 %}
9199 
9200 // Xor Long Register with Immediate -1
9201 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9202   match(Set dst (XorL dst imm));
9203   format %{ "NOT    $dst.lo\n\t"
9204             "NOT    $dst.hi" %}
9205   ins_encode %{
9206      __ notl($dst$$Register);
9207      __ notl(HIGH_FROM_LOW($dst$$Register));
9208   %}
9209   ins_pipe( ialu_reg_long );
9210 %}
9211 
9212 // Xor Long Register with Immediate
9213 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9214   match(Set dst (XorL dst src));
9215   effect(KILL cr);
9216   format %{ "XOR    $dst.lo,$src.lo\n\t"
9217             "XOR    $dst.hi,$src.hi" %}
9218   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9219   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9220   ins_pipe( ialu_reg_long );
9221 %}
9222 
9223 // Xor Long Register with Memory
9224 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9225   match(Set dst (XorL dst (LoadL mem)));
9226   effect(KILL cr);
9227   ins_cost(125);
9228   format %{ "XOR    $dst.lo,$mem\n\t"
9229             "XOR    $dst.hi,$mem+4" %}
9230   opcode(0x33,0x33);
9231   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9232   ins_pipe( ialu_reg_long_mem );
9233 %}
9234 
9235 // Shift Left Long by 1
9236 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9237   predicate(UseNewLongLShift);
9238   match(Set dst (LShiftL dst cnt));
9239   effect(KILL cr);
9240   ins_cost(100);
9241   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9242             "ADC    $dst.hi,$dst.hi" %}
9243   ins_encode %{
9244     __ addl($dst$$Register,$dst$$Register);
9245     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9246   %}
9247   ins_pipe( ialu_reg_long );
9248 %}
9249 
9250 // Shift Left Long by 2
9251 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9252   predicate(UseNewLongLShift);
9253   match(Set dst (LShiftL dst cnt));
9254   effect(KILL cr);
9255   ins_cost(100);
9256   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9257             "ADC    $dst.hi,$dst.hi\n\t"
9258             "ADD    $dst.lo,$dst.lo\n\t"
9259             "ADC    $dst.hi,$dst.hi" %}
9260   ins_encode %{
9261     __ addl($dst$$Register,$dst$$Register);
9262     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9263     __ addl($dst$$Register,$dst$$Register);
9264     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9265   %}
9266   ins_pipe( ialu_reg_long );
9267 %}
9268 
9269 // Shift Left Long by 3
9270 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9271   predicate(UseNewLongLShift);
9272   match(Set dst (LShiftL dst cnt));
9273   effect(KILL cr);
9274   ins_cost(100);
9275   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9276             "ADC    $dst.hi,$dst.hi\n\t"
9277             "ADD    $dst.lo,$dst.lo\n\t"
9278             "ADC    $dst.hi,$dst.hi\n\t"
9279             "ADD    $dst.lo,$dst.lo\n\t"
9280             "ADC    $dst.hi,$dst.hi" %}
9281   ins_encode %{
9282     __ addl($dst$$Register,$dst$$Register);
9283     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9284     __ addl($dst$$Register,$dst$$Register);
9285     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9286     __ addl($dst$$Register,$dst$$Register);
9287     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9288   %}
9289   ins_pipe( ialu_reg_long );
9290 %}
9291 
9292 // Shift Left Long by 1-31
9293 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9294   match(Set dst (LShiftL dst cnt));
9295   effect(KILL cr);
9296   ins_cost(200);
9297   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9298             "SHL    $dst.lo,$cnt" %}
9299   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9300   ins_encode( move_long_small_shift(dst,cnt) );
9301   ins_pipe( ialu_reg_long );
9302 %}
9303 
9304 // Shift Left Long by 32-63
9305 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9306   match(Set dst (LShiftL dst cnt));
9307   effect(KILL cr);
9308   ins_cost(300);
9309   format %{ "MOV    $dst.hi,$dst.lo\n"
9310           "\tSHL    $dst.hi,$cnt-32\n"
9311           "\tXOR    $dst.lo,$dst.lo" %}
9312   opcode(0xC1, 0x4);  /* C1 /4 ib */
9313   ins_encode( move_long_big_shift_clr(dst,cnt) );
9314   ins_pipe( ialu_reg_long );
9315 %}
9316 
9317 // Shift Left Long by variable
9318 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9319   match(Set dst (LShiftL dst shift));
9320   effect(KILL cr);
9321   ins_cost(500+200);
9322   size(17);
9323   format %{ "TEST   $shift,32\n\t"
9324             "JEQ,s  small\n\t"
9325             "MOV    $dst.hi,$dst.lo\n\t"
9326             "XOR    $dst.lo,$dst.lo\n"
9327     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9328             "SHL    $dst.lo,$shift" %}
9329   ins_encode( shift_left_long( dst, shift ) );
9330   ins_pipe( pipe_slow );
9331 %}
9332 
9333 // Shift Right Long by 1-31
9334 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9335   match(Set dst (URShiftL dst cnt));
9336   effect(KILL cr);
9337   ins_cost(200);
9338   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9339             "SHR    $dst.hi,$cnt" %}
9340   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9341   ins_encode( move_long_small_shift(dst,cnt) );
9342   ins_pipe( ialu_reg_long );
9343 %}
9344 
9345 // Shift Right Long by 32-63
9346 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9347   match(Set dst (URShiftL dst cnt));
9348   effect(KILL cr);
9349   ins_cost(300);
9350   format %{ "MOV    $dst.lo,$dst.hi\n"
9351           "\tSHR    $dst.lo,$cnt-32\n"
9352           "\tXOR    $dst.hi,$dst.hi" %}
9353   opcode(0xC1, 0x5);  /* C1 /5 ib */
9354   ins_encode( move_long_big_shift_clr(dst,cnt) );
9355   ins_pipe( ialu_reg_long );
9356 %}
9357 
9358 // Shift Right Long by variable
9359 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9360   match(Set dst (URShiftL dst shift));
9361   effect(KILL cr);
9362   ins_cost(600);
9363   size(17);
9364   format %{ "TEST   $shift,32\n\t"
9365             "JEQ,s  small\n\t"
9366             "MOV    $dst.lo,$dst.hi\n\t"
9367             "XOR    $dst.hi,$dst.hi\n"
9368     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9369             "SHR    $dst.hi,$shift" %}
9370   ins_encode( shift_right_long( dst, shift ) );
9371   ins_pipe( pipe_slow );
9372 %}
9373 
9374 // Shift Right Long by 1-31
9375 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9376   match(Set dst (RShiftL dst cnt));
9377   effect(KILL cr);
9378   ins_cost(200);
9379   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9380             "SAR    $dst.hi,$cnt" %}
9381   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9382   ins_encode( move_long_small_shift(dst,cnt) );
9383   ins_pipe( ialu_reg_long );
9384 %}
9385 
9386 // Shift Right Long by 32-63
9387 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9388   match(Set dst (RShiftL dst cnt));
9389   effect(KILL cr);
9390   ins_cost(300);
9391   format %{ "MOV    $dst.lo,$dst.hi\n"
9392           "\tSAR    $dst.lo,$cnt-32\n"
9393           "\tSAR    $dst.hi,31" %}
9394   opcode(0xC1, 0x7);  /* C1 /7 ib */
9395   ins_encode( move_long_big_shift_sign(dst,cnt) );
9396   ins_pipe( ialu_reg_long );
9397 %}
9398 
9399 // Shift Right arithmetic Long by variable
9400 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9401   match(Set dst (RShiftL dst shift));
9402   effect(KILL cr);
9403   ins_cost(600);
9404   size(18);
9405   format %{ "TEST   $shift,32\n\t"
9406             "JEQ,s  small\n\t"
9407             "MOV    $dst.lo,$dst.hi\n\t"
9408             "SAR    $dst.hi,31\n"
9409     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9410             "SAR    $dst.hi,$shift" %}
9411   ins_encode( shift_right_arith_long( dst, shift ) );
9412   ins_pipe( pipe_slow );
9413 %}
9414 
9415 
9416 //----------Double Instructions------------------------------------------------
9417 // Double Math
9418 
9419 // Compare & branch
9420 
9421 // P6 version of float compare, sets condition codes in EFLAGS
9422 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9423   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9424   match(Set cr (CmpD src1 src2));
9425   effect(KILL rax);
9426   ins_cost(150);
9427   format %{ "FLD    $src1\n\t"
9428             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9429             "JNP    exit\n\t"
9430             "MOV    ah,1       // saw a NaN, set CF\n\t"
9431             "SAHF\n"
9432      "exit:\tNOP               // avoid branch to branch" %}
9433   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9434   ins_encode( Push_Reg_DPR(src1),
9435               OpcP, RegOpc(src2),
9436               cmpF_P6_fixup );
9437   ins_pipe( pipe_slow );
9438 %}
9439 
9440 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9441   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9442   match(Set cr (CmpD src1 src2));
9443   ins_cost(150);
9444   format %{ "FLD    $src1\n\t"
9445             "FUCOMIP ST,$src2  // P6 instruction" %}
9446   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9447   ins_encode( Push_Reg_DPR(src1),
9448               OpcP, RegOpc(src2));
9449   ins_pipe( pipe_slow );
9450 %}
9451 
9452 // Compare & branch
9453 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9454   predicate(UseSSE<=1);
9455   match(Set cr (CmpD src1 src2));
9456   effect(KILL rax);
9457   ins_cost(200);
9458   format %{ "FLD    $src1\n\t"
9459             "FCOMp  $src2\n\t"
9460             "FNSTSW AX\n\t"
9461             "TEST   AX,0x400\n\t"
9462             "JZ,s   flags\n\t"
9463             "MOV    AH,1\t# unordered treat as LT\n"
9464     "flags:\tSAHF" %}
9465   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9466   ins_encode( Push_Reg_DPR(src1),
9467               OpcP, RegOpc(src2),
9468               fpu_flags);
9469   ins_pipe( pipe_slow );
9470 %}
9471 
9472 // Compare vs zero into -1,0,1
9473 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9474   predicate(UseSSE<=1);
9475   match(Set dst (CmpD3 src1 zero));
9476   effect(KILL cr, KILL rax);
9477   ins_cost(280);
9478   format %{ "FTSTD  $dst,$src1" %}
9479   opcode(0xE4, 0xD9);
9480   ins_encode( Push_Reg_DPR(src1),
9481               OpcS, OpcP, PopFPU,
9482               CmpF_Result(dst));
9483   ins_pipe( pipe_slow );
9484 %}
9485 
9486 // Compare into -1,0,1
9487 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9488   predicate(UseSSE<=1);
9489   match(Set dst (CmpD3 src1 src2));
9490   effect(KILL cr, KILL rax);
9491   ins_cost(300);
9492   format %{ "FCMPD  $dst,$src1,$src2" %}
9493   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9494   ins_encode( Push_Reg_DPR(src1),
9495               OpcP, RegOpc(src2),
9496               CmpF_Result(dst));
9497   ins_pipe( pipe_slow );
9498 %}
9499 
9500 // float compare and set condition codes in EFLAGS by XMM regs
9501 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9502   predicate(UseSSE>=2);
9503   match(Set cr (CmpD src1 src2));
9504   ins_cost(145);
9505   format %{ "UCOMISD $src1,$src2\n\t"
9506             "JNP,s   exit\n\t"
9507             "PUSHF\t# saw NaN, set CF\n\t"
9508             "AND     [rsp], #0xffffff2b\n\t"
9509             "POPF\n"
9510     "exit:" %}
9511   ins_encode %{
9512     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9513     emit_cmpfp_fixup(_masm);
9514   %}
9515   ins_pipe( pipe_slow );
9516 %}
9517 
9518 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9519   predicate(UseSSE>=2);
9520   match(Set cr (CmpD src1 src2));
9521   ins_cost(100);
9522   format %{ "UCOMISD $src1,$src2" %}
9523   ins_encode %{
9524     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9525   %}
9526   ins_pipe( pipe_slow );
9527 %}
9528 
9529 // float compare and set condition codes in EFLAGS by XMM regs
9530 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9531   predicate(UseSSE>=2);
9532   match(Set cr (CmpD src1 (LoadD src2)));
9533   ins_cost(145);
9534   format %{ "UCOMISD $src1,$src2\n\t"
9535             "JNP,s   exit\n\t"
9536             "PUSHF\t# saw NaN, set CF\n\t"
9537             "AND     [rsp], #0xffffff2b\n\t"
9538             "POPF\n"
9539     "exit:" %}
9540   ins_encode %{
9541     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9542     emit_cmpfp_fixup(_masm);
9543   %}
9544   ins_pipe( pipe_slow );
9545 %}
9546 
9547 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9548   predicate(UseSSE>=2);
9549   match(Set cr (CmpD src1 (LoadD src2)));
9550   ins_cost(100);
9551   format %{ "UCOMISD $src1,$src2" %}
9552   ins_encode %{
9553     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9554   %}
9555   ins_pipe( pipe_slow );
9556 %}
9557 
9558 // Compare into -1,0,1 in XMM
9559 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9560   predicate(UseSSE>=2);
9561   match(Set dst (CmpD3 src1 src2));
9562   effect(KILL cr);
9563   ins_cost(255);
9564   format %{ "UCOMISD $src1, $src2\n\t"
9565             "MOV     $dst, #-1\n\t"
9566             "JP,s    done\n\t"
9567             "JB,s    done\n\t"
9568             "SETNE   $dst\n\t"
9569             "MOVZB   $dst, $dst\n"
9570     "done:" %}
9571   ins_encode %{
9572     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9573     emit_cmpfp3(_masm, $dst$$Register);
9574   %}
9575   ins_pipe( pipe_slow );
9576 %}
9577 
9578 // Compare into -1,0,1 in XMM and memory
9579 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9580   predicate(UseSSE>=2);
9581   match(Set dst (CmpD3 src1 (LoadD src2)));
9582   effect(KILL cr);
9583   ins_cost(275);
9584   format %{ "UCOMISD $src1, $src2\n\t"
9585             "MOV     $dst, #-1\n\t"
9586             "JP,s    done\n\t"
9587             "JB,s    done\n\t"
9588             "SETNE   $dst\n\t"
9589             "MOVZB   $dst, $dst\n"
9590     "done:" %}
9591   ins_encode %{
9592     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9593     emit_cmpfp3(_masm, $dst$$Register);
9594   %}
9595   ins_pipe( pipe_slow );
9596 %}
9597 
9598 
9599 instruct subDPR_reg(regDPR dst, regDPR src) %{
9600   predicate (UseSSE <=1);
9601   match(Set dst (SubD dst src));
9602 
9603   format %{ "FLD    $src\n\t"
9604             "DSUBp  $dst,ST" %}
9605   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9606   ins_cost(150);
9607   ins_encode( Push_Reg_DPR(src),
9608               OpcP, RegOpc(dst) );
9609   ins_pipe( fpu_reg_reg );
9610 %}
9611 
9612 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9613   predicate (UseSSE <=1);
9614   match(Set dst (RoundDouble (SubD src1 src2)));
9615   ins_cost(250);
9616 
9617   format %{ "FLD    $src2\n\t"
9618             "DSUB   ST,$src1\n\t"
9619             "FSTP_D $dst\t# D-round" %}
9620   opcode(0xD8, 0x5);
9621   ins_encode( Push_Reg_DPR(src2),
9622               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9623   ins_pipe( fpu_mem_reg_reg );
9624 %}
9625 
9626 
9627 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9628   predicate (UseSSE <=1);
9629   match(Set dst (SubD dst (LoadD src)));
9630   ins_cost(150);
9631 
9632   format %{ "FLD    $src\n\t"
9633             "DSUBp  $dst,ST" %}
9634   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9635   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9636               OpcP, RegOpc(dst) );
9637   ins_pipe( fpu_reg_mem );
9638 %}
9639 
9640 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9641   predicate (UseSSE<=1);
9642   match(Set dst (AbsD src));
9643   ins_cost(100);
9644   format %{ "FABS" %}
9645   opcode(0xE1, 0xD9);
9646   ins_encode( OpcS, OpcP );
9647   ins_pipe( fpu_reg_reg );
9648 %}
9649 
9650 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9651   predicate(UseSSE<=1);
9652   match(Set dst (NegD src));
9653   ins_cost(100);
9654   format %{ "FCHS" %}
9655   opcode(0xE0, 0xD9);
9656   ins_encode( OpcS, OpcP );
9657   ins_pipe( fpu_reg_reg );
9658 %}
9659 
9660 instruct addDPR_reg(regDPR dst, regDPR src) %{
9661   predicate(UseSSE<=1);
9662   match(Set dst (AddD dst src));
9663   format %{ "FLD    $src\n\t"
9664             "DADD   $dst,ST" %}
9665   size(4);
9666   ins_cost(150);
9667   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9668   ins_encode( Push_Reg_DPR(src),
9669               OpcP, RegOpc(dst) );
9670   ins_pipe( fpu_reg_reg );
9671 %}
9672 
9673 
9674 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9675   predicate(UseSSE<=1);
9676   match(Set dst (RoundDouble (AddD src1 src2)));
9677   ins_cost(250);
9678 
9679   format %{ "FLD    $src2\n\t"
9680             "DADD   ST,$src1\n\t"
9681             "FSTP_D $dst\t# D-round" %}
9682   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9683   ins_encode( Push_Reg_DPR(src2),
9684               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9685   ins_pipe( fpu_mem_reg_reg );
9686 %}
9687 
9688 
9689 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9690   predicate(UseSSE<=1);
9691   match(Set dst (AddD dst (LoadD src)));
9692   ins_cost(150);
9693 
9694   format %{ "FLD    $src\n\t"
9695             "DADDp  $dst,ST" %}
9696   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9697   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9698               OpcP, RegOpc(dst) );
9699   ins_pipe( fpu_reg_mem );
9700 %}
9701 
9702 // add-to-memory
9703 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9704   predicate(UseSSE<=1);
9705   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9706   ins_cost(150);
9707 
9708   format %{ "FLD_D  $dst\n\t"
9709             "DADD   ST,$src\n\t"
9710             "FST_D  $dst" %}
9711   opcode(0xDD, 0x0);
9712   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9713               Opcode(0xD8), RegOpc(src),
9714               set_instruction_start,
9715               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9716   ins_pipe( fpu_reg_mem );
9717 %}
9718 
9719 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9720   predicate(UseSSE<=1);
9721   match(Set dst (AddD dst con));
9722   ins_cost(125);
9723   format %{ "FLD1\n\t"
9724             "DADDp  $dst,ST" %}
9725   ins_encode %{
9726     __ fld1();
9727     __ faddp($dst$$reg);
9728   %}
9729   ins_pipe(fpu_reg);
9730 %}
9731 
9732 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9733   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9734   match(Set dst (AddD dst con));
9735   ins_cost(200);
9736   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9737             "DADDp  $dst,ST" %}
9738   ins_encode %{
9739     __ fld_d($constantaddress($con));
9740     __ faddp($dst$$reg);
9741   %}
9742   ins_pipe(fpu_reg_mem);
9743 %}
9744 
9745 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9746   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9747   match(Set dst (RoundDouble (AddD src con)));
9748   ins_cost(200);
9749   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9750             "DADD   ST,$src\n\t"
9751             "FSTP_D $dst\t# D-round" %}
9752   ins_encode %{
9753     __ fld_d($constantaddress($con));
9754     __ fadd($src$$reg);
9755     __ fstp_d(Address(rsp, $dst$$disp));
9756   %}
9757   ins_pipe(fpu_mem_reg_con);
9758 %}
9759 
9760 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9761   predicate(UseSSE<=1);
9762   match(Set dst (MulD dst src));
9763   format %{ "FLD    $src\n\t"
9764             "DMULp  $dst,ST" %}
9765   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9766   ins_cost(150);
9767   ins_encode( Push_Reg_DPR(src),
9768               OpcP, RegOpc(dst) );
9769   ins_pipe( fpu_reg_reg );
9770 %}
9771 
9772 // Strict FP instruction biases argument before multiply then
9773 // biases result to avoid double rounding of subnormals.
9774 //
9775 // scale arg1 by multiplying arg1 by 2^(-15360)
9776 // load arg2
9777 // multiply scaled arg1 by arg2
9778 // rescale product by 2^(15360)
9779 //
9780 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9781   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9782   match(Set dst (MulD dst src));
9783   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9784 
9785   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9786             "DMULp  $dst,ST\n\t"
9787             "FLD    $src\n\t"
9788             "DMULp  $dst,ST\n\t"
9789             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9790             "DMULp  $dst,ST\n\t" %}
9791   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9792   ins_encode( strictfp_bias1(dst),
9793               Push_Reg_DPR(src),
9794               OpcP, RegOpc(dst),
9795               strictfp_bias2(dst) );
9796   ins_pipe( fpu_reg_reg );
9797 %}
9798 
9799 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9800   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9801   match(Set dst (MulD dst con));
9802   ins_cost(200);
9803   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9804             "DMULp  $dst,ST" %}
9805   ins_encode %{
9806     __ fld_d($constantaddress($con));
9807     __ fmulp($dst$$reg);
9808   %}
9809   ins_pipe(fpu_reg_mem);
9810 %}
9811 
9812 
9813 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9814   predicate( UseSSE<=1 );
9815   match(Set dst (MulD dst (LoadD src)));
9816   ins_cost(200);
9817   format %{ "FLD_D  $src\n\t"
9818             "DMULp  $dst,ST" %}
9819   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9820   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9821               OpcP, RegOpc(dst) );
9822   ins_pipe( fpu_reg_mem );
9823 %}
9824 
9825 //
9826 // Cisc-alternate to reg-reg multiply
9827 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9828   predicate( UseSSE<=1 );
9829   match(Set dst (MulD src (LoadD mem)));
9830   ins_cost(250);
9831   format %{ "FLD_D  $mem\n\t"
9832             "DMUL   ST,$src\n\t"
9833             "FSTP_D $dst" %}
9834   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9835   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9836               OpcReg_FPR(src),
9837               Pop_Reg_DPR(dst) );
9838   ins_pipe( fpu_reg_reg_mem );
9839 %}
9840 
9841 
9842 // MACRO3 -- addDPR a mulDPR
9843 // This instruction is a '2-address' instruction in that the result goes
9844 // back to src2.  This eliminates a move from the macro; possibly the
9845 // register allocator will have to add it back (and maybe not).
9846 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9847   predicate( UseSSE<=1 );
9848   match(Set src2 (AddD (MulD src0 src1) src2));
9849   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9850             "DMUL   ST,$src1\n\t"
9851             "DADDp  $src2,ST" %}
9852   ins_cost(250);
9853   opcode(0xDD); /* LoadD DD /0 */
9854   ins_encode( Push_Reg_FPR(src0),
9855               FMul_ST_reg(src1),
9856               FAddP_reg_ST(src2) );
9857   ins_pipe( fpu_reg_reg_reg );
9858 %}
9859 
9860 
9861 // MACRO3 -- subDPR a mulDPR
9862 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9863   predicate( UseSSE<=1 );
9864   match(Set src2 (SubD (MulD src0 src1) src2));
9865   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9866             "DMUL   ST,$src1\n\t"
9867             "DSUBRp $src2,ST" %}
9868   ins_cost(250);
9869   ins_encode( Push_Reg_FPR(src0),
9870               FMul_ST_reg(src1),
9871               Opcode(0xDE), Opc_plus(0xE0,src2));
9872   ins_pipe( fpu_reg_reg_reg );
9873 %}
9874 
9875 
9876 instruct divDPR_reg(regDPR dst, regDPR src) %{
9877   predicate( UseSSE<=1 );
9878   match(Set dst (DivD dst src));
9879 
9880   format %{ "FLD    $src\n\t"
9881             "FDIVp  $dst,ST" %}
9882   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9883   ins_cost(150);
9884   ins_encode( Push_Reg_DPR(src),
9885               OpcP, RegOpc(dst) );
9886   ins_pipe( fpu_reg_reg );
9887 %}
9888 
9889 // Strict FP instruction biases argument before division then
9890 // biases result, to avoid double rounding of subnormals.
9891 //
9892 // scale dividend by multiplying dividend by 2^(-15360)
9893 // load divisor
9894 // divide scaled dividend by divisor
9895 // rescale quotient by 2^(15360)
9896 //
9897 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9898   predicate (UseSSE<=1);
9899   match(Set dst (DivD dst src));
9900   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9901   ins_cost(01);
9902 
9903   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9904             "DMULp  $dst,ST\n\t"
9905             "FLD    $src\n\t"
9906             "FDIVp  $dst,ST\n\t"
9907             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9908             "DMULp  $dst,ST\n\t" %}
9909   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9910   ins_encode( strictfp_bias1(dst),
9911               Push_Reg_DPR(src),
9912               OpcP, RegOpc(dst),
9913               strictfp_bias2(dst) );
9914   ins_pipe( fpu_reg_reg );
9915 %}
9916 
9917 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9918   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9919   match(Set dst (RoundDouble (DivD src1 src2)));
9920 
9921   format %{ "FLD    $src1\n\t"
9922             "FDIV   ST,$src2\n\t"
9923             "FSTP_D $dst\t# D-round" %}
9924   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9925   ins_encode( Push_Reg_DPR(src1),
9926               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9927   ins_pipe( fpu_mem_reg_reg );
9928 %}
9929 
9930 
9931 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9932   predicate(UseSSE<=1);
9933   match(Set dst (ModD dst src));
9934   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9935 
9936   format %{ "DMOD   $dst,$src" %}
9937   ins_cost(250);
9938   ins_encode(Push_Reg_Mod_DPR(dst, src),
9939               emitModDPR(),
9940               Push_Result_Mod_DPR(src),
9941               Pop_Reg_DPR(dst));
9942   ins_pipe( pipe_slow );
9943 %}
9944 
9945 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9946   predicate(UseSSE>=2);
9947   match(Set dst (ModD src0 src1));
9948   effect(KILL rax, KILL cr);
9949 
9950   format %{ "SUB    ESP,8\t # DMOD\n"
9951           "\tMOVSD  [ESP+0],$src1\n"
9952           "\tFLD_D  [ESP+0]\n"
9953           "\tMOVSD  [ESP+0],$src0\n"
9954           "\tFLD_D  [ESP+0]\n"
9955      "loop:\tFPREM\n"
9956           "\tFWAIT\n"
9957           "\tFNSTSW AX\n"
9958           "\tSAHF\n"
9959           "\tJP     loop\n"
9960           "\tFSTP_D [ESP+0]\n"
9961           "\tMOVSD  $dst,[ESP+0]\n"
9962           "\tADD    ESP,8\n"
9963           "\tFSTP   ST0\t # Restore FPU Stack"
9964     %}
9965   ins_cost(250);
9966   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9967   ins_pipe( pipe_slow );
9968 %}
9969 
9970 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9971   predicate (UseSSE<=1);
9972   match(Set dst(AtanD dst src));
9973   format %{ "DATA   $dst,$src" %}
9974   opcode(0xD9, 0xF3);
9975   ins_encode( Push_Reg_DPR(src),
9976               OpcP, OpcS, RegOpc(dst) );
9977   ins_pipe( pipe_slow );
9978 %}
9979 
9980 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9981   predicate (UseSSE>=2);
9982   match(Set dst(AtanD dst src));
9983   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9984   format %{ "DATA   $dst,$src" %}
9985   opcode(0xD9, 0xF3);
9986   ins_encode( Push_SrcD(src),
9987               OpcP, OpcS, Push_ResultD(dst) );
9988   ins_pipe( pipe_slow );
9989 %}
9990 
9991 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9992   predicate (UseSSE<=1);
9993   match(Set dst (SqrtD src));
9994   format %{ "DSQRT  $dst,$src" %}
9995   opcode(0xFA, 0xD9);
9996   ins_encode( Push_Reg_DPR(src),
9997               OpcS, OpcP, Pop_Reg_DPR(dst) );
9998   ins_pipe( pipe_slow );
9999 %}
10000 
10001 //-------------Float Instructions-------------------------------
10002 // Float Math
10003 
10004 // Code for float compare:
10005 //     fcompp();
10006 //     fwait(); fnstsw_ax();
10007 //     sahf();
10008 //     movl(dst, unordered_result);
10009 //     jcc(Assembler::parity, exit);
10010 //     movl(dst, less_result);
10011 //     jcc(Assembler::below, exit);
10012 //     movl(dst, equal_result);
10013 //     jcc(Assembler::equal, exit);
10014 //     movl(dst, greater_result);
10015 //   exit:
10016 
10017 // P6 version of float compare, sets condition codes in EFLAGS
10018 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10019   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10020   match(Set cr (CmpF src1 src2));
10021   effect(KILL rax);
10022   ins_cost(150);
10023   format %{ "FLD    $src1\n\t"
10024             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10025             "JNP    exit\n\t"
10026             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10027             "SAHF\n"
10028      "exit:\tNOP               // avoid branch to branch" %}
10029   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10030   ins_encode( Push_Reg_DPR(src1),
10031               OpcP, RegOpc(src2),
10032               cmpF_P6_fixup );
10033   ins_pipe( pipe_slow );
10034 %}
10035 
10036 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10037   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10038   match(Set cr (CmpF src1 src2));
10039   ins_cost(100);
10040   format %{ "FLD    $src1\n\t"
10041             "FUCOMIP ST,$src2  // P6 instruction" %}
10042   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10043   ins_encode( Push_Reg_DPR(src1),
10044               OpcP, RegOpc(src2));
10045   ins_pipe( pipe_slow );
10046 %}
10047 
10048 
10049 // Compare & branch
10050 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10051   predicate(UseSSE == 0);
10052   match(Set cr (CmpF src1 src2));
10053   effect(KILL rax);
10054   ins_cost(200);
10055   format %{ "FLD    $src1\n\t"
10056             "FCOMp  $src2\n\t"
10057             "FNSTSW AX\n\t"
10058             "TEST   AX,0x400\n\t"
10059             "JZ,s   flags\n\t"
10060             "MOV    AH,1\t# unordered treat as LT\n"
10061     "flags:\tSAHF" %}
10062   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10063   ins_encode( Push_Reg_DPR(src1),
10064               OpcP, RegOpc(src2),
10065               fpu_flags);
10066   ins_pipe( pipe_slow );
10067 %}
10068 
10069 // Compare vs zero into -1,0,1
10070 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10071   predicate(UseSSE == 0);
10072   match(Set dst (CmpF3 src1 zero));
10073   effect(KILL cr, KILL rax);
10074   ins_cost(280);
10075   format %{ "FTSTF  $dst,$src1" %}
10076   opcode(0xE4, 0xD9);
10077   ins_encode( Push_Reg_DPR(src1),
10078               OpcS, OpcP, PopFPU,
10079               CmpF_Result(dst));
10080   ins_pipe( pipe_slow );
10081 %}
10082 
10083 // Compare into -1,0,1
10084 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10085   predicate(UseSSE == 0);
10086   match(Set dst (CmpF3 src1 src2));
10087   effect(KILL cr, KILL rax);
10088   ins_cost(300);
10089   format %{ "FCMPF  $dst,$src1,$src2" %}
10090   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10091   ins_encode( Push_Reg_DPR(src1),
10092               OpcP, RegOpc(src2),
10093               CmpF_Result(dst));
10094   ins_pipe( pipe_slow );
10095 %}
10096 
10097 // float compare and set condition codes in EFLAGS by XMM regs
10098 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10099   predicate(UseSSE>=1);
10100   match(Set cr (CmpF src1 src2));
10101   ins_cost(145);
10102   format %{ "UCOMISS $src1,$src2\n\t"
10103             "JNP,s   exit\n\t"
10104             "PUSHF\t# saw NaN, set CF\n\t"
10105             "AND     [rsp], #0xffffff2b\n\t"
10106             "POPF\n"
10107     "exit:" %}
10108   ins_encode %{
10109     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10110     emit_cmpfp_fixup(_masm);
10111   %}
10112   ins_pipe( pipe_slow );
10113 %}
10114 
10115 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10116   predicate(UseSSE>=1);
10117   match(Set cr (CmpF src1 src2));
10118   ins_cost(100);
10119   format %{ "UCOMISS $src1,$src2" %}
10120   ins_encode %{
10121     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10122   %}
10123   ins_pipe( pipe_slow );
10124 %}
10125 
10126 // float compare and set condition codes in EFLAGS by XMM regs
10127 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10128   predicate(UseSSE>=1);
10129   match(Set cr (CmpF src1 (LoadF src2)));
10130   ins_cost(165);
10131   format %{ "UCOMISS $src1,$src2\n\t"
10132             "JNP,s   exit\n\t"
10133             "PUSHF\t# saw NaN, set CF\n\t"
10134             "AND     [rsp], #0xffffff2b\n\t"
10135             "POPF\n"
10136     "exit:" %}
10137   ins_encode %{
10138     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10139     emit_cmpfp_fixup(_masm);
10140   %}
10141   ins_pipe( pipe_slow );
10142 %}
10143 
10144 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10145   predicate(UseSSE>=1);
10146   match(Set cr (CmpF src1 (LoadF src2)));
10147   ins_cost(100);
10148   format %{ "UCOMISS $src1,$src2" %}
10149   ins_encode %{
10150     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10151   %}
10152   ins_pipe( pipe_slow );
10153 %}
10154 
10155 // Compare into -1,0,1 in XMM
10156 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10157   predicate(UseSSE>=1);
10158   match(Set dst (CmpF3 src1 src2));
10159   effect(KILL cr);
10160   ins_cost(255);
10161   format %{ "UCOMISS $src1, $src2\n\t"
10162             "MOV     $dst, #-1\n\t"
10163             "JP,s    done\n\t"
10164             "JB,s    done\n\t"
10165             "SETNE   $dst\n\t"
10166             "MOVZB   $dst, $dst\n"
10167     "done:" %}
10168   ins_encode %{
10169     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10170     emit_cmpfp3(_masm, $dst$$Register);
10171   %}
10172   ins_pipe( pipe_slow );
10173 %}
10174 
10175 // Compare into -1,0,1 in XMM and memory
10176 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10177   predicate(UseSSE>=1);
10178   match(Set dst (CmpF3 src1 (LoadF src2)));
10179   effect(KILL cr);
10180   ins_cost(275);
10181   format %{ "UCOMISS $src1, $src2\n\t"
10182             "MOV     $dst, #-1\n\t"
10183             "JP,s    done\n\t"
10184             "JB,s    done\n\t"
10185             "SETNE   $dst\n\t"
10186             "MOVZB   $dst, $dst\n"
10187     "done:" %}
10188   ins_encode %{
10189     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10190     emit_cmpfp3(_masm, $dst$$Register);
10191   %}
10192   ins_pipe( pipe_slow );
10193 %}
10194 
10195 // Spill to obtain 24-bit precision
10196 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10197   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10198   match(Set dst (SubF src1 src2));
10199 
10200   format %{ "FSUB   $dst,$src1 - $src2" %}
10201   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10202   ins_encode( Push_Reg_FPR(src1),
10203               OpcReg_FPR(src2),
10204               Pop_Mem_FPR(dst) );
10205   ins_pipe( fpu_mem_reg_reg );
10206 %}
10207 //
10208 // This instruction does not round to 24-bits
10209 instruct subFPR_reg(regFPR dst, regFPR src) %{
10210   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10211   match(Set dst (SubF dst src));
10212 
10213   format %{ "FSUB   $dst,$src" %}
10214   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10215   ins_encode( Push_Reg_FPR(src),
10216               OpcP, RegOpc(dst) );
10217   ins_pipe( fpu_reg_reg );
10218 %}
10219 
10220 // Spill to obtain 24-bit precision
10221 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10222   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10223   match(Set dst (AddF src1 src2));
10224 
10225   format %{ "FADD   $dst,$src1,$src2" %}
10226   opcode(0xD8, 0x0); /* D8 C0+i */
10227   ins_encode( Push_Reg_FPR(src2),
10228               OpcReg_FPR(src1),
10229               Pop_Mem_FPR(dst) );
10230   ins_pipe( fpu_mem_reg_reg );
10231 %}
10232 //
10233 // This instruction does not round to 24-bits
10234 instruct addFPR_reg(regFPR dst, regFPR src) %{
10235   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10236   match(Set dst (AddF dst src));
10237 
10238   format %{ "FLD    $src\n\t"
10239             "FADDp  $dst,ST" %}
10240   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10241   ins_encode( Push_Reg_FPR(src),
10242               OpcP, RegOpc(dst) );
10243   ins_pipe( fpu_reg_reg );
10244 %}
10245 
10246 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10247   predicate(UseSSE==0);
10248   match(Set dst (AbsF src));
10249   ins_cost(100);
10250   format %{ "FABS" %}
10251   opcode(0xE1, 0xD9);
10252   ins_encode( OpcS, OpcP );
10253   ins_pipe( fpu_reg_reg );
10254 %}
10255 
10256 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10257   predicate(UseSSE==0);
10258   match(Set dst (NegF src));
10259   ins_cost(100);
10260   format %{ "FCHS" %}
10261   opcode(0xE0, 0xD9);
10262   ins_encode( OpcS, OpcP );
10263   ins_pipe( fpu_reg_reg );
10264 %}
10265 
10266 // Cisc-alternate to addFPR_reg
10267 // Spill to obtain 24-bit precision
10268 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10269   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10270   match(Set dst (AddF src1 (LoadF src2)));
10271 
10272   format %{ "FLD    $src2\n\t"
10273             "FADD   ST,$src1\n\t"
10274             "FSTP_S $dst" %}
10275   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10276   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10277               OpcReg_FPR(src1),
10278               Pop_Mem_FPR(dst) );
10279   ins_pipe( fpu_mem_reg_mem );
10280 %}
10281 //
10282 // Cisc-alternate to addFPR_reg
10283 // This instruction does not round to 24-bits
10284 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10285   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10286   match(Set dst (AddF dst (LoadF src)));
10287 
10288   format %{ "FADD   $dst,$src" %}
10289   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10290   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10291               OpcP, RegOpc(dst) );
10292   ins_pipe( fpu_reg_mem );
10293 %}
10294 
10295 // // Following two instructions for _222_mpegaudio
10296 // Spill to obtain 24-bit precision
10297 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10298   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10299   match(Set dst (AddF src1 src2));
10300 
10301   format %{ "FADD   $dst,$src1,$src2" %}
10302   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10303   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10304               OpcReg_FPR(src2),
10305               Pop_Mem_FPR(dst) );
10306   ins_pipe( fpu_mem_reg_mem );
10307 %}
10308 
10309 // Cisc-spill variant
10310 // Spill to obtain 24-bit precision
10311 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10312   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10313   match(Set dst (AddF src1 (LoadF src2)));
10314 
10315   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10316   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10317   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10318               set_instruction_start,
10319               OpcP, RMopc_Mem(secondary,src1),
10320               Pop_Mem_FPR(dst) );
10321   ins_pipe( fpu_mem_mem_mem );
10322 %}
10323 
10324 // Spill to obtain 24-bit precision
10325 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10326   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10327   match(Set dst (AddF src1 src2));
10328 
10329   format %{ "FADD   $dst,$src1,$src2" %}
10330   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10331   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10332               set_instruction_start,
10333               OpcP, RMopc_Mem(secondary,src1),
10334               Pop_Mem_FPR(dst) );
10335   ins_pipe( fpu_mem_mem_mem );
10336 %}
10337 
10338 
10339 // Spill to obtain 24-bit precision
10340 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10341   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10342   match(Set dst (AddF src con));
10343   format %{ "FLD    $src\n\t"
10344             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10345             "FSTP_S $dst"  %}
10346   ins_encode %{
10347     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10348     __ fadd_s($constantaddress($con));
10349     __ fstp_s(Address(rsp, $dst$$disp));
10350   %}
10351   ins_pipe(fpu_mem_reg_con);
10352 %}
10353 //
10354 // This instruction does not round to 24-bits
10355 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10356   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10357   match(Set dst (AddF src con));
10358   format %{ "FLD    $src\n\t"
10359             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10360             "FSTP   $dst"  %}
10361   ins_encode %{
10362     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10363     __ fadd_s($constantaddress($con));
10364     __ fstp_d($dst$$reg);
10365   %}
10366   ins_pipe(fpu_reg_reg_con);
10367 %}
10368 
10369 // Spill to obtain 24-bit precision
10370 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10371   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10372   match(Set dst (MulF src1 src2));
10373 
10374   format %{ "FLD    $src1\n\t"
10375             "FMUL   $src2\n\t"
10376             "FSTP_S $dst"  %}
10377   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10378   ins_encode( Push_Reg_FPR(src1),
10379               OpcReg_FPR(src2),
10380               Pop_Mem_FPR(dst) );
10381   ins_pipe( fpu_mem_reg_reg );
10382 %}
10383 //
10384 // This instruction does not round to 24-bits
10385 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10386   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10387   match(Set dst (MulF src1 src2));
10388 
10389   format %{ "FLD    $src1\n\t"
10390             "FMUL   $src2\n\t"
10391             "FSTP_S $dst"  %}
10392   opcode(0xD8, 0x1); /* D8 C8+i */
10393   ins_encode( Push_Reg_FPR(src2),
10394               OpcReg_FPR(src1),
10395               Pop_Reg_FPR(dst) );
10396   ins_pipe( fpu_reg_reg_reg );
10397 %}
10398 
10399 
10400 // Spill to obtain 24-bit precision
10401 // Cisc-alternate to reg-reg multiply
10402 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10403   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10404   match(Set dst (MulF src1 (LoadF src2)));
10405 
10406   format %{ "FLD_S  $src2\n\t"
10407             "FMUL   $src1\n\t"
10408             "FSTP_S $dst"  %}
10409   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10410   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10411               OpcReg_FPR(src1),
10412               Pop_Mem_FPR(dst) );
10413   ins_pipe( fpu_mem_reg_mem );
10414 %}
10415 //
10416 // This instruction does not round to 24-bits
10417 // Cisc-alternate to reg-reg multiply
10418 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10419   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10420   match(Set dst (MulF src1 (LoadF src2)));
10421 
10422   format %{ "FMUL   $dst,$src1,$src2" %}
10423   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10424   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10425               OpcReg_FPR(src1),
10426               Pop_Reg_FPR(dst) );
10427   ins_pipe( fpu_reg_reg_mem );
10428 %}
10429 
10430 // Spill to obtain 24-bit precision
10431 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10432   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10433   match(Set dst (MulF src1 src2));
10434 
10435   format %{ "FMUL   $dst,$src1,$src2" %}
10436   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10437   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10438               set_instruction_start,
10439               OpcP, RMopc_Mem(secondary,src1),
10440               Pop_Mem_FPR(dst) );
10441   ins_pipe( fpu_mem_mem_mem );
10442 %}
10443 
10444 // Spill to obtain 24-bit precision
10445 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10446   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10447   match(Set dst (MulF src con));
10448 
10449   format %{ "FLD    $src\n\t"
10450             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10451             "FSTP_S $dst"  %}
10452   ins_encode %{
10453     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10454     __ fmul_s($constantaddress($con));
10455     __ fstp_s(Address(rsp, $dst$$disp));
10456   %}
10457   ins_pipe(fpu_mem_reg_con);
10458 %}
10459 //
10460 // This instruction does not round to 24-bits
10461 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10462   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10463   match(Set dst (MulF src con));
10464 
10465   format %{ "FLD    $src\n\t"
10466             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10467             "FSTP   $dst"  %}
10468   ins_encode %{
10469     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10470     __ fmul_s($constantaddress($con));
10471     __ fstp_d($dst$$reg);
10472   %}
10473   ins_pipe(fpu_reg_reg_con);
10474 %}
10475 
10476 
10477 //
10478 // MACRO1 -- subsume unshared load into mulFPR
10479 // This instruction does not round to 24-bits
10480 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10481   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10482   match(Set dst (MulF (LoadF mem1) src));
10483 
10484   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10485             "FMUL   ST,$src\n\t"
10486             "FSTP   $dst" %}
10487   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10488   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10489               OpcReg_FPR(src),
10490               Pop_Reg_FPR(dst) );
10491   ins_pipe( fpu_reg_reg_mem );
10492 %}
10493 //
10494 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10495 // This instruction does not round to 24-bits
10496 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10497   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10498   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10499   ins_cost(95);
10500 
10501   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10502             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10503             "FADD   ST,$src2\n\t"
10504             "FSTP   $dst" %}
10505   opcode(0xD9); /* LoadF D9 /0 */
10506   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10507               FMul_ST_reg(src1),
10508               FAdd_ST_reg(src2),
10509               Pop_Reg_FPR(dst) );
10510   ins_pipe( fpu_reg_mem_reg_reg );
10511 %}
10512 
10513 // MACRO3 -- addFPR a mulFPR
10514 // This instruction does not round to 24-bits.  It is a '2-address'
10515 // instruction in that the result goes back to src2.  This eliminates
10516 // a move from the macro; possibly the register allocator will have
10517 // to add it back (and maybe not).
10518 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10519   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10520   match(Set src2 (AddF (MulF src0 src1) src2));
10521 
10522   format %{ "FLD    $src0     ===MACRO3===\n\t"
10523             "FMUL   ST,$src1\n\t"
10524             "FADDP  $src2,ST" %}
10525   opcode(0xD9); /* LoadF D9 /0 */
10526   ins_encode( Push_Reg_FPR(src0),
10527               FMul_ST_reg(src1),
10528               FAddP_reg_ST(src2) );
10529   ins_pipe( fpu_reg_reg_reg );
10530 %}
10531 
10532 // MACRO4 -- divFPR subFPR
10533 // This instruction does not round to 24-bits
10534 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10535   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10536   match(Set dst (DivF (SubF src2 src1) src3));
10537 
10538   format %{ "FLD    $src2   ===MACRO4===\n\t"
10539             "FSUB   ST,$src1\n\t"
10540             "FDIV   ST,$src3\n\t"
10541             "FSTP  $dst" %}
10542   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10543   ins_encode( Push_Reg_FPR(src2),
10544               subFPR_divFPR_encode(src1,src3),
10545               Pop_Reg_FPR(dst) );
10546   ins_pipe( fpu_reg_reg_reg_reg );
10547 %}
10548 
10549 // Spill to obtain 24-bit precision
10550 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10551   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10552   match(Set dst (DivF src1 src2));
10553 
10554   format %{ "FDIV   $dst,$src1,$src2" %}
10555   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10556   ins_encode( Push_Reg_FPR(src1),
10557               OpcReg_FPR(src2),
10558               Pop_Mem_FPR(dst) );
10559   ins_pipe( fpu_mem_reg_reg );
10560 %}
10561 //
10562 // This instruction does not round to 24-bits
10563 instruct divFPR_reg(regFPR dst, regFPR src) %{
10564   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10565   match(Set dst (DivF dst src));
10566 
10567   format %{ "FDIV   $dst,$src" %}
10568   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10569   ins_encode( Push_Reg_FPR(src),
10570               OpcP, RegOpc(dst) );
10571   ins_pipe( fpu_reg_reg );
10572 %}
10573 
10574 
10575 // Spill to obtain 24-bit precision
10576 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10577   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10578   match(Set dst (ModF src1 src2));
10579   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10580 
10581   format %{ "FMOD   $dst,$src1,$src2" %}
10582   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10583               emitModDPR(),
10584               Push_Result_Mod_DPR(src2),
10585               Pop_Mem_FPR(dst));
10586   ins_pipe( pipe_slow );
10587 %}
10588 //
10589 // This instruction does not round to 24-bits
10590 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10591   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10592   match(Set dst (ModF dst src));
10593   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10594 
10595   format %{ "FMOD   $dst,$src" %}
10596   ins_encode(Push_Reg_Mod_DPR(dst, src),
10597               emitModDPR(),
10598               Push_Result_Mod_DPR(src),
10599               Pop_Reg_FPR(dst));
10600   ins_pipe( pipe_slow );
10601 %}
10602 
10603 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10604   predicate(UseSSE>=1);
10605   match(Set dst (ModF src0 src1));
10606   effect(KILL rax, KILL cr);
10607   format %{ "SUB    ESP,4\t # FMOD\n"
10608           "\tMOVSS  [ESP+0],$src1\n"
10609           "\tFLD_S  [ESP+0]\n"
10610           "\tMOVSS  [ESP+0],$src0\n"
10611           "\tFLD_S  [ESP+0]\n"
10612      "loop:\tFPREM\n"
10613           "\tFWAIT\n"
10614           "\tFNSTSW AX\n"
10615           "\tSAHF\n"
10616           "\tJP     loop\n"
10617           "\tFSTP_S [ESP+0]\n"
10618           "\tMOVSS  $dst,[ESP+0]\n"
10619           "\tADD    ESP,4\n"
10620           "\tFSTP   ST0\t # Restore FPU Stack"
10621     %}
10622   ins_cost(250);
10623   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10624   ins_pipe( pipe_slow );
10625 %}
10626 
10627 
10628 //----------Arithmetic Conversion Instructions---------------------------------
10629 // The conversions operations are all Alpha sorted.  Please keep it that way!
10630 
10631 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10632   predicate(UseSSE==0);
10633   match(Set dst (RoundFloat src));
10634   ins_cost(125);
10635   format %{ "FST_S  $dst,$src\t# F-round" %}
10636   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10637   ins_pipe( fpu_mem_reg );
10638 %}
10639 
10640 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10641   predicate(UseSSE<=1);
10642   match(Set dst (RoundDouble src));
10643   ins_cost(125);
10644   format %{ "FST_D  $dst,$src\t# D-round" %}
10645   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10646   ins_pipe( fpu_mem_reg );
10647 %}
10648 
10649 // Force rounding to 24-bit precision and 6-bit exponent
10650 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10651   predicate(UseSSE==0);
10652   match(Set dst (ConvD2F src));
10653   format %{ "FST_S  $dst,$src\t# F-round" %}
10654   expand %{
10655     roundFloat_mem_reg(dst,src);
10656   %}
10657 %}
10658 
10659 // Force rounding to 24-bit precision and 6-bit exponent
10660 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10661   predicate(UseSSE==1);
10662   match(Set dst (ConvD2F src));
10663   effect( KILL cr );
10664   format %{ "SUB    ESP,4\n\t"
10665             "FST_S  [ESP],$src\t# F-round\n\t"
10666             "MOVSS  $dst,[ESP]\n\t"
10667             "ADD ESP,4" %}
10668   ins_encode %{
10669     __ subptr(rsp, 4);
10670     if ($src$$reg != FPR1L_enc) {
10671       __ fld_s($src$$reg-1);
10672       __ fstp_s(Address(rsp, 0));
10673     } else {
10674       __ fst_s(Address(rsp, 0));
10675     }
10676     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10677     __ addptr(rsp, 4);
10678   %}
10679   ins_pipe( pipe_slow );
10680 %}
10681 
10682 // Force rounding double precision to single precision
10683 instruct convD2F_reg(regF dst, regD src) %{
10684   predicate(UseSSE>=2);
10685   match(Set dst (ConvD2F src));
10686   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10687   ins_encode %{
10688     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10689   %}
10690   ins_pipe( pipe_slow );
10691 %}
10692 
10693 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10694   predicate(UseSSE==0);
10695   match(Set dst (ConvF2D src));
10696   format %{ "FST_S  $dst,$src\t# D-round" %}
10697   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10698   ins_pipe( fpu_reg_reg );
10699 %}
10700 
10701 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10702   predicate(UseSSE==1);
10703   match(Set dst (ConvF2D src));
10704   format %{ "FST_D  $dst,$src\t# D-round" %}
10705   expand %{
10706     roundDouble_mem_reg(dst,src);
10707   %}
10708 %}
10709 
10710 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10711   predicate(UseSSE==1);
10712   match(Set dst (ConvF2D src));
10713   effect( KILL cr );
10714   format %{ "SUB    ESP,4\n\t"
10715             "MOVSS  [ESP] $src\n\t"
10716             "FLD_S  [ESP]\n\t"
10717             "ADD    ESP,4\n\t"
10718             "FSTP   $dst\t# D-round" %}
10719   ins_encode %{
10720     __ subptr(rsp, 4);
10721     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10722     __ fld_s(Address(rsp, 0));
10723     __ addptr(rsp, 4);
10724     __ fstp_d($dst$$reg);
10725   %}
10726   ins_pipe( pipe_slow );
10727 %}
10728 
10729 instruct convF2D_reg(regD dst, regF src) %{
10730   predicate(UseSSE>=2);
10731   match(Set dst (ConvF2D src));
10732   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10733   ins_encode %{
10734     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10735   %}
10736   ins_pipe( pipe_slow );
10737 %}
10738 
10739 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10740 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10741   predicate(UseSSE<=1);
10742   match(Set dst (ConvD2I src));
10743   effect( KILL tmp, KILL cr );
10744   format %{ "FLD    $src\t# Convert double to int \n\t"
10745             "FLDCW  trunc mode\n\t"
10746             "SUB    ESP,4\n\t"
10747             "FISTp  [ESP + #0]\n\t"
10748             "FLDCW  std/24-bit mode\n\t"
10749             "POP    EAX\n\t"
10750             "CMP    EAX,0x80000000\n\t"
10751             "JNE,s  fast\n\t"
10752             "FLD_D  $src\n\t"
10753             "CALL   d2i_wrapper\n"
10754       "fast:" %}
10755   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10756   ins_pipe( pipe_slow );
10757 %}
10758 
10759 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10760 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10761   predicate(UseSSE>=2);
10762   match(Set dst (ConvD2I src));
10763   effect( KILL tmp, KILL cr );
10764   format %{ "CVTTSD2SI $dst, $src\n\t"
10765             "CMP    $dst,0x80000000\n\t"
10766             "JNE,s  fast\n\t"
10767             "SUB    ESP, 8\n\t"
10768             "MOVSD  [ESP], $src\n\t"
10769             "FLD_D  [ESP]\n\t"
10770             "ADD    ESP, 8\n\t"
10771             "CALL   d2i_wrapper\n"
10772       "fast:" %}
10773   ins_encode %{
10774     Label fast;
10775     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10776     __ cmpl($dst$$Register, 0x80000000);
10777     __ jccb(Assembler::notEqual, fast);
10778     __ subptr(rsp, 8);
10779     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10780     __ fld_d(Address(rsp, 0));
10781     __ addptr(rsp, 8);
10782     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10783     __ bind(fast);
10784   %}
10785   ins_pipe( pipe_slow );
10786 %}
10787 
10788 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10789   predicate(UseSSE<=1);
10790   match(Set dst (ConvD2L src));
10791   effect( KILL cr );
10792   format %{ "FLD    $src\t# Convert double to long\n\t"
10793             "FLDCW  trunc mode\n\t"
10794             "SUB    ESP,8\n\t"
10795             "FISTp  [ESP + #0]\n\t"
10796             "FLDCW  std/24-bit mode\n\t"
10797             "POP    EAX\n\t"
10798             "POP    EDX\n\t"
10799             "CMP    EDX,0x80000000\n\t"
10800             "JNE,s  fast\n\t"
10801             "TEST   EAX,EAX\n\t"
10802             "JNE,s  fast\n\t"
10803             "FLD    $src\n\t"
10804             "CALL   d2l_wrapper\n"
10805       "fast:" %}
10806   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10807   ins_pipe( pipe_slow );
10808 %}
10809 
10810 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10811 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10812   predicate (UseSSE>=2);
10813   match(Set dst (ConvD2L src));
10814   effect( KILL cr );
10815   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10816             "MOVSD  [ESP],$src\n\t"
10817             "FLD_D  [ESP]\n\t"
10818             "FLDCW  trunc mode\n\t"
10819             "FISTp  [ESP + #0]\n\t"
10820             "FLDCW  std/24-bit mode\n\t"
10821             "POP    EAX\n\t"
10822             "POP    EDX\n\t"
10823             "CMP    EDX,0x80000000\n\t"
10824             "JNE,s  fast\n\t"
10825             "TEST   EAX,EAX\n\t"
10826             "JNE,s  fast\n\t"
10827             "SUB    ESP,8\n\t"
10828             "MOVSD  [ESP],$src\n\t"
10829             "FLD_D  [ESP]\n\t"
10830             "ADD    ESP,8\n\t"
10831             "CALL   d2l_wrapper\n"
10832       "fast:" %}
10833   ins_encode %{
10834     Label fast;
10835     __ subptr(rsp, 8);
10836     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10837     __ fld_d(Address(rsp, 0));
10838     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10839     __ fistp_d(Address(rsp, 0));
10840     // Restore the rounding mode, mask the exception
10841     if (Compile::current()->in_24_bit_fp_mode()) {
10842       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10843     } else {
10844       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10845     }
10846     // Load the converted long, adjust CPU stack
10847     __ pop(rax);
10848     __ pop(rdx);
10849     __ cmpl(rdx, 0x80000000);
10850     __ jccb(Assembler::notEqual, fast);
10851     __ testl(rax, rax);
10852     __ jccb(Assembler::notEqual, fast);
10853     __ subptr(rsp, 8);
10854     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10855     __ fld_d(Address(rsp, 0));
10856     __ addptr(rsp, 8);
10857     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10858     __ bind(fast);
10859   %}
10860   ins_pipe( pipe_slow );
10861 %}
10862 
10863 // Convert a double to an int.  Java semantics require we do complex
10864 // manglations in the corner cases.  So we set the rounding mode to
10865 // 'zero', store the darned double down as an int, and reset the
10866 // rounding mode to 'nearest'.  The hardware stores a flag value down
10867 // if we would overflow or converted a NAN; we check for this and
10868 // and go the slow path if needed.
10869 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10870   predicate(UseSSE==0);
10871   match(Set dst (ConvF2I src));
10872   effect( KILL tmp, KILL cr );
10873   format %{ "FLD    $src\t# Convert float to int \n\t"
10874             "FLDCW  trunc mode\n\t"
10875             "SUB    ESP,4\n\t"
10876             "FISTp  [ESP + #0]\n\t"
10877             "FLDCW  std/24-bit mode\n\t"
10878             "POP    EAX\n\t"
10879             "CMP    EAX,0x80000000\n\t"
10880             "JNE,s  fast\n\t"
10881             "FLD    $src\n\t"
10882             "CALL   d2i_wrapper\n"
10883       "fast:" %}
10884   // DPR2I_encoding works for FPR2I
10885   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10886   ins_pipe( pipe_slow );
10887 %}
10888 
10889 // Convert a float in xmm to an int reg.
10890 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10891   predicate(UseSSE>=1);
10892   match(Set dst (ConvF2I src));
10893   effect( KILL tmp, KILL cr );
10894   format %{ "CVTTSS2SI $dst, $src\n\t"
10895             "CMP    $dst,0x80000000\n\t"
10896             "JNE,s  fast\n\t"
10897             "SUB    ESP, 4\n\t"
10898             "MOVSS  [ESP], $src\n\t"
10899             "FLD    [ESP]\n\t"
10900             "ADD    ESP, 4\n\t"
10901             "CALL   d2i_wrapper\n"
10902       "fast:" %}
10903   ins_encode %{
10904     Label fast;
10905     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10906     __ cmpl($dst$$Register, 0x80000000);
10907     __ jccb(Assembler::notEqual, fast);
10908     __ subptr(rsp, 4);
10909     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10910     __ fld_s(Address(rsp, 0));
10911     __ addptr(rsp, 4);
10912     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10913     __ bind(fast);
10914   %}
10915   ins_pipe( pipe_slow );
10916 %}
10917 
10918 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10919   predicate(UseSSE==0);
10920   match(Set dst (ConvF2L src));
10921   effect( KILL cr );
10922   format %{ "FLD    $src\t# Convert float to long\n\t"
10923             "FLDCW  trunc mode\n\t"
10924             "SUB    ESP,8\n\t"
10925             "FISTp  [ESP + #0]\n\t"
10926             "FLDCW  std/24-bit mode\n\t"
10927             "POP    EAX\n\t"
10928             "POP    EDX\n\t"
10929             "CMP    EDX,0x80000000\n\t"
10930             "JNE,s  fast\n\t"
10931             "TEST   EAX,EAX\n\t"
10932             "JNE,s  fast\n\t"
10933             "FLD    $src\n\t"
10934             "CALL   d2l_wrapper\n"
10935       "fast:" %}
10936   // DPR2L_encoding works for FPR2L
10937   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10938   ins_pipe( pipe_slow );
10939 %}
10940 
10941 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10942 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10943   predicate (UseSSE>=1);
10944   match(Set dst (ConvF2L src));
10945   effect( KILL cr );
10946   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10947             "MOVSS  [ESP],$src\n\t"
10948             "FLD_S  [ESP]\n\t"
10949             "FLDCW  trunc mode\n\t"
10950             "FISTp  [ESP + #0]\n\t"
10951             "FLDCW  std/24-bit mode\n\t"
10952             "POP    EAX\n\t"
10953             "POP    EDX\n\t"
10954             "CMP    EDX,0x80000000\n\t"
10955             "JNE,s  fast\n\t"
10956             "TEST   EAX,EAX\n\t"
10957             "JNE,s  fast\n\t"
10958             "SUB    ESP,4\t# Convert float to long\n\t"
10959             "MOVSS  [ESP],$src\n\t"
10960             "FLD_S  [ESP]\n\t"
10961             "ADD    ESP,4\n\t"
10962             "CALL   d2l_wrapper\n"
10963       "fast:" %}
10964   ins_encode %{
10965     Label fast;
10966     __ subptr(rsp, 8);
10967     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10968     __ fld_s(Address(rsp, 0));
10969     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10970     __ fistp_d(Address(rsp, 0));
10971     // Restore the rounding mode, mask the exception
10972     if (Compile::current()->in_24_bit_fp_mode()) {
10973       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10974     } else {
10975       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10976     }
10977     // Load the converted long, adjust CPU stack
10978     __ pop(rax);
10979     __ pop(rdx);
10980     __ cmpl(rdx, 0x80000000);
10981     __ jccb(Assembler::notEqual, fast);
10982     __ testl(rax, rax);
10983     __ jccb(Assembler::notEqual, fast);
10984     __ subptr(rsp, 4);
10985     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10986     __ fld_s(Address(rsp, 0));
10987     __ addptr(rsp, 4);
10988     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10989     __ bind(fast);
10990   %}
10991   ins_pipe( pipe_slow );
10992 %}
10993 
10994 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10995   predicate( UseSSE<=1 );
10996   match(Set dst (ConvI2D src));
10997   format %{ "FILD   $src\n\t"
10998             "FSTP   $dst" %}
10999   opcode(0xDB, 0x0);  /* DB /0 */
11000   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11001   ins_pipe( fpu_reg_mem );
11002 %}
11003 
11004 instruct convI2D_reg(regD dst, rRegI src) %{
11005   predicate( UseSSE>=2 && !UseXmmI2D );
11006   match(Set dst (ConvI2D src));
11007   format %{ "CVTSI2SD $dst,$src" %}
11008   ins_encode %{
11009     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11010   %}
11011   ins_pipe( pipe_slow );
11012 %}
11013 
11014 instruct convI2D_mem(regD dst, memory mem) %{
11015   predicate( UseSSE>=2 );
11016   match(Set dst (ConvI2D (LoadI mem)));
11017   format %{ "CVTSI2SD $dst,$mem" %}
11018   ins_encode %{
11019     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11020   %}
11021   ins_pipe( pipe_slow );
11022 %}
11023 
11024 instruct convXI2D_reg(regD dst, rRegI src)
11025 %{
11026   predicate( UseSSE>=2 && UseXmmI2D );
11027   match(Set dst (ConvI2D src));
11028 
11029   format %{ "MOVD  $dst,$src\n\t"
11030             "CVTDQ2PD $dst,$dst\t# i2d" %}
11031   ins_encode %{
11032     __ movdl($dst$$XMMRegister, $src$$Register);
11033     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11034   %}
11035   ins_pipe(pipe_slow); // XXX
11036 %}
11037 
11038 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11039   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11040   match(Set dst (ConvI2D (LoadI mem)));
11041   format %{ "FILD   $mem\n\t"
11042             "FSTP   $dst" %}
11043   opcode(0xDB);      /* DB /0 */
11044   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11045               Pop_Reg_DPR(dst));
11046   ins_pipe( fpu_reg_mem );
11047 %}
11048 
11049 // Convert a byte to a float; no rounding step needed.
11050 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11051   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11052   match(Set dst (ConvI2F src));
11053   format %{ "FILD   $src\n\t"
11054             "FSTP   $dst" %}
11055 
11056   opcode(0xDB, 0x0);  /* DB /0 */
11057   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11058   ins_pipe( fpu_reg_mem );
11059 %}
11060 
11061 // In 24-bit mode, force exponent rounding by storing back out
11062 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11063   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11064   match(Set dst (ConvI2F src));
11065   ins_cost(200);
11066   format %{ "FILD   $src\n\t"
11067             "FSTP_S $dst" %}
11068   opcode(0xDB, 0x0);  /* DB /0 */
11069   ins_encode( Push_Mem_I(src),
11070               Pop_Mem_FPR(dst));
11071   ins_pipe( fpu_mem_mem );
11072 %}
11073 
11074 // In 24-bit mode, force exponent rounding by storing back out
11075 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11076   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11077   match(Set dst (ConvI2F (LoadI mem)));
11078   ins_cost(200);
11079   format %{ "FILD   $mem\n\t"
11080             "FSTP_S $dst" %}
11081   opcode(0xDB);  /* DB /0 */
11082   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11083               Pop_Mem_FPR(dst));
11084   ins_pipe( fpu_mem_mem );
11085 %}
11086 
11087 // This instruction does not round to 24-bits
11088 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11089   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11090   match(Set dst (ConvI2F src));
11091   format %{ "FILD   $src\n\t"
11092             "FSTP   $dst" %}
11093   opcode(0xDB, 0x0);  /* DB /0 */
11094   ins_encode( Push_Mem_I(src),
11095               Pop_Reg_FPR(dst));
11096   ins_pipe( fpu_reg_mem );
11097 %}
11098 
11099 // This instruction does not round to 24-bits
11100 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11101   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11102   match(Set dst (ConvI2F (LoadI mem)));
11103   format %{ "FILD   $mem\n\t"
11104             "FSTP   $dst" %}
11105   opcode(0xDB);      /* DB /0 */
11106   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11107               Pop_Reg_FPR(dst));
11108   ins_pipe( fpu_reg_mem );
11109 %}
11110 
11111 // Convert an int to a float in xmm; no rounding step needed.
11112 instruct convI2F_reg(regF dst, rRegI src) %{
11113   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11114   match(Set dst (ConvI2F src));
11115   format %{ "CVTSI2SS $dst, $src" %}
11116   ins_encode %{
11117     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11118   %}
11119   ins_pipe( pipe_slow );
11120 %}
11121 
11122  instruct convXI2F_reg(regF dst, rRegI src)
11123 %{
11124   predicate( UseSSE>=2 && UseXmmI2F );
11125   match(Set dst (ConvI2F src));
11126 
11127   format %{ "MOVD  $dst,$src\n\t"
11128             "CVTDQ2PS $dst,$dst\t# i2f" %}
11129   ins_encode %{
11130     __ movdl($dst$$XMMRegister, $src$$Register);
11131     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11132   %}
11133   ins_pipe(pipe_slow); // XXX
11134 %}
11135 
11136 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11137   match(Set dst (ConvI2L src));
11138   effect(KILL cr);
11139   ins_cost(375);
11140   format %{ "MOV    $dst.lo,$src\n\t"
11141             "MOV    $dst.hi,$src\n\t"
11142             "SAR    $dst.hi,31" %}
11143   ins_encode(convert_int_long(dst,src));
11144   ins_pipe( ialu_reg_reg_long );
11145 %}
11146 
11147 // Zero-extend convert int to long
11148 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11149   match(Set dst (AndL (ConvI2L src) mask) );
11150   effect( KILL flags );
11151   ins_cost(250);
11152   format %{ "MOV    $dst.lo,$src\n\t"
11153             "XOR    $dst.hi,$dst.hi" %}
11154   opcode(0x33); // XOR
11155   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11156   ins_pipe( ialu_reg_reg_long );
11157 %}
11158 
11159 // Zero-extend long
11160 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11161   match(Set dst (AndL src mask) );
11162   effect( KILL flags );
11163   ins_cost(250);
11164   format %{ "MOV    $dst.lo,$src.lo\n\t"
11165             "XOR    $dst.hi,$dst.hi\n\t" %}
11166   opcode(0x33); // XOR
11167   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11168   ins_pipe( ialu_reg_reg_long );
11169 %}
11170 
11171 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11172   predicate (UseSSE<=1);
11173   match(Set dst (ConvL2D src));
11174   effect( KILL cr );
11175   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11176             "PUSH   $src.lo\n\t"
11177             "FILD   ST,[ESP + #0]\n\t"
11178             "ADD    ESP,8\n\t"
11179             "FSTP_D $dst\t# D-round" %}
11180   opcode(0xDF, 0x5);  /* DF /5 */
11181   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11182   ins_pipe( pipe_slow );
11183 %}
11184 
11185 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11186   predicate (UseSSE>=2);
11187   match(Set dst (ConvL2D src));
11188   effect( KILL cr );
11189   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11190             "PUSH   $src.lo\n\t"
11191             "FILD_D [ESP]\n\t"
11192             "FSTP_D [ESP]\n\t"
11193             "MOVSD  $dst,[ESP]\n\t"
11194             "ADD    ESP,8" %}
11195   opcode(0xDF, 0x5);  /* DF /5 */
11196   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11197   ins_pipe( pipe_slow );
11198 %}
11199 
11200 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11201   predicate (UseSSE>=1);
11202   match(Set dst (ConvL2F src));
11203   effect( KILL cr );
11204   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11205             "PUSH   $src.lo\n\t"
11206             "FILD_D [ESP]\n\t"
11207             "FSTP_S [ESP]\n\t"
11208             "MOVSS  $dst,[ESP]\n\t"
11209             "ADD    ESP,8" %}
11210   opcode(0xDF, 0x5);  /* DF /5 */
11211   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11212   ins_pipe( pipe_slow );
11213 %}
11214 
11215 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11216   match(Set dst (ConvL2F src));
11217   effect( KILL cr );
11218   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11219             "PUSH   $src.lo\n\t"
11220             "FILD   ST,[ESP + #0]\n\t"
11221             "ADD    ESP,8\n\t"
11222             "FSTP_S $dst\t# F-round" %}
11223   opcode(0xDF, 0x5);  /* DF /5 */
11224   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11225   ins_pipe( pipe_slow );
11226 %}
11227 
11228 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11229   match(Set dst (ConvL2I src));
11230   effect( DEF dst, USE src );
11231   format %{ "MOV    $dst,$src.lo" %}
11232   ins_encode(enc_CopyL_Lo(dst,src));
11233   ins_pipe( ialu_reg_reg );
11234 %}
11235 
11236 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11237   match(Set dst (MoveF2I src));
11238   effect( DEF dst, USE src );
11239   ins_cost(100);
11240   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11241   ins_encode %{
11242     __ movl($dst$$Register, Address(rsp, $src$$disp));
11243   %}
11244   ins_pipe( ialu_reg_mem );
11245 %}
11246 
11247 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11248   predicate(UseSSE==0);
11249   match(Set dst (MoveF2I src));
11250   effect( DEF dst, USE src );
11251 
11252   ins_cost(125);
11253   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11254   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11255   ins_pipe( fpu_mem_reg );
11256 %}
11257 
11258 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11259   predicate(UseSSE>=1);
11260   match(Set dst (MoveF2I src));
11261   effect( DEF dst, USE src );
11262 
11263   ins_cost(95);
11264   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11265   ins_encode %{
11266     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11267   %}
11268   ins_pipe( pipe_slow );
11269 %}
11270 
11271 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11272   predicate(UseSSE>=2);
11273   match(Set dst (MoveF2I src));
11274   effect( DEF dst, USE src );
11275   ins_cost(85);
11276   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11277   ins_encode %{
11278     __ movdl($dst$$Register, $src$$XMMRegister);
11279   %}
11280   ins_pipe( pipe_slow );
11281 %}
11282 
11283 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11284   match(Set dst (MoveI2F src));
11285   effect( DEF dst, USE src );
11286 
11287   ins_cost(100);
11288   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11289   ins_encode %{
11290     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11291   %}
11292   ins_pipe( ialu_mem_reg );
11293 %}
11294 
11295 
11296 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11297   predicate(UseSSE==0);
11298   match(Set dst (MoveI2F src));
11299   effect(DEF dst, USE src);
11300 
11301   ins_cost(125);
11302   format %{ "FLD_S  $src\n\t"
11303             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11304   opcode(0xD9);               /* D9 /0, FLD m32real */
11305   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11306               Pop_Reg_FPR(dst) );
11307   ins_pipe( fpu_reg_mem );
11308 %}
11309 
11310 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11311   predicate(UseSSE>=1);
11312   match(Set dst (MoveI2F src));
11313   effect( DEF dst, USE src );
11314 
11315   ins_cost(95);
11316   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11317   ins_encode %{
11318     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11319   %}
11320   ins_pipe( pipe_slow );
11321 %}
11322 
11323 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11324   predicate(UseSSE>=2);
11325   match(Set dst (MoveI2F src));
11326   effect( DEF dst, USE src );
11327 
11328   ins_cost(85);
11329   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11330   ins_encode %{
11331     __ movdl($dst$$XMMRegister, $src$$Register);
11332   %}
11333   ins_pipe( pipe_slow );
11334 %}
11335 
11336 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11337   match(Set dst (MoveD2L src));
11338   effect(DEF dst, USE src);
11339 
11340   ins_cost(250);
11341   format %{ "MOV    $dst.lo,$src\n\t"
11342             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11343   opcode(0x8B, 0x8B);
11344   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11345   ins_pipe( ialu_mem_long_reg );
11346 %}
11347 
11348 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11349   predicate(UseSSE<=1);
11350   match(Set dst (MoveD2L src));
11351   effect(DEF dst, USE src);
11352 
11353   ins_cost(125);
11354   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11355   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11356   ins_pipe( fpu_mem_reg );
11357 %}
11358 
11359 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11360   predicate(UseSSE>=2);
11361   match(Set dst (MoveD2L src));
11362   effect(DEF dst, USE src);
11363   ins_cost(95);
11364   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11365   ins_encode %{
11366     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11367   %}
11368   ins_pipe( pipe_slow );
11369 %}
11370 
11371 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11372   predicate(UseSSE>=2);
11373   match(Set dst (MoveD2L src));
11374   effect(DEF dst, USE src, TEMP tmp);
11375   ins_cost(85);
11376   format %{ "MOVD   $dst.lo,$src\n\t"
11377             "PSHUFLW $tmp,$src,0x4E\n\t"
11378             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11379   ins_encode %{
11380     __ movdl($dst$$Register, $src$$XMMRegister);
11381     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11382     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11383   %}
11384   ins_pipe( pipe_slow );
11385 %}
11386 
11387 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11388   match(Set dst (MoveL2D src));
11389   effect(DEF dst, USE src);
11390 
11391   ins_cost(200);
11392   format %{ "MOV    $dst,$src.lo\n\t"
11393             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11394   opcode(0x89, 0x89);
11395   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11396   ins_pipe( ialu_mem_long_reg );
11397 %}
11398 
11399 
11400 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11401   predicate(UseSSE<=1);
11402   match(Set dst (MoveL2D src));
11403   effect(DEF dst, USE src);
11404   ins_cost(125);
11405 
11406   format %{ "FLD_D  $src\n\t"
11407             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11408   opcode(0xDD);               /* DD /0, FLD m64real */
11409   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11410               Pop_Reg_DPR(dst) );
11411   ins_pipe( fpu_reg_mem );
11412 %}
11413 
11414 
11415 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11416   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11417   match(Set dst (MoveL2D src));
11418   effect(DEF dst, USE src);
11419 
11420   ins_cost(95);
11421   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11422   ins_encode %{
11423     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11424   %}
11425   ins_pipe( pipe_slow );
11426 %}
11427 
11428 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11429   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11430   match(Set dst (MoveL2D src));
11431   effect(DEF dst, USE src);
11432 
11433   ins_cost(95);
11434   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11435   ins_encode %{
11436     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11437   %}
11438   ins_pipe( pipe_slow );
11439 %}
11440 
11441 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11442   predicate(UseSSE>=2);
11443   match(Set dst (MoveL2D src));
11444   effect(TEMP dst, USE src, TEMP tmp);
11445   ins_cost(85);
11446   format %{ "MOVD   $dst,$src.lo\n\t"
11447             "MOVD   $tmp,$src.hi\n\t"
11448             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11449   ins_encode %{
11450     __ movdl($dst$$XMMRegister, $src$$Register);
11451     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11452     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11453   %}
11454   ins_pipe( pipe_slow );
11455 %}
11456 
11457 
11458 // =======================================================================
11459 // fast clearing of an array
11460 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11461   predicate(!((ClearArrayNode*)n)->is_large());
11462   match(Set dummy (ClearArray cnt base));
11463   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11464 
11465   format %{ $$template
11466     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11467     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11468     $$emit$$"JG     LARGE\n\t"
11469     $$emit$$"SHL    ECX, 1\n\t"
11470     $$emit$$"DEC    ECX\n\t"
11471     $$emit$$"JS     DONE\t# Zero length\n\t"
11472     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11473     $$emit$$"DEC    ECX\n\t"
11474     $$emit$$"JGE    LOOP\n\t"
11475     $$emit$$"JMP    DONE\n\t"
11476     $$emit$$"# LARGE:\n\t"
11477     if (UseFastStosb) {
11478        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11479        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11480     } else {
11481        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11482        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11483     }
11484     $$emit$$"# DONE"
11485   %}
11486   ins_encode %{
11487     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11488   %}
11489   ins_pipe( pipe_slow );
11490 %}
11491 
11492 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11493   predicate(((ClearArrayNode*)n)->is_large());
11494   match(Set dummy (ClearArray cnt base));
11495   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11496   format %{ $$template
11497     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11498     if (UseFastStosb) {
11499        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11500        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11501     } else {
11502        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11503        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11504     }
11505     $$emit$$"# DONE"
11506   %}
11507   ins_encode %{
11508     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11509   %}
11510   ins_pipe( pipe_slow );
11511 %}
11512 
11513 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11514                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11515   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11516   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11517   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11518 
11519   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11520   ins_encode %{
11521     __ string_compare($str1$$Register, $str2$$Register,
11522                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11523                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11524   %}
11525   ins_pipe( pipe_slow );
11526 %}
11527 
11528 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11529                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11530   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11531   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11532   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11533 
11534   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11535   ins_encode %{
11536     __ string_compare($str1$$Register, $str2$$Register,
11537                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11538                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11539   %}
11540   ins_pipe( pipe_slow );
11541 %}
11542 
11543 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11544                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11545   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11546   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11547   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11548 
11549   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11550   ins_encode %{
11551     __ string_compare($str1$$Register, $str2$$Register,
11552                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11553                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11554   %}
11555   ins_pipe( pipe_slow );
11556 %}
11557 
11558 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11559                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11560   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11561   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11562   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11563 
11564   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11565   ins_encode %{
11566     __ string_compare($str2$$Register, $str1$$Register,
11567                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11568                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11569   %}
11570   ins_pipe( pipe_slow );
11571 %}
11572 
11573 // fast string equals
11574 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11575                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11576   match(Set result (StrEquals (Binary str1 str2) cnt));
11577   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11578 
11579   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11580   ins_encode %{
11581     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11582                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11583                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11584   %}
11585 
11586   ins_pipe( pipe_slow );
11587 %}
11588 
11589 // fast search of substring with known size.
11590 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11591                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11592   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11593   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11594   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11595 
11596   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11597   ins_encode %{
11598     int icnt2 = (int)$int_cnt2$$constant;
11599     if (icnt2 >= 16) {
11600       // IndexOf for constant substrings with size >= 16 elements
11601       // which don't need to be loaded through stack.
11602       __ string_indexofC8($str1$$Register, $str2$$Register,
11603                           $cnt1$$Register, $cnt2$$Register,
11604                           icnt2, $result$$Register,
11605                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11606     } else {
11607       // Small strings are loaded through stack if they cross page boundary.
11608       __ string_indexof($str1$$Register, $str2$$Register,
11609                         $cnt1$$Register, $cnt2$$Register,
11610                         icnt2, $result$$Register,
11611                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11612     }
11613   %}
11614   ins_pipe( pipe_slow );
11615 %}
11616 
11617 // fast search of substring with known size.
11618 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11619                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11620   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11621   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11622   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11623 
11624   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11625   ins_encode %{
11626     int icnt2 = (int)$int_cnt2$$constant;
11627     if (icnt2 >= 8) {
11628       // IndexOf for constant substrings with size >= 8 elements
11629       // which don't need to be loaded through stack.
11630       __ string_indexofC8($str1$$Register, $str2$$Register,
11631                           $cnt1$$Register, $cnt2$$Register,
11632                           icnt2, $result$$Register,
11633                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11634     } else {
11635       // Small strings are loaded through stack if they cross page boundary.
11636       __ string_indexof($str1$$Register, $str2$$Register,
11637                         $cnt1$$Register, $cnt2$$Register,
11638                         icnt2, $result$$Register,
11639                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11640     }
11641   %}
11642   ins_pipe( pipe_slow );
11643 %}
11644 
11645 // fast search of substring with known size.
11646 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11647                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11648   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11649   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11650   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11651 
11652   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11653   ins_encode %{
11654     int icnt2 = (int)$int_cnt2$$constant;
11655     if (icnt2 >= 8) {
11656       // IndexOf for constant substrings with size >= 8 elements
11657       // which don't need to be loaded through stack.
11658       __ string_indexofC8($str1$$Register, $str2$$Register,
11659                           $cnt1$$Register, $cnt2$$Register,
11660                           icnt2, $result$$Register,
11661                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11662     } else {
11663       // Small strings are loaded through stack if they cross page boundary.
11664       __ string_indexof($str1$$Register, $str2$$Register,
11665                         $cnt1$$Register, $cnt2$$Register,
11666                         icnt2, $result$$Register,
11667                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11668     }
11669   %}
11670   ins_pipe( pipe_slow );
11671 %}
11672 
11673 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11674                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11675   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11676   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11677   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11678 
11679   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11680   ins_encode %{
11681     __ string_indexof($str1$$Register, $str2$$Register,
11682                       $cnt1$$Register, $cnt2$$Register,
11683                       (-1), $result$$Register,
11684                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11685   %}
11686   ins_pipe( pipe_slow );
11687 %}
11688 
11689 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11690                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11691   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11692   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11693   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11694 
11695   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11696   ins_encode %{
11697     __ string_indexof($str1$$Register, $str2$$Register,
11698                       $cnt1$$Register, $cnt2$$Register,
11699                       (-1), $result$$Register,
11700                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11701   %}
11702   ins_pipe( pipe_slow );
11703 %}
11704 
11705 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11706                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11707   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11708   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11709   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11710 
11711   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11712   ins_encode %{
11713     __ string_indexof($str1$$Register, $str2$$Register,
11714                       $cnt1$$Register, $cnt2$$Register,
11715                       (-1), $result$$Register,
11716                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11717   %}
11718   ins_pipe( pipe_slow );
11719 %}
11720 
11721 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11722                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11723   predicate(UseSSE42Intrinsics);
11724   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11725   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11726   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11727   ins_encode %{
11728     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11729                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11730   %}
11731   ins_pipe( pipe_slow );
11732 %}
11733 
11734 // fast array equals
11735 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11736                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11737 %{
11738   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11739   match(Set result (AryEq ary1 ary2));
11740   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11741   //ins_cost(300);
11742 
11743   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11744   ins_encode %{
11745     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11746                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11747                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11748   %}
11749   ins_pipe( pipe_slow );
11750 %}
11751 
11752 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11753                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11754 %{
11755   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11756   match(Set result (AryEq ary1 ary2));
11757   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11758   //ins_cost(300);
11759 
11760   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11761   ins_encode %{
11762     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11763                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11764                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11765   %}
11766   ins_pipe( pipe_slow );
11767 %}
11768 
11769 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11770                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11771 %{
11772   match(Set result (HasNegatives ary1 len));
11773   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11774 
11775   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11776   ins_encode %{
11777     __ has_negatives($ary1$$Register, $len$$Register,
11778                      $result$$Register, $tmp3$$Register,
11779                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11780   %}
11781   ins_pipe( pipe_slow );
11782 %}
11783 
11784 // fast char[] to byte[] compression
11785 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11786                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11787   match(Set result (StrCompressedCopy src (Binary dst len)));
11788   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11789 
11790   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11791   ins_encode %{
11792     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11793                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11794                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11795   %}
11796   ins_pipe( pipe_slow );
11797 %}
11798 
11799 // fast byte[] to char[] inflation
11800 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11801                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11802   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11803   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11804 
11805   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11806   ins_encode %{
11807     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11808                           $tmp1$$XMMRegister, $tmp2$$Register);
11809   %}
11810   ins_pipe( pipe_slow );
11811 %}
11812 
11813 // encode char[] to byte[] in ISO_8859_1
11814 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11815                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11816                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11817   match(Set result (EncodeISOArray src (Binary dst len)));
11818   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11819 
11820   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11821   ins_encode %{
11822     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11823                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11824                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11825   %}
11826   ins_pipe( pipe_slow );
11827 %}
11828 
11829 
11830 //----------Control Flow Instructions------------------------------------------
11831 // Signed compare Instructions
11832 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11833   match(Set cr (CmpI op1 op2));
11834   effect( DEF cr, USE op1, USE op2 );
11835   format %{ "CMP    $op1,$op2" %}
11836   opcode(0x3B);  /* Opcode 3B /r */
11837   ins_encode( OpcP, RegReg( op1, op2) );
11838   ins_pipe( ialu_cr_reg_reg );
11839 %}
11840 
11841 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11842   match(Set cr (CmpI op1 op2));
11843   effect( DEF cr, USE op1 );
11844   format %{ "CMP    $op1,$op2" %}
11845   opcode(0x81,0x07);  /* Opcode 81 /7 */
11846   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11847   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11848   ins_pipe( ialu_cr_reg_imm );
11849 %}
11850 
11851 // Cisc-spilled version of cmpI_eReg
11852 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11853   match(Set cr (CmpI op1 (LoadI op2)));
11854 
11855   format %{ "CMP    $op1,$op2" %}
11856   ins_cost(500);
11857   opcode(0x3B);  /* Opcode 3B /r */
11858   ins_encode( OpcP, RegMem( op1, op2) );
11859   ins_pipe( ialu_cr_reg_mem );
11860 %}
11861 
11862 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11863   match(Set cr (CmpI src zero));
11864   effect( DEF cr, USE src );
11865 
11866   format %{ "TEST   $src,$src" %}
11867   opcode(0x85);
11868   ins_encode( OpcP, RegReg( src, src ) );
11869   ins_pipe( ialu_cr_reg_imm );
11870 %}
11871 
11872 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11873   match(Set cr (CmpI (AndI src con) zero));
11874 
11875   format %{ "TEST   $src,$con" %}
11876   opcode(0xF7,0x00);
11877   ins_encode( OpcP, RegOpc(src), Con32(con) );
11878   ins_pipe( ialu_cr_reg_imm );
11879 %}
11880 
11881 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11882   match(Set cr (CmpI (AndI src mem) zero));
11883 
11884   format %{ "TEST   $src,$mem" %}
11885   opcode(0x85);
11886   ins_encode( OpcP, RegMem( src, mem ) );
11887   ins_pipe( ialu_cr_reg_mem );
11888 %}
11889 
11890 // Unsigned compare Instructions; really, same as signed except they
11891 // produce an eFlagsRegU instead of eFlagsReg.
11892 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11893   match(Set cr (CmpU op1 op2));
11894 
11895   format %{ "CMPu   $op1,$op2" %}
11896   opcode(0x3B);  /* Opcode 3B /r */
11897   ins_encode( OpcP, RegReg( op1, op2) );
11898   ins_pipe( ialu_cr_reg_reg );
11899 %}
11900 
11901 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11902   match(Set cr (CmpU op1 op2));
11903 
11904   format %{ "CMPu   $op1,$op2" %}
11905   opcode(0x81,0x07);  /* Opcode 81 /7 */
11906   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11907   ins_pipe( ialu_cr_reg_imm );
11908 %}
11909 
11910 // // Cisc-spilled version of cmpU_eReg
11911 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11912   match(Set cr (CmpU op1 (LoadI op2)));
11913 
11914   format %{ "CMPu   $op1,$op2" %}
11915   ins_cost(500);
11916   opcode(0x3B);  /* Opcode 3B /r */
11917   ins_encode( OpcP, RegMem( op1, op2) );
11918   ins_pipe( ialu_cr_reg_mem );
11919 %}
11920 
11921 // // Cisc-spilled version of cmpU_eReg
11922 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11923 //  match(Set cr (CmpU (LoadI op1) op2));
11924 //
11925 //  format %{ "CMPu   $op1,$op2" %}
11926 //  ins_cost(500);
11927 //  opcode(0x39);  /* Opcode 39 /r */
11928 //  ins_encode( OpcP, RegMem( op1, op2) );
11929 //%}
11930 
11931 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11932   match(Set cr (CmpU src zero));
11933 
11934   format %{ "TESTu  $src,$src" %}
11935   opcode(0x85);
11936   ins_encode( OpcP, RegReg( src, src ) );
11937   ins_pipe( ialu_cr_reg_imm );
11938 %}
11939 
11940 // Unsigned pointer compare Instructions
11941 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11942   match(Set cr (CmpP op1 op2));
11943 
11944   format %{ "CMPu   $op1,$op2" %}
11945   opcode(0x3B);  /* Opcode 3B /r */
11946   ins_encode( OpcP, RegReg( op1, op2) );
11947   ins_pipe( ialu_cr_reg_reg );
11948 %}
11949 
11950 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11951   match(Set cr (CmpP op1 op2));
11952 
11953   format %{ "CMPu   $op1,$op2" %}
11954   opcode(0x81,0x07);  /* Opcode 81 /7 */
11955   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11956   ins_pipe( ialu_cr_reg_imm );
11957 %}
11958 
11959 // // Cisc-spilled version of cmpP_eReg
11960 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11961   match(Set cr (CmpP op1 (LoadP op2)));
11962 
11963   format %{ "CMPu   $op1,$op2" %}
11964   ins_cost(500);
11965   opcode(0x3B);  /* Opcode 3B /r */
11966   ins_encode( OpcP, RegMem( op1, op2) );
11967   ins_pipe( ialu_cr_reg_mem );
11968 %}
11969 
11970 // // Cisc-spilled version of cmpP_eReg
11971 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11972 //  match(Set cr (CmpP (LoadP op1) op2));
11973 //
11974 //  format %{ "CMPu   $op1,$op2" %}
11975 //  ins_cost(500);
11976 //  opcode(0x39);  /* Opcode 39 /r */
11977 //  ins_encode( OpcP, RegMem( op1, op2) );
11978 //%}
11979 
11980 // Compare raw pointer (used in out-of-heap check).
11981 // Only works because non-oop pointers must be raw pointers
11982 // and raw pointers have no anti-dependencies.
11983 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11984   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11985   match(Set cr (CmpP op1 (LoadP op2)));
11986 
11987   format %{ "CMPu   $op1,$op2" %}
11988   opcode(0x3B);  /* Opcode 3B /r */
11989   ins_encode( OpcP, RegMem( op1, op2) );
11990   ins_pipe( ialu_cr_reg_mem );
11991 %}
11992 
11993 //
11994 // This will generate a signed flags result. This should be ok
11995 // since any compare to a zero should be eq/neq.
11996 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11997   match(Set cr (CmpP src zero));
11998 
11999   format %{ "TEST   $src,$src" %}
12000   opcode(0x85);
12001   ins_encode( OpcP, RegReg( src, src ) );
12002   ins_pipe( ialu_cr_reg_imm );
12003 %}
12004 
12005 // Cisc-spilled version of testP_reg
12006 // This will generate a signed flags result. This should be ok
12007 // since any compare to a zero should be eq/neq.
12008 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12009   match(Set cr (CmpP (LoadP op) zero));
12010 
12011   format %{ "TEST   $op,0xFFFFFFFF" %}
12012   ins_cost(500);
12013   opcode(0xF7);               /* Opcode F7 /0 */
12014   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12015   ins_pipe( ialu_cr_reg_imm );
12016 %}
12017 
12018 // Yanked all unsigned pointer compare operations.
12019 // Pointer compares are done with CmpP which is already unsigned.
12020 
12021 //----------Max and Min--------------------------------------------------------
12022 // Min Instructions
12023 ////
12024 //   *** Min and Max using the conditional move are slower than the
12025 //   *** branch version on a Pentium III.
12026 // // Conditional move for min
12027 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12028 //  effect( USE_DEF op2, USE op1, USE cr );
12029 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12030 //  opcode(0x4C,0x0F);
12031 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12032 //  ins_pipe( pipe_cmov_reg );
12033 //%}
12034 //
12035 //// Min Register with Register (P6 version)
12036 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12037 //  predicate(VM_Version::supports_cmov() );
12038 //  match(Set op2 (MinI op1 op2));
12039 //  ins_cost(200);
12040 //  expand %{
12041 //    eFlagsReg cr;
12042 //    compI_eReg(cr,op1,op2);
12043 //    cmovI_reg_lt(op2,op1,cr);
12044 //  %}
12045 //%}
12046 
12047 // Min Register with Register (generic version)
12048 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12049   match(Set dst (MinI dst src));
12050   effect(KILL flags);
12051   ins_cost(300);
12052 
12053   format %{ "MIN    $dst,$src" %}
12054   opcode(0xCC);
12055   ins_encode( min_enc(dst,src) );
12056   ins_pipe( pipe_slow );
12057 %}
12058 
12059 // Max Register with Register
12060 //   *** Min and Max using the conditional move are slower than the
12061 //   *** branch version on a Pentium III.
12062 // // Conditional move for max
12063 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12064 //  effect( USE_DEF op2, USE op1, USE cr );
12065 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12066 //  opcode(0x4F,0x0F);
12067 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12068 //  ins_pipe( pipe_cmov_reg );
12069 //%}
12070 //
12071 // // Max Register with Register (P6 version)
12072 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12073 //  predicate(VM_Version::supports_cmov() );
12074 //  match(Set op2 (MaxI op1 op2));
12075 //  ins_cost(200);
12076 //  expand %{
12077 //    eFlagsReg cr;
12078 //    compI_eReg(cr,op1,op2);
12079 //    cmovI_reg_gt(op2,op1,cr);
12080 //  %}
12081 //%}
12082 
12083 // Max Register with Register (generic version)
12084 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12085   match(Set dst (MaxI dst src));
12086   effect(KILL flags);
12087   ins_cost(300);
12088 
12089   format %{ "MAX    $dst,$src" %}
12090   opcode(0xCC);
12091   ins_encode( max_enc(dst,src) );
12092   ins_pipe( pipe_slow );
12093 %}
12094 
12095 // ============================================================================
12096 // Counted Loop limit node which represents exact final iterator value.
12097 // Note: the resulting value should fit into integer range since
12098 // counted loops have limit check on overflow.
12099 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12100   match(Set limit (LoopLimit (Binary init limit) stride));
12101   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12102   ins_cost(300);
12103 
12104   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12105   ins_encode %{
12106     int strd = (int)$stride$$constant;
12107     assert(strd != 1 && strd != -1, "sanity");
12108     int m1 = (strd > 0) ? 1 : -1;
12109     // Convert limit to long (EAX:EDX)
12110     __ cdql();
12111     // Convert init to long (init:tmp)
12112     __ movl($tmp$$Register, $init$$Register);
12113     __ sarl($tmp$$Register, 31);
12114     // $limit - $init
12115     __ subl($limit$$Register, $init$$Register);
12116     __ sbbl($limit_hi$$Register, $tmp$$Register);
12117     // + ($stride - 1)
12118     if (strd > 0) {
12119       __ addl($limit$$Register, (strd - 1));
12120       __ adcl($limit_hi$$Register, 0);
12121       __ movl($tmp$$Register, strd);
12122     } else {
12123       __ addl($limit$$Register, (strd + 1));
12124       __ adcl($limit_hi$$Register, -1);
12125       __ lneg($limit_hi$$Register, $limit$$Register);
12126       __ movl($tmp$$Register, -strd);
12127     }
12128     // signed devision: (EAX:EDX) / pos_stride
12129     __ idivl($tmp$$Register);
12130     if (strd < 0) {
12131       // restore sign
12132       __ negl($tmp$$Register);
12133     }
12134     // (EAX) * stride
12135     __ mull($tmp$$Register);
12136     // + init (ignore upper bits)
12137     __ addl($limit$$Register, $init$$Register);
12138   %}
12139   ins_pipe( pipe_slow );
12140 %}
12141 
12142 // ============================================================================
12143 // Branch Instructions
12144 // Jump Table
12145 instruct jumpXtnd(rRegI switch_val) %{
12146   match(Jump switch_val);
12147   ins_cost(350);
12148   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12149   ins_encode %{
12150     // Jump to Address(table_base + switch_reg)
12151     Address index(noreg, $switch_val$$Register, Address::times_1);
12152     __ jump(ArrayAddress($constantaddress, index));
12153   %}
12154   ins_pipe(pipe_jmp);
12155 %}
12156 
12157 // Jump Direct - Label defines a relative address from JMP+1
12158 instruct jmpDir(label labl) %{
12159   match(Goto);
12160   effect(USE labl);
12161 
12162   ins_cost(300);
12163   format %{ "JMP    $labl" %}
12164   size(5);
12165   ins_encode %{
12166     Label* L = $labl$$label;
12167     __ jmp(*L, false); // Always long jump
12168   %}
12169   ins_pipe( pipe_jmp );
12170 %}
12171 
12172 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12173 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12174   match(If cop cr);
12175   effect(USE labl);
12176 
12177   ins_cost(300);
12178   format %{ "J$cop    $labl" %}
12179   size(6);
12180   ins_encode %{
12181     Label* L = $labl$$label;
12182     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12183   %}
12184   ins_pipe( pipe_jcc );
12185 %}
12186 
12187 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12188 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12189   predicate(!n->has_vector_mask_set());
12190   match(CountedLoopEnd cop cr);
12191   effect(USE labl);
12192 
12193   ins_cost(300);
12194   format %{ "J$cop    $labl\t# Loop end" %}
12195   size(6);
12196   ins_encode %{
12197     Label* L = $labl$$label;
12198     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12199   %}
12200   ins_pipe( pipe_jcc );
12201 %}
12202 
12203 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12204 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12205   predicate(!n->has_vector_mask_set());
12206   match(CountedLoopEnd cop cmp);
12207   effect(USE labl);
12208 
12209   ins_cost(300);
12210   format %{ "J$cop,u  $labl\t# Loop end" %}
12211   size(6);
12212   ins_encode %{
12213     Label* L = $labl$$label;
12214     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12215   %}
12216   ins_pipe( pipe_jcc );
12217 %}
12218 
12219 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12220   predicate(!n->has_vector_mask_set());
12221   match(CountedLoopEnd cop cmp);
12222   effect(USE labl);
12223 
12224   ins_cost(200);
12225   format %{ "J$cop,u  $labl\t# Loop end" %}
12226   size(6);
12227   ins_encode %{
12228     Label* L = $labl$$label;
12229     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12230   %}
12231   ins_pipe( pipe_jcc );
12232 %}
12233 
12234 // mask version
12235 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12236 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12237   predicate(n->has_vector_mask_set());
12238   match(CountedLoopEnd cop cr);
12239   effect(USE labl);
12240 
12241   ins_cost(400);
12242   format %{ "J$cop    $labl\t# Loop end\n\t"
12243             "restorevectmask \t# vector mask restore for loops" %}
12244   size(10);
12245   ins_encode %{
12246     Label* L = $labl$$label;
12247     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12248     __ restorevectmask();
12249   %}
12250   ins_pipe( pipe_jcc );
12251 %}
12252 
12253 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12254 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12255   predicate(n->has_vector_mask_set());
12256   match(CountedLoopEnd cop cmp);
12257   effect(USE labl);
12258 
12259   ins_cost(400);
12260   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12261             "restorevectmask \t# vector mask restore for loops" %}
12262   size(10);
12263   ins_encode %{
12264     Label* L = $labl$$label;
12265     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12266     __ restorevectmask();
12267   %}
12268   ins_pipe( pipe_jcc );
12269 %}
12270 
12271 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12272   predicate(n->has_vector_mask_set());
12273   match(CountedLoopEnd cop cmp);
12274   effect(USE labl);
12275 
12276   ins_cost(300);
12277   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12278             "restorevectmask \t# vector mask restore for loops" %}
12279   size(10);
12280   ins_encode %{
12281     Label* L = $labl$$label;
12282     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12283     __ restorevectmask();
12284   %}
12285   ins_pipe( pipe_jcc );
12286 %}
12287 
12288 // Jump Direct Conditional - using unsigned comparison
12289 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12290   match(If cop cmp);
12291   effect(USE labl);
12292 
12293   ins_cost(300);
12294   format %{ "J$cop,u  $labl" %}
12295   size(6);
12296   ins_encode %{
12297     Label* L = $labl$$label;
12298     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12299   %}
12300   ins_pipe(pipe_jcc);
12301 %}
12302 
12303 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12304   match(If cop cmp);
12305   effect(USE labl);
12306 
12307   ins_cost(200);
12308   format %{ "J$cop,u  $labl" %}
12309   size(6);
12310   ins_encode %{
12311     Label* L = $labl$$label;
12312     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12313   %}
12314   ins_pipe(pipe_jcc);
12315 %}
12316 
12317 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12318   match(If cop cmp);
12319   effect(USE labl);
12320 
12321   ins_cost(200);
12322   format %{ $$template
12323     if ($cop$$cmpcode == Assembler::notEqual) {
12324       $$emit$$"JP,u   $labl\n\t"
12325       $$emit$$"J$cop,u   $labl"
12326     } else {
12327       $$emit$$"JP,u   done\n\t"
12328       $$emit$$"J$cop,u   $labl\n\t"
12329       $$emit$$"done:"
12330     }
12331   %}
12332   ins_encode %{
12333     Label* l = $labl$$label;
12334     if ($cop$$cmpcode == Assembler::notEqual) {
12335       __ jcc(Assembler::parity, *l, false);
12336       __ jcc(Assembler::notEqual, *l, false);
12337     } else if ($cop$$cmpcode == Assembler::equal) {
12338       Label done;
12339       __ jccb(Assembler::parity, done);
12340       __ jcc(Assembler::equal, *l, false);
12341       __ bind(done);
12342     } else {
12343        ShouldNotReachHere();
12344     }
12345   %}
12346   ins_pipe(pipe_jcc);
12347 %}
12348 
12349 // ============================================================================
12350 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12351 // array for an instance of the superklass.  Set a hidden internal cache on a
12352 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12353 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12354 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12355   match(Set result (PartialSubtypeCheck sub super));
12356   effect( KILL rcx, KILL cr );
12357 
12358   ins_cost(1100);  // slightly larger than the next version
12359   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12360             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12361             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12362             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12363             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12364             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12365             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12366      "miss:\t" %}
12367 
12368   opcode(0x1); // Force a XOR of EDI
12369   ins_encode( enc_PartialSubtypeCheck() );
12370   ins_pipe( pipe_slow );
12371 %}
12372 
12373 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12374   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12375   effect( KILL rcx, KILL result );
12376 
12377   ins_cost(1000);
12378   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12379             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12380             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12381             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12382             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12383             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12384      "miss:\t" %}
12385 
12386   opcode(0x0);  // No need to XOR EDI
12387   ins_encode( enc_PartialSubtypeCheck() );
12388   ins_pipe( pipe_slow );
12389 %}
12390 
12391 // ============================================================================
12392 // Branch Instructions -- short offset versions
12393 //
12394 // These instructions are used to replace jumps of a long offset (the default
12395 // match) with jumps of a shorter offset.  These instructions are all tagged
12396 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12397 // match rules in general matching.  Instead, the ADLC generates a conversion
12398 // method in the MachNode which can be used to do in-place replacement of the
12399 // long variant with the shorter variant.  The compiler will determine if a
12400 // branch can be taken by the is_short_branch_offset() predicate in the machine
12401 // specific code section of the file.
12402 
12403 // Jump Direct - Label defines a relative address from JMP+1
12404 instruct jmpDir_short(label labl) %{
12405   match(Goto);
12406   effect(USE labl);
12407 
12408   ins_cost(300);
12409   format %{ "JMP,s  $labl" %}
12410   size(2);
12411   ins_encode %{
12412     Label* L = $labl$$label;
12413     __ jmpb(*L);
12414   %}
12415   ins_pipe( pipe_jmp );
12416   ins_short_branch(1);
12417 %}
12418 
12419 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12420 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12421   match(If cop cr);
12422   effect(USE labl);
12423 
12424   ins_cost(300);
12425   format %{ "J$cop,s  $labl" %}
12426   size(2);
12427   ins_encode %{
12428     Label* L = $labl$$label;
12429     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12430   %}
12431   ins_pipe( pipe_jcc );
12432   ins_short_branch(1);
12433 %}
12434 
12435 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12436 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12437   match(CountedLoopEnd cop cr);
12438   effect(USE labl);
12439 
12440   ins_cost(300);
12441   format %{ "J$cop,s  $labl\t# Loop end" %}
12442   size(2);
12443   ins_encode %{
12444     Label* L = $labl$$label;
12445     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12446   %}
12447   ins_pipe( pipe_jcc );
12448   ins_short_branch(1);
12449 %}
12450 
12451 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12452 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12453   match(CountedLoopEnd cop cmp);
12454   effect(USE labl);
12455 
12456   ins_cost(300);
12457   format %{ "J$cop,us $labl\t# Loop end" %}
12458   size(2);
12459   ins_encode %{
12460     Label* L = $labl$$label;
12461     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12462   %}
12463   ins_pipe( pipe_jcc );
12464   ins_short_branch(1);
12465 %}
12466 
12467 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12468   match(CountedLoopEnd cop cmp);
12469   effect(USE labl);
12470 
12471   ins_cost(300);
12472   format %{ "J$cop,us $labl\t# Loop end" %}
12473   size(2);
12474   ins_encode %{
12475     Label* L = $labl$$label;
12476     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12477   %}
12478   ins_pipe( pipe_jcc );
12479   ins_short_branch(1);
12480 %}
12481 
12482 // Jump Direct Conditional - using unsigned comparison
12483 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12484   match(If cop cmp);
12485   effect(USE labl);
12486 
12487   ins_cost(300);
12488   format %{ "J$cop,us $labl" %}
12489   size(2);
12490   ins_encode %{
12491     Label* L = $labl$$label;
12492     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12493   %}
12494   ins_pipe( pipe_jcc );
12495   ins_short_branch(1);
12496 %}
12497 
12498 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12499   match(If cop cmp);
12500   effect(USE labl);
12501 
12502   ins_cost(300);
12503   format %{ "J$cop,us $labl" %}
12504   size(2);
12505   ins_encode %{
12506     Label* L = $labl$$label;
12507     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12508   %}
12509   ins_pipe( pipe_jcc );
12510   ins_short_branch(1);
12511 %}
12512 
12513 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12514   match(If cop cmp);
12515   effect(USE labl);
12516 
12517   ins_cost(300);
12518   format %{ $$template
12519     if ($cop$$cmpcode == Assembler::notEqual) {
12520       $$emit$$"JP,u,s   $labl\n\t"
12521       $$emit$$"J$cop,u,s   $labl"
12522     } else {
12523       $$emit$$"JP,u,s   done\n\t"
12524       $$emit$$"J$cop,u,s  $labl\n\t"
12525       $$emit$$"done:"
12526     }
12527   %}
12528   size(4);
12529   ins_encode %{
12530     Label* l = $labl$$label;
12531     if ($cop$$cmpcode == Assembler::notEqual) {
12532       __ jccb(Assembler::parity, *l);
12533       __ jccb(Assembler::notEqual, *l);
12534     } else if ($cop$$cmpcode == Assembler::equal) {
12535       Label done;
12536       __ jccb(Assembler::parity, done);
12537       __ jccb(Assembler::equal, *l);
12538       __ bind(done);
12539     } else {
12540        ShouldNotReachHere();
12541     }
12542   %}
12543   ins_pipe(pipe_jcc);
12544   ins_short_branch(1);
12545 %}
12546 
12547 // ============================================================================
12548 // Long Compare
12549 //
12550 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12551 // is tricky.  The flavor of compare used depends on whether we are testing
12552 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12553 // The GE test is the negated LT test.  The LE test can be had by commuting
12554 // the operands (yielding a GE test) and then negating; negate again for the
12555 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12556 // NE test is negated from that.
12557 
12558 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12559 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12560 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12561 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12562 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12563 // foo match ends up with the wrong leaf.  One fix is to not match both
12564 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12565 // both forms beat the trinary form of long-compare and both are very useful
12566 // on Intel which has so few registers.
12567 
12568 // Manifest a CmpL result in an integer register.  Very painful.
12569 // This is the test to avoid.
12570 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12571   match(Set dst (CmpL3 src1 src2));
12572   effect( KILL flags );
12573   ins_cost(1000);
12574   format %{ "XOR    $dst,$dst\n\t"
12575             "CMP    $src1.hi,$src2.hi\n\t"
12576             "JLT,s  m_one\n\t"
12577             "JGT,s  p_one\n\t"
12578             "CMP    $src1.lo,$src2.lo\n\t"
12579             "JB,s   m_one\n\t"
12580             "JEQ,s  done\n"
12581     "p_one:\tINC    $dst\n\t"
12582             "JMP,s  done\n"
12583     "m_one:\tDEC    $dst\n"
12584      "done:" %}
12585   ins_encode %{
12586     Label p_one, m_one, done;
12587     __ xorptr($dst$$Register, $dst$$Register);
12588     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12589     __ jccb(Assembler::less,    m_one);
12590     __ jccb(Assembler::greater, p_one);
12591     __ cmpl($src1$$Register, $src2$$Register);
12592     __ jccb(Assembler::below,   m_one);
12593     __ jccb(Assembler::equal,   done);
12594     __ bind(p_one);
12595     __ incrementl($dst$$Register);
12596     __ jmpb(done);
12597     __ bind(m_one);
12598     __ decrementl($dst$$Register);
12599     __ bind(done);
12600   %}
12601   ins_pipe( pipe_slow );
12602 %}
12603 
12604 //======
12605 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12606 // compares.  Can be used for LE or GT compares by reversing arguments.
12607 // NOT GOOD FOR EQ/NE tests.
12608 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12609   match( Set flags (CmpL src zero ));
12610   ins_cost(100);
12611   format %{ "TEST   $src.hi,$src.hi" %}
12612   opcode(0x85);
12613   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12614   ins_pipe( ialu_cr_reg_reg );
12615 %}
12616 
12617 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12618 // compares.  Can be used for LE or GT compares by reversing arguments.
12619 // NOT GOOD FOR EQ/NE tests.
12620 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12621   match( Set flags (CmpL src1 src2 ));
12622   effect( TEMP tmp );
12623   ins_cost(300);
12624   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12625             "MOV    $tmp,$src1.hi\n\t"
12626             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12627   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12628   ins_pipe( ialu_cr_reg_reg );
12629 %}
12630 
12631 // Long compares reg < zero/req OR reg >= zero/req.
12632 // Just a wrapper for a normal branch, plus the predicate test.
12633 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12634   match(If cmp flags);
12635   effect(USE labl);
12636   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12637   expand %{
12638     jmpCon(cmp,flags,labl);    // JLT or JGE...
12639   %}
12640 %}
12641 
12642 // Compare 2 longs and CMOVE longs.
12643 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12644   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12645   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12646   ins_cost(400);
12647   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12648             "CMOV$cmp $dst.hi,$src.hi" %}
12649   opcode(0x0F,0x40);
12650   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12651   ins_pipe( pipe_cmov_reg_long );
12652 %}
12653 
12654 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12655   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12656   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12657   ins_cost(500);
12658   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12659             "CMOV$cmp $dst.hi,$src.hi" %}
12660   opcode(0x0F,0x40);
12661   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12662   ins_pipe( pipe_cmov_reg_long );
12663 %}
12664 
12665 // Compare 2 longs and CMOVE ints.
12666 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12667   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12668   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12669   ins_cost(200);
12670   format %{ "CMOV$cmp $dst,$src" %}
12671   opcode(0x0F,0x40);
12672   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12673   ins_pipe( pipe_cmov_reg );
12674 %}
12675 
12676 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12677   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12678   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12679   ins_cost(250);
12680   format %{ "CMOV$cmp $dst,$src" %}
12681   opcode(0x0F,0x40);
12682   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12683   ins_pipe( pipe_cmov_mem );
12684 %}
12685 
12686 // Compare 2 longs and CMOVE ints.
12687 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12688   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12689   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12690   ins_cost(200);
12691   format %{ "CMOV$cmp $dst,$src" %}
12692   opcode(0x0F,0x40);
12693   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12694   ins_pipe( pipe_cmov_reg );
12695 %}
12696 
12697 // Compare 2 longs and CMOVE doubles
12698 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12699   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12700   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12701   ins_cost(200);
12702   expand %{
12703     fcmovDPR_regS(cmp,flags,dst,src);
12704   %}
12705 %}
12706 
12707 // Compare 2 longs and CMOVE doubles
12708 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12709   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12710   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12711   ins_cost(200);
12712   expand %{
12713     fcmovD_regS(cmp,flags,dst,src);
12714   %}
12715 %}
12716 
12717 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12718   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12719   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12720   ins_cost(200);
12721   expand %{
12722     fcmovFPR_regS(cmp,flags,dst,src);
12723   %}
12724 %}
12725 
12726 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12727   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12728   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12729   ins_cost(200);
12730   expand %{
12731     fcmovF_regS(cmp,flags,dst,src);
12732   %}
12733 %}
12734 
12735 //======
12736 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12737 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12738   match( Set flags (CmpL src zero ));
12739   effect(TEMP tmp);
12740   ins_cost(200);
12741   format %{ "MOV    $tmp,$src.lo\n\t"
12742             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12743   ins_encode( long_cmp_flags0( src, tmp ) );
12744   ins_pipe( ialu_reg_reg_long );
12745 %}
12746 
12747 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12748 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12749   match( Set flags (CmpL src1 src2 ));
12750   ins_cost(200+300);
12751   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12752             "JNE,s  skip\n\t"
12753             "CMP    $src1.hi,$src2.hi\n\t"
12754      "skip:\t" %}
12755   ins_encode( long_cmp_flags1( src1, src2 ) );
12756   ins_pipe( ialu_cr_reg_reg );
12757 %}
12758 
12759 // Long compare reg == zero/reg OR reg != zero/reg
12760 // Just a wrapper for a normal branch, plus the predicate test.
12761 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12762   match(If cmp flags);
12763   effect(USE labl);
12764   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12765   expand %{
12766     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12767   %}
12768 %}
12769 
12770 // Compare 2 longs and CMOVE longs.
12771 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12772   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12773   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12774   ins_cost(400);
12775   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12776             "CMOV$cmp $dst.hi,$src.hi" %}
12777   opcode(0x0F,0x40);
12778   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12779   ins_pipe( pipe_cmov_reg_long );
12780 %}
12781 
12782 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12783   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12784   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12785   ins_cost(500);
12786   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12787             "CMOV$cmp $dst.hi,$src.hi" %}
12788   opcode(0x0F,0x40);
12789   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12790   ins_pipe( pipe_cmov_reg_long );
12791 %}
12792 
12793 // Compare 2 longs and CMOVE ints.
12794 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12795   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12796   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12797   ins_cost(200);
12798   format %{ "CMOV$cmp $dst,$src" %}
12799   opcode(0x0F,0x40);
12800   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12801   ins_pipe( pipe_cmov_reg );
12802 %}
12803 
12804 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12805   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12806   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12807   ins_cost(250);
12808   format %{ "CMOV$cmp $dst,$src" %}
12809   opcode(0x0F,0x40);
12810   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12811   ins_pipe( pipe_cmov_mem );
12812 %}
12813 
12814 // Compare 2 longs and CMOVE ints.
12815 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12816   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12817   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12818   ins_cost(200);
12819   format %{ "CMOV$cmp $dst,$src" %}
12820   opcode(0x0F,0x40);
12821   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12822   ins_pipe( pipe_cmov_reg );
12823 %}
12824 
12825 // Compare 2 longs and CMOVE doubles
12826 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12827   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12828   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12829   ins_cost(200);
12830   expand %{
12831     fcmovDPR_regS(cmp,flags,dst,src);
12832   %}
12833 %}
12834 
12835 // Compare 2 longs and CMOVE doubles
12836 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12837   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12838   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12839   ins_cost(200);
12840   expand %{
12841     fcmovD_regS(cmp,flags,dst,src);
12842   %}
12843 %}
12844 
12845 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12846   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12847   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12848   ins_cost(200);
12849   expand %{
12850     fcmovFPR_regS(cmp,flags,dst,src);
12851   %}
12852 %}
12853 
12854 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12855   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12856   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12857   ins_cost(200);
12858   expand %{
12859     fcmovF_regS(cmp,flags,dst,src);
12860   %}
12861 %}
12862 
12863 //======
12864 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12865 // Same as cmpL_reg_flags_LEGT except must negate src
12866 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12867   match( Set flags (CmpL src zero ));
12868   effect( TEMP tmp );
12869   ins_cost(300);
12870   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12871             "CMP    $tmp,$src.lo\n\t"
12872             "SBB    $tmp,$src.hi\n\t" %}
12873   ins_encode( long_cmp_flags3(src, tmp) );
12874   ins_pipe( ialu_reg_reg_long );
12875 %}
12876 
12877 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12878 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12879 // requires a commuted test to get the same result.
12880 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12881   match( Set flags (CmpL src1 src2 ));
12882   effect( TEMP tmp );
12883   ins_cost(300);
12884   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12885             "MOV    $tmp,$src2.hi\n\t"
12886             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12887   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12888   ins_pipe( ialu_cr_reg_reg );
12889 %}
12890 
12891 // Long compares reg < zero/req OR reg >= zero/req.
12892 // Just a wrapper for a normal branch, plus the predicate test
12893 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12894   match(If cmp flags);
12895   effect(USE labl);
12896   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12897   ins_cost(300);
12898   expand %{
12899     jmpCon(cmp,flags,labl);    // JGT or JLE...
12900   %}
12901 %}
12902 
12903 // Compare 2 longs and CMOVE longs.
12904 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12905   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12906   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12907   ins_cost(400);
12908   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12909             "CMOV$cmp $dst.hi,$src.hi" %}
12910   opcode(0x0F,0x40);
12911   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12912   ins_pipe( pipe_cmov_reg_long );
12913 %}
12914 
12915 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12916   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12917   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12918   ins_cost(500);
12919   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12920             "CMOV$cmp $dst.hi,$src.hi+4" %}
12921   opcode(0x0F,0x40);
12922   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12923   ins_pipe( pipe_cmov_reg_long );
12924 %}
12925 
12926 // Compare 2 longs and CMOVE ints.
12927 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12928   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12929   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12930   ins_cost(200);
12931   format %{ "CMOV$cmp $dst,$src" %}
12932   opcode(0x0F,0x40);
12933   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12934   ins_pipe( pipe_cmov_reg );
12935 %}
12936 
12937 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12938   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12939   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12940   ins_cost(250);
12941   format %{ "CMOV$cmp $dst,$src" %}
12942   opcode(0x0F,0x40);
12943   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12944   ins_pipe( pipe_cmov_mem );
12945 %}
12946 
12947 // Compare 2 longs and CMOVE ptrs.
12948 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12949   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12950   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12951   ins_cost(200);
12952   format %{ "CMOV$cmp $dst,$src" %}
12953   opcode(0x0F,0x40);
12954   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12955   ins_pipe( pipe_cmov_reg );
12956 %}
12957 
12958 // Compare 2 longs and CMOVE doubles
12959 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12960   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12961   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12962   ins_cost(200);
12963   expand %{
12964     fcmovDPR_regS(cmp,flags,dst,src);
12965   %}
12966 %}
12967 
12968 // Compare 2 longs and CMOVE doubles
12969 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12970   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12971   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12972   ins_cost(200);
12973   expand %{
12974     fcmovD_regS(cmp,flags,dst,src);
12975   %}
12976 %}
12977 
12978 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12979   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12980   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12981   ins_cost(200);
12982   expand %{
12983     fcmovFPR_regS(cmp,flags,dst,src);
12984   %}
12985 %}
12986 
12987 
12988 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12989   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12990   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12991   ins_cost(200);
12992   expand %{
12993     fcmovF_regS(cmp,flags,dst,src);
12994   %}
12995 %}
12996 
12997 
12998 // ============================================================================
12999 // Procedure Call/Return Instructions
13000 // Call Java Static Instruction
13001 // Note: If this code changes, the corresponding ret_addr_offset() and
13002 //       compute_padding() functions will have to be adjusted.
13003 instruct CallStaticJavaDirect(method meth) %{
13004   match(CallStaticJava);
13005   effect(USE meth);
13006 
13007   ins_cost(300);
13008   format %{ "CALL,static " %}
13009   opcode(0xE8); /* E8 cd */
13010   ins_encode( pre_call_resets,
13011               Java_Static_Call( meth ),
13012               call_epilog,
13013               post_call_FPU );
13014   ins_pipe( pipe_slow );
13015   ins_alignment(4);
13016 %}
13017 
13018 // Call Java Dynamic Instruction
13019 // Note: If this code changes, the corresponding ret_addr_offset() and
13020 //       compute_padding() functions will have to be adjusted.
13021 instruct CallDynamicJavaDirect(method meth) %{
13022   match(CallDynamicJava);
13023   effect(USE meth);
13024 
13025   ins_cost(300);
13026   format %{ "MOV    EAX,(oop)-1\n\t"
13027             "CALL,dynamic" %}
13028   opcode(0xE8); /* E8 cd */
13029   ins_encode( pre_call_resets,
13030               Java_Dynamic_Call( meth ),
13031               call_epilog,
13032               post_call_FPU );
13033   ins_pipe( pipe_slow );
13034   ins_alignment(4);
13035 %}
13036 
13037 // Call Runtime Instruction
13038 instruct CallRuntimeDirect(method meth) %{
13039   match(CallRuntime );
13040   effect(USE meth);
13041 
13042   ins_cost(300);
13043   format %{ "CALL,runtime " %}
13044   opcode(0xE8); /* E8 cd */
13045   // Use FFREEs to clear entries in float stack
13046   ins_encode( pre_call_resets,
13047               FFree_Float_Stack_All,
13048               Java_To_Runtime( meth ),
13049               post_call_FPU );
13050   ins_pipe( pipe_slow );
13051 %}
13052 
13053 // Call runtime without safepoint
13054 instruct CallLeafDirect(method meth) %{
13055   match(CallLeaf);
13056   effect(USE meth);
13057 
13058   ins_cost(300);
13059   format %{ "CALL_LEAF,runtime " %}
13060   opcode(0xE8); /* E8 cd */
13061   ins_encode( pre_call_resets,
13062               FFree_Float_Stack_All,
13063               Java_To_Runtime( meth ),
13064               Verify_FPU_For_Leaf, post_call_FPU );
13065   ins_pipe( pipe_slow );
13066 %}
13067 
13068 instruct CallLeafNoFPDirect(method meth) %{
13069   match(CallLeafNoFP);
13070   effect(USE meth);
13071 
13072   ins_cost(300);
13073   format %{ "CALL_LEAF_NOFP,runtime " %}
13074   opcode(0xE8); /* E8 cd */
13075   ins_encode(Java_To_Runtime(meth));
13076   ins_pipe( pipe_slow );
13077 %}
13078 
13079 
13080 // Return Instruction
13081 // Remove the return address & jump to it.
13082 instruct Ret() %{
13083   match(Return);
13084   format %{ "RET" %}
13085   opcode(0xC3);
13086   ins_encode(OpcP);
13087   ins_pipe( pipe_jmp );
13088 %}
13089 
13090 // Tail Call; Jump from runtime stub to Java code.
13091 // Also known as an 'interprocedural jump'.
13092 // Target of jump will eventually return to caller.
13093 // TailJump below removes the return address.
13094 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13095   match(TailCall jump_target method_oop );
13096   ins_cost(300);
13097   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13098   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13099   ins_encode( OpcP, RegOpc(jump_target) );
13100   ins_pipe( pipe_jmp );
13101 %}
13102 
13103 
13104 // Tail Jump; remove the return address; jump to target.
13105 // TailCall above leaves the return address around.
13106 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13107   match( TailJump jump_target ex_oop );
13108   ins_cost(300);
13109   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13110             "JMP    $jump_target " %}
13111   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13112   ins_encode( enc_pop_rdx,
13113               OpcP, RegOpc(jump_target) );
13114   ins_pipe( pipe_jmp );
13115 %}
13116 
13117 // Create exception oop: created by stack-crawling runtime code.
13118 // Created exception is now available to this handler, and is setup
13119 // just prior to jumping to this handler.  No code emitted.
13120 instruct CreateException( eAXRegP ex_oop )
13121 %{
13122   match(Set ex_oop (CreateEx));
13123 
13124   size(0);
13125   // use the following format syntax
13126   format %{ "# exception oop is in EAX; no code emitted" %}
13127   ins_encode();
13128   ins_pipe( empty );
13129 %}
13130 
13131 
13132 // Rethrow exception:
13133 // The exception oop will come in the first argument position.
13134 // Then JUMP (not call) to the rethrow stub code.
13135 instruct RethrowException()
13136 %{
13137   match(Rethrow);
13138 
13139   // use the following format syntax
13140   format %{ "JMP    rethrow_stub" %}
13141   ins_encode(enc_rethrow);
13142   ins_pipe( pipe_jmp );
13143 %}
13144 
13145 // inlined locking and unlocking
13146 
13147 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13148   predicate(Compile::current()->use_rtm());
13149   match(Set cr (FastLock object box));
13150   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13151   ins_cost(300);
13152   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13153   ins_encode %{
13154     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13155                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13156                  _counters, _rtm_counters, _stack_rtm_counters,
13157                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13158                  true, ra_->C->profile_rtm());
13159   %}
13160   ins_pipe(pipe_slow);
13161 %}
13162 
13163 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13164   predicate(!Compile::current()->use_rtm());
13165   match(Set cr (FastLock object box));
13166   effect(TEMP tmp, TEMP scr, USE_KILL box);
13167   ins_cost(300);
13168   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13169   ins_encode %{
13170     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13171                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13172   %}
13173   ins_pipe(pipe_slow);
13174 %}
13175 
13176 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13177   match(Set cr (FastUnlock object box));
13178   effect(TEMP tmp, USE_KILL box);
13179   ins_cost(300);
13180   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13181   ins_encode %{
13182     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13183   %}
13184   ins_pipe(pipe_slow);
13185 %}
13186 
13187 
13188 
13189 // ============================================================================
13190 // Safepoint Instruction
13191 instruct safePoint_poll(eFlagsReg cr) %{
13192   match(SafePoint);
13193   effect(KILL cr);
13194 
13195   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13196   // On SPARC that might be acceptable as we can generate the address with
13197   // just a sethi, saving an or.  By polling at offset 0 we can end up
13198   // putting additional pressure on the index-0 in the D$.  Because of
13199   // alignment (just like the situation at hand) the lower indices tend
13200   // to see more traffic.  It'd be better to change the polling address
13201   // to offset 0 of the last $line in the polling page.
13202 
13203   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13204   ins_cost(125);
13205   size(6) ;
13206   ins_encode( Safepoint_Poll() );
13207   ins_pipe( ialu_reg_mem );
13208 %}
13209 
13210 
13211 // ============================================================================
13212 // This name is KNOWN by the ADLC and cannot be changed.
13213 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13214 // for this guy.
13215 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13216   match(Set dst (ThreadLocal));
13217   effect(DEF dst, KILL cr);
13218 
13219   format %{ "MOV    $dst, Thread::current()" %}
13220   ins_encode %{
13221     Register dstReg = as_Register($dst$$reg);
13222     __ get_thread(dstReg);
13223   %}
13224   ins_pipe( ialu_reg_fat );
13225 %}
13226 
13227 
13228 
13229 //----------PEEPHOLE RULES-----------------------------------------------------
13230 // These must follow all instruction definitions as they use the names
13231 // defined in the instructions definitions.
13232 //
13233 // peepmatch ( root_instr_name [preceding_instruction]* );
13234 //
13235 // peepconstraint %{
13236 // (instruction_number.operand_name relational_op instruction_number.operand_name
13237 //  [, ...] );
13238 // // instruction numbers are zero-based using left to right order in peepmatch
13239 //
13240 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13241 // // provide an instruction_number.operand_name for each operand that appears
13242 // // in the replacement instruction's match rule
13243 //
13244 // ---------VM FLAGS---------------------------------------------------------
13245 //
13246 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13247 //
13248 // Each peephole rule is given an identifying number starting with zero and
13249 // increasing by one in the order seen by the parser.  An individual peephole
13250 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13251 // on the command-line.
13252 //
13253 // ---------CURRENT LIMITATIONS----------------------------------------------
13254 //
13255 // Only match adjacent instructions in same basic block
13256 // Only equality constraints
13257 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13258 // Only one replacement instruction
13259 //
13260 // ---------EXAMPLE----------------------------------------------------------
13261 //
13262 // // pertinent parts of existing instructions in architecture description
13263 // instruct movI(rRegI dst, rRegI src) %{
13264 //   match(Set dst (CopyI src));
13265 // %}
13266 //
13267 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13268 //   match(Set dst (AddI dst src));
13269 //   effect(KILL cr);
13270 // %}
13271 //
13272 // // Change (inc mov) to lea
13273 // peephole %{
13274 //   // increment preceeded by register-register move
13275 //   peepmatch ( incI_eReg movI );
13276 //   // require that the destination register of the increment
13277 //   // match the destination register of the move
13278 //   peepconstraint ( 0.dst == 1.dst );
13279 //   // construct a replacement instruction that sets
13280 //   // the destination to ( move's source register + one )
13281 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13282 // %}
13283 //
13284 // Implementation no longer uses movX instructions since
13285 // machine-independent system no longer uses CopyX nodes.
13286 //
13287 // peephole %{
13288 //   peepmatch ( incI_eReg movI );
13289 //   peepconstraint ( 0.dst == 1.dst );
13290 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13291 // %}
13292 //
13293 // peephole %{
13294 //   peepmatch ( decI_eReg movI );
13295 //   peepconstraint ( 0.dst == 1.dst );
13296 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13297 // %}
13298 //
13299 // peephole %{
13300 //   peepmatch ( addI_eReg_imm movI );
13301 //   peepconstraint ( 0.dst == 1.dst );
13302 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13303 // %}
13304 //
13305 // peephole %{
13306 //   peepmatch ( addP_eReg_imm movP );
13307 //   peepconstraint ( 0.dst == 1.dst );
13308 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13309 // %}
13310 
13311 // // Change load of spilled value to only a spill
13312 // instruct storeI(memory mem, rRegI src) %{
13313 //   match(Set mem (StoreI mem src));
13314 // %}
13315 //
13316 // instruct loadI(rRegI dst, memory mem) %{
13317 //   match(Set dst (LoadI mem));
13318 // %}
13319 //
13320 peephole %{
13321   peepmatch ( loadI storeI );
13322   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13323   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13324 %}
13325 
13326 //----------SMARTSPILL RULES---------------------------------------------------
13327 // These must follow all instruction definitions as they use the names
13328 // defined in the instructions definitions.