1 //
   2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     if (SafepointMechanism::uses_thread_local_poll()) {
 710       Register pollReg = as_Register(EBX_enc);
 711       MacroAssembler masm(&cbuf);
 712       masm.get_thread(pollReg);
 713       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 714       masm.relocate(relocInfo::poll_return_type);
 715       masm.testl(rax, Address(pollReg, 0));
 716     } else {
 717       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 718       emit_opcode(cbuf,0x85);
 719       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 720       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 721     }
 722   }
 723 }
 724 
 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 726   return MachNode::size(ra_); // too many variables; just compute it
 727                               // the hard way
 728 }
 729 
 730 int MachEpilogNode::reloc() const {
 731   return 0; // a large enough number
 732 }
 733 
 734 const Pipeline * MachEpilogNode::pipeline() const {
 735   return MachNode::pipeline_class();
 736 }
 737 
 738 int MachEpilogNode::safepoint_offset() const { return 0; }
 739 
 740 //=============================================================================
 741 
 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 743 static enum RC rc_class( OptoReg::Name reg ) {
 744 
 745   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 746   if (OptoReg::is_stack(reg)) return rc_stack;
 747 
 748   VMReg r = OptoReg::as_VMReg(reg);
 749   if (r->is_Register()) return rc_int;
 750   if (r->is_FloatRegister()) {
 751     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 752     return rc_float;
 753   }
 754   assert(r->is_XMMRegister(), "must be");
 755   return rc_xmm;
 756 }
 757 
 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 759                         int opcode, const char *op_str, int size, outputStream* st ) {
 760   if( cbuf ) {
 761     emit_opcode  (*cbuf, opcode );
 762     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 763 #ifndef PRODUCT
 764   } else if( !do_size ) {
 765     if( size != 0 ) st->print("\n\t");
 766     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 767       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 768       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 769     } else { // FLD, FST, PUSH, POP
 770       st->print("%s [ESP + #%d]",op_str,offset);
 771     }
 772 #endif
 773   }
 774   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 775   return size+3+offset_size;
 776 }
 777 
 778 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 780                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 781   int in_size_in_bits = Assembler::EVEX_32bit;
 782   int evex_encoding = 0;
 783   if (reg_lo+1 == reg_hi) {
 784     in_size_in_bits = Assembler::EVEX_64bit;
 785     evex_encoding = Assembler::VEX_W;
 786   }
 787   if (cbuf) {
 788     MacroAssembler _masm(cbuf);
 789     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 790     //                          it maps more cases to single byte displacement
 791     _masm.set_managed();
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 843     _masm.set_managed();
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 883     _masm.set_managed();
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 901     _masm.set_managed();
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Do we need to mask the count passed to shift instructions or does
1433 // the cpu only look at the lower 5/6 bits anyway?
1434 const bool Matcher::need_masked_shift_count = false;
1435 
1436 bool Matcher::narrow_oop_use_complex_address() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 bool Matcher::narrow_klass_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::const_oop_prefer_decode() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 bool Matcher::const_klass_prefer_decode() {
1452   ShouldNotCallThis();
1453   return true;
1454 }
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     // Clear upper bits of YMM registers when current compiled code uses
1886     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887     MacroAssembler _masm(&cbuf);
1888     __ vzeroupper();
1889     debug_only(int off1 = cbuf.insts_size());
1890     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891   %}
1892 
1893   enc_class post_call_FPU %{
1894     // If method sets FPU control word do it here also
1895     if (Compile::current()->in_24_bit_fp_mode()) {
1896       MacroAssembler masm(&cbuf);
1897       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898     }
1899   %}
1900 
1901   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903     // who we intended to call.
1904     cbuf.set_insts_mark();
1905     $$$emit8$primary;
1906 
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(),
1910                      RELOC_IMM32);
1911     } else {
1912       int method_index = resolved_method_index(cbuf);
1913       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                   : static_call_Relocation::spec(method_index);
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      rspec, RELOC_DISP32);
1917       // Emit stubs for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       }
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     emit_opcode(cbuf,0xF0);         // [Lock]
2091   %}
2092 
2093   // Cmp-xchg long value.
2094   // Note: we need to swap rbx, and rcx before and after the
2095   //       cmpxchg8 instruction because the instruction uses
2096   //       rcx as the high order word of the new value to store but
2097   //       our register encoding uses rbx,.
2098   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099 
2100     // XCHG  rbx,ecx
2101     emit_opcode(cbuf,0x87);
2102     emit_opcode(cbuf,0xD9);
2103     // [Lock]
2104     emit_opcode(cbuf,0xF0);
2105     // CMPXCHG8 [Eptr]
2106     emit_opcode(cbuf,0x0F);
2107     emit_opcode(cbuf,0xC7);
2108     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109     // XCHG  rbx,ecx
2110     emit_opcode(cbuf,0x87);
2111     emit_opcode(cbuf,0xD9);
2112   %}
2113 
2114   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115     // [Lock]
2116     emit_opcode(cbuf,0xF0);
2117 
2118     // CMPXCHG [Eptr]
2119     emit_opcode(cbuf,0x0F);
2120     emit_opcode(cbuf,0xB1);
2121     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2122   %}
2123 
2124   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2125     // [Lock]
2126     emit_opcode(cbuf,0xF0);
2127 
2128     // CMPXCHGB [Eptr]
2129     emit_opcode(cbuf,0x0F);
2130     emit_opcode(cbuf,0xB0);
2131     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2132   %}
2133 
2134   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2135     // [Lock]
2136     emit_opcode(cbuf,0xF0);
2137 
2138     // 16-bit mode
2139     emit_opcode(cbuf, 0x66);
2140 
2141     // CMPXCHGW [Eptr]
2142     emit_opcode(cbuf,0x0F);
2143     emit_opcode(cbuf,0xB1);
2144     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145   %}
2146 
2147   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2148     int res_encoding = $res$$reg;
2149 
2150     // MOV  res,0
2151     emit_opcode( cbuf, 0xB8 + res_encoding);
2152     emit_d32( cbuf, 0 );
2153     // JNE,s  fail
2154     emit_opcode(cbuf,0x75);
2155     emit_d8(cbuf, 5 );
2156     // MOV  res,1
2157     emit_opcode( cbuf, 0xB8 + res_encoding);
2158     emit_d32( cbuf, 1 );
2159     // fail:
2160   %}
2161 
2162   enc_class set_instruction_start( ) %{
2163     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2164   %}
2165 
2166   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2167     int reg_encoding = $ereg$$reg;
2168     int base  = $mem$$base;
2169     int index = $mem$$index;
2170     int scale = $mem$$scale;
2171     int displace = $mem$$disp;
2172     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2173     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2174   %}
2175 
2176   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2177     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2178     int base  = $mem$$base;
2179     int index = $mem$$index;
2180     int scale = $mem$$scale;
2181     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2182     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2183     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2184   %}
2185 
2186   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2187     int r1, r2;
2188     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190     emit_opcode(cbuf,0x0F);
2191     emit_opcode(cbuf,$tertiary);
2192     emit_rm(cbuf, 0x3, r1, r2);
2193     emit_d8(cbuf,$cnt$$constant);
2194     emit_d8(cbuf,$primary);
2195     emit_rm(cbuf, 0x3, $secondary, r1);
2196     emit_d8(cbuf,$cnt$$constant);
2197   %}
2198 
2199   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2200     emit_opcode( cbuf, 0x8B ); // Move
2201     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_d8(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_d8(cbuf,$primary);
2208     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2209     emit_d8(cbuf,31);
2210   %}
2211 
2212   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2213     int r1, r2;
2214     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2215     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2216 
2217     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2218     emit_rm(cbuf, 0x3, r1, r2);
2219     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220       emit_opcode(cbuf,$primary);
2221       emit_rm(cbuf, 0x3, $secondary, r1);
2222       emit_d8(cbuf,$cnt$$constant-32);
2223     }
2224     emit_opcode(cbuf,0x33);  // XOR r2,r2
2225     emit_rm(cbuf, 0x3, r2, r2);
2226   %}
2227 
2228   // Clone of RegMem but accepts an extra parameter to access each
2229   // half of a double in memory; it never needs relocation info.
2230   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2231     emit_opcode(cbuf,$opcode$$constant);
2232     int reg_encoding = $rm_reg$$reg;
2233     int base     = $mem$$base;
2234     int index    = $mem$$index;
2235     int scale    = $mem$$scale;
2236     int displace = $mem$$disp + $disp_for_half$$constant;
2237     relocInfo::relocType disp_reloc = relocInfo::none;
2238     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2239   %}
2240 
2241   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2242   //
2243   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2244   // and it never needs relocation information.
2245   // Frequently used to move data between FPU's Stack Top and memory.
2246   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2247     int rm_byte_opcode = $rm_opcode$$constant;
2248     int base     = $mem$$base;
2249     int index    = $mem$$index;
2250     int scale    = $mem$$scale;
2251     int displace = $mem$$disp;
2252     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2253     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2254   %}
2255 
2256   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2257     int rm_byte_opcode = $rm_opcode$$constant;
2258     int base     = $mem$$base;
2259     int index    = $mem$$index;
2260     int scale    = $mem$$scale;
2261     int displace = $mem$$disp;
2262     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2263     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2264   %}
2265 
2266   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2267     int reg_encoding = $dst$$reg;
2268     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2269     int index        = 0x04;            // 0x04 indicates no index
2270     int scale        = 0x00;            // 0x00 indicates no scale
2271     int displace     = $src1$$constant; // 0x00 indicates no displacement
2272     relocInfo::relocType disp_reloc = relocInfo::none;
2273     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2274   %}
2275 
2276   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2277     // Compare dst,src
2278     emit_opcode(cbuf,0x3B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280     // jmp dst < src around move
2281     emit_opcode(cbuf,0x7C);
2282     emit_d8(cbuf,2);
2283     // move dst,src
2284     emit_opcode(cbuf,0x8B);
2285     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286   %}
2287 
2288   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2289     // Compare dst,src
2290     emit_opcode(cbuf,0x3B);
2291     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2292     // jmp dst > src around move
2293     emit_opcode(cbuf,0x7F);
2294     emit_d8(cbuf,2);
2295     // move dst,src
2296     emit_opcode(cbuf,0x8B);
2297     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2298   %}
2299 
2300   enc_class enc_FPR_store(memory mem, regDPR src) %{
2301     // If src is FPR1, we can just FST to store it.
2302     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2303     int reg_encoding = 0x2; // Just store
2304     int base  = $mem$$base;
2305     int index = $mem$$index;
2306     int scale = $mem$$scale;
2307     int displace = $mem$$disp;
2308     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2309     if( $src$$reg != FPR1L_enc ) {
2310       reg_encoding = 0x3;  // Store & pop
2311       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2312       emit_d8( cbuf, 0xC0-1+$src$$reg );
2313     }
2314     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2315     emit_opcode(cbuf,$primary);
2316     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2317   %}
2318 
2319   enc_class neg_reg(rRegI dst) %{
2320     // NEG $dst
2321     emit_opcode(cbuf,0xF7);
2322     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2323   %}
2324 
2325   enc_class setLT_reg(eCXRegI dst) %{
2326     // SETLT $dst
2327     emit_opcode(cbuf,0x0F);
2328     emit_opcode(cbuf,0x9C);
2329     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2330   %}
2331 
2332   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2333     int tmpReg = $tmp$$reg;
2334 
2335     // SUB $p,$q
2336     emit_opcode(cbuf,0x2B);
2337     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2338     // SBB $tmp,$tmp
2339     emit_opcode(cbuf,0x1B);
2340     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2341     // AND $tmp,$y
2342     emit_opcode(cbuf,0x23);
2343     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2344     // ADD $p,$tmp
2345     emit_opcode(cbuf,0x03);
2346     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2347   %}
2348 
2349   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.hi,$dst.lo
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2360     // CLR    $dst.lo
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2363 // small:
2364     // SHLD   $dst.hi,$dst.lo,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xA5);
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2368     // SHL    $dst.lo,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2371   %}
2372 
2373   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x04);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // CLR    $dst.hi
2385     emit_opcode(cbuf, 0x33);
2386     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2387 // small:
2388     // SHRD   $dst.lo,$dst.hi,$shift
2389     emit_opcode(cbuf,0x0F);
2390     emit_opcode(cbuf,0xAD);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392     // SHR    $dst.hi,$shift"
2393     emit_opcode(cbuf,0xD3);
2394     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2395   %}
2396 
2397   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2398     // TEST shift,32
2399     emit_opcode(cbuf,0xF7);
2400     emit_rm(cbuf, 0x3, 0, ECX_enc);
2401     emit_d32(cbuf,0x20);
2402     // JEQ,s small
2403     emit_opcode(cbuf, 0x74);
2404     emit_d8(cbuf, 0x05);
2405     // MOV    $dst.lo,$dst.hi
2406     emit_opcode( cbuf, 0x8B );
2407     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2408     // SAR    $dst.hi,31
2409     emit_opcode(cbuf, 0xC1);
2410     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2411     emit_d8(cbuf, 0x1F );
2412 // small:
2413     // SHRD   $dst.lo,$dst.hi,$shift
2414     emit_opcode(cbuf,0x0F);
2415     emit_opcode(cbuf,0xAD);
2416     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417     // SAR    $dst.hi,$shift"
2418     emit_opcode(cbuf,0xD3);
2419     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2420   %}
2421 
2422 
2423   // ----------------- Encodings for floating point unit -----------------
2424   // May leave result in FPU-TOS or FPU reg depending on opcodes
2425   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2426     $$$emit8$primary;
2427     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2428   %}
2429 
2430   // Pop argument in FPR0 with FSTP ST(0)
2431   enc_class PopFPU() %{
2432     emit_opcode( cbuf, 0xDD );
2433     emit_d8( cbuf, 0xD8 );
2434   %}
2435 
2436   // !!!!! equivalent to Pop_Reg_F
2437   enc_class Pop_Reg_DPR( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2439     emit_d8( cbuf, 0xD8+$dst$$reg );
2440   %}
2441 
2442   enc_class Push_Reg_DPR( regDPR dst ) %{
2443     emit_opcode( cbuf, 0xD9 );
2444     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2445   %}
2446 
2447   enc_class strictfp_bias1( regDPR dst ) %{
2448     emit_opcode( cbuf, 0xDB );           // FLD m80real
2449     emit_opcode( cbuf, 0x2D );
2450     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2451     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2452     emit_opcode( cbuf, 0xC8+$dst$$reg );
2453   %}
2454 
2455   enc_class strictfp_bias2( regDPR dst ) %{
2456     emit_opcode( cbuf, 0xDB );           // FLD m80real
2457     emit_opcode( cbuf, 0x2D );
2458     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2459     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460     emit_opcode( cbuf, 0xC8+$dst$$reg );
2461   %}
2462 
2463   // Special case for moving an integer register to a stack slot.
2464   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2465     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2466   %}
2467 
2468   // Special case for moving a register to a stack slot.
2469   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470     // Opcode already emitted
2471     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2472     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2473     emit_d32(cbuf, $dst$$disp);   // Displacement
2474   %}
2475 
2476   // Push the integer in stackSlot 'src' onto FP-stack
2477   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2478     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2479   %}
2480 
2481   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2483     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2484   %}
2485 
2486   // Same as Pop_Mem_F except for opcode
2487   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2488   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2489     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2490   %}
2491 
2492   enc_class Pop_Reg_FPR( regFPR dst ) %{
2493     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2494     emit_d8( cbuf, 0xD8+$dst$$reg );
2495   %}
2496 
2497   enc_class Push_Reg_FPR( regFPR dst ) %{
2498     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2499     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2500   %}
2501 
2502   // Push FPU's float to a stack-slot, and pop FPU-stack
2503   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2504     int pop = 0x02;
2505     if ($src$$reg != FPR1L_enc) {
2506       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2507       emit_d8( cbuf, 0xC0-1+$src$$reg );
2508       pop = 0x03;
2509     }
2510     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2511   %}
2512 
2513   // Push FPU's double to a stack-slot, and pop FPU-stack
2514   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2515     int pop = 0x02;
2516     if ($src$$reg != FPR1L_enc) {
2517       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2518       emit_d8( cbuf, 0xC0-1+$src$$reg );
2519       pop = 0x03;
2520     }
2521     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2522   %}
2523 
2524   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2525   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2526     int pop = 0xD0 - 1; // -1 since we skip FLD
2527     if ($src$$reg != FPR1L_enc) {
2528       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2529       emit_d8( cbuf, 0xC0-1+$src$$reg );
2530       pop = 0xD8;
2531     }
2532     emit_opcode( cbuf, 0xDD );
2533     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2534   %}
2535 
2536 
2537   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2538     // load dst in FPR0
2539     emit_opcode( cbuf, 0xD9 );
2540     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2541     if ($src$$reg != FPR1L_enc) {
2542       // fincstp
2543       emit_opcode (cbuf, 0xD9);
2544       emit_opcode (cbuf, 0xF7);
2545       // swap src with FPR1:
2546       // FXCH FPR1 with src
2547       emit_opcode(cbuf, 0xD9);
2548       emit_d8(cbuf, 0xC8-1+$src$$reg );
2549       // fdecstp
2550       emit_opcode (cbuf, 0xD9);
2551       emit_opcode (cbuf, 0xF6);
2552     }
2553   %}
2554 
2555   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2561     __ fld_d(Address(rsp, 0));
2562   %}
2563 
2564   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ subptr(rsp, 4);
2567     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2568     __ fld_s(Address(rsp, 0));
2569     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2570     __ fld_s(Address(rsp, 0));
2571   %}
2572 
2573   enc_class Push_ResultD(regD dst) %{
2574     MacroAssembler _masm(&cbuf);
2575     __ fstp_d(Address(rsp, 0));
2576     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class Push_ResultF(regF dst, immI d8) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ fstp_s(Address(rsp, 0));
2583     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2584     __ addptr(rsp, $d8$$constant);
2585   %}
2586 
2587   enc_class Push_SrcD(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ subptr(rsp, 8);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class push_stack_temp_qword() %{
2595     MacroAssembler _masm(&cbuf);
2596     __ subptr(rsp, 8);
2597   %}
2598 
2599   enc_class pop_stack_temp_qword() %{
2600     MacroAssembler _masm(&cbuf);
2601     __ addptr(rsp, 8);
2602   %}
2603 
2604   enc_class push_xmm_to_fpr1(regD src) %{
2605     MacroAssembler _masm(&cbuf);
2606     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2607     __ fld_d(Address(rsp, 0));
2608   %}
2609 
2610   enc_class Push_Result_Mod_DPR( regDPR src) %{
2611     if ($src$$reg != FPR1L_enc) {
2612       // fincstp
2613       emit_opcode (cbuf, 0xD9);
2614       emit_opcode (cbuf, 0xF7);
2615       // FXCH FPR1 with src
2616       emit_opcode(cbuf, 0xD9);
2617       emit_d8(cbuf, 0xC8-1+$src$$reg );
2618       // fdecstp
2619       emit_opcode (cbuf, 0xD9);
2620       emit_opcode (cbuf, 0xF6);
2621     }
2622     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2623     // // FSTP   FPR$dst$$reg
2624     // emit_opcode( cbuf, 0xDD );
2625     // emit_d8( cbuf, 0xD8+$dst$$reg );
2626   %}
2627 
2628   enc_class fnstsw_sahf_skip_parity() %{
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jnp  ::skip
2635     emit_opcode( cbuf, 0x7B );
2636     emit_opcode( cbuf, 0x05 );
2637   %}
2638 
2639   enc_class emitModDPR() %{
2640     // fprem must be iterative
2641     // :: loop
2642     // fprem
2643     emit_opcode( cbuf, 0xD9 );
2644     emit_opcode( cbuf, 0xF8 );
2645     // wait
2646     emit_opcode( cbuf, 0x9b );
2647     // fnstsw ax
2648     emit_opcode( cbuf, 0xDF );
2649     emit_opcode( cbuf, 0xE0 );
2650     // sahf
2651     emit_opcode( cbuf, 0x9E );
2652     // jp  ::loop
2653     emit_opcode( cbuf, 0x0F );
2654     emit_opcode( cbuf, 0x8A );
2655     emit_opcode( cbuf, 0xF4 );
2656     emit_opcode( cbuf, 0xFF );
2657     emit_opcode( cbuf, 0xFF );
2658     emit_opcode( cbuf, 0xFF );
2659   %}
2660 
2661   enc_class fpu_flags() %{
2662     // fnstsw_ax
2663     emit_opcode( cbuf, 0xDF);
2664     emit_opcode( cbuf, 0xE0);
2665     // test ax,0x0400
2666     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2667     emit_opcode( cbuf, 0xA9 );
2668     emit_d16   ( cbuf, 0x0400 );
2669     // // // This sequence works, but stalls for 12-16 cycles on PPro
2670     // // test rax,0x0400
2671     // emit_opcode( cbuf, 0xA9 );
2672     // emit_d32   ( cbuf, 0x00000400 );
2673     //
2674     // jz exit (no unordered comparison)
2675     emit_opcode( cbuf, 0x74 );
2676     emit_d8    ( cbuf, 0x02 );
2677     // mov ah,1 - treat as LT case (set carry flag)
2678     emit_opcode( cbuf, 0xB4 );
2679     emit_d8    ( cbuf, 0x01 );
2680     // sahf
2681     emit_opcode( cbuf, 0x9E);
2682   %}
2683 
2684   enc_class cmpF_P6_fixup() %{
2685     // Fixup the integer flags in case comparison involved a NaN
2686     //
2687     // JNP exit (no unordered comparison, P-flag is set by NaN)
2688     emit_opcode( cbuf, 0x7B );
2689     emit_d8    ( cbuf, 0x03 );
2690     // MOV AH,1 - treat as LT case (set carry flag)
2691     emit_opcode( cbuf, 0xB4 );
2692     emit_d8    ( cbuf, 0x01 );
2693     // SAHF
2694     emit_opcode( cbuf, 0x9E);
2695     // NOP     // target for branch to avoid branch to branch
2696     emit_opcode( cbuf, 0x90);
2697   %}
2698 
2699 //     fnstsw_ax();
2700 //     sahf();
2701 //     movl(dst, nan_result);
2702 //     jcc(Assembler::parity, exit);
2703 //     movl(dst, less_result);
2704 //     jcc(Assembler::below, exit);
2705 //     movl(dst, equal_result);
2706 //     jcc(Assembler::equal, exit);
2707 //     movl(dst, greater_result);
2708 
2709 // less_result     =  1;
2710 // greater_result  = -1;
2711 // equal_result    = 0;
2712 // nan_result      = -1;
2713 
2714   enc_class CmpF_Result(rRegI dst) %{
2715     // fnstsw_ax();
2716     emit_opcode( cbuf, 0xDF);
2717     emit_opcode( cbuf, 0xE0);
2718     // sahf
2719     emit_opcode( cbuf, 0x9E);
2720     // movl(dst, nan_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, -1 );
2723     // jcc(Assembler::parity, exit);
2724     emit_opcode( cbuf, 0x7A );
2725     emit_d8    ( cbuf, 0x13 );
2726     // movl(dst, less_result);
2727     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728     emit_d32( cbuf, -1 );
2729     // jcc(Assembler::below, exit);
2730     emit_opcode( cbuf, 0x72 );
2731     emit_d8    ( cbuf, 0x0C );
2732     // movl(dst, equal_result);
2733     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2734     emit_d32( cbuf, 0 );
2735     // jcc(Assembler::equal, exit);
2736     emit_opcode( cbuf, 0x74 );
2737     emit_d8    ( cbuf, 0x05 );
2738     // movl(dst, greater_result);
2739     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2740     emit_d32( cbuf, 1 );
2741   %}
2742 
2743 
2744   // Compare the longs and set flags
2745   // BROKEN!  Do Not use as-is
2746   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2747     // CMP    $src1.hi,$src2.hi
2748     emit_opcode( cbuf, 0x3B );
2749     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2750     // JNE,s  done
2751     emit_opcode(cbuf,0x75);
2752     emit_d8(cbuf, 2 );
2753     // CMP    $src1.lo,$src2.lo
2754     emit_opcode( cbuf, 0x3B );
2755     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2756 // done:
2757   %}
2758 
2759   enc_class convert_int_long( regL dst, rRegI src ) %{
2760     // mov $dst.lo,$src
2761     int dst_encoding = $dst$$reg;
2762     int src_encoding = $src$$reg;
2763     encode_Copy( cbuf, dst_encoding  , src_encoding );
2764     // mov $dst.hi,$src
2765     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2766     // sar $dst.hi,31
2767     emit_opcode( cbuf, 0xC1 );
2768     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2769     emit_d8(cbuf, 0x1F );
2770   %}
2771 
2772   enc_class convert_long_double( eRegL src ) %{
2773     // push $src.hi
2774     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2775     // push $src.lo
2776     emit_opcode(cbuf, 0x50+$src$$reg  );
2777     // fild 64-bits at [SP]
2778     emit_opcode(cbuf,0xdf);
2779     emit_d8(cbuf, 0x6C);
2780     emit_d8(cbuf, 0x24);
2781     emit_d8(cbuf, 0x00);
2782     // pop stack
2783     emit_opcode(cbuf, 0x83); // add  SP, #8
2784     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2785     emit_d8(cbuf, 0x8);
2786   %}
2787 
2788   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2789     // IMUL   EDX:EAX,$src1
2790     emit_opcode( cbuf, 0xF7 );
2791     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2792     // SAR    EDX,$cnt-32
2793     int shift_count = ((int)$cnt$$constant) - 32;
2794     if (shift_count > 0) {
2795       emit_opcode(cbuf, 0xC1);
2796       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2797       emit_d8(cbuf, shift_count);
2798     }
2799   %}
2800 
2801   // this version doesn't have add sp, 8
2802   enc_class convert_long_double2( eRegL src ) %{
2803     // push $src.hi
2804     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2805     // push $src.lo
2806     emit_opcode(cbuf, 0x50+$src$$reg  );
2807     // fild 64-bits at [SP]
2808     emit_opcode(cbuf,0xdf);
2809     emit_d8(cbuf, 0x6C);
2810     emit_d8(cbuf, 0x24);
2811     emit_d8(cbuf, 0x00);
2812   %}
2813 
2814   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2815     // Basic idea: long = (long)int * (long)int
2816     // IMUL EDX:EAX, src
2817     emit_opcode( cbuf, 0xF7 );
2818     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2819   %}
2820 
2821   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2822     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2823     // MUL EDX:EAX, src
2824     emit_opcode( cbuf, 0xF7 );
2825     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2826   %}
2827 
2828   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2829     // Basic idea: lo(result) = lo(x_lo * y_lo)
2830     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2831     // MOV    $tmp,$src.lo
2832     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2833     // IMUL   $tmp,EDX
2834     emit_opcode( cbuf, 0x0F );
2835     emit_opcode( cbuf, 0xAF );
2836     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2837     // MOV    EDX,$src.hi
2838     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2839     // IMUL   EDX,EAX
2840     emit_opcode( cbuf, 0x0F );
2841     emit_opcode( cbuf, 0xAF );
2842     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2843     // ADD    $tmp,EDX
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2846     // MUL   EDX:EAX,$src.lo
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2852   %}
2853 
2854   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2855     // Basic idea: lo(result) = lo(src * y_lo)
2856     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2857     // IMUL   $tmp,EDX,$src
2858     emit_opcode( cbuf, 0x6B );
2859     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860     emit_d8( cbuf, (int)$src$$constant );
2861     // MOV    EDX,$src
2862     emit_opcode(cbuf, 0xB8 + EDX_enc);
2863     emit_d32( cbuf, (int)$src$$constant );
2864     // MUL   EDX:EAX,EDX
2865     emit_opcode( cbuf, 0xF7 );
2866     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2867     // ADD    EDX,ESI
2868     emit_opcode( cbuf, 0x03 );
2869     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2870   %}
2871 
2872   enc_class long_div( eRegL src1, eRegL src2 ) %{
2873     // PUSH src1.hi
2874     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875     // PUSH src1.lo
2876     emit_opcode(cbuf,               0x50+$src1$$reg  );
2877     // PUSH src2.hi
2878     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879     // PUSH src2.lo
2880     emit_opcode(cbuf,               0x50+$src2$$reg  );
2881     // CALL directly to the runtime
2882     cbuf.set_insts_mark();
2883     emit_opcode(cbuf,0xE8);       // Call into runtime
2884     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885     // Restore stack
2886     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888     emit_d8(cbuf, 4*4);
2889   %}
2890 
2891   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2892     // PUSH src1.hi
2893     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2894     // PUSH src1.lo
2895     emit_opcode(cbuf,               0x50+$src1$$reg  );
2896     // PUSH src2.hi
2897     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2898     // PUSH src2.lo
2899     emit_opcode(cbuf,               0x50+$src2$$reg  );
2900     // CALL directly to the runtime
2901     cbuf.set_insts_mark();
2902     emit_opcode(cbuf,0xE8);       // Call into runtime
2903     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2904     // Restore stack
2905     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2906     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2907     emit_d8(cbuf, 4*4);
2908   %}
2909 
2910   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2911     // MOV   $tmp,$src.lo
2912     emit_opcode(cbuf, 0x8B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2914     // OR    $tmp,$src.hi
2915     emit_opcode(cbuf, 0x0B);
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2917   %}
2918 
2919   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2920     // CMP    $src1.lo,$src2.lo
2921     emit_opcode( cbuf, 0x3B );
2922     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923     // JNE,s  skip
2924     emit_cc(cbuf, 0x70, 0x5);
2925     emit_d8(cbuf,2);
2926     // CMP    $src1.hi,$src2.hi
2927     emit_opcode( cbuf, 0x3B );
2928     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2929   %}
2930 
2931   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2932     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2933     emit_opcode( cbuf, 0x3B );
2934     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2935     // MOV    $tmp,$src1.hi
2936     emit_opcode( cbuf, 0x8B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2938     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2939     emit_opcode( cbuf, 0x1B );
2940     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2941   %}
2942 
2943   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2944     // XOR    $tmp,$tmp
2945     emit_opcode(cbuf,0x33);  // XOR
2946     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2947     // CMP    $tmp,$src.lo
2948     emit_opcode( cbuf, 0x3B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2950     // SBB    $tmp,$src.hi
2951     emit_opcode( cbuf, 0x1B );
2952     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2953   %}
2954 
2955  // Sniff, sniff... smells like Gnu Superoptimizer
2956   enc_class neg_long( eRegL dst ) %{
2957     emit_opcode(cbuf,0xF7);    // NEG hi
2958     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2959     emit_opcode(cbuf,0xF7);    // NEG lo
2960     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2961     emit_opcode(cbuf,0x83);    // SBB hi,0
2962     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2963     emit_d8    (cbuf,0 );
2964   %}
2965 
2966   enc_class enc_pop_rdx() %{
2967     emit_opcode(cbuf,0x5A);
2968   %}
2969 
2970   enc_class enc_rethrow() %{
2971     cbuf.set_insts_mark();
2972     emit_opcode(cbuf, 0xE9);        // jmp    entry
2973     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2974                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2975   %}
2976 
2977 
2978   // Convert a double to an int.  Java semantics require we do complex
2979   // manglelations in the corner cases.  So we set the rounding mode to
2980   // 'zero', store the darned double down as an int, and reset the
2981   // rounding mode to 'nearest'.  The hardware throws an exception which
2982   // patches up the correct value directly to the stack.
2983   enc_class DPR2I_encoding( regDPR src ) %{
2984     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2985     // exceptions here, so that a NAN or other corner-case value will
2986     // thrown an exception (but normal values get converted at full speed).
2987     // However, I2C adapters and other float-stack manglers leave pending
2988     // invalid-op exceptions hanging.  We would have to clear them before
2989     // enabling them and that is more expensive than just testing for the
2990     // invalid value Intel stores down in the corner cases.
2991     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2992     emit_opcode(cbuf,0x2D);
2993     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2994     // Allocate a word
2995     emit_opcode(cbuf,0x83);            // SUB ESP,4
2996     emit_opcode(cbuf,0xEC);
2997     emit_d8(cbuf,0x04);
2998     // Encoding assumes a double has been pushed into FPR0.
2999     // Store down the double as an int, popping the FPU stack
3000     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3001     emit_opcode(cbuf,0x1C);
3002     emit_d8(cbuf,0x24);
3003     // Restore the rounding mode; mask the exception
3004     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3005     emit_opcode(cbuf,0x2D);
3006     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3007         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3008         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3009 
3010     // Load the converted int; adjust CPU stack
3011     emit_opcode(cbuf,0x58);       // POP EAX
3012     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3013     emit_d32   (cbuf,0x80000000); //         0x80000000
3014     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3015     emit_d8    (cbuf,0x07);       // Size of slow_call
3016     // Push src onto stack slow-path
3017     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3018     emit_d8    (cbuf,0xC0-1+$src$$reg );
3019     // CALL directly to the runtime
3020     cbuf.set_insts_mark();
3021     emit_opcode(cbuf,0xE8);       // Call into runtime
3022     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3023     // Carry on here...
3024   %}
3025 
3026   enc_class DPR2L_encoding( regDPR src ) %{
3027     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3028     emit_opcode(cbuf,0x2D);
3029     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3030     // Allocate a word
3031     emit_opcode(cbuf,0x83);            // SUB ESP,8
3032     emit_opcode(cbuf,0xEC);
3033     emit_d8(cbuf,0x08);
3034     // Encoding assumes a double has been pushed into FPR0.
3035     // Store down the double as a long, popping the FPU stack
3036     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3037     emit_opcode(cbuf,0x3C);
3038     emit_d8(cbuf,0x24);
3039     // Restore the rounding mode; mask the exception
3040     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3041     emit_opcode(cbuf,0x2D);
3042     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3043         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3044         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3045 
3046     // Load the converted int; adjust CPU stack
3047     emit_opcode(cbuf,0x58);       // POP EAX
3048     emit_opcode(cbuf,0x5A);       // POP EDX
3049     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3050     emit_d8    (cbuf,0xFA);       // rdx
3051     emit_d32   (cbuf,0x80000000); //         0x80000000
3052     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3053     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3054     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3055     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3056     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3057     emit_d8    (cbuf,0x07);       // Size of slow_call
3058     // Push src onto stack slow-path
3059     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3060     emit_d8    (cbuf,0xC0-1+$src$$reg );
3061     // CALL directly to the runtime
3062     cbuf.set_insts_mark();
3063     emit_opcode(cbuf,0xE8);       // Call into runtime
3064     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065     // Carry on here...
3066   %}
3067 
3068   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3069     // Operand was loaded from memory into fp ST (stack top)
3070     // FMUL   ST,$src  /* D8 C8+i */
3071     emit_opcode(cbuf, 0xD8);
3072     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3073   %}
3074 
3075   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3076     // FADDP  ST,src2  /* D8 C0+i */
3077     emit_opcode(cbuf, 0xD8);
3078     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3079     //could use FADDP  src2,fpST  /* DE C0+i */
3080   %}
3081 
3082   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3083     // FADDP  src2,ST  /* DE C0+i */
3084     emit_opcode(cbuf, 0xDE);
3085     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3086   %}
3087 
3088   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3089     // Operand has been loaded into fp ST (stack top)
3090       // FSUB   ST,$src1
3091       emit_opcode(cbuf, 0xD8);
3092       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3093 
3094       // FDIV
3095       emit_opcode(cbuf, 0xD8);
3096       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3097   %}
3098 
3099   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3100     // Operand was loaded from memory into fp ST (stack top)
3101     // FADD   ST,$src  /* D8 C0+i */
3102     emit_opcode(cbuf, 0xD8);
3103     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104 
3105     // FMUL  ST,src2  /* D8 C*+i */
3106     emit_opcode(cbuf, 0xD8);
3107     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108   %}
3109 
3110 
3111   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3112     // Operand was loaded from memory into fp ST (stack top)
3113     // FADD   ST,$src  /* D8 C0+i */
3114     emit_opcode(cbuf, 0xD8);
3115     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3116 
3117     // FMULP  src2,ST  /* DE C8+i */
3118     emit_opcode(cbuf, 0xDE);
3119     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3120   %}
3121 
3122   // Atomically load the volatile long
3123   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x05;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3133   %}
3134 
3135   // Volatile Store Long.  Must be atomic, so move it into
3136   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3137   // target address before the store (for null-ptr checks)
3138   // so the memory operand is used twice in the encoding.
3139   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3140     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3141     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3142     emit_opcode(cbuf,0xDF);
3143     int rm_byte_opcode = 0x07;
3144     int base     = $mem$$base;
3145     int index    = $mem$$index;
3146     int scale    = $mem$$scale;
3147     int displace = $mem$$disp;
3148     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3149     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3150   %}
3151 
3152   // Safepoint Poll.  This polls the safepoint page, and causes an
3153   // exception if it is not readable. Unfortunately, it kills the condition code
3154   // in the process
3155   // We current use TESTL [spp],EDI
3156   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3157 
3158   enc_class Safepoint_Poll() %{
3159     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3160     emit_opcode(cbuf,0x85);
3161     emit_rm (cbuf, 0x0, 0x7, 0x5);
3162     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3163   %}
3164 %}
3165 
3166 
3167 //----------FRAME--------------------------------------------------------------
3168 // Definition of frame structure and management information.
3169 //
3170 //  S T A C K   L A Y O U T    Allocators stack-slot number
3171 //                             |   (to get allocators register number
3172 //  G  Owned by    |        |  v    add OptoReg::stack0())
3173 //  r   CALLER     |        |
3174 //  o     |        +--------+      pad to even-align allocators stack-slot
3175 //  w     V        |  pad0  |        numbers; owned by CALLER
3176 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3177 //  h     ^        |   in   |  5
3178 //        |        |  args  |  4   Holes in incoming args owned by SELF
3179 //  |     |        |        |  3
3180 //  |     |        +--------+
3181 //  V     |        | old out|      Empty on Intel, window on Sparc
3182 //        |    old |preserve|      Must be even aligned.
3183 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3184 //        |        |   in   |  3   area for Intel ret address
3185 //     Owned by    |preserve|      Empty on Sparc.
3186 //       SELF      +--------+
3187 //        |        |  pad2  |  2   pad to align old SP
3188 //        |        +--------+  1
3189 //        |        | locks  |  0
3190 //        |        +--------+----> OptoReg::stack0(), even aligned
3191 //        |        |  pad1  | 11   pad to align new SP
3192 //        |        +--------+
3193 //        |        |        | 10
3194 //        |        | spills |  9   spills
3195 //        V        |        |  8   (pad0 slot for callee)
3196 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3197 //        ^        |  out   |  7
3198 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3199 //     Owned by    +--------+
3200 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3201 //        |    new |preserve|      Must be even-aligned.
3202 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3203 //        |        |        |
3204 //
3205 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3206 //         known from SELF's arguments and the Java calling convention.
3207 //         Region 6-7 is determined per call site.
3208 // Note 2: If the calling convention leaves holes in the incoming argument
3209 //         area, those holes are owned by SELF.  Holes in the outgoing area
3210 //         are owned by the CALLEE.  Holes should not be nessecary in the
3211 //         incoming area, as the Java calling convention is completely under
3212 //         the control of the AD file.  Doubles can be sorted and packed to
3213 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3214 //         varargs C calling conventions.
3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3216 //         even aligned with pad0 as needed.
3217 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3218 //         region 6-11 is even aligned; it may be padded out more so that
3219 //         the region from SP to FP meets the minimum stack alignment.
3220 
3221 frame %{
3222   // What direction does stack grow in (assumed to be same for C & Java)
3223   stack_direction(TOWARDS_LOW);
3224 
3225   // These three registers define part of the calling convention
3226   // between compiled code and the interpreter.
3227   inline_cache_reg(EAX);                // Inline Cache Register
3228   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3229 
3230   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3231   cisc_spilling_operand_name(indOffset32);
3232 
3233   // Number of stack slots consumed by locking an object
3234   sync_stack_slots(1);
3235 
3236   // Compiled code's Frame Pointer
3237   frame_pointer(ESP);
3238   // Interpreter stores its frame pointer in a register which is
3239   // stored to the stack by I2CAdaptors.
3240   // I2CAdaptors convert from interpreted java to compiled java.
3241   interpreter_frame_pointer(EBP);
3242 
3243   // Stack alignment requirement
3244   // Alignment size in bytes (128-bit -> 16 bytes)
3245   stack_alignment(StackAlignmentInBytes);
3246 
3247   // Number of stack slots between incoming argument block and the start of
3248   // a new frame.  The PROLOG must add this many slots to the stack.  The
3249   // EPILOG must remove this many slots.  Intel needs one slot for
3250   // return address and one for rbp, (must save rbp)
3251   in_preserve_stack_slots(2+VerifyStackAtCalls);
3252 
3253   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3254   // for calls to C.  Supports the var-args backing area for register parms.
3255   varargs_C_out_slots_killed(0);
3256 
3257   // The after-PROLOG location of the return address.  Location of
3258   // return address specifies a type (REG or STACK) and a number
3259   // representing the register number (i.e. - use a register name) or
3260   // stack slot.
3261   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3262   // Otherwise, it is above the locks and verification slot and alignment word
3263   return_addr(STACK - 1 +
3264               align_up((Compile::current()->in_preserve_stack_slots() +
3265                         Compile::current()->fixed_slots()),
3266                        stack_alignment_in_slots()));
3267 
3268   // Body of function which returns an integer array locating
3269   // arguments either in registers or in stack slots.  Passed an array
3270   // of ideal registers called "sig" and a "length" count.  Stack-slot
3271   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272   // arguments for a CALLEE.  Incoming stack arguments are
3273   // automatically biased by the preserve_stack_slots field above.
3274   calling_convention %{
3275     // No difference between ingoing/outgoing just pass false
3276     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3277   %}
3278 
3279 
3280   // Body of function which returns an integer array locating
3281   // arguments either in registers or in stack slots.  Passed an array
3282   // of ideal registers called "sig" and a "length" count.  Stack-slot
3283   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3284   // arguments for a CALLEE.  Incoming stack arguments are
3285   // automatically biased by the preserve_stack_slots field above.
3286   c_calling_convention %{
3287     // This is obviously always outgoing
3288     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3289   %}
3290 
3291   // Location of C & interpreter return values
3292   c_return_value %{
3293     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3294     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3295     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3296 
3297     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3298     // that C functions return float and double results in XMM0.
3299     if( ideal_reg == Op_RegD && UseSSE>=2 )
3300       return OptoRegPair(XMM0b_num,XMM0_num);
3301     if( ideal_reg == Op_RegF && UseSSE>=2 )
3302       return OptoRegPair(OptoReg::Bad,XMM0_num);
3303 
3304     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305   %}
3306 
3307   // Location of return values
3308   return_value %{
3309     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3310     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3311     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3312     if( ideal_reg == Op_RegD && UseSSE>=2 )
3313       return OptoRegPair(XMM0b_num,XMM0_num);
3314     if( ideal_reg == Op_RegF && UseSSE>=1 )
3315       return OptoRegPair(OptoReg::Bad,XMM0_num);
3316     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3317   %}
3318 
3319 %}
3320 
3321 //----------ATTRIBUTES---------------------------------------------------------
3322 //----------Operand Attributes-------------------------------------------------
3323 op_attrib op_cost(0);        // Required cost attribute
3324 
3325 //----------Instruction Attributes---------------------------------------------
3326 ins_attrib ins_cost(100);       // Required cost attribute
3327 ins_attrib ins_size(8);         // Required size attribute (in bits)
3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3329                                 // non-matching short branch variant of some
3330                                                             // long branch?
3331 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3332                                 // specifies the alignment that some part of the instruction (not
3333                                 // necessarily the start) requires.  If > 1, a compute_padding()
3334                                 // function must be provided for the instruction
3335 
3336 //----------OPERANDS-----------------------------------------------------------
3337 // Operand definitions must precede instruction definitions for correct parsing
3338 // in the ADLC because operands constitute user defined types which are used in
3339 // instruction definitions.
3340 
3341 //----------Simple Operands----------------------------------------------------
3342 // Immediate Operands
3343 // Integer Immediate
3344 operand immI() %{
3345   match(ConI);
3346 
3347   op_cost(10);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for test vs zero
3353 operand immI0() %{
3354   predicate(n->get_int() == 0);
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 // Constant for increment
3363 operand immI1() %{
3364   predicate(n->get_int() == 1);
3365   match(ConI);
3366 
3367   op_cost(0);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 // Constant for decrement
3373 operand immI_M1() %{
3374   predicate(n->get_int() == -1);
3375   match(ConI);
3376 
3377   op_cost(0);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 // Valid scale values for addressing modes
3383 operand immI2() %{
3384   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3385   match(ConI);
3386 
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 operand immI8() %{
3392   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3393   match(ConI);
3394 
3395   op_cost(5);
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 operand immI16() %{
3401   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3402   match(ConI);
3403 
3404   op_cost(10);
3405   format %{ %}
3406   interface(CONST_INTER);
3407 %}
3408 
3409 // Int Immediate non-negative
3410 operand immU31()
3411 %{
3412   predicate(n->get_int() >= 0);
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 // Constant for long shifts
3421 operand immI_32() %{
3422   predicate( n->get_int() == 32 );
3423   match(ConI);
3424 
3425   op_cost(0);
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1_31() %{
3431   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_32_63() %{
3440   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3441   match(ConI);
3442   op_cost(0);
3443 
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_1() %{
3449   predicate( n->get_int() == 1 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 operand immI_2() %{
3458   predicate( n->get_int() == 2 );
3459   match(ConI);
3460 
3461   op_cost(0);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 operand immI_3() %{
3467   predicate( n->get_int() == 3 );
3468   match(ConI);
3469 
3470   op_cost(0);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Pointer Immediate
3476 operand immP() %{
3477   match(ConP);
3478 
3479   op_cost(10);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // NULL Pointer Immediate
3485 operand immP0() %{
3486   predicate( n->get_ptr() == 0 );
3487   match(ConP);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate
3495 operand immL() %{
3496   match(ConL);
3497 
3498   op_cost(20);
3499   format %{ %}
3500   interface(CONST_INTER);
3501 %}
3502 
3503 // Long Immediate zero
3504 operand immL0() %{
3505   predicate( n->get_long() == 0L );
3506   match(ConL);
3507   op_cost(0);
3508 
3509   format %{ %}
3510   interface(CONST_INTER);
3511 %}
3512 
3513 // Long Immediate zero
3514 operand immL_M1() %{
3515   predicate( n->get_long() == -1L );
3516   match(ConL);
3517   op_cost(0);
3518 
3519   format %{ %}
3520   interface(CONST_INTER);
3521 %}
3522 
3523 // Long immediate from 0 to 127.
3524 // Used for a shorter form of long mul by 10.
3525 operand immL_127() %{
3526   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3527   match(ConL);
3528   op_cost(0);
3529 
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Long Immediate: low 32-bit mask
3535 operand immL_32bits() %{
3536   predicate(n->get_long() == 0xFFFFFFFFL);
3537   match(ConL);
3538   op_cost(0);
3539 
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Long Immediate: low 32-bit mask
3545 operand immL32() %{
3546   predicate(n->get_long() == (int)(n->get_long()));
3547   match(ConL);
3548   op_cost(20);
3549 
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 //Double Immediate zero
3555 operand immDPR0() %{
3556   // Do additional (and counter-intuitive) test against NaN to work around VC++
3557   // bug that generates code such that NaNs compare equal to 0.0
3558   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3559   match(ConD);
3560 
3561   op_cost(5);
3562   format %{ %}
3563   interface(CONST_INTER);
3564 %}
3565 
3566 // Double Immediate one
3567 operand immDPR1() %{
3568   predicate( UseSSE<=1 && n->getd() == 1.0 );
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate
3577 operand immDPR() %{
3578   predicate(UseSSE<=1);
3579   match(ConD);
3580 
3581   op_cost(5);
3582   format %{ %}
3583   interface(CONST_INTER);
3584 %}
3585 
3586 operand immD() %{
3587   predicate(UseSSE>=2);
3588   match(ConD);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Double Immediate zero
3596 operand immD0() %{
3597   // Do additional (and counter-intuitive) test against NaN to work around VC++
3598   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3599   // compare equal to -0.0.
3600   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3601   match(ConD);
3602 
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 // Float Immediate zero
3608 operand immFPR0() %{
3609   predicate(UseSSE == 0 && n->getf() == 0.0F);
3610   match(ConF);
3611 
3612   op_cost(5);
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Float Immediate one
3618 operand immFPR1() %{
3619   predicate(UseSSE == 0 && n->getf() == 1.0F);
3620   match(ConF);
3621 
3622   op_cost(5);
3623   format %{ %}
3624   interface(CONST_INTER);
3625 %}
3626 
3627 // Float Immediate
3628 operand immFPR() %{
3629   predicate( UseSSE == 0 );
3630   match(ConF);
3631 
3632   op_cost(5);
3633   format %{ %}
3634   interface(CONST_INTER);
3635 %}
3636 
3637 // Float Immediate
3638 operand immF() %{
3639   predicate(UseSSE >= 1);
3640   match(ConF);
3641 
3642   op_cost(5);
3643   format %{ %}
3644   interface(CONST_INTER);
3645 %}
3646 
3647 // Float Immediate zero.  Zero and not -0.0
3648 operand immF0() %{
3649   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3650   match(ConF);
3651 
3652   op_cost(5);
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Immediates for special shifts (sign extend)
3658 
3659 // Constants for increment
3660 operand immI_16() %{
3661   predicate( n->get_int() == 16 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 operand immI_24() %{
3669   predicate( n->get_int() == 24 );
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Constant for byte-wide masking
3677 operand immI_255() %{
3678   predicate( n->get_int() == 255 );
3679   match(ConI);
3680 
3681   format %{ %}
3682   interface(CONST_INTER);
3683 %}
3684 
3685 // Constant for short-wide masking
3686 operand immI_65535() %{
3687   predicate(n->get_int() == 65535);
3688   match(ConI);
3689 
3690   format %{ %}
3691   interface(CONST_INTER);
3692 %}
3693 
3694 // Register Operands
3695 // Integer Register
3696 operand rRegI() %{
3697   constraint(ALLOC_IN_RC(int_reg));
3698   match(RegI);
3699   match(xRegI);
3700   match(eAXRegI);
3701   match(eBXRegI);
3702   match(eCXRegI);
3703   match(eDXRegI);
3704   match(eDIRegI);
3705   match(eSIRegI);
3706 
3707   format %{ %}
3708   interface(REG_INTER);
3709 %}
3710 
3711 // Subset of Integer Register
3712 operand xRegI(rRegI reg) %{
3713   constraint(ALLOC_IN_RC(int_x_reg));
3714   match(reg);
3715   match(eAXRegI);
3716   match(eBXRegI);
3717   match(eCXRegI);
3718   match(eDXRegI);
3719 
3720   format %{ %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Special Registers
3725 operand eAXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(eax_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EAX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 // Special Registers
3735 operand eBXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(ebx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EBX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eCXRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(ecx_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "ECX" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand eDXRegI(xRegI reg) %{
3754   constraint(ALLOC_IN_RC(edx_reg));
3755   match(reg);
3756   match(rRegI);
3757 
3758   format %{ "EDX" %}
3759   interface(REG_INTER);
3760 %}
3761 
3762 operand eDIRegI(xRegI reg) %{
3763   constraint(ALLOC_IN_RC(edi_reg));
3764   match(reg);
3765   match(rRegI);
3766 
3767   format %{ "EDI" %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand naxRegI() %{
3772   constraint(ALLOC_IN_RC(nax_reg));
3773   match(RegI);
3774   match(eCXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 operand nadxRegI() %{
3784   constraint(ALLOC_IN_RC(nadx_reg));
3785   match(RegI);
3786   match(eBXRegI);
3787   match(eCXRegI);
3788   match(eSIRegI);
3789   match(eDIRegI);
3790 
3791   format %{ %}
3792   interface(REG_INTER);
3793 %}
3794 
3795 operand ncxRegI() %{
3796   constraint(ALLOC_IN_RC(ncx_reg));
3797   match(RegI);
3798   match(eAXRegI);
3799   match(eDXRegI);
3800   match(eSIRegI);
3801   match(eDIRegI);
3802 
3803   format %{ %}
3804   interface(REG_INTER);
3805 %}
3806 
3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3808 // //
3809 operand eSIRegI(xRegI reg) %{
3810    constraint(ALLOC_IN_RC(esi_reg));
3811    match(reg);
3812    match(rRegI);
3813 
3814    format %{ "ESI" %}
3815    interface(REG_INTER);
3816 %}
3817 
3818 // Pointer Register
3819 operand anyRegP() %{
3820   constraint(ALLOC_IN_RC(any_reg));
3821   match(RegP);
3822   match(eAXRegP);
3823   match(eBXRegP);
3824   match(eCXRegP);
3825   match(eDIRegP);
3826   match(eRegP);
3827 
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 operand eRegP() %{
3833   constraint(ALLOC_IN_RC(int_reg));
3834   match(RegP);
3835   match(eAXRegP);
3836   match(eBXRegP);
3837   match(eCXRegP);
3838   match(eDIRegP);
3839 
3840   format %{ %}
3841   interface(REG_INTER);
3842 %}
3843 
3844 // On windows95, EBP is not safe to use for implicit null tests.
3845 operand eRegP_no_EBP() %{
3846   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3847   match(RegP);
3848   match(eAXRegP);
3849   match(eBXRegP);
3850   match(eCXRegP);
3851   match(eDIRegP);
3852 
3853   op_cost(100);
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 operand naxRegP() %{
3859   constraint(ALLOC_IN_RC(nax_reg));
3860   match(RegP);
3861   match(eBXRegP);
3862   match(eDXRegP);
3863   match(eCXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 operand nabxRegP() %{
3872   constraint(ALLOC_IN_RC(nabx_reg));
3873   match(RegP);
3874   match(eCXRegP);
3875   match(eDXRegP);
3876   match(eSIRegP);
3877   match(eDIRegP);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 operand pRegP() %{
3884   constraint(ALLOC_IN_RC(p_reg));
3885   match(RegP);
3886   match(eBXRegP);
3887   match(eDXRegP);
3888   match(eSIRegP);
3889   match(eDIRegP);
3890 
3891   format %{ %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Special Registers
3896 // Return a pointer value
3897 operand eAXRegP(eRegP reg) %{
3898   constraint(ALLOC_IN_RC(eax_reg));
3899   match(reg);
3900   format %{ "EAX" %}
3901   interface(REG_INTER);
3902 %}
3903 
3904 // Used in AtomicAdd
3905 operand eBXRegP(eRegP reg) %{
3906   constraint(ALLOC_IN_RC(ebx_reg));
3907   match(reg);
3908   format %{ "EBX" %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Tail-call (interprocedural jump) to interpreter
3913 operand eCXRegP(eRegP reg) %{
3914   constraint(ALLOC_IN_RC(ecx_reg));
3915   match(reg);
3916   format %{ "ECX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eSIRegP(eRegP reg) %{
3921   constraint(ALLOC_IN_RC(esi_reg));
3922   match(reg);
3923   format %{ "ESI" %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 // Used in rep stosw
3928 operand eDIRegP(eRegP reg) %{
3929   constraint(ALLOC_IN_RC(edi_reg));
3930   match(reg);
3931   format %{ "EDI" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eRegL() %{
3936   constraint(ALLOC_IN_RC(long_reg));
3937   match(RegL);
3938   match(eADXRegL);
3939 
3940   format %{ %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 operand eADXRegL( eRegL reg ) %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(reg);
3947 
3948   format %{ "EDX:EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eBCXRegL( eRegL reg ) %{
3953   constraint(ALLOC_IN_RC(ebcx_reg));
3954   match(reg);
3955 
3956   format %{ "EBX:ECX" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Special case for integer high multiply
3961 operand eADXRegL_low_only() %{
3962   constraint(ALLOC_IN_RC(eadx_reg));
3963   match(RegL);
3964 
3965   format %{ "EAX" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 // Flags register, used as output of compare instructions
3970 operand eFlagsReg() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973 
3974   format %{ "EFLAGS" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Flags register, used as output of FLOATING POINT compare instructions
3979 operand eFlagsRegU() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982 
3983   format %{ "EFLAGS_U" %}
3984   interface(REG_INTER);
3985 %}
3986 
3987 operand eFlagsRegUCF() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990   predicate(false);
3991 
3992   format %{ "EFLAGS_U_CF" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 // Condition Code Register used by long compare
3997 operand flagsReg_long_LTGE() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LTGE" %}
4001   interface(REG_INTER);
4002 %}
4003 operand flagsReg_long_EQNE() %{
4004   constraint(ALLOC_IN_RC(int_flags));
4005   match(RegFlags);
4006   format %{ "FLAGS_EQNE" %}
4007   interface(REG_INTER);
4008 %}
4009 operand flagsReg_long_LEGT() %{
4010   constraint(ALLOC_IN_RC(int_flags));
4011   match(RegFlags);
4012   format %{ "FLAGS_LEGT" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 // Condition Code Register used by unsigned long compare
4017 operand flagsReg_ulong_LTGE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_U_LTGE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_ulong_EQNE() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_U_EQNE" %}
4027   interface(REG_INTER);
4028 %}
4029 operand flagsReg_ulong_LEGT() %{
4030   constraint(ALLOC_IN_RC(int_flags));
4031   match(RegFlags);
4032   format %{ "FLAGS_U_LEGT" %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Float register operands
4037 operand regDPR() %{
4038   predicate( UseSSE < 2 );
4039   constraint(ALLOC_IN_RC(fp_dbl_reg));
4040   match(RegD);
4041   match(regDPR1);
4042   match(regDPR2);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 operand regDPR1(regDPR reg) %{
4048   predicate( UseSSE < 2 );
4049   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4050   match(reg);
4051   format %{ "FPR1" %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 operand regDPR2(regDPR reg) %{
4056   predicate( UseSSE < 2 );
4057   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4058   match(reg);
4059   format %{ "FPR2" %}
4060   interface(REG_INTER);
4061 %}
4062 
4063 operand regnotDPR1(regDPR reg) %{
4064   predicate( UseSSE < 2 );
4065   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4066   match(reg);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Float register operands
4072 operand regFPR() %{
4073   predicate( UseSSE < 2 );
4074   constraint(ALLOC_IN_RC(fp_flt_reg));
4075   match(RegF);
4076   match(regFPR1);
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 // Float register operands
4082 operand regFPR1(regFPR reg) %{
4083   predicate( UseSSE < 2 );
4084   constraint(ALLOC_IN_RC(fp_flt_reg0));
4085   match(reg);
4086   format %{ "FPR1" %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 // XMM Float register operands
4091 operand regF() %{
4092   predicate( UseSSE>=1 );
4093   constraint(ALLOC_IN_RC(float_reg_legacy));
4094   match(RegF);
4095   format %{ %}
4096   interface(REG_INTER);
4097 %}
4098 
4099 // Float register operands
4100 operand vlRegF() %{
4101    constraint(ALLOC_IN_RC(float_reg_vl));
4102    match(RegF);
4103 
4104    format %{ %}
4105    interface(REG_INTER);
4106 %}
4107 
4108 // XMM Double register operands
4109 operand regD() %{
4110   predicate( UseSSE>=2 );
4111   constraint(ALLOC_IN_RC(double_reg_legacy));
4112   match(RegD);
4113   format %{ %}
4114   interface(REG_INTER);
4115 %}
4116 
4117 // Double register operands
4118 operand vlRegD() %{
4119    constraint(ALLOC_IN_RC(double_reg_vl));
4120    match(RegD);
4121 
4122    format %{ %}
4123    interface(REG_INTER);
4124 %}
4125 
4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4127 // runtime code generation via reg_class_dynamic.
4128 operand vecS() %{
4129   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4130   match(VecS);
4131 
4132   format %{ %}
4133   interface(REG_INTER);
4134 %}
4135 
4136 operand legVecS() %{
4137   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4138   match(VecS);
4139 
4140   format %{ %}
4141   interface(REG_INTER);
4142 %}
4143 
4144 operand vecD() %{
4145   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4146   match(VecD);
4147 
4148   format %{ %}
4149   interface(REG_INTER);
4150 %}
4151 
4152 operand legVecD() %{
4153   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4154   match(VecD);
4155 
4156   format %{ %}
4157   interface(REG_INTER);
4158 %}
4159 
4160 operand vecX() %{
4161   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4162   match(VecX);
4163 
4164   format %{ %}
4165   interface(REG_INTER);
4166 %}
4167 
4168 operand legVecX() %{
4169   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4170   match(VecX);
4171 
4172   format %{ %}
4173   interface(REG_INTER);
4174 %}
4175 
4176 operand vecY() %{
4177   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4178   match(VecY);
4179 
4180   format %{ %}
4181   interface(REG_INTER);
4182 %}
4183 
4184 operand legVecY() %{
4185   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4186   match(VecY);
4187 
4188   format %{ %}
4189   interface(REG_INTER);
4190 %}
4191 
4192 //----------Memory Operands----------------------------------------------------
4193 // Direct Memory Operand
4194 operand direct(immP addr) %{
4195   match(addr);
4196 
4197   format %{ "[$addr]" %}
4198   interface(MEMORY_INTER) %{
4199     base(0xFFFFFFFF);
4200     index(0x4);
4201     scale(0x0);
4202     disp($addr);
4203   %}
4204 %}
4205 
4206 // Indirect Memory Operand
4207 operand indirect(eRegP reg) %{
4208   constraint(ALLOC_IN_RC(int_reg));
4209   match(reg);
4210 
4211   format %{ "[$reg]" %}
4212   interface(MEMORY_INTER) %{
4213     base($reg);
4214     index(0x4);
4215     scale(0x0);
4216     disp(0x0);
4217   %}
4218 %}
4219 
4220 // Indirect Memory Plus Short Offset Operand
4221 operand indOffset8(eRegP reg, immI8 off) %{
4222   match(AddP reg off);
4223 
4224   format %{ "[$reg + $off]" %}
4225   interface(MEMORY_INTER) %{
4226     base($reg);
4227     index(0x4);
4228     scale(0x0);
4229     disp($off);
4230   %}
4231 %}
4232 
4233 // Indirect Memory Plus Long Offset Operand
4234 operand indOffset32(eRegP reg, immI off) %{
4235   match(AddP reg off);
4236 
4237   format %{ "[$reg + $off]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index(0x4);
4241     scale(0x0);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 // Indirect Memory Plus Long Offset Operand
4247 operand indOffset32X(rRegI reg, immP off) %{
4248   match(AddP off reg);
4249 
4250   format %{ "[$reg + $off]" %}
4251   interface(MEMORY_INTER) %{
4252     base($reg);
4253     index(0x4);
4254     scale(0x0);
4255     disp($off);
4256   %}
4257 %}
4258 
4259 // Indirect Memory Plus Index Register Plus Offset Operand
4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4261   match(AddP (AddP reg ireg) off);
4262 
4263   op_cost(10);
4264   format %{"[$reg + $off + $ireg]" %}
4265   interface(MEMORY_INTER) %{
4266     base($reg);
4267     index($ireg);
4268     scale(0x0);
4269     disp($off);
4270   %}
4271 %}
4272 
4273 // Indirect Memory Plus Index Register Plus Offset Operand
4274 operand indIndex(eRegP reg, rRegI ireg) %{
4275   match(AddP reg ireg);
4276 
4277   op_cost(10);
4278   format %{"[$reg + $ireg]" %}
4279   interface(MEMORY_INTER) %{
4280     base($reg);
4281     index($ireg);
4282     scale(0x0);
4283     disp(0x0);
4284   %}
4285 %}
4286 
4287 // // -------------------------------------------------------------------------
4288 // // 486 architecture doesn't support "scale * index + offset" with out a base
4289 // // -------------------------------------------------------------------------
4290 // // Scaled Memory Operands
4291 // // Indirect Memory Times Scale Plus Offset Operand
4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4293 //   match(AddP off (LShiftI ireg scale));
4294 //
4295 //   op_cost(10);
4296 //   format %{"[$off + $ireg << $scale]" %}
4297 //   interface(MEMORY_INTER) %{
4298 //     base(0x4);
4299 //     index($ireg);
4300 //     scale($scale);
4301 //     disp($off);
4302 //   %}
4303 // %}
4304 
4305 // Indirect Memory Times Scale Plus Index Register
4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4307   match(AddP reg (LShiftI ireg scale));
4308 
4309   op_cost(10);
4310   format %{"[$reg + $ireg << $scale]" %}
4311   interface(MEMORY_INTER) %{
4312     base($reg);
4313     index($ireg);
4314     scale($scale);
4315     disp(0x0);
4316   %}
4317 %}
4318 
4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4321   match(AddP (AddP reg (LShiftI ireg scale)) off);
4322 
4323   op_cost(10);
4324   format %{"[$reg + $off + $ireg << $scale]" %}
4325   interface(MEMORY_INTER) %{
4326     base($reg);
4327     index($ireg);
4328     scale($scale);
4329     disp($off);
4330   %}
4331 %}
4332 
4333 //----------Load Long Memory Operands------------------------------------------
4334 // The load-long idiom will use it's address expression again after loading
4335 // the first word of the long.  If the load-long destination overlaps with
4336 // registers used in the addressing expression, the 2nd half will be loaded
4337 // from a clobbered address.  Fix this by requiring that load-long use
4338 // address registers that do not overlap with the load-long target.
4339 
4340 // load-long support
4341 operand load_long_RegP() %{
4342   constraint(ALLOC_IN_RC(esi_reg));
4343   match(RegP);
4344   match(eSIRegP);
4345   op_cost(100);
4346   format %{  %}
4347   interface(REG_INTER);
4348 %}
4349 
4350 // Indirect Memory Operand Long
4351 operand load_long_indirect(load_long_RegP reg) %{
4352   constraint(ALLOC_IN_RC(esi_reg));
4353   match(reg);
4354 
4355   format %{ "[$reg]" %}
4356   interface(MEMORY_INTER) %{
4357     base($reg);
4358     index(0x4);
4359     scale(0x0);
4360     disp(0x0);
4361   %}
4362 %}
4363 
4364 // Indirect Memory Plus Long Offset Operand
4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4366   match(AddP reg off);
4367 
4368   format %{ "[$reg + $off]" %}
4369   interface(MEMORY_INTER) %{
4370     base($reg);
4371     index(0x4);
4372     scale(0x0);
4373     disp($off);
4374   %}
4375 %}
4376 
4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4378 
4379  
4380 operand legRegF() %{
4381   predicate( UseSSE>=1 );
4382   constraint(ALLOC_IN_RC(float_reg_legacy));
4383   match(RegF);
4384   format %{ %}
4385   interface(REG_INTER);
4386 %}
4387 
4388 operand legRegD() %{
4389   predicate( UseSSE>=2 );
4390   constraint(ALLOC_IN_RC(double_reg_legacy));
4391   match(RegD);
4392   format %{ %}
4393   interface(REG_INTER);
4394 %}
4395 
4396 
4397 
4398 //----------Special Memory Operands--------------------------------------------
4399 // Stack Slot Operand - This operand is used for loading and storing temporary
4400 //                      values on the stack where a match requires a value to
4401 //                      flow through memory.
4402 operand stackSlotP(sRegP reg) %{
4403   constraint(ALLOC_IN_RC(stack_slots));
4404   // No match rule because this operand is only generated in matching
4405   format %{ "[$reg]" %}
4406   interface(MEMORY_INTER) %{
4407     base(0x4);   // ESP
4408     index(0x4);  // No Index
4409     scale(0x0);  // No Scale
4410     disp($reg);  // Stack Offset
4411   %}
4412 %}
4413 
4414 operand stackSlotI(sRegI reg) %{
4415   constraint(ALLOC_IN_RC(stack_slots));
4416   // No match rule because this operand is only generated in matching
4417   format %{ "[$reg]" %}
4418   interface(MEMORY_INTER) %{
4419     base(0x4);   // ESP
4420     index(0x4);  // No Index
4421     scale(0x0);  // No Scale
4422     disp($reg);  // Stack Offset
4423   %}
4424 %}
4425 
4426 operand stackSlotF(sRegF reg) %{
4427   constraint(ALLOC_IN_RC(stack_slots));
4428   // No match rule because this operand is only generated in matching
4429   format %{ "[$reg]" %}
4430   interface(MEMORY_INTER) %{
4431     base(0x4);   // ESP
4432     index(0x4);  // No Index
4433     scale(0x0);  // No Scale
4434     disp($reg);  // Stack Offset
4435   %}
4436 %}
4437 
4438 operand stackSlotD(sRegD reg) %{
4439   constraint(ALLOC_IN_RC(stack_slots));
4440   // No match rule because this operand is only generated in matching
4441   format %{ "[$reg]" %}
4442   interface(MEMORY_INTER) %{
4443     base(0x4);   // ESP
4444     index(0x4);  // No Index
4445     scale(0x0);  // No Scale
4446     disp($reg);  // Stack Offset
4447   %}
4448 %}
4449 
4450 operand stackSlotL(sRegL reg) %{
4451   constraint(ALLOC_IN_RC(stack_slots));
4452   // No match rule because this operand is only generated in matching
4453   format %{ "[$reg]" %}
4454   interface(MEMORY_INTER) %{
4455     base(0x4);   // ESP
4456     index(0x4);  // No Index
4457     scale(0x0);  // No Scale
4458     disp($reg);  // Stack Offset
4459   %}
4460 %}
4461 
4462 //----------Memory Operands - Win95 Implicit Null Variants----------------
4463 // Indirect Memory Operand
4464 operand indirect_win95_safe(eRegP_no_EBP reg)
4465 %{
4466   constraint(ALLOC_IN_RC(int_reg));
4467   match(reg);
4468 
4469   op_cost(100);
4470   format %{ "[$reg]" %}
4471   interface(MEMORY_INTER) %{
4472     base($reg);
4473     index(0x4);
4474     scale(0x0);
4475     disp(0x0);
4476   %}
4477 %}
4478 
4479 // Indirect Memory Plus Short Offset Operand
4480 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4481 %{
4482   match(AddP reg off);
4483 
4484   op_cost(100);
4485   format %{ "[$reg + $off]" %}
4486   interface(MEMORY_INTER) %{
4487     base($reg);
4488     index(0x4);
4489     scale(0x0);
4490     disp($off);
4491   %}
4492 %}
4493 
4494 // Indirect Memory Plus Long Offset Operand
4495 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4496 %{
4497   match(AddP reg off);
4498 
4499   op_cost(100);
4500   format %{ "[$reg + $off]" %}
4501   interface(MEMORY_INTER) %{
4502     base($reg);
4503     index(0x4);
4504     scale(0x0);
4505     disp($off);
4506   %}
4507 %}
4508 
4509 // Indirect Memory Plus Index Register Plus Offset Operand
4510 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4511 %{
4512   match(AddP (AddP reg ireg) off);
4513 
4514   op_cost(100);
4515   format %{"[$reg + $off + $ireg]" %}
4516   interface(MEMORY_INTER) %{
4517     base($reg);
4518     index($ireg);
4519     scale(0x0);
4520     disp($off);
4521   %}
4522 %}
4523 
4524 // Indirect Memory Times Scale Plus Index Register
4525 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4526 %{
4527   match(AddP reg (LShiftI ireg scale));
4528 
4529   op_cost(100);
4530   format %{"[$reg + $ireg << $scale]" %}
4531   interface(MEMORY_INTER) %{
4532     base($reg);
4533     index($ireg);
4534     scale($scale);
4535     disp(0x0);
4536   %}
4537 %}
4538 
4539 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4540 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4541 %{
4542   match(AddP (AddP reg (LShiftI ireg scale)) off);
4543 
4544   op_cost(100);
4545   format %{"[$reg + $off + $ireg << $scale]" %}
4546   interface(MEMORY_INTER) %{
4547     base($reg);
4548     index($ireg);
4549     scale($scale);
4550     disp($off);
4551   %}
4552 %}
4553 
4554 //----------Conditional Branch Operands----------------------------------------
4555 // Comparison Op  - This is the operation of the comparison, and is limited to
4556 //                  the following set of codes:
4557 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4558 //
4559 // Other attributes of the comparison, such as unsignedness, are specified
4560 // by the comparison instruction that sets a condition code flags register.
4561 // That result is represented by a flags operand whose subtype is appropriate
4562 // to the unsignedness (etc.) of the comparison.
4563 //
4564 // Later, the instruction which matches both the Comparison Op (a Bool) and
4565 // the flags (produced by the Cmp) specifies the coding of the comparison op
4566 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4567 
4568 // Comparision Code
4569 operand cmpOp() %{
4570   match(Bool);
4571 
4572   format %{ "" %}
4573   interface(COND_INTER) %{
4574     equal(0x4, "e");
4575     not_equal(0x5, "ne");
4576     less(0xC, "l");
4577     greater_equal(0xD, "ge");
4578     less_equal(0xE, "le");
4579     greater(0xF, "g");
4580     overflow(0x0, "o");
4581     no_overflow(0x1, "no");
4582   %}
4583 %}
4584 
4585 // Comparison Code, unsigned compare.  Used by FP also, with
4586 // C2 (unordered) turned into GT or LT already.  The other bits
4587 // C0 and C3 are turned into Carry & Zero flags.
4588 operand cmpOpU() %{
4589   match(Bool);
4590 
4591   format %{ "" %}
4592   interface(COND_INTER) %{
4593     equal(0x4, "e");
4594     not_equal(0x5, "ne");
4595     less(0x2, "b");
4596     greater_equal(0x3, "nb");
4597     less_equal(0x6, "be");
4598     greater(0x7, "nbe");
4599     overflow(0x0, "o");
4600     no_overflow(0x1, "no");
4601   %}
4602 %}
4603 
4604 // Floating comparisons that don't require any fixup for the unordered case
4605 operand cmpOpUCF() %{
4606   match(Bool);
4607   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4608             n->as_Bool()->_test._test == BoolTest::ge ||
4609             n->as_Bool()->_test._test == BoolTest::le ||
4610             n->as_Bool()->_test._test == BoolTest::gt);
4611   format %{ "" %}
4612   interface(COND_INTER) %{
4613     equal(0x4, "e");
4614     not_equal(0x5, "ne");
4615     less(0x2, "b");
4616     greater_equal(0x3, "nb");
4617     less_equal(0x6, "be");
4618     greater(0x7, "nbe");
4619     overflow(0x0, "o");
4620     no_overflow(0x1, "no");
4621   %}
4622 %}
4623 
4624 
4625 // Floating comparisons that can be fixed up with extra conditional jumps
4626 operand cmpOpUCF2() %{
4627   match(Bool);
4628   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4629             n->as_Bool()->_test._test == BoolTest::eq);
4630   format %{ "" %}
4631   interface(COND_INTER) %{
4632     equal(0x4, "e");
4633     not_equal(0x5, "ne");
4634     less(0x2, "b");
4635     greater_equal(0x3, "nb");
4636     less_equal(0x6, "be");
4637     greater(0x7, "nbe");
4638     overflow(0x0, "o");
4639     no_overflow(0x1, "no");
4640   %}
4641 %}
4642 
4643 // Comparison Code for FP conditional move
4644 operand cmpOp_fcmov() %{
4645   match(Bool);
4646 
4647   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4648             n->as_Bool()->_test._test != BoolTest::no_overflow);
4649   format %{ "" %}
4650   interface(COND_INTER) %{
4651     equal        (0x0C8);
4652     not_equal    (0x1C8);
4653     less         (0x0C0);
4654     greater_equal(0x1C0);
4655     less_equal   (0x0D0);
4656     greater      (0x1D0);
4657     overflow(0x0, "o"); // not really supported by the instruction
4658     no_overflow(0x1, "no"); // not really supported by the instruction
4659   %}
4660 %}
4661 
4662 // Comparison Code used in long compares
4663 operand cmpOp_commute() %{
4664   match(Bool);
4665 
4666   format %{ "" %}
4667   interface(COND_INTER) %{
4668     equal(0x4, "e");
4669     not_equal(0x5, "ne");
4670     less(0xF, "g");
4671     greater_equal(0xE, "le");
4672     less_equal(0xD, "ge");
4673     greater(0xC, "l");
4674     overflow(0x0, "o");
4675     no_overflow(0x1, "no");
4676   %}
4677 %}
4678 
4679 // Comparison Code used in unsigned long compares
4680 operand cmpOpU_commute() %{
4681   match(Bool);
4682 
4683   format %{ "" %}
4684   interface(COND_INTER) %{
4685     equal(0x4, "e");
4686     not_equal(0x5, "ne");
4687     less(0x7, "nbe");
4688     greater_equal(0x6, "be");
4689     less_equal(0x3, "nb");
4690     greater(0x2, "b");
4691     overflow(0x0, "o");
4692     no_overflow(0x1, "no");
4693   %}
4694 %}
4695 
4696 //----------OPERAND CLASSES----------------------------------------------------
4697 // Operand Classes are groups of operands that are used as to simplify
4698 // instruction definitions by not requiring the AD writer to specify separate
4699 // instructions for every form of operand when the instruction accepts
4700 // multiple operand types with the same basic encoding and format.  The classic
4701 // case of this is memory operands.
4702 
4703 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4704                indIndex, indIndexScale, indIndexScaleOffset);
4705 
4706 // Long memory operations are encoded in 2 instructions and a +4 offset.
4707 // This means some kind of offset is always required and you cannot use
4708 // an oop as the offset (done when working on static globals).
4709 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4710                     indIndex, indIndexScale, indIndexScaleOffset);
4711 
4712 
4713 //----------PIPELINE-----------------------------------------------------------
4714 // Rules which define the behavior of the target architectures pipeline.
4715 pipeline %{
4716 
4717 //----------ATTRIBUTES---------------------------------------------------------
4718 attributes %{
4719   variable_size_instructions;        // Fixed size instructions
4720   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4721   instruction_unit_size = 1;         // An instruction is 1 bytes long
4722   instruction_fetch_unit_size = 16;  // The processor fetches one line
4723   instruction_fetch_units = 1;       // of 16 bytes
4724 
4725   // List of nop instructions
4726   nops( MachNop );
4727 %}
4728 
4729 //----------RESOURCES----------------------------------------------------------
4730 // Resources are the functional units available to the machine
4731 
4732 // Generic P2/P3 pipeline
4733 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4734 // 3 instructions decoded per cycle.
4735 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4736 // 2 ALU op, only ALU0 handles mul/div instructions.
4737 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4738            MS0, MS1, MEM = MS0 | MS1,
4739            BR, FPU,
4740            ALU0, ALU1, ALU = ALU0 | ALU1 );
4741 
4742 //----------PIPELINE DESCRIPTION-----------------------------------------------
4743 // Pipeline Description specifies the stages in the machine's pipeline
4744 
4745 // Generic P2/P3 pipeline
4746 pipe_desc(S0, S1, S2, S3, S4, S5);
4747 
4748 //----------PIPELINE CLASSES---------------------------------------------------
4749 // Pipeline Classes describe the stages in which input and output are
4750 // referenced by the hardware pipeline.
4751 
4752 // Naming convention: ialu or fpu
4753 // Then: _reg
4754 // Then: _reg if there is a 2nd register
4755 // Then: _long if it's a pair of instructions implementing a long
4756 // Then: _fat if it requires the big decoder
4757 //   Or: _mem if it requires the big decoder and a memory unit.
4758 
4759 // Integer ALU reg operation
4760 pipe_class ialu_reg(rRegI dst) %{
4761     single_instruction;
4762     dst    : S4(write);
4763     dst    : S3(read);
4764     DECODE : S0;        // any decoder
4765     ALU    : S3;        // any alu
4766 %}
4767 
4768 // Long ALU reg operation
4769 pipe_class ialu_reg_long(eRegL dst) %{
4770     instruction_count(2);
4771     dst    : S4(write);
4772     dst    : S3(read);
4773     DECODE : S0(2);     // any 2 decoders
4774     ALU    : S3(2);     // both alus
4775 %}
4776 
4777 // Integer ALU reg operation using big decoder
4778 pipe_class ialu_reg_fat(rRegI dst) %{
4779     single_instruction;
4780     dst    : S4(write);
4781     dst    : S3(read);
4782     D0     : S0;        // big decoder only
4783     ALU    : S3;        // any alu
4784 %}
4785 
4786 // Long ALU reg operation using big decoder
4787 pipe_class ialu_reg_long_fat(eRegL dst) %{
4788     instruction_count(2);
4789     dst    : S4(write);
4790     dst    : S3(read);
4791     D0     : S0(2);     // big decoder only; twice
4792     ALU    : S3(2);     // any 2 alus
4793 %}
4794 
4795 // Integer ALU reg-reg operation
4796 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4797     single_instruction;
4798     dst    : S4(write);
4799     src    : S3(read);
4800     DECODE : S0;        // any decoder
4801     ALU    : S3;        // any alu
4802 %}
4803 
4804 // Long ALU reg-reg operation
4805 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4806     instruction_count(2);
4807     dst    : S4(write);
4808     src    : S3(read);
4809     DECODE : S0(2);     // any 2 decoders
4810     ALU    : S3(2);     // both alus
4811 %}
4812 
4813 // Integer ALU reg-reg operation
4814 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4815     single_instruction;
4816     dst    : S4(write);
4817     src    : S3(read);
4818     D0     : S0;        // big decoder only
4819     ALU    : S3;        // any alu
4820 %}
4821 
4822 // Long ALU reg-reg operation
4823 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4824     instruction_count(2);
4825     dst    : S4(write);
4826     src    : S3(read);
4827     D0     : S0(2);     // big decoder only; twice
4828     ALU    : S3(2);     // both alus
4829 %}
4830 
4831 // Integer ALU reg-mem operation
4832 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4833     single_instruction;
4834     dst    : S5(write);
4835     mem    : S3(read);
4836     D0     : S0;        // big decoder only
4837     ALU    : S4;        // any alu
4838     MEM    : S3;        // any mem
4839 %}
4840 
4841 // Long ALU reg-mem operation
4842 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4843     instruction_count(2);
4844     dst    : S5(write);
4845     mem    : S3(read);
4846     D0     : S0(2);     // big decoder only; twice
4847     ALU    : S4(2);     // any 2 alus
4848     MEM    : S3(2);     // both mems
4849 %}
4850 
4851 // Integer mem operation (prefetch)
4852 pipe_class ialu_mem(memory mem)
4853 %{
4854     single_instruction;
4855     mem    : S3(read);
4856     D0     : S0;        // big decoder only
4857     MEM    : S3;        // any mem
4858 %}
4859 
4860 // Integer Store to Memory
4861 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4862     single_instruction;
4863     mem    : S3(read);
4864     src    : S5(read);
4865     D0     : S0;        // big decoder only
4866     ALU    : S4;        // any alu
4867     MEM    : S3;
4868 %}
4869 
4870 // Long Store to Memory
4871 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4872     instruction_count(2);
4873     mem    : S3(read);
4874     src    : S5(read);
4875     D0     : S0(2);     // big decoder only; twice
4876     ALU    : S4(2);     // any 2 alus
4877     MEM    : S3(2);     // Both mems
4878 %}
4879 
4880 // Integer Store to Memory
4881 pipe_class ialu_mem_imm(memory mem) %{
4882     single_instruction;
4883     mem    : S3(read);
4884     D0     : S0;        // big decoder only
4885     ALU    : S4;        // any alu
4886     MEM    : S3;
4887 %}
4888 
4889 // Integer ALU0 reg-reg operation
4890 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4891     single_instruction;
4892     dst    : S4(write);
4893     src    : S3(read);
4894     D0     : S0;        // Big decoder only
4895     ALU0   : S3;        // only alu0
4896 %}
4897 
4898 // Integer ALU0 reg-mem operation
4899 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4900     single_instruction;
4901     dst    : S5(write);
4902     mem    : S3(read);
4903     D0     : S0;        // big decoder only
4904     ALU0   : S4;        // ALU0 only
4905     MEM    : S3;        // any mem
4906 %}
4907 
4908 // Integer ALU reg-reg operation
4909 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4910     single_instruction;
4911     cr     : S4(write);
4912     src1   : S3(read);
4913     src2   : S3(read);
4914     DECODE : S0;        // any decoder
4915     ALU    : S3;        // any alu
4916 %}
4917 
4918 // Integer ALU reg-imm operation
4919 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4920     single_instruction;
4921     cr     : S4(write);
4922     src1   : S3(read);
4923     DECODE : S0;        // any decoder
4924     ALU    : S3;        // any alu
4925 %}
4926 
4927 // Integer ALU reg-mem operation
4928 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4929     single_instruction;
4930     cr     : S4(write);
4931     src1   : S3(read);
4932     src2   : S3(read);
4933     D0     : S0;        // big decoder only
4934     ALU    : S4;        // any alu
4935     MEM    : S3;
4936 %}
4937 
4938 // Conditional move reg-reg
4939 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4940     instruction_count(4);
4941     y      : S4(read);
4942     q      : S3(read);
4943     p      : S3(read);
4944     DECODE : S0(4);     // any decoder
4945 %}
4946 
4947 // Conditional move reg-reg
4948 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4949     single_instruction;
4950     dst    : S4(write);
4951     src    : S3(read);
4952     cr     : S3(read);
4953     DECODE : S0;        // any decoder
4954 %}
4955 
4956 // Conditional move reg-mem
4957 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4958     single_instruction;
4959     dst    : S4(write);
4960     src    : S3(read);
4961     cr     : S3(read);
4962     DECODE : S0;        // any decoder
4963     MEM    : S3;
4964 %}
4965 
4966 // Conditional move reg-reg long
4967 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4968     single_instruction;
4969     dst    : S4(write);
4970     src    : S3(read);
4971     cr     : S3(read);
4972     DECODE : S0(2);     // any 2 decoders
4973 %}
4974 
4975 // Conditional move double reg-reg
4976 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4977     single_instruction;
4978     dst    : S4(write);
4979     src    : S3(read);
4980     cr     : S3(read);
4981     DECODE : S0;        // any decoder
4982 %}
4983 
4984 // Float reg-reg operation
4985 pipe_class fpu_reg(regDPR dst) %{
4986     instruction_count(2);
4987     dst    : S3(read);
4988     DECODE : S0(2);     // any 2 decoders
4989     FPU    : S3;
4990 %}
4991 
4992 // Float reg-reg operation
4993 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4994     instruction_count(2);
4995     dst    : S4(write);
4996     src    : S3(read);
4997     DECODE : S0(2);     // any 2 decoders
4998     FPU    : S3;
4999 %}
5000 
5001 // Float reg-reg operation
5002 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
5003     instruction_count(3);
5004     dst    : S4(write);
5005     src1   : S3(read);
5006     src2   : S3(read);
5007     DECODE : S0(3);     // any 3 decoders
5008     FPU    : S3(2);
5009 %}
5010 
5011 // Float reg-reg operation
5012 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
5013     instruction_count(4);
5014     dst    : S4(write);
5015     src1   : S3(read);
5016     src2   : S3(read);
5017     src3   : S3(read);
5018     DECODE : S0(4);     // any 3 decoders
5019     FPU    : S3(2);
5020 %}
5021 
5022 // Float reg-reg operation
5023 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5024     instruction_count(4);
5025     dst    : S4(write);
5026     src1   : S3(read);
5027     src2   : S3(read);
5028     src3   : S3(read);
5029     DECODE : S1(3);     // any 3 decoders
5030     D0     : S0;        // Big decoder only
5031     FPU    : S3(2);
5032     MEM    : S3;
5033 %}
5034 
5035 // Float reg-mem operation
5036 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5037     instruction_count(2);
5038     dst    : S5(write);
5039     mem    : S3(read);
5040     D0     : S0;        // big decoder only
5041     DECODE : S1;        // any decoder for FPU POP
5042     FPU    : S4;
5043     MEM    : S3;        // any mem
5044 %}
5045 
5046 // Float reg-mem operation
5047 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5048     instruction_count(3);
5049     dst    : S5(write);
5050     src1   : S3(read);
5051     mem    : S3(read);
5052     D0     : S0;        // big decoder only
5053     DECODE : S1(2);     // any decoder for FPU POP
5054     FPU    : S4;
5055     MEM    : S3;        // any mem
5056 %}
5057 
5058 // Float mem-reg operation
5059 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5060     instruction_count(2);
5061     src    : S5(read);
5062     mem    : S3(read);
5063     DECODE : S0;        // any decoder for FPU PUSH
5064     D0     : S1;        // big decoder only
5065     FPU    : S4;
5066     MEM    : S3;        // any mem
5067 %}
5068 
5069 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5070     instruction_count(3);
5071     src1   : S3(read);
5072     src2   : S3(read);
5073     mem    : S3(read);
5074     DECODE : S0(2);     // any decoder for FPU PUSH
5075     D0     : S1;        // big decoder only
5076     FPU    : S4;
5077     MEM    : S3;        // any mem
5078 %}
5079 
5080 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5081     instruction_count(3);
5082     src1   : S3(read);
5083     src2   : S3(read);
5084     mem    : S4(read);
5085     DECODE : S0;        // any decoder for FPU PUSH
5086     D0     : S0(2);     // big decoder only
5087     FPU    : S4;
5088     MEM    : S3(2);     // any mem
5089 %}
5090 
5091 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5092     instruction_count(2);
5093     src1   : S3(read);
5094     dst    : S4(read);
5095     D0     : S0(2);     // big decoder only
5096     MEM    : S3(2);     // any mem
5097 %}
5098 
5099 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5100     instruction_count(3);
5101     src1   : S3(read);
5102     src2   : S3(read);
5103     dst    : S4(read);
5104     D0     : S0(3);     // big decoder only
5105     FPU    : S4;
5106     MEM    : S3(3);     // any mem
5107 %}
5108 
5109 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5110     instruction_count(3);
5111     src1   : S4(read);
5112     mem    : S4(read);
5113     DECODE : S0;        // any decoder for FPU PUSH
5114     D0     : S0(2);     // big decoder only
5115     FPU    : S4;
5116     MEM    : S3(2);     // any mem
5117 %}
5118 
5119 // Float load constant
5120 pipe_class fpu_reg_con(regDPR dst) %{
5121     instruction_count(2);
5122     dst    : S5(write);
5123     D0     : S0;        // big decoder only for the load
5124     DECODE : S1;        // any decoder for FPU POP
5125     FPU    : S4;
5126     MEM    : S3;        // any mem
5127 %}
5128 
5129 // Float load constant
5130 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5131     instruction_count(3);
5132     dst    : S5(write);
5133     src    : S3(read);
5134     D0     : S0;        // big decoder only for the load
5135     DECODE : S1(2);     // any decoder for FPU POP
5136     FPU    : S4;
5137     MEM    : S3;        // any mem
5138 %}
5139 
5140 // UnConditional branch
5141 pipe_class pipe_jmp( label labl ) %{
5142     single_instruction;
5143     BR   : S3;
5144 %}
5145 
5146 // Conditional branch
5147 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5148     single_instruction;
5149     cr    : S1(read);
5150     BR    : S3;
5151 %}
5152 
5153 // Allocation idiom
5154 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5155     instruction_count(1); force_serialization;
5156     fixed_latency(6);
5157     heap_ptr : S3(read);
5158     DECODE   : S0(3);
5159     D0       : S2;
5160     MEM      : S3;
5161     ALU      : S3(2);
5162     dst      : S5(write);
5163     BR       : S5;
5164 %}
5165 
5166 // Generic big/slow expanded idiom
5167 pipe_class pipe_slow(  ) %{
5168     instruction_count(10); multiple_bundles; force_serialization;
5169     fixed_latency(100);
5170     D0  : S0(2);
5171     MEM : S3(2);
5172 %}
5173 
5174 // The real do-nothing guy
5175 pipe_class empty( ) %{
5176     instruction_count(0);
5177 %}
5178 
5179 // Define the class for the Nop node
5180 define %{
5181    MachNop = empty;
5182 %}
5183 
5184 %}
5185 
5186 //----------INSTRUCTIONS-------------------------------------------------------
5187 //
5188 // match      -- States which machine-independent subtree may be replaced
5189 //               by this instruction.
5190 // ins_cost   -- The estimated cost of this instruction is used by instruction
5191 //               selection to identify a minimum cost tree of machine
5192 //               instructions that matches a tree of machine-independent
5193 //               instructions.
5194 // format     -- A string providing the disassembly for this instruction.
5195 //               The value of an instruction's operand may be inserted
5196 //               by referring to it with a '$' prefix.
5197 // opcode     -- Three instruction opcodes may be provided.  These are referred
5198 //               to within an encode class as $primary, $secondary, and $tertiary
5199 //               respectively.  The primary opcode is commonly used to
5200 //               indicate the type of machine instruction, while secondary
5201 //               and tertiary are often used for prefix options or addressing
5202 //               modes.
5203 // ins_encode -- A list of encode classes with parameters. The encode class
5204 //               name must have been defined in an 'enc_class' specification
5205 //               in the encode section of the architecture description.
5206 
5207 //----------BSWAP-Instruction--------------------------------------------------
5208 instruct bytes_reverse_int(rRegI dst) %{
5209   match(Set dst (ReverseBytesI dst));
5210 
5211   format %{ "BSWAP  $dst" %}
5212   opcode(0x0F, 0xC8);
5213   ins_encode( OpcP, OpcSReg(dst) );
5214   ins_pipe( ialu_reg );
5215 %}
5216 
5217 instruct bytes_reverse_long(eRegL dst) %{
5218   match(Set dst (ReverseBytesL dst));
5219 
5220   format %{ "BSWAP  $dst.lo\n\t"
5221             "BSWAP  $dst.hi\n\t"
5222             "XCHG   $dst.lo $dst.hi" %}
5223 
5224   ins_cost(125);
5225   ins_encode( bswap_long_bytes(dst) );
5226   ins_pipe( ialu_reg_reg);
5227 %}
5228 
5229 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5230   match(Set dst (ReverseBytesUS dst));
5231   effect(KILL cr);
5232 
5233   format %{ "BSWAP  $dst\n\t"
5234             "SHR    $dst,16\n\t" %}
5235   ins_encode %{
5236     __ bswapl($dst$$Register);
5237     __ shrl($dst$$Register, 16);
5238   %}
5239   ins_pipe( ialu_reg );
5240 %}
5241 
5242 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5243   match(Set dst (ReverseBytesS dst));
5244   effect(KILL cr);
5245 
5246   format %{ "BSWAP  $dst\n\t"
5247             "SAR    $dst,16\n\t" %}
5248   ins_encode %{
5249     __ bswapl($dst$$Register);
5250     __ sarl($dst$$Register, 16);
5251   %}
5252   ins_pipe( ialu_reg );
5253 %}
5254 
5255 
5256 //---------- Zeros Count Instructions ------------------------------------------
5257 
5258 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5259   predicate(UseCountLeadingZerosInstruction);
5260   match(Set dst (CountLeadingZerosI src));
5261   effect(KILL cr);
5262 
5263   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5264   ins_encode %{
5265     __ lzcntl($dst$$Register, $src$$Register);
5266   %}
5267   ins_pipe(ialu_reg);
5268 %}
5269 
5270 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5271   predicate(!UseCountLeadingZerosInstruction);
5272   match(Set dst (CountLeadingZerosI src));
5273   effect(KILL cr);
5274 
5275   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5276             "JNZ    skip\n\t"
5277             "MOV    $dst, -1\n"
5278       "skip:\n\t"
5279             "NEG    $dst\n\t"
5280             "ADD    $dst, 31" %}
5281   ins_encode %{
5282     Register Rdst = $dst$$Register;
5283     Register Rsrc = $src$$Register;
5284     Label skip;
5285     __ bsrl(Rdst, Rsrc);
5286     __ jccb(Assembler::notZero, skip);
5287     __ movl(Rdst, -1);
5288     __ bind(skip);
5289     __ negl(Rdst);
5290     __ addl(Rdst, BitsPerInt - 1);
5291   %}
5292   ins_pipe(ialu_reg);
5293 %}
5294 
5295 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5296   predicate(UseCountLeadingZerosInstruction);
5297   match(Set dst (CountLeadingZerosL src));
5298   effect(TEMP dst, KILL cr);
5299 
5300   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5301             "JNC    done\n\t"
5302             "LZCNT  $dst, $src.lo\n\t"
5303             "ADD    $dst, 32\n"
5304       "done:" %}
5305   ins_encode %{
5306     Register Rdst = $dst$$Register;
5307     Register Rsrc = $src$$Register;
5308     Label done;
5309     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5310     __ jccb(Assembler::carryClear, done);
5311     __ lzcntl(Rdst, Rsrc);
5312     __ addl(Rdst, BitsPerInt);
5313     __ bind(done);
5314   %}
5315   ins_pipe(ialu_reg);
5316 %}
5317 
5318 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5319   predicate(!UseCountLeadingZerosInstruction);
5320   match(Set dst (CountLeadingZerosL src));
5321   effect(TEMP dst, KILL cr);
5322 
5323   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5324             "JZ     msw_is_zero\n\t"
5325             "ADD    $dst, 32\n\t"
5326             "JMP    not_zero\n"
5327       "msw_is_zero:\n\t"
5328             "BSR    $dst, $src.lo\n\t"
5329             "JNZ    not_zero\n\t"
5330             "MOV    $dst, -1\n"
5331       "not_zero:\n\t"
5332             "NEG    $dst\n\t"
5333             "ADD    $dst, 63\n" %}
5334  ins_encode %{
5335     Register Rdst = $dst$$Register;
5336     Register Rsrc = $src$$Register;
5337     Label msw_is_zero;
5338     Label not_zero;
5339     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5340     __ jccb(Assembler::zero, msw_is_zero);
5341     __ addl(Rdst, BitsPerInt);
5342     __ jmpb(not_zero);
5343     __ bind(msw_is_zero);
5344     __ bsrl(Rdst, Rsrc);
5345     __ jccb(Assembler::notZero, not_zero);
5346     __ movl(Rdst, -1);
5347     __ bind(not_zero);
5348     __ negl(Rdst);
5349     __ addl(Rdst, BitsPerLong - 1);
5350   %}
5351   ins_pipe(ialu_reg);
5352 %}
5353 
5354 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5355   predicate(UseCountTrailingZerosInstruction);
5356   match(Set dst (CountTrailingZerosI src));
5357   effect(KILL cr);
5358 
5359   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5360   ins_encode %{
5361     __ tzcntl($dst$$Register, $src$$Register);
5362   %}
5363   ins_pipe(ialu_reg);
5364 %}
5365 
5366 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5367   predicate(!UseCountTrailingZerosInstruction);
5368   match(Set dst (CountTrailingZerosI src));
5369   effect(KILL cr);
5370 
5371   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5372             "JNZ    done\n\t"
5373             "MOV    $dst, 32\n"
5374       "done:" %}
5375   ins_encode %{
5376     Register Rdst = $dst$$Register;
5377     Label done;
5378     __ bsfl(Rdst, $src$$Register);
5379     __ jccb(Assembler::notZero, done);
5380     __ movl(Rdst, BitsPerInt);
5381     __ bind(done);
5382   %}
5383   ins_pipe(ialu_reg);
5384 %}
5385 
5386 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5387   predicate(UseCountTrailingZerosInstruction);
5388   match(Set dst (CountTrailingZerosL src));
5389   effect(TEMP dst, KILL cr);
5390 
5391   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5392             "JNC    done\n\t"
5393             "TZCNT  $dst, $src.hi\n\t"
5394             "ADD    $dst, 32\n"
5395             "done:" %}
5396   ins_encode %{
5397     Register Rdst = $dst$$Register;
5398     Register Rsrc = $src$$Register;
5399     Label done;
5400     __ tzcntl(Rdst, Rsrc);
5401     __ jccb(Assembler::carryClear, done);
5402     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5403     __ addl(Rdst, BitsPerInt);
5404     __ bind(done);
5405   %}
5406   ins_pipe(ialu_reg);
5407 %}
5408 
5409 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5410   predicate(!UseCountTrailingZerosInstruction);
5411   match(Set dst (CountTrailingZerosL src));
5412   effect(TEMP dst, KILL cr);
5413 
5414   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5415             "JNZ    done\n\t"
5416             "BSF    $dst, $src.hi\n\t"
5417             "JNZ    msw_not_zero\n\t"
5418             "MOV    $dst, 32\n"
5419       "msw_not_zero:\n\t"
5420             "ADD    $dst, 32\n"
5421       "done:" %}
5422   ins_encode %{
5423     Register Rdst = $dst$$Register;
5424     Register Rsrc = $src$$Register;
5425     Label msw_not_zero;
5426     Label done;
5427     __ bsfl(Rdst, Rsrc);
5428     __ jccb(Assembler::notZero, done);
5429     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5430     __ jccb(Assembler::notZero, msw_not_zero);
5431     __ movl(Rdst, BitsPerInt);
5432     __ bind(msw_not_zero);
5433     __ addl(Rdst, BitsPerInt);
5434     __ bind(done);
5435   %}
5436   ins_pipe(ialu_reg);
5437 %}
5438 
5439 
5440 //---------- Population Count Instructions -------------------------------------
5441 
5442 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5443   predicate(UsePopCountInstruction);
5444   match(Set dst (PopCountI src));
5445   effect(KILL cr);
5446 
5447   format %{ "POPCNT $dst, $src" %}
5448   ins_encode %{
5449     __ popcntl($dst$$Register, $src$$Register);
5450   %}
5451   ins_pipe(ialu_reg);
5452 %}
5453 
5454 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5455   predicate(UsePopCountInstruction);
5456   match(Set dst (PopCountI (LoadI mem)));
5457   effect(KILL cr);
5458 
5459   format %{ "POPCNT $dst, $mem" %}
5460   ins_encode %{
5461     __ popcntl($dst$$Register, $mem$$Address);
5462   %}
5463   ins_pipe(ialu_reg);
5464 %}
5465 
5466 // Note: Long.bitCount(long) returns an int.
5467 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5468   predicate(UsePopCountInstruction);
5469   match(Set dst (PopCountL src));
5470   effect(KILL cr, TEMP tmp, TEMP dst);
5471 
5472   format %{ "POPCNT $dst, $src.lo\n\t"
5473             "POPCNT $tmp, $src.hi\n\t"
5474             "ADD    $dst, $tmp" %}
5475   ins_encode %{
5476     __ popcntl($dst$$Register, $src$$Register);
5477     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5478     __ addl($dst$$Register, $tmp$$Register);
5479   %}
5480   ins_pipe(ialu_reg);
5481 %}
5482 
5483 // Note: Long.bitCount(long) returns an int.
5484 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5485   predicate(UsePopCountInstruction);
5486   match(Set dst (PopCountL (LoadL mem)));
5487   effect(KILL cr, TEMP tmp, TEMP dst);
5488 
5489   format %{ "POPCNT $dst, $mem\n\t"
5490             "POPCNT $tmp, $mem+4\n\t"
5491             "ADD    $dst, $tmp" %}
5492   ins_encode %{
5493     //__ popcntl($dst$$Register, $mem$$Address$$first);
5494     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5495     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5496     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5497     __ addl($dst$$Register, $tmp$$Register);
5498   %}
5499   ins_pipe(ialu_reg);
5500 %}
5501 
5502 
5503 //----------Load/Store/Move Instructions---------------------------------------
5504 //----------Load Instructions--------------------------------------------------
5505 // Load Byte (8bit signed)
5506 instruct loadB(xRegI dst, memory mem) %{
5507   match(Set dst (LoadB mem));
5508 
5509   ins_cost(125);
5510   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5511 
5512   ins_encode %{
5513     __ movsbl($dst$$Register, $mem$$Address);
5514   %}
5515 
5516   ins_pipe(ialu_reg_mem);
5517 %}
5518 
5519 // Load Byte (8bit signed) into Long Register
5520 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5521   match(Set dst (ConvI2L (LoadB mem)));
5522   effect(KILL cr);
5523 
5524   ins_cost(375);
5525   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5526             "MOV    $dst.hi,$dst.lo\n\t"
5527             "SAR    $dst.hi,7" %}
5528 
5529   ins_encode %{
5530     __ movsbl($dst$$Register, $mem$$Address);
5531     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5532     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5533   %}
5534 
5535   ins_pipe(ialu_reg_mem);
5536 %}
5537 
5538 // Load Unsigned Byte (8bit UNsigned)
5539 instruct loadUB(xRegI dst, memory mem) %{
5540   match(Set dst (LoadUB mem));
5541 
5542   ins_cost(125);
5543   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5544 
5545   ins_encode %{
5546     __ movzbl($dst$$Register, $mem$$Address);
5547   %}
5548 
5549   ins_pipe(ialu_reg_mem);
5550 %}
5551 
5552 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5553 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5554   match(Set dst (ConvI2L (LoadUB mem)));
5555   effect(KILL cr);
5556 
5557   ins_cost(250);
5558   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5559             "XOR    $dst.hi,$dst.hi" %}
5560 
5561   ins_encode %{
5562     Register Rdst = $dst$$Register;
5563     __ movzbl(Rdst, $mem$$Address);
5564     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5565   %}
5566 
5567   ins_pipe(ialu_reg_mem);
5568 %}
5569 
5570 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5571 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5572   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5573   effect(KILL cr);
5574 
5575   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5576             "XOR    $dst.hi,$dst.hi\n\t"
5577             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5578   ins_encode %{
5579     Register Rdst = $dst$$Register;
5580     __ movzbl(Rdst, $mem$$Address);
5581     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5582     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5583   %}
5584   ins_pipe(ialu_reg_mem);
5585 %}
5586 
5587 // Load Short (16bit signed)
5588 instruct loadS(rRegI dst, memory mem) %{
5589   match(Set dst (LoadS mem));
5590 
5591   ins_cost(125);
5592   format %{ "MOVSX  $dst,$mem\t# short" %}
5593 
5594   ins_encode %{
5595     __ movswl($dst$$Register, $mem$$Address);
5596   %}
5597 
5598   ins_pipe(ialu_reg_mem);
5599 %}
5600 
5601 // Load Short (16 bit signed) to Byte (8 bit signed)
5602 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5603   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5604 
5605   ins_cost(125);
5606   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5607   ins_encode %{
5608     __ movsbl($dst$$Register, $mem$$Address);
5609   %}
5610   ins_pipe(ialu_reg_mem);
5611 %}
5612 
5613 // Load Short (16bit signed) into Long Register
5614 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5615   match(Set dst (ConvI2L (LoadS mem)));
5616   effect(KILL cr);
5617 
5618   ins_cost(375);
5619   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5620             "MOV    $dst.hi,$dst.lo\n\t"
5621             "SAR    $dst.hi,15" %}
5622 
5623   ins_encode %{
5624     __ movswl($dst$$Register, $mem$$Address);
5625     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5626     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5627   %}
5628 
5629   ins_pipe(ialu_reg_mem);
5630 %}
5631 
5632 // Load Unsigned Short/Char (16bit unsigned)
5633 instruct loadUS(rRegI dst, memory mem) %{
5634   match(Set dst (LoadUS mem));
5635 
5636   ins_cost(125);
5637   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5638 
5639   ins_encode %{
5640     __ movzwl($dst$$Register, $mem$$Address);
5641   %}
5642 
5643   ins_pipe(ialu_reg_mem);
5644 %}
5645 
5646 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5647 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5648   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5649 
5650   ins_cost(125);
5651   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5652   ins_encode %{
5653     __ movsbl($dst$$Register, $mem$$Address);
5654   %}
5655   ins_pipe(ialu_reg_mem);
5656 %}
5657 
5658 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5659 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5660   match(Set dst (ConvI2L (LoadUS mem)));
5661   effect(KILL cr);
5662 
5663   ins_cost(250);
5664   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5665             "XOR    $dst.hi,$dst.hi" %}
5666 
5667   ins_encode %{
5668     __ movzwl($dst$$Register, $mem$$Address);
5669     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5670   %}
5671 
5672   ins_pipe(ialu_reg_mem);
5673 %}
5674 
5675 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5676 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5677   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5678   effect(KILL cr);
5679 
5680   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5681             "XOR    $dst.hi,$dst.hi" %}
5682   ins_encode %{
5683     Register Rdst = $dst$$Register;
5684     __ movzbl(Rdst, $mem$$Address);
5685     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5686   %}
5687   ins_pipe(ialu_reg_mem);
5688 %}
5689 
5690 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5691 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5692   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5693   effect(KILL cr);
5694 
5695   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5696             "XOR    $dst.hi,$dst.hi\n\t"
5697             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5698   ins_encode %{
5699     Register Rdst = $dst$$Register;
5700     __ movzwl(Rdst, $mem$$Address);
5701     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5702     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5703   %}
5704   ins_pipe(ialu_reg_mem);
5705 %}
5706 
5707 // Load Integer
5708 instruct loadI(rRegI dst, memory mem) %{
5709   match(Set dst (LoadI mem));
5710 
5711   ins_cost(125);
5712   format %{ "MOV    $dst,$mem\t# int" %}
5713 
5714   ins_encode %{
5715     __ movl($dst$$Register, $mem$$Address);
5716   %}
5717 
5718   ins_pipe(ialu_reg_mem);
5719 %}
5720 
5721 // Load Integer (32 bit signed) to Byte (8 bit signed)
5722 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5723   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5724 
5725   ins_cost(125);
5726   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5727   ins_encode %{
5728     __ movsbl($dst$$Register, $mem$$Address);
5729   %}
5730   ins_pipe(ialu_reg_mem);
5731 %}
5732 
5733 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5734 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5735   match(Set dst (AndI (LoadI mem) mask));
5736 
5737   ins_cost(125);
5738   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5739   ins_encode %{
5740     __ movzbl($dst$$Register, $mem$$Address);
5741   %}
5742   ins_pipe(ialu_reg_mem);
5743 %}
5744 
5745 // Load Integer (32 bit signed) to Short (16 bit signed)
5746 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5747   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5748 
5749   ins_cost(125);
5750   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5751   ins_encode %{
5752     __ movswl($dst$$Register, $mem$$Address);
5753   %}
5754   ins_pipe(ialu_reg_mem);
5755 %}
5756 
5757 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5758 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5759   match(Set dst (AndI (LoadI mem) mask));
5760 
5761   ins_cost(125);
5762   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5763   ins_encode %{
5764     __ movzwl($dst$$Register, $mem$$Address);
5765   %}
5766   ins_pipe(ialu_reg_mem);
5767 %}
5768 
5769 // Load Integer into Long Register
5770 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5771   match(Set dst (ConvI2L (LoadI mem)));
5772   effect(KILL cr);
5773 
5774   ins_cost(375);
5775   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5776             "MOV    $dst.hi,$dst.lo\n\t"
5777             "SAR    $dst.hi,31" %}
5778 
5779   ins_encode %{
5780     __ movl($dst$$Register, $mem$$Address);
5781     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5782     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5783   %}
5784 
5785   ins_pipe(ialu_reg_mem);
5786 %}
5787 
5788 // Load Integer with mask 0xFF into Long Register
5789 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5790   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5791   effect(KILL cr);
5792 
5793   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5794             "XOR    $dst.hi,$dst.hi" %}
5795   ins_encode %{
5796     Register Rdst = $dst$$Register;
5797     __ movzbl(Rdst, $mem$$Address);
5798     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5799   %}
5800   ins_pipe(ialu_reg_mem);
5801 %}
5802 
5803 // Load Integer with mask 0xFFFF into Long Register
5804 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5805   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5806   effect(KILL cr);
5807 
5808   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5809             "XOR    $dst.hi,$dst.hi" %}
5810   ins_encode %{
5811     Register Rdst = $dst$$Register;
5812     __ movzwl(Rdst, $mem$$Address);
5813     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5814   %}
5815   ins_pipe(ialu_reg_mem);
5816 %}
5817 
5818 // Load Integer with 31-bit mask into Long Register
5819 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5820   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5821   effect(KILL cr);
5822 
5823   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5824             "XOR    $dst.hi,$dst.hi\n\t"
5825             "AND    $dst.lo,$mask" %}
5826   ins_encode %{
5827     Register Rdst = $dst$$Register;
5828     __ movl(Rdst, $mem$$Address);
5829     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5830     __ andl(Rdst, $mask$$constant);
5831   %}
5832   ins_pipe(ialu_reg_mem);
5833 %}
5834 
5835 // Load Unsigned Integer into Long Register
5836 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5837   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5838   effect(KILL cr);
5839 
5840   ins_cost(250);
5841   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5842             "XOR    $dst.hi,$dst.hi" %}
5843 
5844   ins_encode %{
5845     __ movl($dst$$Register, $mem$$Address);
5846     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5847   %}
5848 
5849   ins_pipe(ialu_reg_mem);
5850 %}
5851 
5852 // Load Long.  Cannot clobber address while loading, so restrict address
5853 // register to ESI
5854 instruct loadL(eRegL dst, load_long_memory mem) %{
5855   predicate(!((LoadLNode*)n)->require_atomic_access());
5856   match(Set dst (LoadL mem));
5857 
5858   ins_cost(250);
5859   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5860             "MOV    $dst.hi,$mem+4" %}
5861 
5862   ins_encode %{
5863     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5864     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5865     __ movl($dst$$Register, Amemlo);
5866     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5867   %}
5868 
5869   ins_pipe(ialu_reg_long_mem);
5870 %}
5871 
5872 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5873 // then store it down to the stack and reload on the int
5874 // side.
5875 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5876   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5877   match(Set dst (LoadL mem));
5878 
5879   ins_cost(200);
5880   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5881             "FISTp  $dst" %}
5882   ins_encode(enc_loadL_volatile(mem,dst));
5883   ins_pipe( fpu_reg_mem );
5884 %}
5885 
5886 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5887   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5888   match(Set dst (LoadL mem));
5889   effect(TEMP tmp);
5890   ins_cost(180);
5891   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5892             "MOVSD  $dst,$tmp" %}
5893   ins_encode %{
5894     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5895     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5896   %}
5897   ins_pipe( pipe_slow );
5898 %}
5899 
5900 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5901   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5902   match(Set dst (LoadL mem));
5903   effect(TEMP tmp);
5904   ins_cost(160);
5905   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5906             "MOVD   $dst.lo,$tmp\n\t"
5907             "PSRLQ  $tmp,32\n\t"
5908             "MOVD   $dst.hi,$tmp" %}
5909   ins_encode %{
5910     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5911     __ movdl($dst$$Register, $tmp$$XMMRegister);
5912     __ psrlq($tmp$$XMMRegister, 32);
5913     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5914   %}
5915   ins_pipe( pipe_slow );
5916 %}
5917 
5918 // Load Range
5919 instruct loadRange(rRegI dst, memory mem) %{
5920   match(Set dst (LoadRange mem));
5921 
5922   ins_cost(125);
5923   format %{ "MOV    $dst,$mem" %}
5924   opcode(0x8B);
5925   ins_encode( OpcP, RegMem(dst,mem));
5926   ins_pipe( ialu_reg_mem );
5927 %}
5928 
5929 
5930 // Load Pointer
5931 instruct loadP(eRegP dst, memory mem) %{
5932   match(Set dst (LoadP mem));
5933 
5934   ins_cost(125);
5935   format %{ "MOV    $dst,$mem" %}
5936   opcode(0x8B);
5937   ins_encode( OpcP, RegMem(dst,mem));
5938   ins_pipe( ialu_reg_mem );
5939 %}
5940 
5941 // Load Klass Pointer
5942 instruct loadKlass(eRegP dst, memory mem) %{
5943   match(Set dst (LoadKlass mem));
5944 
5945   ins_cost(125);
5946   format %{ "MOV    $dst,$mem" %}
5947   opcode(0x8B);
5948   ins_encode( OpcP, RegMem(dst,mem));
5949   ins_pipe( ialu_reg_mem );
5950 %}
5951 
5952 // Load Double
5953 instruct loadDPR(regDPR dst, memory mem) %{
5954   predicate(UseSSE<=1);
5955   match(Set dst (LoadD mem));
5956 
5957   ins_cost(150);
5958   format %{ "FLD_D  ST,$mem\n\t"
5959             "FSTP   $dst" %}
5960   opcode(0xDD);               /* DD /0 */
5961   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5962               Pop_Reg_DPR(dst) );
5963   ins_pipe( fpu_reg_mem );
5964 %}
5965 
5966 // Load Double to XMM
5967 instruct loadD(regD dst, memory mem) %{
5968   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5969   match(Set dst (LoadD mem));
5970   ins_cost(145);
5971   format %{ "MOVSD  $dst,$mem" %}
5972   ins_encode %{
5973     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5974   %}
5975   ins_pipe( pipe_slow );
5976 %}
5977 
5978 instruct loadD_partial(regD dst, memory mem) %{
5979   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5980   match(Set dst (LoadD mem));
5981   ins_cost(145);
5982   format %{ "MOVLPD $dst,$mem" %}
5983   ins_encode %{
5984     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5985   %}
5986   ins_pipe( pipe_slow );
5987 %}
5988 
5989 // Load to XMM register (single-precision floating point)
5990 // MOVSS instruction
5991 instruct loadF(regF dst, memory mem) %{
5992   predicate(UseSSE>=1);
5993   match(Set dst (LoadF mem));
5994   ins_cost(145);
5995   format %{ "MOVSS  $dst,$mem" %}
5996   ins_encode %{
5997     __ movflt ($dst$$XMMRegister, $mem$$Address);
5998   %}
5999   ins_pipe( pipe_slow );
6000 %}
6001 
6002 // Load Float
6003 instruct MoveF2LEG(legRegF dst, regF src) %{
6004   match(Set dst src);
6005   format %{ "movss $dst,$src\t! if src != dst load float (4 bytes)" %}
6006   ins_encode %{
6007     if ($dst$$reg != $src$$reg) {
6008     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6009     }
6010   %}
6011   ins_pipe( fpu_reg_reg );
6012 %}
6013 
6014 // Load Float
6015 instruct MoveLEG2F(regF dst, legRegF src) %{
6016   match(Set dst src);
6017   format %{ "movss $dst,$src\t! if src != dst load float (4 bytes)" %}
6018   ins_encode %{
6019     if ($dst$$reg != $src$$reg) {
6020       __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6021     }
6022   %}
6023   ins_pipe( fpu_reg_reg );
6024 %}
6025 
6026 
6027 
6028 // Load Float
6029 instruct loadFPR(regFPR dst, memory mem) %{
6030   predicate(UseSSE==0);
6031   match(Set dst (LoadF mem));
6032 
6033   ins_cost(150);
6034   format %{ "FLD_S  ST,$mem\n\t"
6035             "FSTP   $dst" %}
6036   opcode(0xD9);               /* D9 /0 */
6037   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6038               Pop_Reg_FPR(dst) );
6039   ins_pipe( fpu_reg_mem );
6040 %}
6041 
6042 // Load Effective Address
6043 instruct leaP8(eRegP dst, indOffset8 mem) %{
6044   match(Set dst mem);
6045 
6046   ins_cost(110);
6047   format %{ "LEA    $dst,$mem" %}
6048   opcode(0x8D);
6049   ins_encode( OpcP, RegMem(dst,mem));
6050   ins_pipe( ialu_reg_reg_fat );
6051 %}
6052 
6053 instruct leaP32(eRegP dst, indOffset32 mem) %{
6054   match(Set dst mem);
6055 
6056   ins_cost(110);
6057   format %{ "LEA    $dst,$mem" %}
6058   opcode(0x8D);
6059   ins_encode( OpcP, RegMem(dst,mem));
6060   ins_pipe( ialu_reg_reg_fat );
6061 %}
6062 
6063 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6064   match(Set dst mem);
6065 
6066   ins_cost(110);
6067   format %{ "LEA    $dst,$mem" %}
6068   opcode(0x8D);
6069   ins_encode( OpcP, RegMem(dst,mem));
6070   ins_pipe( ialu_reg_reg_fat );
6071 %}
6072 
6073 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6074   match(Set dst mem);
6075 
6076   ins_cost(110);
6077   format %{ "LEA    $dst,$mem" %}
6078   opcode(0x8D);
6079   ins_encode( OpcP, RegMem(dst,mem));
6080   ins_pipe( ialu_reg_reg_fat );
6081 %}
6082 
6083 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6084   match(Set dst mem);
6085 
6086   ins_cost(110);
6087   format %{ "LEA    $dst,$mem" %}
6088   opcode(0x8D);
6089   ins_encode( OpcP, RegMem(dst,mem));
6090   ins_pipe( ialu_reg_reg_fat );
6091 %}
6092 
6093 // Load Constant
6094 instruct loadConI(rRegI dst, immI src) %{
6095   match(Set dst src);
6096 
6097   format %{ "MOV    $dst,$src" %}
6098   ins_encode( LdImmI(dst, src) );
6099   ins_pipe( ialu_reg_fat );
6100 %}
6101 
6102 // Load Constant zero
6103 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6104   match(Set dst src);
6105   effect(KILL cr);
6106 
6107   ins_cost(50);
6108   format %{ "XOR    $dst,$dst" %}
6109   opcode(0x33);  /* + rd */
6110   ins_encode( OpcP, RegReg( dst, dst ) );
6111   ins_pipe( ialu_reg );
6112 %}
6113 
6114 instruct loadConP(eRegP dst, immP src) %{
6115   match(Set dst src);
6116 
6117   format %{ "MOV    $dst,$src" %}
6118   opcode(0xB8);  /* + rd */
6119   ins_encode( LdImmP(dst, src) );
6120   ins_pipe( ialu_reg_fat );
6121 %}
6122 
6123 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6124   match(Set dst src);
6125   effect(KILL cr);
6126   ins_cost(200);
6127   format %{ "MOV    $dst.lo,$src.lo\n\t"
6128             "MOV    $dst.hi,$src.hi" %}
6129   opcode(0xB8);
6130   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6131   ins_pipe( ialu_reg_long_fat );
6132 %}
6133 
6134 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6135   match(Set dst src);
6136   effect(KILL cr);
6137   ins_cost(150);
6138   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6139             "XOR    $dst.hi,$dst.hi" %}
6140   opcode(0x33,0x33);
6141   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6142   ins_pipe( ialu_reg_long );
6143 %}
6144 
6145 // The instruction usage is guarded by predicate in operand immFPR().
6146 instruct loadConFPR(regFPR dst, immFPR con) %{
6147   match(Set dst con);
6148   ins_cost(125);
6149   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6150             "FSTP   $dst" %}
6151   ins_encode %{
6152     __ fld_s($constantaddress($con));
6153     __ fstp_d($dst$$reg);
6154   %}
6155   ins_pipe(fpu_reg_con);
6156 %}
6157 
6158 // The instruction usage is guarded by predicate in operand immFPR0().
6159 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6160   match(Set dst con);
6161   ins_cost(125);
6162   format %{ "FLDZ   ST\n\t"
6163             "FSTP   $dst" %}
6164   ins_encode %{
6165     __ fldz();
6166     __ fstp_d($dst$$reg);
6167   %}
6168   ins_pipe(fpu_reg_con);
6169 %}
6170 
6171 // The instruction usage is guarded by predicate in operand immFPR1().
6172 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6173   match(Set dst con);
6174   ins_cost(125);
6175   format %{ "FLD1   ST\n\t"
6176             "FSTP   $dst" %}
6177   ins_encode %{
6178     __ fld1();
6179     __ fstp_d($dst$$reg);
6180   %}
6181   ins_pipe(fpu_reg_con);
6182 %}
6183 
6184 // The instruction usage is guarded by predicate in operand immF().
6185 instruct loadConF(regF dst, immF con) %{
6186   match(Set dst con);
6187   ins_cost(125);
6188   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6189   ins_encode %{
6190     __ movflt($dst$$XMMRegister, $constantaddress($con));
6191   %}
6192   ins_pipe(pipe_slow);
6193 %}
6194 
6195 // The instruction usage is guarded by predicate in operand immF0().
6196 instruct loadConF0(regF dst, immF0 src) %{
6197   match(Set dst src);
6198   ins_cost(100);
6199   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6200   ins_encode %{
6201     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6202   %}
6203   ins_pipe(pipe_slow);
6204 %}
6205 
6206 // The instruction usage is guarded by predicate in operand immDPR().
6207 instruct loadConDPR(regDPR dst, immDPR con) %{
6208   match(Set dst con);
6209   ins_cost(125);
6210 
6211   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6212             "FSTP   $dst" %}
6213   ins_encode %{
6214     __ fld_d($constantaddress($con));
6215     __ fstp_d($dst$$reg);
6216   %}
6217   ins_pipe(fpu_reg_con);
6218 %}
6219 
6220 // The instruction usage is guarded by predicate in operand immDPR0().
6221 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6222   match(Set dst con);
6223   ins_cost(125);
6224 
6225   format %{ "FLDZ   ST\n\t"
6226             "FSTP   $dst" %}
6227   ins_encode %{
6228     __ fldz();
6229     __ fstp_d($dst$$reg);
6230   %}
6231   ins_pipe(fpu_reg_con);
6232 %}
6233 
6234 // The instruction usage is guarded by predicate in operand immDPR1().
6235 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6236   match(Set dst con);
6237   ins_cost(125);
6238 
6239   format %{ "FLD1   ST\n\t"
6240             "FSTP   $dst" %}
6241   ins_encode %{
6242     __ fld1();
6243     __ fstp_d($dst$$reg);
6244   %}
6245   ins_pipe(fpu_reg_con);
6246 %}
6247 
6248 // The instruction usage is guarded by predicate in operand immD().
6249 instruct loadConD(regD dst, immD con) %{
6250   match(Set dst con);
6251   ins_cost(125);
6252   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6253   ins_encode %{
6254     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6255   %}
6256   ins_pipe(pipe_slow);
6257 %}
6258 
6259 // Load Double
6260 instruct MoveD2LEG(legRegD dst, regD src) %{
6261   match(Set dst src);
6262   format %{ "movsd $dst,$src\t! if src != dst load double (8 bytes)" %}
6263   ins_encode %{
6264     if ($dst$$reg != $src$$reg) {
6265       __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6266     }
6267   %}
6268   ins_pipe( fpu_reg_reg );
6269 %}
6270 
6271 // Load Double
6272 instruct MoveLEG2D(regD dst, legRegD src) %{
6273   match(Set dst src);
6274   format %{ "movsd $dst,$src\t! if src != dst load double (8 bytes)" %}
6275   ins_encode %{
6276     if ($dst$$reg != $src$$reg) {
6277     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6278     }
6279   %}
6280   ins_pipe( fpu_reg_reg );
6281 %}
6282 
6283 
6284 // The instruction usage is guarded by predicate in operand immD0().
6285 instruct loadConD0(regD dst, immD0 src) %{
6286   match(Set dst src);
6287   ins_cost(100);
6288   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6289   ins_encode %{
6290     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6291   %}
6292   ins_pipe( pipe_slow );
6293 %}
6294 
6295 // Load Stack Slot
6296 instruct loadSSI(rRegI dst, stackSlotI src) %{
6297   match(Set dst src);
6298   ins_cost(125);
6299 
6300   format %{ "MOV    $dst,$src" %}
6301   opcode(0x8B);
6302   ins_encode( OpcP, RegMem(dst,src));
6303   ins_pipe( ialu_reg_mem );
6304 %}
6305 
6306 instruct loadSSL(eRegL dst, stackSlotL src) %{
6307   match(Set dst src);
6308 
6309   ins_cost(200);
6310   format %{ "MOV    $dst,$src.lo\n\t"
6311             "MOV    $dst+4,$src.hi" %}
6312   opcode(0x8B, 0x8B);
6313   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6314   ins_pipe( ialu_mem_long_reg );
6315 %}
6316 
6317 // Load Stack Slot
6318 instruct loadSSP(eRegP dst, stackSlotP src) %{
6319   match(Set dst src);
6320   ins_cost(125);
6321 
6322   format %{ "MOV    $dst,$src" %}
6323   opcode(0x8B);
6324   ins_encode( OpcP, RegMem(dst,src));
6325   ins_pipe( ialu_reg_mem );
6326 %}
6327 
6328 // Load Stack Slot
6329 instruct loadSSF(regFPR dst, stackSlotF src) %{
6330   match(Set dst src);
6331   ins_cost(125);
6332 
6333   format %{ "FLD_S  $src\n\t"
6334             "FSTP   $dst" %}
6335   opcode(0xD9);               /* D9 /0, FLD m32real */
6336   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6337               Pop_Reg_FPR(dst) );
6338   ins_pipe( fpu_reg_mem );
6339 %}
6340 
6341 // Load Stack Slot
6342 instruct loadSSD(regDPR dst, stackSlotD src) %{
6343   match(Set dst src);
6344   ins_cost(125);
6345 
6346   format %{ "FLD_D  $src\n\t"
6347             "FSTP   $dst" %}
6348   opcode(0xDD);               /* DD /0, FLD m64real */
6349   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6350               Pop_Reg_DPR(dst) );
6351   ins_pipe( fpu_reg_mem );
6352 %}
6353 
6354 // Prefetch instructions for allocation.
6355 // Must be safe to execute with invalid address (cannot fault).
6356 
6357 instruct prefetchAlloc0( memory mem ) %{
6358   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6359   match(PrefetchAllocation mem);
6360   ins_cost(0);
6361   size(0);
6362   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6363   ins_encode();
6364   ins_pipe(empty);
6365 %}
6366 
6367 instruct prefetchAlloc( memory mem ) %{
6368   predicate(AllocatePrefetchInstr==3);
6369   match( PrefetchAllocation mem );
6370   ins_cost(100);
6371 
6372   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6373   ins_encode %{
6374     __ prefetchw($mem$$Address);
6375   %}
6376   ins_pipe(ialu_mem);
6377 %}
6378 
6379 instruct prefetchAllocNTA( memory mem ) %{
6380   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6381   match(PrefetchAllocation mem);
6382   ins_cost(100);
6383 
6384   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6385   ins_encode %{
6386     __ prefetchnta($mem$$Address);
6387   %}
6388   ins_pipe(ialu_mem);
6389 %}
6390 
6391 instruct prefetchAllocT0( memory mem ) %{
6392   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6393   match(PrefetchAllocation mem);
6394   ins_cost(100);
6395 
6396   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6397   ins_encode %{
6398     __ prefetcht0($mem$$Address);
6399   %}
6400   ins_pipe(ialu_mem);
6401 %}
6402 
6403 instruct prefetchAllocT2( memory mem ) %{
6404   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6405   match(PrefetchAllocation mem);
6406   ins_cost(100);
6407 
6408   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6409   ins_encode %{
6410     __ prefetcht2($mem$$Address);
6411   %}
6412   ins_pipe(ialu_mem);
6413 %}
6414 
6415 //----------Store Instructions-------------------------------------------------
6416 
6417 // Store Byte
6418 instruct storeB(memory mem, xRegI src) %{
6419   match(Set mem (StoreB mem src));
6420 
6421   ins_cost(125);
6422   format %{ "MOV8   $mem,$src" %}
6423   opcode(0x88);
6424   ins_encode( OpcP, RegMem( src, mem ) );
6425   ins_pipe( ialu_mem_reg );
6426 %}
6427 
6428 // Store Char/Short
6429 instruct storeC(memory mem, rRegI src) %{
6430   match(Set mem (StoreC mem src));
6431 
6432   ins_cost(125);
6433   format %{ "MOV16  $mem,$src" %}
6434   opcode(0x89, 0x66);
6435   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6436   ins_pipe( ialu_mem_reg );
6437 %}
6438 
6439 // Store Integer
6440 instruct storeI(memory mem, rRegI src) %{
6441   match(Set mem (StoreI mem src));
6442 
6443   ins_cost(125);
6444   format %{ "MOV    $mem,$src" %}
6445   opcode(0x89);
6446   ins_encode( OpcP, RegMem( src, mem ) );
6447   ins_pipe( ialu_mem_reg );
6448 %}
6449 
6450 // Store Long
6451 instruct storeL(long_memory mem, eRegL src) %{
6452   predicate(!((StoreLNode*)n)->require_atomic_access());
6453   match(Set mem (StoreL mem src));
6454 
6455   ins_cost(200);
6456   format %{ "MOV    $mem,$src.lo\n\t"
6457             "MOV    $mem+4,$src.hi" %}
6458   opcode(0x89, 0x89);
6459   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6460   ins_pipe( ialu_mem_long_reg );
6461 %}
6462 
6463 // Store Long to Integer
6464 instruct storeL2I(memory mem, eRegL src) %{
6465   match(Set mem (StoreI mem (ConvL2I src)));
6466 
6467   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6468   ins_encode %{
6469     __ movl($mem$$Address, $src$$Register);
6470   %}
6471   ins_pipe(ialu_mem_reg);
6472 %}
6473 
6474 // Volatile Store Long.  Must be atomic, so move it into
6475 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6476 // target address before the store (for null-ptr checks)
6477 // so the memory operand is used twice in the encoding.
6478 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6479   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6480   match(Set mem (StoreL mem src));
6481   effect( KILL cr );
6482   ins_cost(400);
6483   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6484             "FILD   $src\n\t"
6485             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6486   opcode(0x3B);
6487   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6488   ins_pipe( fpu_reg_mem );
6489 %}
6490 
6491 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6492   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6493   match(Set mem (StoreL mem src));
6494   effect( TEMP tmp, KILL cr );
6495   ins_cost(380);
6496   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6497             "MOVSD  $tmp,$src\n\t"
6498             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6499   ins_encode %{
6500     __ cmpl(rax, $mem$$Address);
6501     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6502     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6503   %}
6504   ins_pipe( pipe_slow );
6505 %}
6506 
6507 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6508   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6509   match(Set mem (StoreL mem src));
6510   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6511   ins_cost(360);
6512   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6513             "MOVD   $tmp,$src.lo\n\t"
6514             "MOVD   $tmp2,$src.hi\n\t"
6515             "PUNPCKLDQ $tmp,$tmp2\n\t"
6516             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6517   ins_encode %{
6518     __ cmpl(rax, $mem$$Address);
6519     __ movdl($tmp$$XMMRegister, $src$$Register);
6520     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6521     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6522     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6523   %}
6524   ins_pipe( pipe_slow );
6525 %}
6526 
6527 // Store Pointer; for storing unknown oops and raw pointers
6528 instruct storeP(memory mem, anyRegP src) %{
6529   match(Set mem (StoreP mem src));
6530 
6531   ins_cost(125);
6532   format %{ "MOV    $mem,$src" %}
6533   opcode(0x89);
6534   ins_encode( OpcP, RegMem( src, mem ) );
6535   ins_pipe( ialu_mem_reg );
6536 %}
6537 
6538 // Store Integer Immediate
6539 instruct storeImmI(memory mem, immI src) %{
6540   match(Set mem (StoreI mem src));
6541 
6542   ins_cost(150);
6543   format %{ "MOV    $mem,$src" %}
6544   opcode(0xC7);               /* C7 /0 */
6545   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6546   ins_pipe( ialu_mem_imm );
6547 %}
6548 
6549 // Store Short/Char Immediate
6550 instruct storeImmI16(memory mem, immI16 src) %{
6551   predicate(UseStoreImmI16);
6552   match(Set mem (StoreC mem src));
6553 
6554   ins_cost(150);
6555   format %{ "MOV16  $mem,$src" %}
6556   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6557   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6558   ins_pipe( ialu_mem_imm );
6559 %}
6560 
6561 // Store Pointer Immediate; null pointers or constant oops that do not
6562 // need card-mark barriers.
6563 instruct storeImmP(memory mem, immP src) %{
6564   match(Set mem (StoreP mem src));
6565 
6566   ins_cost(150);
6567   format %{ "MOV    $mem,$src" %}
6568   opcode(0xC7);               /* C7 /0 */
6569   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6570   ins_pipe( ialu_mem_imm );
6571 %}
6572 
6573 // Store Byte Immediate
6574 instruct storeImmB(memory mem, immI8 src) %{
6575   match(Set mem (StoreB mem src));
6576 
6577   ins_cost(150);
6578   format %{ "MOV8   $mem,$src" %}
6579   opcode(0xC6);               /* C6 /0 */
6580   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6581   ins_pipe( ialu_mem_imm );
6582 %}
6583 
6584 // Store CMS card-mark Immediate
6585 instruct storeImmCM(memory mem, immI8 src) %{
6586   match(Set mem (StoreCM mem src));
6587 
6588   ins_cost(150);
6589   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6590   opcode(0xC6);               /* C6 /0 */
6591   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6592   ins_pipe( ialu_mem_imm );
6593 %}
6594 
6595 // Store Double
6596 instruct storeDPR( memory mem, regDPR1 src) %{
6597   predicate(UseSSE<=1);
6598   match(Set mem (StoreD mem src));
6599 
6600   ins_cost(100);
6601   format %{ "FST_D  $mem,$src" %}
6602   opcode(0xDD);       /* DD /2 */
6603   ins_encode( enc_FPR_store(mem,src) );
6604   ins_pipe( fpu_mem_reg );
6605 %}
6606 
6607 // Store double does rounding on x86
6608 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6609   predicate(UseSSE<=1);
6610   match(Set mem (StoreD mem (RoundDouble src)));
6611 
6612   ins_cost(100);
6613   format %{ "FST_D  $mem,$src\t# round" %}
6614   opcode(0xDD);       /* DD /2 */
6615   ins_encode( enc_FPR_store(mem,src) );
6616   ins_pipe( fpu_mem_reg );
6617 %}
6618 
6619 // Store XMM register to memory (double-precision floating points)
6620 // MOVSD instruction
6621 instruct storeD(memory mem, regD src) %{
6622   predicate(UseSSE>=2);
6623   match(Set mem (StoreD mem src));
6624   ins_cost(95);
6625   format %{ "MOVSD  $mem,$src" %}
6626   ins_encode %{
6627     __ movdbl($mem$$Address, $src$$XMMRegister);
6628   %}
6629   ins_pipe( pipe_slow );
6630 %}
6631 
6632 // Load Double
6633 instruct MoveD2VL(vlRegD dst, regD src) %{
6634   match(Set dst src);
6635   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6636   ins_encode %{
6637     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6638   %}
6639   ins_pipe( fpu_reg_reg );
6640 %}
6641 
6642 // Load Double
6643 instruct MoveVL2D(regD dst, vlRegD src) %{
6644   match(Set dst src);
6645   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6646   ins_encode %{
6647     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6648   %}
6649   ins_pipe( fpu_reg_reg );
6650 %}
6651 
6652 // Store XMM register to memory (single-precision floating point)
6653 // MOVSS instruction
6654 instruct storeF(memory mem, regF src) %{
6655   predicate(UseSSE>=1);
6656   match(Set mem (StoreF mem src));
6657   ins_cost(95);
6658   format %{ "MOVSS  $mem,$src" %}
6659   ins_encode %{
6660     __ movflt($mem$$Address, $src$$XMMRegister);
6661   %}
6662   ins_pipe( pipe_slow );
6663 %}
6664 
6665 // Load Float
6666 instruct MoveF2VL(vlRegF dst, regF src) %{
6667   match(Set dst src);
6668   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6669   ins_encode %{
6670     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6671   %}
6672   ins_pipe( fpu_reg_reg );
6673 %}
6674 
6675 // Load Float
6676 instruct MoveVL2F(regF dst, vlRegF src) %{
6677   match(Set dst src);
6678   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6679   ins_encode %{
6680     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6681   %}
6682   ins_pipe( fpu_reg_reg );
6683 %}
6684 
6685 // Store Float
6686 instruct storeFPR( memory mem, regFPR1 src) %{
6687   predicate(UseSSE==0);
6688   match(Set mem (StoreF mem src));
6689 
6690   ins_cost(100);
6691   format %{ "FST_S  $mem,$src" %}
6692   opcode(0xD9);       /* D9 /2 */
6693   ins_encode( enc_FPR_store(mem,src) );
6694   ins_pipe( fpu_mem_reg );
6695 %}
6696 
6697 // Store Float does rounding on x86
6698 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6699   predicate(UseSSE==0);
6700   match(Set mem (StoreF mem (RoundFloat src)));
6701 
6702   ins_cost(100);
6703   format %{ "FST_S  $mem,$src\t# round" %}
6704   opcode(0xD9);       /* D9 /2 */
6705   ins_encode( enc_FPR_store(mem,src) );
6706   ins_pipe( fpu_mem_reg );
6707 %}
6708 
6709 // Store Float does rounding on x86
6710 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6711   predicate(UseSSE<=1);
6712   match(Set mem (StoreF mem (ConvD2F src)));
6713 
6714   ins_cost(100);
6715   format %{ "FST_S  $mem,$src\t# D-round" %}
6716   opcode(0xD9);       /* D9 /2 */
6717   ins_encode( enc_FPR_store(mem,src) );
6718   ins_pipe( fpu_mem_reg );
6719 %}
6720 
6721 // Store immediate Float value (it is faster than store from FPU register)
6722 // The instruction usage is guarded by predicate in operand immFPR().
6723 instruct storeFPR_imm( memory mem, immFPR src) %{
6724   match(Set mem (StoreF mem src));
6725 
6726   ins_cost(50);
6727   format %{ "MOV    $mem,$src\t# store float" %}
6728   opcode(0xC7);               /* C7 /0 */
6729   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6730   ins_pipe( ialu_mem_imm );
6731 %}
6732 
6733 // Store immediate Float value (it is faster than store from XMM register)
6734 // The instruction usage is guarded by predicate in operand immF().
6735 instruct storeF_imm( memory mem, immF src) %{
6736   match(Set mem (StoreF mem src));
6737 
6738   ins_cost(50);
6739   format %{ "MOV    $mem,$src\t# store float" %}
6740   opcode(0xC7);               /* C7 /0 */
6741   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6742   ins_pipe( ialu_mem_imm );
6743 %}
6744 
6745 // Store Integer to stack slot
6746 instruct storeSSI(stackSlotI dst, rRegI src) %{
6747   match(Set dst src);
6748 
6749   ins_cost(100);
6750   format %{ "MOV    $dst,$src" %}
6751   opcode(0x89);
6752   ins_encode( OpcPRegSS( dst, src ) );
6753   ins_pipe( ialu_mem_reg );
6754 %}
6755 
6756 // Store Integer to stack slot
6757 instruct storeSSP(stackSlotP dst, eRegP src) %{
6758   match(Set dst src);
6759 
6760   ins_cost(100);
6761   format %{ "MOV    $dst,$src" %}
6762   opcode(0x89);
6763   ins_encode( OpcPRegSS( dst, src ) );
6764   ins_pipe( ialu_mem_reg );
6765 %}
6766 
6767 // Store Long to stack slot
6768 instruct storeSSL(stackSlotL dst, eRegL src) %{
6769   match(Set dst src);
6770 
6771   ins_cost(200);
6772   format %{ "MOV    $dst,$src.lo\n\t"
6773             "MOV    $dst+4,$src.hi" %}
6774   opcode(0x89, 0x89);
6775   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6776   ins_pipe( ialu_mem_long_reg );
6777 %}
6778 
6779 //----------MemBar Instructions-----------------------------------------------
6780 // Memory barrier flavors
6781 
6782 instruct membar_acquire() %{
6783   match(MemBarAcquire);
6784   match(LoadFence);
6785   ins_cost(400);
6786 
6787   size(0);
6788   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6789   ins_encode();
6790   ins_pipe(empty);
6791 %}
6792 
6793 instruct membar_acquire_lock() %{
6794   match(MemBarAcquireLock);
6795   ins_cost(0);
6796 
6797   size(0);
6798   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6799   ins_encode( );
6800   ins_pipe(empty);
6801 %}
6802 
6803 instruct membar_release() %{
6804   match(MemBarRelease);
6805   match(StoreFence);
6806   ins_cost(400);
6807 
6808   size(0);
6809   format %{ "MEMBAR-release ! (empty encoding)" %}
6810   ins_encode( );
6811   ins_pipe(empty);
6812 %}
6813 
6814 instruct membar_release_lock() %{
6815   match(MemBarReleaseLock);
6816   ins_cost(0);
6817 
6818   size(0);
6819   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6820   ins_encode( );
6821   ins_pipe(empty);
6822 %}
6823 
6824 instruct membar_volatile(eFlagsReg cr) %{
6825   match(MemBarVolatile);
6826   effect(KILL cr);
6827   ins_cost(400);
6828 
6829   format %{
6830     $$template
6831     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6832   %}
6833   ins_encode %{
6834     __ membar(Assembler::StoreLoad);
6835   %}
6836   ins_pipe(pipe_slow);
6837 %}
6838 
6839 instruct unnecessary_membar_volatile() %{
6840   match(MemBarVolatile);
6841   predicate(Matcher::post_store_load_barrier(n));
6842   ins_cost(0);
6843 
6844   size(0);
6845   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6846   ins_encode( );
6847   ins_pipe(empty);
6848 %}
6849 
6850 instruct membar_storestore() %{
6851   match(MemBarStoreStore);
6852   ins_cost(0);
6853 
6854   size(0);
6855   format %{ "MEMBAR-storestore (empty encoding)" %}
6856   ins_encode( );
6857   ins_pipe(empty);
6858 %}
6859 
6860 //----------Move Instructions--------------------------------------------------
6861 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6862   match(Set dst (CastX2P src));
6863   format %{ "# X2P  $dst, $src" %}
6864   ins_encode( /*empty encoding*/ );
6865   ins_cost(0);
6866   ins_pipe(empty);
6867 %}
6868 
6869 instruct castP2X(rRegI dst, eRegP src ) %{
6870   match(Set dst (CastP2X src));
6871   ins_cost(50);
6872   format %{ "MOV    $dst, $src\t# CastP2X" %}
6873   ins_encode( enc_Copy( dst, src) );
6874   ins_pipe( ialu_reg_reg );
6875 %}
6876 
6877 //----------Conditional Move---------------------------------------------------
6878 // Conditional move
6879 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6880   predicate(!VM_Version::supports_cmov() );
6881   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6882   ins_cost(200);
6883   format %{ "J$cop,us skip\t# signed cmove\n\t"
6884             "MOV    $dst,$src\n"
6885       "skip:" %}
6886   ins_encode %{
6887     Label Lskip;
6888     // Invert sense of branch from sense of CMOV
6889     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6890     __ movl($dst$$Register, $src$$Register);
6891     __ bind(Lskip);
6892   %}
6893   ins_pipe( pipe_cmov_reg );
6894 %}
6895 
6896 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6897   predicate(!VM_Version::supports_cmov() );
6898   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6899   ins_cost(200);
6900   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6901             "MOV    $dst,$src\n"
6902       "skip:" %}
6903   ins_encode %{
6904     Label Lskip;
6905     // Invert sense of branch from sense of CMOV
6906     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6907     __ movl($dst$$Register, $src$$Register);
6908     __ bind(Lskip);
6909   %}
6910   ins_pipe( pipe_cmov_reg );
6911 %}
6912 
6913 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6914   predicate(VM_Version::supports_cmov() );
6915   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6916   ins_cost(200);
6917   format %{ "CMOV$cop $dst,$src" %}
6918   opcode(0x0F,0x40);
6919   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6920   ins_pipe( pipe_cmov_reg );
6921 %}
6922 
6923 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6924   predicate(VM_Version::supports_cmov() );
6925   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6926   ins_cost(200);
6927   format %{ "CMOV$cop $dst,$src" %}
6928   opcode(0x0F,0x40);
6929   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6930   ins_pipe( pipe_cmov_reg );
6931 %}
6932 
6933 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6934   predicate(VM_Version::supports_cmov() );
6935   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6936   ins_cost(200);
6937   expand %{
6938     cmovI_regU(cop, cr, dst, src);
6939   %}
6940 %}
6941 
6942 // Conditional move
6943 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6944   predicate(VM_Version::supports_cmov() );
6945   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6946   ins_cost(250);
6947   format %{ "CMOV$cop $dst,$src" %}
6948   opcode(0x0F,0x40);
6949   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6950   ins_pipe( pipe_cmov_mem );
6951 %}
6952 
6953 // Conditional move
6954 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6955   predicate(VM_Version::supports_cmov() );
6956   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6957   ins_cost(250);
6958   format %{ "CMOV$cop $dst,$src" %}
6959   opcode(0x0F,0x40);
6960   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6961   ins_pipe( pipe_cmov_mem );
6962 %}
6963 
6964 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6965   predicate(VM_Version::supports_cmov() );
6966   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6967   ins_cost(250);
6968   expand %{
6969     cmovI_memU(cop, cr, dst, src);
6970   %}
6971 %}
6972 
6973 // Conditional move
6974 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6975   predicate(VM_Version::supports_cmov() );
6976   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6977   ins_cost(200);
6978   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6979   opcode(0x0F,0x40);
6980   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6981   ins_pipe( pipe_cmov_reg );
6982 %}
6983 
6984 // Conditional move (non-P6 version)
6985 // Note:  a CMoveP is generated for  stubs and native wrappers
6986 //        regardless of whether we are on a P6, so we
6987 //        emulate a cmov here
6988 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6989   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6990   ins_cost(300);
6991   format %{ "Jn$cop   skip\n\t"
6992           "MOV    $dst,$src\t# pointer\n"
6993       "skip:" %}
6994   opcode(0x8b);
6995   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6996   ins_pipe( pipe_cmov_reg );
6997 %}
6998 
6999 // Conditional move
7000 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7001   predicate(VM_Version::supports_cmov() );
7002   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7003   ins_cost(200);
7004   format %{ "CMOV$cop $dst,$src\t# ptr" %}
7005   opcode(0x0F,0x40);
7006   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7007   ins_pipe( pipe_cmov_reg );
7008 %}
7009 
7010 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7011   predicate(VM_Version::supports_cmov() );
7012   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7013   ins_cost(200);
7014   expand %{
7015     cmovP_regU(cop, cr, dst, src);
7016   %}
7017 %}
7018 
7019 // DISABLED: Requires the ADLC to emit a bottom_type call that
7020 // correctly meets the two pointer arguments; one is an incoming
7021 // register but the other is a memory operand.  ALSO appears to
7022 // be buggy with implicit null checks.
7023 //
7024 //// Conditional move
7025 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
7026 //  predicate(VM_Version::supports_cmov() );
7027 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7028 //  ins_cost(250);
7029 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7030 //  opcode(0x0F,0x40);
7031 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7032 //  ins_pipe( pipe_cmov_mem );
7033 //%}
7034 //
7035 //// Conditional move
7036 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7037 //  predicate(VM_Version::supports_cmov() );
7038 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7039 //  ins_cost(250);
7040 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7041 //  opcode(0x0F,0x40);
7042 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7043 //  ins_pipe( pipe_cmov_mem );
7044 //%}
7045 
7046 // Conditional move
7047 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
7048   predicate(UseSSE<=1);
7049   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7050   ins_cost(200);
7051   format %{ "FCMOV$cop $dst,$src\t# double" %}
7052   opcode(0xDA);
7053   ins_encode( enc_cmov_dpr(cop,src) );
7054   ins_pipe( pipe_cmovDPR_reg );
7055 %}
7056 
7057 // Conditional move
7058 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
7059   predicate(UseSSE==0);
7060   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7061   ins_cost(200);
7062   format %{ "FCMOV$cop $dst,$src\t# float" %}
7063   opcode(0xDA);
7064   ins_encode( enc_cmov_dpr(cop,src) );
7065   ins_pipe( pipe_cmovDPR_reg );
7066 %}
7067 
7068 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7069 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7070   predicate(UseSSE<=1);
7071   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7072   ins_cost(200);
7073   format %{ "Jn$cop   skip\n\t"
7074             "MOV    $dst,$src\t# double\n"
7075       "skip:" %}
7076   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7077   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7078   ins_pipe( pipe_cmovDPR_reg );
7079 %}
7080 
7081 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7082 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7083   predicate(UseSSE==0);
7084   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7085   ins_cost(200);
7086   format %{ "Jn$cop    skip\n\t"
7087             "MOV    $dst,$src\t# float\n"
7088       "skip:" %}
7089   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7090   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7091   ins_pipe( pipe_cmovDPR_reg );
7092 %}
7093 
7094 // No CMOVE with SSE/SSE2
7095 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7096   predicate (UseSSE>=1);
7097   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7098   ins_cost(200);
7099   format %{ "Jn$cop   skip\n\t"
7100             "MOVSS  $dst,$src\t# float\n"
7101       "skip:" %}
7102   ins_encode %{
7103     Label skip;
7104     // Invert sense of branch from sense of CMOV
7105     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7106     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7107     __ bind(skip);
7108   %}
7109   ins_pipe( pipe_slow );
7110 %}
7111 
7112 // No CMOVE with SSE/SSE2
7113 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7114   predicate (UseSSE>=2);
7115   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7116   ins_cost(200);
7117   format %{ "Jn$cop   skip\n\t"
7118             "MOVSD  $dst,$src\t# float\n"
7119       "skip:" %}
7120   ins_encode %{
7121     Label skip;
7122     // Invert sense of branch from sense of CMOV
7123     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7124     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7125     __ bind(skip);
7126   %}
7127   ins_pipe( pipe_slow );
7128 %}
7129 
7130 // unsigned version
7131 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7132   predicate (UseSSE>=1);
7133   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7134   ins_cost(200);
7135   format %{ "Jn$cop   skip\n\t"
7136             "MOVSS  $dst,$src\t# float\n"
7137       "skip:" %}
7138   ins_encode %{
7139     Label skip;
7140     // Invert sense of branch from sense of CMOV
7141     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7142     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7143     __ bind(skip);
7144   %}
7145   ins_pipe( pipe_slow );
7146 %}
7147 
7148 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7149   predicate (UseSSE>=1);
7150   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7151   ins_cost(200);
7152   expand %{
7153     fcmovF_regU(cop, cr, dst, src);
7154   %}
7155 %}
7156 
7157 // unsigned version
7158 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7159   predicate (UseSSE>=2);
7160   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7161   ins_cost(200);
7162   format %{ "Jn$cop   skip\n\t"
7163             "MOVSD  $dst,$src\t# float\n"
7164       "skip:" %}
7165   ins_encode %{
7166     Label skip;
7167     // Invert sense of branch from sense of CMOV
7168     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7169     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7170     __ bind(skip);
7171   %}
7172   ins_pipe( pipe_slow );
7173 %}
7174 
7175 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7176   predicate (UseSSE>=2);
7177   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7178   ins_cost(200);
7179   expand %{
7180     fcmovD_regU(cop, cr, dst, src);
7181   %}
7182 %}
7183 
7184 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7185   predicate(VM_Version::supports_cmov() );
7186   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7187   ins_cost(200);
7188   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7189             "CMOV$cop $dst.hi,$src.hi" %}
7190   opcode(0x0F,0x40);
7191   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7192   ins_pipe( pipe_cmov_reg_long );
7193 %}
7194 
7195 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7196   predicate(VM_Version::supports_cmov() );
7197   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7198   ins_cost(200);
7199   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7200             "CMOV$cop $dst.hi,$src.hi" %}
7201   opcode(0x0F,0x40);
7202   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7203   ins_pipe( pipe_cmov_reg_long );
7204 %}
7205 
7206 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7207   predicate(VM_Version::supports_cmov() );
7208   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7209   ins_cost(200);
7210   expand %{
7211     cmovL_regU(cop, cr, dst, src);
7212   %}
7213 %}
7214 
7215 //----------Arithmetic Instructions--------------------------------------------
7216 //----------Addition Instructions----------------------------------------------
7217 
7218 // Integer Addition Instructions
7219 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7220   match(Set dst (AddI dst src));
7221   effect(KILL cr);
7222 
7223   size(2);
7224   format %{ "ADD    $dst,$src" %}
7225   opcode(0x03);
7226   ins_encode( OpcP, RegReg( dst, src) );
7227   ins_pipe( ialu_reg_reg );
7228 %}
7229 
7230 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7231   match(Set dst (AddI dst src));
7232   effect(KILL cr);
7233 
7234   format %{ "ADD    $dst,$src" %}
7235   opcode(0x81, 0x00); /* /0 id */
7236   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7237   ins_pipe( ialu_reg );
7238 %}
7239 
7240 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7241   predicate(UseIncDec);
7242   match(Set dst (AddI dst src));
7243   effect(KILL cr);
7244 
7245   size(1);
7246   format %{ "INC    $dst" %}
7247   opcode(0x40); /*  */
7248   ins_encode( Opc_plus( primary, dst ) );
7249   ins_pipe( ialu_reg );
7250 %}
7251 
7252 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7253   match(Set dst (AddI src0 src1));
7254   ins_cost(110);
7255 
7256   format %{ "LEA    $dst,[$src0 + $src1]" %}
7257   opcode(0x8D); /* 0x8D /r */
7258   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7259   ins_pipe( ialu_reg_reg );
7260 %}
7261 
7262 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7263   match(Set dst (AddP src0 src1));
7264   ins_cost(110);
7265 
7266   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7267   opcode(0x8D); /* 0x8D /r */
7268   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7269   ins_pipe( ialu_reg_reg );
7270 %}
7271 
7272 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7273   predicate(UseIncDec);
7274   match(Set dst (AddI dst src));
7275   effect(KILL cr);
7276 
7277   size(1);
7278   format %{ "DEC    $dst" %}
7279   opcode(0x48); /*  */
7280   ins_encode( Opc_plus( primary, dst ) );
7281   ins_pipe( ialu_reg );
7282 %}
7283 
7284 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7285   match(Set dst (AddP dst src));
7286   effect(KILL cr);
7287 
7288   size(2);
7289   format %{ "ADD    $dst,$src" %}
7290   opcode(0x03);
7291   ins_encode( OpcP, RegReg( dst, src) );
7292   ins_pipe( ialu_reg_reg );
7293 %}
7294 
7295 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7296   match(Set dst (AddP dst src));
7297   effect(KILL cr);
7298 
7299   format %{ "ADD    $dst,$src" %}
7300   opcode(0x81,0x00); /* Opcode 81 /0 id */
7301   // ins_encode( RegImm( dst, src) );
7302   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7303   ins_pipe( ialu_reg );
7304 %}
7305 
7306 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7307   match(Set dst (AddI dst (LoadI src)));
7308   effect(KILL cr);
7309 
7310   ins_cost(125);
7311   format %{ "ADD    $dst,$src" %}
7312   opcode(0x03);
7313   ins_encode( OpcP, RegMem( dst, src) );
7314   ins_pipe( ialu_reg_mem );
7315 %}
7316 
7317 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7318   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7319   effect(KILL cr);
7320 
7321   ins_cost(150);
7322   format %{ "ADD    $dst,$src" %}
7323   opcode(0x01);  /* Opcode 01 /r */
7324   ins_encode( OpcP, RegMem( src, dst ) );
7325   ins_pipe( ialu_mem_reg );
7326 %}
7327 
7328 // Add Memory with Immediate
7329 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7330   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7331   effect(KILL cr);
7332 
7333   ins_cost(125);
7334   format %{ "ADD    $dst,$src" %}
7335   opcode(0x81);               /* Opcode 81 /0 id */
7336   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7337   ins_pipe( ialu_mem_imm );
7338 %}
7339 
7340 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7341   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7342   effect(KILL cr);
7343 
7344   ins_cost(125);
7345   format %{ "INC    $dst" %}
7346   opcode(0xFF);               /* Opcode FF /0 */
7347   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7348   ins_pipe( ialu_mem_imm );
7349 %}
7350 
7351 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7352   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7353   effect(KILL cr);
7354 
7355   ins_cost(125);
7356   format %{ "DEC    $dst" %}
7357   opcode(0xFF);               /* Opcode FF /1 */
7358   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7359   ins_pipe( ialu_mem_imm );
7360 %}
7361 
7362 
7363 instruct checkCastPP( eRegP dst ) %{
7364   match(Set dst (CheckCastPP dst));
7365 
7366   size(0);
7367   format %{ "#checkcastPP of $dst" %}
7368   ins_encode( /*empty encoding*/ );
7369   ins_pipe( empty );
7370 %}
7371 
7372 instruct castPP( eRegP dst ) %{
7373   match(Set dst (CastPP dst));
7374   format %{ "#castPP of $dst" %}
7375   ins_encode( /*empty encoding*/ );
7376   ins_pipe( empty );
7377 %}
7378 
7379 instruct castII( rRegI dst ) %{
7380   match(Set dst (CastII dst));
7381   format %{ "#castII of $dst" %}
7382   ins_encode( /*empty encoding*/ );
7383   ins_cost(0);
7384   ins_pipe( empty );
7385 %}
7386 
7387 
7388 // Load-locked - same as a regular pointer load when used with compare-swap
7389 instruct loadPLocked(eRegP dst, memory mem) %{
7390   match(Set dst (LoadPLocked mem));
7391 
7392   ins_cost(125);
7393   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7394   opcode(0x8B);
7395   ins_encode( OpcP, RegMem(dst,mem));
7396   ins_pipe( ialu_reg_mem );
7397 %}
7398 
7399 // Conditional-store of the updated heap-top.
7400 // Used during allocation of the shared heap.
7401 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7402 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7403   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7404   // EAX is killed if there is contention, but then it's also unused.
7405   // In the common case of no contention, EAX holds the new oop address.
7406   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7407   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7408   ins_pipe( pipe_cmpxchg );
7409 %}
7410 
7411 // Conditional-store of an int value.
7412 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7413 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7414   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7415   effect(KILL oldval);
7416   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7417   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7418   ins_pipe( pipe_cmpxchg );
7419 %}
7420 
7421 // Conditional-store of a long value.
7422 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7423 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7424   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7425   effect(KILL oldval);
7426   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7427             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7428             "XCHG   EBX,ECX"
7429   %}
7430   ins_encode %{
7431     // Note: we need to swap rbx, and rcx before and after the
7432     //       cmpxchg8 instruction because the instruction uses
7433     //       rcx as the high order word of the new value to store but
7434     //       our register encoding uses rbx.
7435     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7436     __ lock();
7437     __ cmpxchg8($mem$$Address);
7438     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7439   %}
7440   ins_pipe( pipe_cmpxchg );
7441 %}
7442 
7443 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7444 
7445 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7446   predicate(VM_Version::supports_cx8());
7447   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7448   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7449   effect(KILL cr, KILL oldval);
7450   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7451             "MOV    $res,0\n\t"
7452             "JNE,s  fail\n\t"
7453             "MOV    $res,1\n"
7454           "fail:" %}
7455   ins_encode( enc_cmpxchg8(mem_ptr),
7456               enc_flags_ne_to_boolean(res) );
7457   ins_pipe( pipe_cmpxchg );
7458 %}
7459 
7460 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7461   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7462   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7463   effect(KILL cr, KILL oldval);
7464   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7465             "MOV    $res,0\n\t"
7466             "JNE,s  fail\n\t"
7467             "MOV    $res,1\n"
7468           "fail:" %}
7469   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7470   ins_pipe( pipe_cmpxchg );
7471 %}
7472 
7473 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7474   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7475   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7476   effect(KILL cr, KILL oldval);
7477   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7478             "MOV    $res,0\n\t"
7479             "JNE,s  fail\n\t"
7480             "MOV    $res,1\n"
7481           "fail:" %}
7482   ins_encode( enc_cmpxchgb(mem_ptr),
7483               enc_flags_ne_to_boolean(res) );
7484   ins_pipe( pipe_cmpxchg );
7485 %}
7486 
7487 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7488   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7489   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7490   effect(KILL cr, KILL oldval);
7491   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7492             "MOV    $res,0\n\t"
7493             "JNE,s  fail\n\t"
7494             "MOV    $res,1\n"
7495           "fail:" %}
7496   ins_encode( enc_cmpxchgw(mem_ptr),
7497               enc_flags_ne_to_boolean(res) );
7498   ins_pipe( pipe_cmpxchg );
7499 %}
7500 
7501 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7502   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7503   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7504   effect(KILL cr, KILL oldval);
7505   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7506             "MOV    $res,0\n\t"
7507             "JNE,s  fail\n\t"
7508             "MOV    $res,1\n"
7509           "fail:" %}
7510   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7511   ins_pipe( pipe_cmpxchg );
7512 %}
7513 
7514 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7515   predicate(VM_Version::supports_cx8());
7516   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7517   effect(KILL cr);
7518   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7519   ins_encode( enc_cmpxchg8(mem_ptr) );
7520   ins_pipe( pipe_cmpxchg );
7521 %}
7522 
7523 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7524   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7525   effect(KILL cr);
7526   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7527   ins_encode( enc_cmpxchg(mem_ptr) );
7528   ins_pipe( pipe_cmpxchg );
7529 %}
7530 
7531 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7532   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7533   effect(KILL cr);
7534   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7535   ins_encode( enc_cmpxchgb(mem_ptr) );
7536   ins_pipe( pipe_cmpxchg );
7537 %}
7538 
7539 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7540   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7541   effect(KILL cr);
7542   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7543   ins_encode( enc_cmpxchgw(mem_ptr) );
7544   ins_pipe( pipe_cmpxchg );
7545 %}
7546 
7547 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7548   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7549   effect(KILL cr);
7550   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7551   ins_encode( enc_cmpxchg(mem_ptr) );
7552   ins_pipe( pipe_cmpxchg );
7553 %}
7554 
7555 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7556   predicate(n->as_LoadStore()->result_not_used());
7557   match(Set dummy (GetAndAddB mem add));
7558   effect(KILL cr);
7559   format %{ "ADDB  [$mem],$add" %}
7560   ins_encode %{
7561     __ lock();
7562     __ addb($mem$$Address, $add$$constant);
7563   %}
7564   ins_pipe( pipe_cmpxchg );
7565 %}
7566 
7567 // Important to match to xRegI: only 8-bit regs.
7568 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7569   match(Set newval (GetAndAddB mem newval));
7570   effect(KILL cr);
7571   format %{ "XADDB  [$mem],$newval" %}
7572   ins_encode %{
7573     __ lock();
7574     __ xaddb($mem$$Address, $newval$$Register);
7575   %}
7576   ins_pipe( pipe_cmpxchg );
7577 %}
7578 
7579 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7580   predicate(n->as_LoadStore()->result_not_used());
7581   match(Set dummy (GetAndAddS mem add));
7582   effect(KILL cr);
7583   format %{ "ADDS  [$mem],$add" %}
7584   ins_encode %{
7585     __ lock();
7586     __ addw($mem$$Address, $add$$constant);
7587   %}
7588   ins_pipe( pipe_cmpxchg );
7589 %}
7590 
7591 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7592   match(Set newval (GetAndAddS mem newval));
7593   effect(KILL cr);
7594   format %{ "XADDS  [$mem],$newval" %}
7595   ins_encode %{
7596     __ lock();
7597     __ xaddw($mem$$Address, $newval$$Register);
7598   %}
7599   ins_pipe( pipe_cmpxchg );
7600 %}
7601 
7602 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7603   predicate(n->as_LoadStore()->result_not_used());
7604   match(Set dummy (GetAndAddI mem add));
7605   effect(KILL cr);
7606   format %{ "ADDL  [$mem],$add" %}
7607   ins_encode %{
7608     __ lock();
7609     __ addl($mem$$Address, $add$$constant);
7610   %}
7611   ins_pipe( pipe_cmpxchg );
7612 %}
7613 
7614 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7615   match(Set newval (GetAndAddI mem newval));
7616   effect(KILL cr);
7617   format %{ "XADDL  [$mem],$newval" %}
7618   ins_encode %{
7619     __ lock();
7620     __ xaddl($mem$$Address, $newval$$Register);
7621   %}
7622   ins_pipe( pipe_cmpxchg );
7623 %}
7624 
7625 // Important to match to xRegI: only 8-bit regs.
7626 instruct xchgB( memory mem, xRegI newval) %{
7627   match(Set newval (GetAndSetB mem newval));
7628   format %{ "XCHGB  $newval,[$mem]" %}
7629   ins_encode %{
7630     __ xchgb($newval$$Register, $mem$$Address);
7631   %}
7632   ins_pipe( pipe_cmpxchg );
7633 %}
7634 
7635 instruct xchgS( memory mem, rRegI newval) %{
7636   match(Set newval (GetAndSetS mem newval));
7637   format %{ "XCHGW  $newval,[$mem]" %}
7638   ins_encode %{
7639     __ xchgw($newval$$Register, $mem$$Address);
7640   %}
7641   ins_pipe( pipe_cmpxchg );
7642 %}
7643 
7644 instruct xchgI( memory mem, rRegI newval) %{
7645   match(Set newval (GetAndSetI mem newval));
7646   format %{ "XCHGL  $newval,[$mem]" %}
7647   ins_encode %{
7648     __ xchgl($newval$$Register, $mem$$Address);
7649   %}
7650   ins_pipe( pipe_cmpxchg );
7651 %}
7652 
7653 instruct xchgP( memory mem, pRegP newval) %{
7654   match(Set newval (GetAndSetP mem newval));
7655   format %{ "XCHGL  $newval,[$mem]" %}
7656   ins_encode %{
7657     __ xchgl($newval$$Register, $mem$$Address);
7658   %}
7659   ins_pipe( pipe_cmpxchg );
7660 %}
7661 
7662 //----------Subtraction Instructions-------------------------------------------
7663 
7664 // Integer Subtraction Instructions
7665 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7666   match(Set dst (SubI dst src));
7667   effect(KILL cr);
7668 
7669   size(2);
7670   format %{ "SUB    $dst,$src" %}
7671   opcode(0x2B);
7672   ins_encode( OpcP, RegReg( dst, src) );
7673   ins_pipe( ialu_reg_reg );
7674 %}
7675 
7676 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7677   match(Set dst (SubI dst src));
7678   effect(KILL cr);
7679 
7680   format %{ "SUB    $dst,$src" %}
7681   opcode(0x81,0x05);  /* Opcode 81 /5 */
7682   // ins_encode( RegImm( dst, src) );
7683   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7684   ins_pipe( ialu_reg );
7685 %}
7686 
7687 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7688   match(Set dst (SubI dst (LoadI src)));
7689   effect(KILL cr);
7690 
7691   ins_cost(125);
7692   format %{ "SUB    $dst,$src" %}
7693   opcode(0x2B);
7694   ins_encode( OpcP, RegMem( dst, src) );
7695   ins_pipe( ialu_reg_mem );
7696 %}
7697 
7698 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7699   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7700   effect(KILL cr);
7701 
7702   ins_cost(150);
7703   format %{ "SUB    $dst,$src" %}
7704   opcode(0x29);  /* Opcode 29 /r */
7705   ins_encode( OpcP, RegMem( src, dst ) );
7706   ins_pipe( ialu_mem_reg );
7707 %}
7708 
7709 // Subtract from a pointer
7710 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7711   match(Set dst (AddP dst (SubI zero src)));
7712   effect(KILL cr);
7713 
7714   size(2);
7715   format %{ "SUB    $dst,$src" %}
7716   opcode(0x2B);
7717   ins_encode( OpcP, RegReg( dst, src) );
7718   ins_pipe( ialu_reg_reg );
7719 %}
7720 
7721 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7722   match(Set dst (SubI zero dst));
7723   effect(KILL cr);
7724 
7725   size(2);
7726   format %{ "NEG    $dst" %}
7727   opcode(0xF7,0x03);  // Opcode F7 /3
7728   ins_encode( OpcP, RegOpc( dst ) );
7729   ins_pipe( ialu_reg );
7730 %}
7731 
7732 //----------Multiplication/Division Instructions-------------------------------
7733 // Integer Multiplication Instructions
7734 // Multiply Register
7735 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7736   match(Set dst (MulI dst src));
7737   effect(KILL cr);
7738 
7739   size(3);
7740   ins_cost(300);
7741   format %{ "IMUL   $dst,$src" %}
7742   opcode(0xAF, 0x0F);
7743   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7744   ins_pipe( ialu_reg_reg_alu0 );
7745 %}
7746 
7747 // Multiply 32-bit Immediate
7748 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7749   match(Set dst (MulI src imm));
7750   effect(KILL cr);
7751 
7752   ins_cost(300);
7753   format %{ "IMUL   $dst,$src,$imm" %}
7754   opcode(0x69);  /* 69 /r id */
7755   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7756   ins_pipe( ialu_reg_reg_alu0 );
7757 %}
7758 
7759 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7760   match(Set dst src);
7761   effect(KILL cr);
7762 
7763   // Note that this is artificially increased to make it more expensive than loadConL
7764   ins_cost(250);
7765   format %{ "MOV    EAX,$src\t// low word only" %}
7766   opcode(0xB8);
7767   ins_encode( LdImmL_Lo(dst, src) );
7768   ins_pipe( ialu_reg_fat );
7769 %}
7770 
7771 // Multiply by 32-bit Immediate, taking the shifted high order results
7772 //  (special case for shift by 32)
7773 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7774   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7775   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7776              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7777              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7778   effect(USE src1, KILL cr);
7779 
7780   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7781   ins_cost(0*100 + 1*400 - 150);
7782   format %{ "IMUL   EDX:EAX,$src1" %}
7783   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7784   ins_pipe( pipe_slow );
7785 %}
7786 
7787 // Multiply by 32-bit Immediate, taking the shifted high order results
7788 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7789   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7790   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7791              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7792              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7793   effect(USE src1, KILL cr);
7794 
7795   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7796   ins_cost(1*100 + 1*400 - 150);
7797   format %{ "IMUL   EDX:EAX,$src1\n\t"
7798             "SAR    EDX,$cnt-32" %}
7799   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7800   ins_pipe( pipe_slow );
7801 %}
7802 
7803 // Multiply Memory 32-bit Immediate
7804 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7805   match(Set dst (MulI (LoadI src) imm));
7806   effect(KILL cr);
7807 
7808   ins_cost(300);
7809   format %{ "IMUL   $dst,$src,$imm" %}
7810   opcode(0x69);  /* 69 /r id */
7811   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7812   ins_pipe( ialu_reg_mem_alu0 );
7813 %}
7814 
7815 // Multiply Memory
7816 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7817   match(Set dst (MulI dst (LoadI src)));
7818   effect(KILL cr);
7819 
7820   ins_cost(350);
7821   format %{ "IMUL   $dst,$src" %}
7822   opcode(0xAF, 0x0F);
7823   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7824   ins_pipe( ialu_reg_mem_alu0 );
7825 %}
7826 
7827 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7828 %{
7829   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7830   effect(KILL cr, KILL src2);
7831 
7832   expand %{ mulI_eReg(dst, src1, cr);
7833            mulI_eReg(src2, src3, cr);
7834            addI_eReg(dst, src2, cr); %}
7835 %}
7836 
7837 // Multiply Register Int to Long
7838 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7839   // Basic Idea: long = (long)int * (long)int
7840   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7841   effect(DEF dst, USE src, USE src1, KILL flags);
7842 
7843   ins_cost(300);
7844   format %{ "IMUL   $dst,$src1" %}
7845 
7846   ins_encode( long_int_multiply( dst, src1 ) );
7847   ins_pipe( ialu_reg_reg_alu0 );
7848 %}
7849 
7850 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7851   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7852   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7853   effect(KILL flags);
7854 
7855   ins_cost(300);
7856   format %{ "MUL    $dst,$src1" %}
7857 
7858   ins_encode( long_uint_multiply(dst, src1) );
7859   ins_pipe( ialu_reg_reg_alu0 );
7860 %}
7861 
7862 // Multiply Register Long
7863 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7864   match(Set dst (MulL dst src));
7865   effect(KILL cr, TEMP tmp);
7866   ins_cost(4*100+3*400);
7867 // Basic idea: lo(result) = lo(x_lo * y_lo)
7868 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7869   format %{ "MOV    $tmp,$src.lo\n\t"
7870             "IMUL   $tmp,EDX\n\t"
7871             "MOV    EDX,$src.hi\n\t"
7872             "IMUL   EDX,EAX\n\t"
7873             "ADD    $tmp,EDX\n\t"
7874             "MUL    EDX:EAX,$src.lo\n\t"
7875             "ADD    EDX,$tmp" %}
7876   ins_encode( long_multiply( dst, src, tmp ) );
7877   ins_pipe( pipe_slow );
7878 %}
7879 
7880 // Multiply Register Long where the left operand's high 32 bits are zero
7881 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7882   predicate(is_operand_hi32_zero(n->in(1)));
7883   match(Set dst (MulL dst src));
7884   effect(KILL cr, TEMP tmp);
7885   ins_cost(2*100+2*400);
7886 // Basic idea: lo(result) = lo(x_lo * y_lo)
7887 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7888   format %{ "MOV    $tmp,$src.hi\n\t"
7889             "IMUL   $tmp,EAX\n\t"
7890             "MUL    EDX:EAX,$src.lo\n\t"
7891             "ADD    EDX,$tmp" %}
7892   ins_encode %{
7893     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7894     __ imull($tmp$$Register, rax);
7895     __ mull($src$$Register);
7896     __ addl(rdx, $tmp$$Register);
7897   %}
7898   ins_pipe( pipe_slow );
7899 %}
7900 
7901 // Multiply Register Long where the right operand's high 32 bits are zero
7902 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7903   predicate(is_operand_hi32_zero(n->in(2)));
7904   match(Set dst (MulL dst src));
7905   effect(KILL cr, TEMP tmp);
7906   ins_cost(2*100+2*400);
7907 // Basic idea: lo(result) = lo(x_lo * y_lo)
7908 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7909   format %{ "MOV    $tmp,$src.lo\n\t"
7910             "IMUL   $tmp,EDX\n\t"
7911             "MUL    EDX:EAX,$src.lo\n\t"
7912             "ADD    EDX,$tmp" %}
7913   ins_encode %{
7914     __ movl($tmp$$Register, $src$$Register);
7915     __ imull($tmp$$Register, rdx);
7916     __ mull($src$$Register);
7917     __ addl(rdx, $tmp$$Register);
7918   %}
7919   ins_pipe( pipe_slow );
7920 %}
7921 
7922 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7923 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7924   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7925   match(Set dst (MulL dst src));
7926   effect(KILL cr);
7927   ins_cost(1*400);
7928 // Basic idea: lo(result) = lo(x_lo * y_lo)
7929 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7930   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7931   ins_encode %{
7932     __ mull($src$$Register);
7933   %}
7934   ins_pipe( pipe_slow );
7935 %}
7936 
7937 // Multiply Register Long by small constant
7938 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7939   match(Set dst (MulL dst src));
7940   effect(KILL cr, TEMP tmp);
7941   ins_cost(2*100+2*400);
7942   size(12);
7943 // Basic idea: lo(result) = lo(src * EAX)
7944 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7945   format %{ "IMUL   $tmp,EDX,$src\n\t"
7946             "MOV    EDX,$src\n\t"
7947             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7948             "ADD    EDX,$tmp" %}
7949   ins_encode( long_multiply_con( dst, src, tmp ) );
7950   ins_pipe( pipe_slow );
7951 %}
7952 
7953 // Integer DIV with Register
7954 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7955   match(Set rax (DivI rax div));
7956   effect(KILL rdx, KILL cr);
7957   size(26);
7958   ins_cost(30*100+10*100);
7959   format %{ "CMP    EAX,0x80000000\n\t"
7960             "JNE,s  normal\n\t"
7961             "XOR    EDX,EDX\n\t"
7962             "CMP    ECX,-1\n\t"
7963             "JE,s   done\n"
7964     "normal: CDQ\n\t"
7965             "IDIV   $div\n\t"
7966     "done:"        %}
7967   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7968   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7969   ins_pipe( ialu_reg_reg_alu0 );
7970 %}
7971 
7972 // Divide Register Long
7973 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7974   match(Set dst (DivL src1 src2));
7975   effect( KILL cr, KILL cx, KILL bx );
7976   ins_cost(10000);
7977   format %{ "PUSH   $src1.hi\n\t"
7978             "PUSH   $src1.lo\n\t"
7979             "PUSH   $src2.hi\n\t"
7980             "PUSH   $src2.lo\n\t"
7981             "CALL   SharedRuntime::ldiv\n\t"
7982             "ADD    ESP,16" %}
7983   ins_encode( long_div(src1,src2) );
7984   ins_pipe( pipe_slow );
7985 %}
7986 
7987 // Integer DIVMOD with Register, both quotient and mod results
7988 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7989   match(DivModI rax div);
7990   effect(KILL cr);
7991   size(26);
7992   ins_cost(30*100+10*100);
7993   format %{ "CMP    EAX,0x80000000\n\t"
7994             "JNE,s  normal\n\t"
7995             "XOR    EDX,EDX\n\t"
7996             "CMP    ECX,-1\n\t"
7997             "JE,s   done\n"
7998     "normal: CDQ\n\t"
7999             "IDIV   $div\n\t"
8000     "done:"        %}
8001   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8002   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8003   ins_pipe( pipe_slow );
8004 %}
8005 
8006 // Integer MOD with Register
8007 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8008   match(Set rdx (ModI rax div));
8009   effect(KILL rax, KILL cr);
8010 
8011   size(26);
8012   ins_cost(300);
8013   format %{ "CDQ\n\t"
8014             "IDIV   $div" %}
8015   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8016   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8017   ins_pipe( ialu_reg_reg_alu0 );
8018 %}
8019 
8020 // Remainder Register Long
8021 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8022   match(Set dst (ModL src1 src2));
8023   effect( KILL cr, KILL cx, KILL bx );
8024   ins_cost(10000);
8025   format %{ "PUSH   $src1.hi\n\t"
8026             "PUSH   $src1.lo\n\t"
8027             "PUSH   $src2.hi\n\t"
8028             "PUSH   $src2.lo\n\t"
8029             "CALL   SharedRuntime::lrem\n\t"
8030             "ADD    ESP,16" %}
8031   ins_encode( long_mod(src1,src2) );
8032   ins_pipe( pipe_slow );
8033 %}
8034 
8035 // Divide Register Long (no special case since divisor != -1)
8036 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8037   match(Set dst (DivL dst imm));
8038   effect( TEMP tmp, TEMP tmp2, KILL cr );
8039   ins_cost(1000);
8040   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8041             "XOR    $tmp2,$tmp2\n\t"
8042             "CMP    $tmp,EDX\n\t"
8043             "JA,s   fast\n\t"
8044             "MOV    $tmp2,EAX\n\t"
8045             "MOV    EAX,EDX\n\t"
8046             "MOV    EDX,0\n\t"
8047             "JLE,s  pos\n\t"
8048             "LNEG   EAX : $tmp2\n\t"
8049             "DIV    $tmp # unsigned division\n\t"
8050             "XCHG   EAX,$tmp2\n\t"
8051             "DIV    $tmp\n\t"
8052             "LNEG   $tmp2 : EAX\n\t"
8053             "JMP,s  done\n"
8054     "pos:\n\t"
8055             "DIV    $tmp\n\t"
8056             "XCHG   EAX,$tmp2\n"
8057     "fast:\n\t"
8058             "DIV    $tmp\n"
8059     "done:\n\t"
8060             "MOV    EDX,$tmp2\n\t"
8061             "NEG    EDX:EAX # if $imm < 0" %}
8062   ins_encode %{
8063     int con = (int)$imm$$constant;
8064     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8065     int pcon = (con > 0) ? con : -con;
8066     Label Lfast, Lpos, Ldone;
8067 
8068     __ movl($tmp$$Register, pcon);
8069     __ xorl($tmp2$$Register,$tmp2$$Register);
8070     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8071     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8072 
8073     __ movl($tmp2$$Register, $dst$$Register); // save
8074     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8075     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8076     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8077 
8078     // Negative dividend.
8079     // convert value to positive to use unsigned division
8080     __ lneg($dst$$Register, $tmp2$$Register);
8081     __ divl($tmp$$Register);
8082     __ xchgl($dst$$Register, $tmp2$$Register);
8083     __ divl($tmp$$Register);
8084     // revert result back to negative
8085     __ lneg($tmp2$$Register, $dst$$Register);
8086     __ jmpb(Ldone);
8087 
8088     __ bind(Lpos);
8089     __ divl($tmp$$Register); // Use unsigned division
8090     __ xchgl($dst$$Register, $tmp2$$Register);
8091     // Fallthrow for final divide, tmp2 has 32 bit hi result
8092 
8093     __ bind(Lfast);
8094     // fast path: src is positive
8095     __ divl($tmp$$Register); // Use unsigned division
8096 
8097     __ bind(Ldone);
8098     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8099     if (con < 0) {
8100       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8101     }
8102   %}
8103   ins_pipe( pipe_slow );
8104 %}
8105 
8106 // Remainder Register Long (remainder fit into 32 bits)
8107 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8108   match(Set dst (ModL dst imm));
8109   effect( TEMP tmp, TEMP tmp2, KILL cr );
8110   ins_cost(1000);
8111   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8112             "CMP    $tmp,EDX\n\t"
8113             "JA,s   fast\n\t"
8114             "MOV    $tmp2,EAX\n\t"
8115             "MOV    EAX,EDX\n\t"
8116             "MOV    EDX,0\n\t"
8117             "JLE,s  pos\n\t"
8118             "LNEG   EAX : $tmp2\n\t"
8119             "DIV    $tmp # unsigned division\n\t"
8120             "MOV    EAX,$tmp2\n\t"
8121             "DIV    $tmp\n\t"
8122             "NEG    EDX\n\t"
8123             "JMP,s  done\n"
8124     "pos:\n\t"
8125             "DIV    $tmp\n\t"
8126             "MOV    EAX,$tmp2\n"
8127     "fast:\n\t"
8128             "DIV    $tmp\n"
8129     "done:\n\t"
8130             "MOV    EAX,EDX\n\t"
8131             "SAR    EDX,31\n\t" %}
8132   ins_encode %{
8133     int con = (int)$imm$$constant;
8134     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8135     int pcon = (con > 0) ? con : -con;
8136     Label  Lfast, Lpos, Ldone;
8137 
8138     __ movl($tmp$$Register, pcon);
8139     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8140     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8141 
8142     __ movl($tmp2$$Register, $dst$$Register); // save
8143     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8144     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8145     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8146 
8147     // Negative dividend.
8148     // convert value to positive to use unsigned division
8149     __ lneg($dst$$Register, $tmp2$$Register);
8150     __ divl($tmp$$Register);
8151     __ movl($dst$$Register, $tmp2$$Register);
8152     __ divl($tmp$$Register);
8153     // revert remainder back to negative
8154     __ negl(HIGH_FROM_LOW($dst$$Register));
8155     __ jmpb(Ldone);
8156 
8157     __ bind(Lpos);
8158     __ divl($tmp$$Register);
8159     __ movl($dst$$Register, $tmp2$$Register);
8160 
8161     __ bind(Lfast);
8162     // fast path: src is positive
8163     __ divl($tmp$$Register);
8164 
8165     __ bind(Ldone);
8166     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8167     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8168 
8169   %}
8170   ins_pipe( pipe_slow );
8171 %}
8172 
8173 // Integer Shift Instructions
8174 // Shift Left by one
8175 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8176   match(Set dst (LShiftI dst shift));
8177   effect(KILL cr);
8178 
8179   size(2);
8180   format %{ "SHL    $dst,$shift" %}
8181   opcode(0xD1, 0x4);  /* D1 /4 */
8182   ins_encode( OpcP, RegOpc( dst ) );
8183   ins_pipe( ialu_reg );
8184 %}
8185 
8186 // Shift Left by 8-bit immediate
8187 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8188   match(Set dst (LShiftI dst shift));
8189   effect(KILL cr);
8190 
8191   size(3);
8192   format %{ "SHL    $dst,$shift" %}
8193   opcode(0xC1, 0x4);  /* C1 /4 ib */
8194   ins_encode( RegOpcImm( dst, shift) );
8195   ins_pipe( ialu_reg );
8196 %}
8197 
8198 // Shift Left by variable
8199 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8200   match(Set dst (LShiftI dst shift));
8201   effect(KILL cr);
8202 
8203   size(2);
8204   format %{ "SHL    $dst,$shift" %}
8205   opcode(0xD3, 0x4);  /* D3 /4 */
8206   ins_encode( OpcP, RegOpc( dst ) );
8207   ins_pipe( ialu_reg_reg );
8208 %}
8209 
8210 // Arithmetic shift right by one
8211 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8212   match(Set dst (RShiftI dst shift));
8213   effect(KILL cr);
8214 
8215   size(2);
8216   format %{ "SAR    $dst,$shift" %}
8217   opcode(0xD1, 0x7);  /* D1 /7 */
8218   ins_encode( OpcP, RegOpc( dst ) );
8219   ins_pipe( ialu_reg );
8220 %}
8221 
8222 // Arithmetic shift right by one
8223 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8224   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8225   effect(KILL cr);
8226   format %{ "SAR    $dst,$shift" %}
8227   opcode(0xD1, 0x7);  /* D1 /7 */
8228   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8229   ins_pipe( ialu_mem_imm );
8230 %}
8231 
8232 // Arithmetic Shift Right by 8-bit immediate
8233 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8234   match(Set dst (RShiftI dst shift));
8235   effect(KILL cr);
8236 
8237   size(3);
8238   format %{ "SAR    $dst,$shift" %}
8239   opcode(0xC1, 0x7);  /* C1 /7 ib */
8240   ins_encode( RegOpcImm( dst, shift ) );
8241   ins_pipe( ialu_mem_imm );
8242 %}
8243 
8244 // Arithmetic Shift Right by 8-bit immediate
8245 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8246   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8247   effect(KILL cr);
8248 
8249   format %{ "SAR    $dst,$shift" %}
8250   opcode(0xC1, 0x7);  /* C1 /7 ib */
8251   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8252   ins_pipe( ialu_mem_imm );
8253 %}
8254 
8255 // Arithmetic Shift Right by variable
8256 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8257   match(Set dst (RShiftI dst shift));
8258   effect(KILL cr);
8259 
8260   size(2);
8261   format %{ "SAR    $dst,$shift" %}
8262   opcode(0xD3, 0x7);  /* D3 /7 */
8263   ins_encode( OpcP, RegOpc( dst ) );
8264   ins_pipe( ialu_reg_reg );
8265 %}
8266 
8267 // Logical shift right by one
8268 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8269   match(Set dst (URShiftI dst shift));
8270   effect(KILL cr);
8271 
8272   size(2);
8273   format %{ "SHR    $dst,$shift" %}
8274   opcode(0xD1, 0x5);  /* D1 /5 */
8275   ins_encode( OpcP, RegOpc( dst ) );
8276   ins_pipe( ialu_reg );
8277 %}
8278 
8279 // Logical Shift Right by 8-bit immediate
8280 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8281   match(Set dst (URShiftI dst shift));
8282   effect(KILL cr);
8283 
8284   size(3);
8285   format %{ "SHR    $dst,$shift" %}
8286   opcode(0xC1, 0x5);  /* C1 /5 ib */
8287   ins_encode( RegOpcImm( dst, shift) );
8288   ins_pipe( ialu_reg );
8289 %}
8290 
8291 
8292 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8293 // This idiom is used by the compiler for the i2b bytecode.
8294 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8295   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8296 
8297   size(3);
8298   format %{ "MOVSX  $dst,$src :8" %}
8299   ins_encode %{
8300     __ movsbl($dst$$Register, $src$$Register);
8301   %}
8302   ins_pipe(ialu_reg_reg);
8303 %}
8304 
8305 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8306 // This idiom is used by the compiler the i2s bytecode.
8307 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8308   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8309 
8310   size(3);
8311   format %{ "MOVSX  $dst,$src :16" %}
8312   ins_encode %{
8313     __ movswl($dst$$Register, $src$$Register);
8314   %}
8315   ins_pipe(ialu_reg_reg);
8316 %}
8317 
8318 
8319 // Logical Shift Right by variable
8320 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8321   match(Set dst (URShiftI dst shift));
8322   effect(KILL cr);
8323 
8324   size(2);
8325   format %{ "SHR    $dst,$shift" %}
8326   opcode(0xD3, 0x5);  /* D3 /5 */
8327   ins_encode( OpcP, RegOpc( dst ) );
8328   ins_pipe( ialu_reg_reg );
8329 %}
8330 
8331 
8332 //----------Logical Instructions-----------------------------------------------
8333 //----------Integer Logical Instructions---------------------------------------
8334 // And Instructions
8335 // And Register with Register
8336 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8337   match(Set dst (AndI dst src));
8338   effect(KILL cr);
8339 
8340   size(2);
8341   format %{ "AND    $dst,$src" %}
8342   opcode(0x23);
8343   ins_encode( OpcP, RegReg( dst, src) );
8344   ins_pipe( ialu_reg_reg );
8345 %}
8346 
8347 // And Register with Immediate
8348 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8349   match(Set dst (AndI dst src));
8350   effect(KILL cr);
8351 
8352   format %{ "AND    $dst,$src" %}
8353   opcode(0x81,0x04);  /* Opcode 81 /4 */
8354   // ins_encode( RegImm( dst, src) );
8355   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8356   ins_pipe( ialu_reg );
8357 %}
8358 
8359 // And Register with Memory
8360 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8361   match(Set dst (AndI dst (LoadI src)));
8362   effect(KILL cr);
8363 
8364   ins_cost(125);
8365   format %{ "AND    $dst,$src" %}
8366   opcode(0x23);
8367   ins_encode( OpcP, RegMem( dst, src) );
8368   ins_pipe( ialu_reg_mem );
8369 %}
8370 
8371 // And Memory with Register
8372 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8373   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8374   effect(KILL cr);
8375 
8376   ins_cost(150);
8377   format %{ "AND    $dst,$src" %}
8378   opcode(0x21);  /* Opcode 21 /r */
8379   ins_encode( OpcP, RegMem( src, dst ) );
8380   ins_pipe( ialu_mem_reg );
8381 %}
8382 
8383 // And Memory with Immediate
8384 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8385   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8386   effect(KILL cr);
8387 
8388   ins_cost(125);
8389   format %{ "AND    $dst,$src" %}
8390   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8391   // ins_encode( MemImm( dst, src) );
8392   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8393   ins_pipe( ialu_mem_imm );
8394 %}
8395 
8396 // BMI1 instructions
8397 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8398   match(Set dst (AndI (XorI src1 minus_1) src2));
8399   predicate(UseBMI1Instructions);
8400   effect(KILL cr);
8401 
8402   format %{ "ANDNL  $dst, $src1, $src2" %}
8403 
8404   ins_encode %{
8405     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8406   %}
8407   ins_pipe(ialu_reg);
8408 %}
8409 
8410 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8411   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8412   predicate(UseBMI1Instructions);
8413   effect(KILL cr);
8414 
8415   ins_cost(125);
8416   format %{ "ANDNL  $dst, $src1, $src2" %}
8417 
8418   ins_encode %{
8419     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8420   %}
8421   ins_pipe(ialu_reg_mem);
8422 %}
8423 
8424 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8425   match(Set dst (AndI (SubI imm_zero src) src));
8426   predicate(UseBMI1Instructions);
8427   effect(KILL cr);
8428 
8429   format %{ "BLSIL  $dst, $src" %}
8430 
8431   ins_encode %{
8432     __ blsil($dst$$Register, $src$$Register);
8433   %}
8434   ins_pipe(ialu_reg);
8435 %}
8436 
8437 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8438   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8439   predicate(UseBMI1Instructions);
8440   effect(KILL cr);
8441 
8442   ins_cost(125);
8443   format %{ "BLSIL  $dst, $src" %}
8444 
8445   ins_encode %{
8446     __ blsil($dst$$Register, $src$$Address);
8447   %}
8448   ins_pipe(ialu_reg_mem);
8449 %}
8450 
8451 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8452 %{
8453   match(Set dst (XorI (AddI src minus_1) src));
8454   predicate(UseBMI1Instructions);
8455   effect(KILL cr);
8456 
8457   format %{ "BLSMSKL $dst, $src" %}
8458 
8459   ins_encode %{
8460     __ blsmskl($dst$$Register, $src$$Register);
8461   %}
8462 
8463   ins_pipe(ialu_reg);
8464 %}
8465 
8466 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8467 %{
8468   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8469   predicate(UseBMI1Instructions);
8470   effect(KILL cr);
8471 
8472   ins_cost(125);
8473   format %{ "BLSMSKL $dst, $src" %}
8474 
8475   ins_encode %{
8476     __ blsmskl($dst$$Register, $src$$Address);
8477   %}
8478 
8479   ins_pipe(ialu_reg_mem);
8480 %}
8481 
8482 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8483 %{
8484   match(Set dst (AndI (AddI src minus_1) src) );
8485   predicate(UseBMI1Instructions);
8486   effect(KILL cr);
8487 
8488   format %{ "BLSRL  $dst, $src" %}
8489 
8490   ins_encode %{
8491     __ blsrl($dst$$Register, $src$$Register);
8492   %}
8493 
8494   ins_pipe(ialu_reg);
8495 %}
8496 
8497 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8498 %{
8499   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8500   predicate(UseBMI1Instructions);
8501   effect(KILL cr);
8502 
8503   ins_cost(125);
8504   format %{ "BLSRL  $dst, $src" %}
8505 
8506   ins_encode %{
8507     __ blsrl($dst$$Register, $src$$Address);
8508   %}
8509 
8510   ins_pipe(ialu_reg_mem);
8511 %}
8512 
8513 // Or Instructions
8514 // Or Register with Register
8515 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8516   match(Set dst (OrI dst src));
8517   effect(KILL cr);
8518 
8519   size(2);
8520   format %{ "OR     $dst,$src" %}
8521   opcode(0x0B);
8522   ins_encode( OpcP, RegReg( dst, src) );
8523   ins_pipe( ialu_reg_reg );
8524 %}
8525 
8526 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8527   match(Set dst (OrI dst (CastP2X src)));
8528   effect(KILL cr);
8529 
8530   size(2);
8531   format %{ "OR     $dst,$src" %}
8532   opcode(0x0B);
8533   ins_encode( OpcP, RegReg( dst, src) );
8534   ins_pipe( ialu_reg_reg );
8535 %}
8536 
8537 
8538 // Or Register with Immediate
8539 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8540   match(Set dst (OrI dst src));
8541   effect(KILL cr);
8542 
8543   format %{ "OR     $dst,$src" %}
8544   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8545   // ins_encode( RegImm( dst, src) );
8546   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8547   ins_pipe( ialu_reg );
8548 %}
8549 
8550 // Or Register with Memory
8551 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8552   match(Set dst (OrI dst (LoadI src)));
8553   effect(KILL cr);
8554 
8555   ins_cost(125);
8556   format %{ "OR     $dst,$src" %}
8557   opcode(0x0B);
8558   ins_encode( OpcP, RegMem( dst, src) );
8559   ins_pipe( ialu_reg_mem );
8560 %}
8561 
8562 // Or Memory with Register
8563 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8564   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8565   effect(KILL cr);
8566 
8567   ins_cost(150);
8568   format %{ "OR     $dst,$src" %}
8569   opcode(0x09);  /* Opcode 09 /r */
8570   ins_encode( OpcP, RegMem( src, dst ) );
8571   ins_pipe( ialu_mem_reg );
8572 %}
8573 
8574 // Or Memory with Immediate
8575 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8576   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8577   effect(KILL cr);
8578 
8579   ins_cost(125);
8580   format %{ "OR     $dst,$src" %}
8581   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8582   // ins_encode( MemImm( dst, src) );
8583   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8584   ins_pipe( ialu_mem_imm );
8585 %}
8586 
8587 // ROL/ROR
8588 // ROL expand
8589 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8590   effect(USE_DEF dst, USE shift, KILL cr);
8591 
8592   format %{ "ROL    $dst, $shift" %}
8593   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8594   ins_encode( OpcP, RegOpc( dst ));
8595   ins_pipe( ialu_reg );
8596 %}
8597 
8598 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8599   effect(USE_DEF dst, USE shift, KILL cr);
8600 
8601   format %{ "ROL    $dst, $shift" %}
8602   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8603   ins_encode( RegOpcImm(dst, shift) );
8604   ins_pipe(ialu_reg);
8605 %}
8606 
8607 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8608   effect(USE_DEF dst, USE shift, KILL cr);
8609 
8610   format %{ "ROL    $dst, $shift" %}
8611   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8612   ins_encode(OpcP, RegOpc(dst));
8613   ins_pipe( ialu_reg_reg );
8614 %}
8615 // end of ROL expand
8616 
8617 // ROL 32bit by one once
8618 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8619   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8620 
8621   expand %{
8622     rolI_eReg_imm1(dst, lshift, cr);
8623   %}
8624 %}
8625 
8626 // ROL 32bit var by imm8 once
8627 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8628   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8629   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8630 
8631   expand %{
8632     rolI_eReg_imm8(dst, lshift, cr);
8633   %}
8634 %}
8635 
8636 // ROL 32bit var by var once
8637 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8638   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8639 
8640   expand %{
8641     rolI_eReg_CL(dst, shift, cr);
8642   %}
8643 %}
8644 
8645 // ROL 32bit var by var once
8646 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8647   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8648 
8649   expand %{
8650     rolI_eReg_CL(dst, shift, cr);
8651   %}
8652 %}
8653 
8654 // ROR expand
8655 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8656   effect(USE_DEF dst, USE shift, KILL cr);
8657 
8658   format %{ "ROR    $dst, $shift" %}
8659   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8660   ins_encode( OpcP, RegOpc( dst ) );
8661   ins_pipe( ialu_reg );
8662 %}
8663 
8664 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8665   effect (USE_DEF dst, USE shift, KILL cr);
8666 
8667   format %{ "ROR    $dst, $shift" %}
8668   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8669   ins_encode( RegOpcImm(dst, shift) );
8670   ins_pipe( ialu_reg );
8671 %}
8672 
8673 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8674   effect(USE_DEF dst, USE shift, KILL cr);
8675 
8676   format %{ "ROR    $dst, $shift" %}
8677   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8678   ins_encode(OpcP, RegOpc(dst));
8679   ins_pipe( ialu_reg_reg );
8680 %}
8681 // end of ROR expand
8682 
8683 // ROR right once
8684 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8685   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8686 
8687   expand %{
8688     rorI_eReg_imm1(dst, rshift, cr);
8689   %}
8690 %}
8691 
8692 // ROR 32bit by immI8 once
8693 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8694   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8695   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8696 
8697   expand %{
8698     rorI_eReg_imm8(dst, rshift, cr);
8699   %}
8700 %}
8701 
8702 // ROR 32bit var by var once
8703 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8704   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8705 
8706   expand %{
8707     rorI_eReg_CL(dst, shift, cr);
8708   %}
8709 %}
8710 
8711 // ROR 32bit var by var once
8712 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8713   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8714 
8715   expand %{
8716     rorI_eReg_CL(dst, shift, cr);
8717   %}
8718 %}
8719 
8720 // Xor Instructions
8721 // Xor Register with Register
8722 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8723   match(Set dst (XorI dst src));
8724   effect(KILL cr);
8725 
8726   size(2);
8727   format %{ "XOR    $dst,$src" %}
8728   opcode(0x33);
8729   ins_encode( OpcP, RegReg( dst, src) );
8730   ins_pipe( ialu_reg_reg );
8731 %}
8732 
8733 // Xor Register with Immediate -1
8734 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8735   match(Set dst (XorI dst imm));
8736 
8737   size(2);
8738   format %{ "NOT    $dst" %}
8739   ins_encode %{
8740      __ notl($dst$$Register);
8741   %}
8742   ins_pipe( ialu_reg );
8743 %}
8744 
8745 // Xor Register with Immediate
8746 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8747   match(Set dst (XorI dst src));
8748   effect(KILL cr);
8749 
8750   format %{ "XOR    $dst,$src" %}
8751   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8752   // ins_encode( RegImm( dst, src) );
8753   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8754   ins_pipe( ialu_reg );
8755 %}
8756 
8757 // Xor Register with Memory
8758 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8759   match(Set dst (XorI dst (LoadI src)));
8760   effect(KILL cr);
8761 
8762   ins_cost(125);
8763   format %{ "XOR    $dst,$src" %}
8764   opcode(0x33);
8765   ins_encode( OpcP, RegMem(dst, src) );
8766   ins_pipe( ialu_reg_mem );
8767 %}
8768 
8769 // Xor Memory with Register
8770 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8771   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8772   effect(KILL cr);
8773 
8774   ins_cost(150);
8775   format %{ "XOR    $dst,$src" %}
8776   opcode(0x31);  /* Opcode 31 /r */
8777   ins_encode( OpcP, RegMem( src, dst ) );
8778   ins_pipe( ialu_mem_reg );
8779 %}
8780 
8781 // Xor Memory with Immediate
8782 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8783   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8784   effect(KILL cr);
8785 
8786   ins_cost(125);
8787   format %{ "XOR    $dst,$src" %}
8788   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8789   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8790   ins_pipe( ialu_mem_imm );
8791 %}
8792 
8793 //----------Convert Int to Boolean---------------------------------------------
8794 
8795 instruct movI_nocopy(rRegI dst, rRegI src) %{
8796   effect( DEF dst, USE src );
8797   format %{ "MOV    $dst,$src" %}
8798   ins_encode( enc_Copy( dst, src) );
8799   ins_pipe( ialu_reg_reg );
8800 %}
8801 
8802 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8803   effect( USE_DEF dst, USE src, KILL cr );
8804 
8805   size(4);
8806   format %{ "NEG    $dst\n\t"
8807             "ADC    $dst,$src" %}
8808   ins_encode( neg_reg(dst),
8809               OpcRegReg(0x13,dst,src) );
8810   ins_pipe( ialu_reg_reg_long );
8811 %}
8812 
8813 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8814   match(Set dst (Conv2B src));
8815 
8816   expand %{
8817     movI_nocopy(dst,src);
8818     ci2b(dst,src,cr);
8819   %}
8820 %}
8821 
8822 instruct movP_nocopy(rRegI dst, eRegP src) %{
8823   effect( DEF dst, USE src );
8824   format %{ "MOV    $dst,$src" %}
8825   ins_encode( enc_Copy( dst, src) );
8826   ins_pipe( ialu_reg_reg );
8827 %}
8828 
8829 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8830   effect( USE_DEF dst, USE src, KILL cr );
8831   format %{ "NEG    $dst\n\t"
8832             "ADC    $dst,$src" %}
8833   ins_encode( neg_reg(dst),
8834               OpcRegReg(0x13,dst,src) );
8835   ins_pipe( ialu_reg_reg_long );
8836 %}
8837 
8838 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8839   match(Set dst (Conv2B src));
8840 
8841   expand %{
8842     movP_nocopy(dst,src);
8843     cp2b(dst,src,cr);
8844   %}
8845 %}
8846 
8847 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8848   match(Set dst (CmpLTMask p q));
8849   effect(KILL cr);
8850   ins_cost(400);
8851 
8852   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8853   format %{ "XOR    $dst,$dst\n\t"
8854             "CMP    $p,$q\n\t"
8855             "SETlt  $dst\n\t"
8856             "NEG    $dst" %}
8857   ins_encode %{
8858     Register Rp = $p$$Register;
8859     Register Rq = $q$$Register;
8860     Register Rd = $dst$$Register;
8861     Label done;
8862     __ xorl(Rd, Rd);
8863     __ cmpl(Rp, Rq);
8864     __ setb(Assembler::less, Rd);
8865     __ negl(Rd);
8866   %}
8867 
8868   ins_pipe(pipe_slow);
8869 %}
8870 
8871 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8872   match(Set dst (CmpLTMask dst zero));
8873   effect(DEF dst, KILL cr);
8874   ins_cost(100);
8875 
8876   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8877   ins_encode %{
8878   __ sarl($dst$$Register, 31);
8879   %}
8880   ins_pipe(ialu_reg);
8881 %}
8882 
8883 /* better to save a register than avoid a branch */
8884 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8885   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8886   effect(KILL cr);
8887   ins_cost(400);
8888   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8889             "JGE    done\n\t"
8890             "ADD    $p,$y\n"
8891             "done:  " %}
8892   ins_encode %{
8893     Register Rp = $p$$Register;
8894     Register Rq = $q$$Register;
8895     Register Ry = $y$$Register;
8896     Label done;
8897     __ subl(Rp, Rq);
8898     __ jccb(Assembler::greaterEqual, done);
8899     __ addl(Rp, Ry);
8900     __ bind(done);
8901   %}
8902 
8903   ins_pipe(pipe_cmplt);
8904 %}
8905 
8906 /* better to save a register than avoid a branch */
8907 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8908   match(Set y (AndI (CmpLTMask p q) y));
8909   effect(KILL cr);
8910 
8911   ins_cost(300);
8912 
8913   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8914             "JLT      done\n\t"
8915             "XORL     $y, $y\n"
8916             "done:  " %}
8917   ins_encode %{
8918     Register Rp = $p$$Register;
8919     Register Rq = $q$$Register;
8920     Register Ry = $y$$Register;
8921     Label done;
8922     __ cmpl(Rp, Rq);
8923     __ jccb(Assembler::less, done);
8924     __ xorl(Ry, Ry);
8925     __ bind(done);
8926   %}
8927 
8928   ins_pipe(pipe_cmplt);
8929 %}
8930 
8931 /* If I enable this, I encourage spilling in the inner loop of compress.
8932 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8933   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8934 */
8935 //----------Overflow Math Instructions-----------------------------------------
8936 
8937 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8938 %{
8939   match(Set cr (OverflowAddI op1 op2));
8940   effect(DEF cr, USE_KILL op1, USE op2);
8941 
8942   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8943 
8944   ins_encode %{
8945     __ addl($op1$$Register, $op2$$Register);
8946   %}
8947   ins_pipe(ialu_reg_reg);
8948 %}
8949 
8950 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8951 %{
8952   match(Set cr (OverflowAddI op1 op2));
8953   effect(DEF cr, USE_KILL op1, USE op2);
8954 
8955   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8956 
8957   ins_encode %{
8958     __ addl($op1$$Register, $op2$$constant);
8959   %}
8960   ins_pipe(ialu_reg_reg);
8961 %}
8962 
8963 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8964 %{
8965   match(Set cr (OverflowSubI op1 op2));
8966 
8967   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8968   ins_encode %{
8969     __ cmpl($op1$$Register, $op2$$Register);
8970   %}
8971   ins_pipe(ialu_reg_reg);
8972 %}
8973 
8974 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8975 %{
8976   match(Set cr (OverflowSubI op1 op2));
8977 
8978   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8979   ins_encode %{
8980     __ cmpl($op1$$Register, $op2$$constant);
8981   %}
8982   ins_pipe(ialu_reg_reg);
8983 %}
8984 
8985 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8986 %{
8987   match(Set cr (OverflowSubI zero op2));
8988   effect(DEF cr, USE_KILL op2);
8989 
8990   format %{ "NEG    $op2\t# overflow check int" %}
8991   ins_encode %{
8992     __ negl($op2$$Register);
8993   %}
8994   ins_pipe(ialu_reg_reg);
8995 %}
8996 
8997 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8998 %{
8999   match(Set cr (OverflowMulI op1 op2));
9000   effect(DEF cr, USE_KILL op1, USE op2);
9001 
9002   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
9003   ins_encode %{
9004     __ imull($op1$$Register, $op2$$Register);
9005   %}
9006   ins_pipe(ialu_reg_reg_alu0);
9007 %}
9008 
9009 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
9010 %{
9011   match(Set cr (OverflowMulI op1 op2));
9012   effect(DEF cr, TEMP tmp, USE op1, USE op2);
9013 
9014   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
9015   ins_encode %{
9016     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
9017   %}
9018   ins_pipe(ialu_reg_reg_alu0);
9019 %}
9020 
9021 //----------Long Instructions------------------------------------------------
9022 // Add Long Register with Register
9023 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9024   match(Set dst (AddL dst src));
9025   effect(KILL cr);
9026   ins_cost(200);
9027   format %{ "ADD    $dst.lo,$src.lo\n\t"
9028             "ADC    $dst.hi,$src.hi" %}
9029   opcode(0x03, 0x13);
9030   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9031   ins_pipe( ialu_reg_reg_long );
9032 %}
9033 
9034 // Add Long Register with Immediate
9035 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9036   match(Set dst (AddL dst src));
9037   effect(KILL cr);
9038   format %{ "ADD    $dst.lo,$src.lo\n\t"
9039             "ADC    $dst.hi,$src.hi" %}
9040   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9041   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9042   ins_pipe( ialu_reg_long );
9043 %}
9044 
9045 // Add Long Register with Memory
9046 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9047   match(Set dst (AddL dst (LoadL mem)));
9048   effect(KILL cr);
9049   ins_cost(125);
9050   format %{ "ADD    $dst.lo,$mem\n\t"
9051             "ADC    $dst.hi,$mem+4" %}
9052   opcode(0x03, 0x13);
9053   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9054   ins_pipe( ialu_reg_long_mem );
9055 %}
9056 
9057 // Subtract Long Register with Register.
9058 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9059   match(Set dst (SubL dst src));
9060   effect(KILL cr);
9061   ins_cost(200);
9062   format %{ "SUB    $dst.lo,$src.lo\n\t"
9063             "SBB    $dst.hi,$src.hi" %}
9064   opcode(0x2B, 0x1B);
9065   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9066   ins_pipe( ialu_reg_reg_long );
9067 %}
9068 
9069 // Subtract Long Register with Immediate
9070 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9071   match(Set dst (SubL dst src));
9072   effect(KILL cr);
9073   format %{ "SUB    $dst.lo,$src.lo\n\t"
9074             "SBB    $dst.hi,$src.hi" %}
9075   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9076   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9077   ins_pipe( ialu_reg_long );
9078 %}
9079 
9080 // Subtract Long Register with Memory
9081 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9082   match(Set dst (SubL dst (LoadL mem)));
9083   effect(KILL cr);
9084   ins_cost(125);
9085   format %{ "SUB    $dst.lo,$mem\n\t"
9086             "SBB    $dst.hi,$mem+4" %}
9087   opcode(0x2B, 0x1B);
9088   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9089   ins_pipe( ialu_reg_long_mem );
9090 %}
9091 
9092 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9093   match(Set dst (SubL zero dst));
9094   effect(KILL cr);
9095   ins_cost(300);
9096   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9097   ins_encode( neg_long(dst) );
9098   ins_pipe( ialu_reg_reg_long );
9099 %}
9100 
9101 // And Long Register with Register
9102 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9103   match(Set dst (AndL dst src));
9104   effect(KILL cr);
9105   format %{ "AND    $dst.lo,$src.lo\n\t"
9106             "AND    $dst.hi,$src.hi" %}
9107   opcode(0x23,0x23);
9108   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9109   ins_pipe( ialu_reg_reg_long );
9110 %}
9111 
9112 // And Long Register with Immediate
9113 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9114   match(Set dst (AndL dst src));
9115   effect(KILL cr);
9116   format %{ "AND    $dst.lo,$src.lo\n\t"
9117             "AND    $dst.hi,$src.hi" %}
9118   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9119   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9120   ins_pipe( ialu_reg_long );
9121 %}
9122 
9123 // And Long Register with Memory
9124 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9125   match(Set dst (AndL dst (LoadL mem)));
9126   effect(KILL cr);
9127   ins_cost(125);
9128   format %{ "AND    $dst.lo,$mem\n\t"
9129             "AND    $dst.hi,$mem+4" %}
9130   opcode(0x23, 0x23);
9131   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9132   ins_pipe( ialu_reg_long_mem );
9133 %}
9134 
9135 // BMI1 instructions
9136 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9137   match(Set dst (AndL (XorL src1 minus_1) src2));
9138   predicate(UseBMI1Instructions);
9139   effect(KILL cr, TEMP dst);
9140 
9141   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9142             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9143          %}
9144 
9145   ins_encode %{
9146     Register Rdst = $dst$$Register;
9147     Register Rsrc1 = $src1$$Register;
9148     Register Rsrc2 = $src2$$Register;
9149     __ andnl(Rdst, Rsrc1, Rsrc2);
9150     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9151   %}
9152   ins_pipe(ialu_reg_reg_long);
9153 %}
9154 
9155 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9156   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9157   predicate(UseBMI1Instructions);
9158   effect(KILL cr, TEMP dst);
9159 
9160   ins_cost(125);
9161   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9162             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9163          %}
9164 
9165   ins_encode %{
9166     Register Rdst = $dst$$Register;
9167     Register Rsrc1 = $src1$$Register;
9168     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9169 
9170     __ andnl(Rdst, Rsrc1, $src2$$Address);
9171     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9172   %}
9173   ins_pipe(ialu_reg_mem);
9174 %}
9175 
9176 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9177   match(Set dst (AndL (SubL imm_zero src) src));
9178   predicate(UseBMI1Instructions);
9179   effect(KILL cr, TEMP dst);
9180 
9181   format %{ "MOVL   $dst.hi, 0\n\t"
9182             "BLSIL  $dst.lo, $src.lo\n\t"
9183             "JNZ    done\n\t"
9184             "BLSIL  $dst.hi, $src.hi\n"
9185             "done:"
9186          %}
9187 
9188   ins_encode %{
9189     Label done;
9190     Register Rdst = $dst$$Register;
9191     Register Rsrc = $src$$Register;
9192     __ movl(HIGH_FROM_LOW(Rdst), 0);
9193     __ blsil(Rdst, Rsrc);
9194     __ jccb(Assembler::notZero, done);
9195     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9196     __ bind(done);
9197   %}
9198   ins_pipe(ialu_reg);
9199 %}
9200 
9201 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9202   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9203   predicate(UseBMI1Instructions);
9204   effect(KILL cr, TEMP dst);
9205 
9206   ins_cost(125);
9207   format %{ "MOVL   $dst.hi, 0\n\t"
9208             "BLSIL  $dst.lo, $src\n\t"
9209             "JNZ    done\n\t"
9210             "BLSIL  $dst.hi, $src+4\n"
9211             "done:"
9212          %}
9213 
9214   ins_encode %{
9215     Label done;
9216     Register Rdst = $dst$$Register;
9217     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9218 
9219     __ movl(HIGH_FROM_LOW(Rdst), 0);
9220     __ blsil(Rdst, $src$$Address);
9221     __ jccb(Assembler::notZero, done);
9222     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9223     __ bind(done);
9224   %}
9225   ins_pipe(ialu_reg_mem);
9226 %}
9227 
9228 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9229 %{
9230   match(Set dst (XorL (AddL src minus_1) src));
9231   predicate(UseBMI1Instructions);
9232   effect(KILL cr, TEMP dst);
9233 
9234   format %{ "MOVL    $dst.hi, 0\n\t"
9235             "BLSMSKL $dst.lo, $src.lo\n\t"
9236             "JNC     done\n\t"
9237             "BLSMSKL $dst.hi, $src.hi\n"
9238             "done:"
9239          %}
9240 
9241   ins_encode %{
9242     Label done;
9243     Register Rdst = $dst$$Register;
9244     Register Rsrc = $src$$Register;
9245     __ movl(HIGH_FROM_LOW(Rdst), 0);
9246     __ blsmskl(Rdst, Rsrc);
9247     __ jccb(Assembler::carryClear, done);
9248     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9249     __ bind(done);
9250   %}
9251 
9252   ins_pipe(ialu_reg);
9253 %}
9254 
9255 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9256 %{
9257   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9258   predicate(UseBMI1Instructions);
9259   effect(KILL cr, TEMP dst);
9260 
9261   ins_cost(125);
9262   format %{ "MOVL    $dst.hi, 0\n\t"
9263             "BLSMSKL $dst.lo, $src\n\t"
9264             "JNC     done\n\t"
9265             "BLSMSKL $dst.hi, $src+4\n"
9266             "done:"
9267          %}
9268 
9269   ins_encode %{
9270     Label done;
9271     Register Rdst = $dst$$Register;
9272     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9273 
9274     __ movl(HIGH_FROM_LOW(Rdst), 0);
9275     __ blsmskl(Rdst, $src$$Address);
9276     __ jccb(Assembler::carryClear, done);
9277     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9278     __ bind(done);
9279   %}
9280 
9281   ins_pipe(ialu_reg_mem);
9282 %}
9283 
9284 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9285 %{
9286   match(Set dst (AndL (AddL src minus_1) src) );
9287   predicate(UseBMI1Instructions);
9288   effect(KILL cr, TEMP dst);
9289 
9290   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9291             "BLSRL  $dst.lo, $src.lo\n\t"
9292             "JNC    done\n\t"
9293             "BLSRL  $dst.hi, $src.hi\n"
9294             "done:"
9295   %}
9296 
9297   ins_encode %{
9298     Label done;
9299     Register Rdst = $dst$$Register;
9300     Register Rsrc = $src$$Register;
9301     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9302     __ blsrl(Rdst, Rsrc);
9303     __ jccb(Assembler::carryClear, done);
9304     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9305     __ bind(done);
9306   %}
9307 
9308   ins_pipe(ialu_reg);
9309 %}
9310 
9311 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9312 %{
9313   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9314   predicate(UseBMI1Instructions);
9315   effect(KILL cr, TEMP dst);
9316 
9317   ins_cost(125);
9318   format %{ "MOVL   $dst.hi, $src+4\n\t"
9319             "BLSRL  $dst.lo, $src\n\t"
9320             "JNC    done\n\t"
9321             "BLSRL  $dst.hi, $src+4\n"
9322             "done:"
9323   %}
9324 
9325   ins_encode %{
9326     Label done;
9327     Register Rdst = $dst$$Register;
9328     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9329     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9330     __ blsrl(Rdst, $src$$Address);
9331     __ jccb(Assembler::carryClear, done);
9332     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9333     __ bind(done);
9334   %}
9335 
9336   ins_pipe(ialu_reg_mem);
9337 %}
9338 
9339 // Or Long Register with Register
9340 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9341   match(Set dst (OrL dst src));
9342   effect(KILL cr);
9343   format %{ "OR     $dst.lo,$src.lo\n\t"
9344             "OR     $dst.hi,$src.hi" %}
9345   opcode(0x0B,0x0B);
9346   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9347   ins_pipe( ialu_reg_reg_long );
9348 %}
9349 
9350 // Or Long Register with Immediate
9351 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9352   match(Set dst (OrL dst src));
9353   effect(KILL cr);
9354   format %{ "OR     $dst.lo,$src.lo\n\t"
9355             "OR     $dst.hi,$src.hi" %}
9356   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9357   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9358   ins_pipe( ialu_reg_long );
9359 %}
9360 
9361 // Or Long Register with Memory
9362 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9363   match(Set dst (OrL dst (LoadL mem)));
9364   effect(KILL cr);
9365   ins_cost(125);
9366   format %{ "OR     $dst.lo,$mem\n\t"
9367             "OR     $dst.hi,$mem+4" %}
9368   opcode(0x0B,0x0B);
9369   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9370   ins_pipe( ialu_reg_long_mem );
9371 %}
9372 
9373 // Xor Long Register with Register
9374 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9375   match(Set dst (XorL dst src));
9376   effect(KILL cr);
9377   format %{ "XOR    $dst.lo,$src.lo\n\t"
9378             "XOR    $dst.hi,$src.hi" %}
9379   opcode(0x33,0x33);
9380   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9381   ins_pipe( ialu_reg_reg_long );
9382 %}
9383 
9384 // Xor Long Register with Immediate -1
9385 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9386   match(Set dst (XorL dst imm));
9387   format %{ "NOT    $dst.lo\n\t"
9388             "NOT    $dst.hi" %}
9389   ins_encode %{
9390      __ notl($dst$$Register);
9391      __ notl(HIGH_FROM_LOW($dst$$Register));
9392   %}
9393   ins_pipe( ialu_reg_long );
9394 %}
9395 
9396 // Xor Long Register with Immediate
9397 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9398   match(Set dst (XorL dst src));
9399   effect(KILL cr);
9400   format %{ "XOR    $dst.lo,$src.lo\n\t"
9401             "XOR    $dst.hi,$src.hi" %}
9402   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9403   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9404   ins_pipe( ialu_reg_long );
9405 %}
9406 
9407 // Xor Long Register with Memory
9408 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9409   match(Set dst (XorL dst (LoadL mem)));
9410   effect(KILL cr);
9411   ins_cost(125);
9412   format %{ "XOR    $dst.lo,$mem\n\t"
9413             "XOR    $dst.hi,$mem+4" %}
9414   opcode(0x33,0x33);
9415   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9416   ins_pipe( ialu_reg_long_mem );
9417 %}
9418 
9419 // Shift Left Long by 1
9420 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9421   predicate(UseNewLongLShift);
9422   match(Set dst (LShiftL dst cnt));
9423   effect(KILL cr);
9424   ins_cost(100);
9425   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9426             "ADC    $dst.hi,$dst.hi" %}
9427   ins_encode %{
9428     __ addl($dst$$Register,$dst$$Register);
9429     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9430   %}
9431   ins_pipe( ialu_reg_long );
9432 %}
9433 
9434 // Shift Left Long by 2
9435 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9436   predicate(UseNewLongLShift);
9437   match(Set dst (LShiftL dst cnt));
9438   effect(KILL cr);
9439   ins_cost(100);
9440   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9441             "ADC    $dst.hi,$dst.hi\n\t"
9442             "ADD    $dst.lo,$dst.lo\n\t"
9443             "ADC    $dst.hi,$dst.hi" %}
9444   ins_encode %{
9445     __ addl($dst$$Register,$dst$$Register);
9446     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9447     __ addl($dst$$Register,$dst$$Register);
9448     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9449   %}
9450   ins_pipe( ialu_reg_long );
9451 %}
9452 
9453 // Shift Left Long by 3
9454 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9455   predicate(UseNewLongLShift);
9456   match(Set dst (LShiftL dst cnt));
9457   effect(KILL cr);
9458   ins_cost(100);
9459   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9460             "ADC    $dst.hi,$dst.hi\n\t"
9461             "ADD    $dst.lo,$dst.lo\n\t"
9462             "ADC    $dst.hi,$dst.hi\n\t"
9463             "ADD    $dst.lo,$dst.lo\n\t"
9464             "ADC    $dst.hi,$dst.hi" %}
9465   ins_encode %{
9466     __ addl($dst$$Register,$dst$$Register);
9467     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9468     __ addl($dst$$Register,$dst$$Register);
9469     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9470     __ addl($dst$$Register,$dst$$Register);
9471     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9472   %}
9473   ins_pipe( ialu_reg_long );
9474 %}
9475 
9476 // Shift Left Long by 1-31
9477 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9478   match(Set dst (LShiftL dst cnt));
9479   effect(KILL cr);
9480   ins_cost(200);
9481   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9482             "SHL    $dst.lo,$cnt" %}
9483   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9484   ins_encode( move_long_small_shift(dst,cnt) );
9485   ins_pipe( ialu_reg_long );
9486 %}
9487 
9488 // Shift Left Long by 32-63
9489 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9490   match(Set dst (LShiftL dst cnt));
9491   effect(KILL cr);
9492   ins_cost(300);
9493   format %{ "MOV    $dst.hi,$dst.lo\n"
9494           "\tSHL    $dst.hi,$cnt-32\n"
9495           "\tXOR    $dst.lo,$dst.lo" %}
9496   opcode(0xC1, 0x4);  /* C1 /4 ib */
9497   ins_encode( move_long_big_shift_clr(dst,cnt) );
9498   ins_pipe( ialu_reg_long );
9499 %}
9500 
9501 // Shift Left Long by variable
9502 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9503   match(Set dst (LShiftL dst shift));
9504   effect(KILL cr);
9505   ins_cost(500+200);
9506   size(17);
9507   format %{ "TEST   $shift,32\n\t"
9508             "JEQ,s  small\n\t"
9509             "MOV    $dst.hi,$dst.lo\n\t"
9510             "XOR    $dst.lo,$dst.lo\n"
9511     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9512             "SHL    $dst.lo,$shift" %}
9513   ins_encode( shift_left_long( dst, shift ) );
9514   ins_pipe( pipe_slow );
9515 %}
9516 
9517 // Shift Right Long by 1-31
9518 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9519   match(Set dst (URShiftL dst cnt));
9520   effect(KILL cr);
9521   ins_cost(200);
9522   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9523             "SHR    $dst.hi,$cnt" %}
9524   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9525   ins_encode( move_long_small_shift(dst,cnt) );
9526   ins_pipe( ialu_reg_long );
9527 %}
9528 
9529 // Shift Right Long by 32-63
9530 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9531   match(Set dst (URShiftL dst cnt));
9532   effect(KILL cr);
9533   ins_cost(300);
9534   format %{ "MOV    $dst.lo,$dst.hi\n"
9535           "\tSHR    $dst.lo,$cnt-32\n"
9536           "\tXOR    $dst.hi,$dst.hi" %}
9537   opcode(0xC1, 0x5);  /* C1 /5 ib */
9538   ins_encode( move_long_big_shift_clr(dst,cnt) );
9539   ins_pipe( ialu_reg_long );
9540 %}
9541 
9542 // Shift Right Long by variable
9543 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9544   match(Set dst (URShiftL dst shift));
9545   effect(KILL cr);
9546   ins_cost(600);
9547   size(17);
9548   format %{ "TEST   $shift,32\n\t"
9549             "JEQ,s  small\n\t"
9550             "MOV    $dst.lo,$dst.hi\n\t"
9551             "XOR    $dst.hi,$dst.hi\n"
9552     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9553             "SHR    $dst.hi,$shift" %}
9554   ins_encode( shift_right_long( dst, shift ) );
9555   ins_pipe( pipe_slow );
9556 %}
9557 
9558 // Shift Right Long by 1-31
9559 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9560   match(Set dst (RShiftL dst cnt));
9561   effect(KILL cr);
9562   ins_cost(200);
9563   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9564             "SAR    $dst.hi,$cnt" %}
9565   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9566   ins_encode( move_long_small_shift(dst,cnt) );
9567   ins_pipe( ialu_reg_long );
9568 %}
9569 
9570 // Shift Right Long by 32-63
9571 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9572   match(Set dst (RShiftL dst cnt));
9573   effect(KILL cr);
9574   ins_cost(300);
9575   format %{ "MOV    $dst.lo,$dst.hi\n"
9576           "\tSAR    $dst.lo,$cnt-32\n"
9577           "\tSAR    $dst.hi,31" %}
9578   opcode(0xC1, 0x7);  /* C1 /7 ib */
9579   ins_encode( move_long_big_shift_sign(dst,cnt) );
9580   ins_pipe( ialu_reg_long );
9581 %}
9582 
9583 // Shift Right arithmetic Long by variable
9584 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9585   match(Set dst (RShiftL dst shift));
9586   effect(KILL cr);
9587   ins_cost(600);
9588   size(18);
9589   format %{ "TEST   $shift,32\n\t"
9590             "JEQ,s  small\n\t"
9591             "MOV    $dst.lo,$dst.hi\n\t"
9592             "SAR    $dst.hi,31\n"
9593     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9594             "SAR    $dst.hi,$shift" %}
9595   ins_encode( shift_right_arith_long( dst, shift ) );
9596   ins_pipe( pipe_slow );
9597 %}
9598 
9599 
9600 //----------Double Instructions------------------------------------------------
9601 // Double Math
9602 
9603 // Compare & branch
9604 
9605 // P6 version of float compare, sets condition codes in EFLAGS
9606 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9607   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9608   match(Set cr (CmpD src1 src2));
9609   effect(KILL rax);
9610   ins_cost(150);
9611   format %{ "FLD    $src1\n\t"
9612             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9613             "JNP    exit\n\t"
9614             "MOV    ah,1       // saw a NaN, set CF\n\t"
9615             "SAHF\n"
9616      "exit:\tNOP               // avoid branch to branch" %}
9617   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9618   ins_encode( Push_Reg_DPR(src1),
9619               OpcP, RegOpc(src2),
9620               cmpF_P6_fixup );
9621   ins_pipe( pipe_slow );
9622 %}
9623 
9624 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9625   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9626   match(Set cr (CmpD src1 src2));
9627   ins_cost(150);
9628   format %{ "FLD    $src1\n\t"
9629             "FUCOMIP ST,$src2  // P6 instruction" %}
9630   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9631   ins_encode( Push_Reg_DPR(src1),
9632               OpcP, RegOpc(src2));
9633   ins_pipe( pipe_slow );
9634 %}
9635 
9636 // Compare & branch
9637 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9638   predicate(UseSSE<=1);
9639   match(Set cr (CmpD src1 src2));
9640   effect(KILL rax);
9641   ins_cost(200);
9642   format %{ "FLD    $src1\n\t"
9643             "FCOMp  $src2\n\t"
9644             "FNSTSW AX\n\t"
9645             "TEST   AX,0x400\n\t"
9646             "JZ,s   flags\n\t"
9647             "MOV    AH,1\t# unordered treat as LT\n"
9648     "flags:\tSAHF" %}
9649   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9650   ins_encode( Push_Reg_DPR(src1),
9651               OpcP, RegOpc(src2),
9652               fpu_flags);
9653   ins_pipe( pipe_slow );
9654 %}
9655 
9656 // Compare vs zero into -1,0,1
9657 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9658   predicate(UseSSE<=1);
9659   match(Set dst (CmpD3 src1 zero));
9660   effect(KILL cr, KILL rax);
9661   ins_cost(280);
9662   format %{ "FTSTD  $dst,$src1" %}
9663   opcode(0xE4, 0xD9);
9664   ins_encode( Push_Reg_DPR(src1),
9665               OpcS, OpcP, PopFPU,
9666               CmpF_Result(dst));
9667   ins_pipe( pipe_slow );
9668 %}
9669 
9670 // Compare into -1,0,1
9671 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9672   predicate(UseSSE<=1);
9673   match(Set dst (CmpD3 src1 src2));
9674   effect(KILL cr, KILL rax);
9675   ins_cost(300);
9676   format %{ "FCMPD  $dst,$src1,$src2" %}
9677   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9678   ins_encode( Push_Reg_DPR(src1),
9679               OpcP, RegOpc(src2),
9680               CmpF_Result(dst));
9681   ins_pipe( pipe_slow );
9682 %}
9683 
9684 // float compare and set condition codes in EFLAGS by XMM regs
9685 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9686   predicate(UseSSE>=2);
9687   match(Set cr (CmpD src1 src2));
9688   ins_cost(145);
9689   format %{ "UCOMISD $src1,$src2\n\t"
9690             "JNP,s   exit\n\t"
9691             "PUSHF\t# saw NaN, set CF\n\t"
9692             "AND     [rsp], #0xffffff2b\n\t"
9693             "POPF\n"
9694     "exit:" %}
9695   ins_encode %{
9696     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9697     emit_cmpfp_fixup(_masm);
9698   %}
9699   ins_pipe( pipe_slow );
9700 %}
9701 
9702 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9703   predicate(UseSSE>=2);
9704   match(Set cr (CmpD src1 src2));
9705   ins_cost(100);
9706   format %{ "UCOMISD $src1,$src2" %}
9707   ins_encode %{
9708     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9709   %}
9710   ins_pipe( pipe_slow );
9711 %}
9712 
9713 // float compare and set condition codes in EFLAGS by XMM regs
9714 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9715   predicate(UseSSE>=2);
9716   match(Set cr (CmpD src1 (LoadD src2)));
9717   ins_cost(145);
9718   format %{ "UCOMISD $src1,$src2\n\t"
9719             "JNP,s   exit\n\t"
9720             "PUSHF\t# saw NaN, set CF\n\t"
9721             "AND     [rsp], #0xffffff2b\n\t"
9722             "POPF\n"
9723     "exit:" %}
9724   ins_encode %{
9725     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9726     emit_cmpfp_fixup(_masm);
9727   %}
9728   ins_pipe( pipe_slow );
9729 %}
9730 
9731 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9732   predicate(UseSSE>=2);
9733   match(Set cr (CmpD src1 (LoadD src2)));
9734   ins_cost(100);
9735   format %{ "UCOMISD $src1,$src2" %}
9736   ins_encode %{
9737     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9738   %}
9739   ins_pipe( pipe_slow );
9740 %}
9741 
9742 // Compare into -1,0,1 in XMM
9743 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9744   predicate(UseSSE>=2);
9745   match(Set dst (CmpD3 src1 src2));
9746   effect(KILL cr);
9747   ins_cost(255);
9748   format %{ "UCOMISD $src1, $src2\n\t"
9749             "MOV     $dst, #-1\n\t"
9750             "JP,s    done\n\t"
9751             "JB,s    done\n\t"
9752             "SETNE   $dst\n\t"
9753             "MOVZB   $dst, $dst\n"
9754     "done:" %}
9755   ins_encode %{
9756     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9757     emit_cmpfp3(_masm, $dst$$Register);
9758   %}
9759   ins_pipe( pipe_slow );
9760 %}
9761 
9762 // Compare into -1,0,1 in XMM and memory
9763 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9764   predicate(UseSSE>=2);
9765   match(Set dst (CmpD3 src1 (LoadD src2)));
9766   effect(KILL cr);
9767   ins_cost(275);
9768   format %{ "UCOMISD $src1, $src2\n\t"
9769             "MOV     $dst, #-1\n\t"
9770             "JP,s    done\n\t"
9771             "JB,s    done\n\t"
9772             "SETNE   $dst\n\t"
9773             "MOVZB   $dst, $dst\n"
9774     "done:" %}
9775   ins_encode %{
9776     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9777     emit_cmpfp3(_masm, $dst$$Register);
9778   %}
9779   ins_pipe( pipe_slow );
9780 %}
9781 
9782 
9783 instruct subDPR_reg(regDPR dst, regDPR src) %{
9784   predicate (UseSSE <=1);
9785   match(Set dst (SubD dst src));
9786 
9787   format %{ "FLD    $src\n\t"
9788             "DSUBp  $dst,ST" %}
9789   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9790   ins_cost(150);
9791   ins_encode( Push_Reg_DPR(src),
9792               OpcP, RegOpc(dst) );
9793   ins_pipe( fpu_reg_reg );
9794 %}
9795 
9796 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9797   predicate (UseSSE <=1);
9798   match(Set dst (RoundDouble (SubD src1 src2)));
9799   ins_cost(250);
9800 
9801   format %{ "FLD    $src2\n\t"
9802             "DSUB   ST,$src1\n\t"
9803             "FSTP_D $dst\t# D-round" %}
9804   opcode(0xD8, 0x5);
9805   ins_encode( Push_Reg_DPR(src2),
9806               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9807   ins_pipe( fpu_mem_reg_reg );
9808 %}
9809 
9810 
9811 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9812   predicate (UseSSE <=1);
9813   match(Set dst (SubD dst (LoadD src)));
9814   ins_cost(150);
9815 
9816   format %{ "FLD    $src\n\t"
9817             "DSUBp  $dst,ST" %}
9818   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9819   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9820               OpcP, RegOpc(dst) );
9821   ins_pipe( fpu_reg_mem );
9822 %}
9823 
9824 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9825   predicate (UseSSE<=1);
9826   match(Set dst (AbsD src));
9827   ins_cost(100);
9828   format %{ "FABS" %}
9829   opcode(0xE1, 0xD9);
9830   ins_encode( OpcS, OpcP );
9831   ins_pipe( fpu_reg_reg );
9832 %}
9833 
9834 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9835   predicate(UseSSE<=1);
9836   match(Set dst (NegD src));
9837   ins_cost(100);
9838   format %{ "FCHS" %}
9839   opcode(0xE0, 0xD9);
9840   ins_encode( OpcS, OpcP );
9841   ins_pipe( fpu_reg_reg );
9842 %}
9843 
9844 instruct addDPR_reg(regDPR dst, regDPR src) %{
9845   predicate(UseSSE<=1);
9846   match(Set dst (AddD dst src));
9847   format %{ "FLD    $src\n\t"
9848             "DADD   $dst,ST" %}
9849   size(4);
9850   ins_cost(150);
9851   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9852   ins_encode( Push_Reg_DPR(src),
9853               OpcP, RegOpc(dst) );
9854   ins_pipe( fpu_reg_reg );
9855 %}
9856 
9857 
9858 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9859   predicate(UseSSE<=1);
9860   match(Set dst (RoundDouble (AddD src1 src2)));
9861   ins_cost(250);
9862 
9863   format %{ "FLD    $src2\n\t"
9864             "DADD   ST,$src1\n\t"
9865             "FSTP_D $dst\t# D-round" %}
9866   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9867   ins_encode( Push_Reg_DPR(src2),
9868               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9869   ins_pipe( fpu_mem_reg_reg );
9870 %}
9871 
9872 
9873 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9874   predicate(UseSSE<=1);
9875   match(Set dst (AddD dst (LoadD src)));
9876   ins_cost(150);
9877 
9878   format %{ "FLD    $src\n\t"
9879             "DADDp  $dst,ST" %}
9880   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9881   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9882               OpcP, RegOpc(dst) );
9883   ins_pipe( fpu_reg_mem );
9884 %}
9885 
9886 // add-to-memory
9887 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9888   predicate(UseSSE<=1);
9889   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9890   ins_cost(150);
9891 
9892   format %{ "FLD_D  $dst\n\t"
9893             "DADD   ST,$src\n\t"
9894             "FST_D  $dst" %}
9895   opcode(0xDD, 0x0);
9896   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9897               Opcode(0xD8), RegOpc(src),
9898               set_instruction_start,
9899               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9900   ins_pipe( fpu_reg_mem );
9901 %}
9902 
9903 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9904   predicate(UseSSE<=1);
9905   match(Set dst (AddD dst con));
9906   ins_cost(125);
9907   format %{ "FLD1\n\t"
9908             "DADDp  $dst,ST" %}
9909   ins_encode %{
9910     __ fld1();
9911     __ faddp($dst$$reg);
9912   %}
9913   ins_pipe(fpu_reg);
9914 %}
9915 
9916 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9917   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9918   match(Set dst (AddD dst con));
9919   ins_cost(200);
9920   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9921             "DADDp  $dst,ST" %}
9922   ins_encode %{
9923     __ fld_d($constantaddress($con));
9924     __ faddp($dst$$reg);
9925   %}
9926   ins_pipe(fpu_reg_mem);
9927 %}
9928 
9929 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9930   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9931   match(Set dst (RoundDouble (AddD src con)));
9932   ins_cost(200);
9933   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9934             "DADD   ST,$src\n\t"
9935             "FSTP_D $dst\t# D-round" %}
9936   ins_encode %{
9937     __ fld_d($constantaddress($con));
9938     __ fadd($src$$reg);
9939     __ fstp_d(Address(rsp, $dst$$disp));
9940   %}
9941   ins_pipe(fpu_mem_reg_con);
9942 %}
9943 
9944 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9945   predicate(UseSSE<=1);
9946   match(Set dst (MulD dst src));
9947   format %{ "FLD    $src\n\t"
9948             "DMULp  $dst,ST" %}
9949   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9950   ins_cost(150);
9951   ins_encode( Push_Reg_DPR(src),
9952               OpcP, RegOpc(dst) );
9953   ins_pipe( fpu_reg_reg );
9954 %}
9955 
9956 // Strict FP instruction biases argument before multiply then
9957 // biases result to avoid double rounding of subnormals.
9958 //
9959 // scale arg1 by multiplying arg1 by 2^(-15360)
9960 // load arg2
9961 // multiply scaled arg1 by arg2
9962 // rescale product by 2^(15360)
9963 //
9964 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9965   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9966   match(Set dst (MulD dst src));
9967   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9968 
9969   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9970             "DMULp  $dst,ST\n\t"
9971             "FLD    $src\n\t"
9972             "DMULp  $dst,ST\n\t"
9973             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9974             "DMULp  $dst,ST\n\t" %}
9975   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9976   ins_encode( strictfp_bias1(dst),
9977               Push_Reg_DPR(src),
9978               OpcP, RegOpc(dst),
9979               strictfp_bias2(dst) );
9980   ins_pipe( fpu_reg_reg );
9981 %}
9982 
9983 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9984   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9985   match(Set dst (MulD dst con));
9986   ins_cost(200);
9987   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9988             "DMULp  $dst,ST" %}
9989   ins_encode %{
9990     __ fld_d($constantaddress($con));
9991     __ fmulp($dst$$reg);
9992   %}
9993   ins_pipe(fpu_reg_mem);
9994 %}
9995 
9996 
9997 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9998   predicate( UseSSE<=1 );
9999   match(Set dst (MulD dst (LoadD src)));
10000   ins_cost(200);
10001   format %{ "FLD_D  $src\n\t"
10002             "DMULp  $dst,ST" %}
10003   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
10004   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10005               OpcP, RegOpc(dst) );
10006   ins_pipe( fpu_reg_mem );
10007 %}
10008 
10009 //
10010 // Cisc-alternate to reg-reg multiply
10011 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
10012   predicate( UseSSE<=1 );
10013   match(Set dst (MulD src (LoadD mem)));
10014   ins_cost(250);
10015   format %{ "FLD_D  $mem\n\t"
10016             "DMUL   ST,$src\n\t"
10017             "FSTP_D $dst" %}
10018   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
10019   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10020               OpcReg_FPR(src),
10021               Pop_Reg_DPR(dst) );
10022   ins_pipe( fpu_reg_reg_mem );
10023 %}
10024 
10025 
10026 // MACRO3 -- addDPR a mulDPR
10027 // This instruction is a '2-address' instruction in that the result goes
10028 // back to src2.  This eliminates a move from the macro; possibly the
10029 // register allocator will have to add it back (and maybe not).
10030 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10031   predicate( UseSSE<=1 );
10032   match(Set src2 (AddD (MulD src0 src1) src2));
10033   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10034             "DMUL   ST,$src1\n\t"
10035             "DADDp  $src2,ST" %}
10036   ins_cost(250);
10037   opcode(0xDD); /* LoadD DD /0 */
10038   ins_encode( Push_Reg_FPR(src0),
10039               FMul_ST_reg(src1),
10040               FAddP_reg_ST(src2) );
10041   ins_pipe( fpu_reg_reg_reg );
10042 %}
10043 
10044 
10045 // MACRO3 -- subDPR a mulDPR
10046 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10047   predicate( UseSSE<=1 );
10048   match(Set src2 (SubD (MulD src0 src1) src2));
10049   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10050             "DMUL   ST,$src1\n\t"
10051             "DSUBRp $src2,ST" %}
10052   ins_cost(250);
10053   ins_encode( Push_Reg_FPR(src0),
10054               FMul_ST_reg(src1),
10055               Opcode(0xDE), Opc_plus(0xE0,src2));
10056   ins_pipe( fpu_reg_reg_reg );
10057 %}
10058 
10059 
10060 instruct divDPR_reg(regDPR dst, regDPR src) %{
10061   predicate( UseSSE<=1 );
10062   match(Set dst (DivD dst src));
10063 
10064   format %{ "FLD    $src\n\t"
10065             "FDIVp  $dst,ST" %}
10066   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10067   ins_cost(150);
10068   ins_encode( Push_Reg_DPR(src),
10069               OpcP, RegOpc(dst) );
10070   ins_pipe( fpu_reg_reg );
10071 %}
10072 
10073 // Strict FP instruction biases argument before division then
10074 // biases result, to avoid double rounding of subnormals.
10075 //
10076 // scale dividend by multiplying dividend by 2^(-15360)
10077 // load divisor
10078 // divide scaled dividend by divisor
10079 // rescale quotient by 2^(15360)
10080 //
10081 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10082   predicate (UseSSE<=1);
10083   match(Set dst (DivD dst src));
10084   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10085   ins_cost(01);
10086 
10087   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10088             "DMULp  $dst,ST\n\t"
10089             "FLD    $src\n\t"
10090             "FDIVp  $dst,ST\n\t"
10091             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10092             "DMULp  $dst,ST\n\t" %}
10093   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10094   ins_encode( strictfp_bias1(dst),
10095               Push_Reg_DPR(src),
10096               OpcP, RegOpc(dst),
10097               strictfp_bias2(dst) );
10098   ins_pipe( fpu_reg_reg );
10099 %}
10100 
10101 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10102   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10103   match(Set dst (RoundDouble (DivD src1 src2)));
10104 
10105   format %{ "FLD    $src1\n\t"
10106             "FDIV   ST,$src2\n\t"
10107             "FSTP_D $dst\t# D-round" %}
10108   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10109   ins_encode( Push_Reg_DPR(src1),
10110               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10111   ins_pipe( fpu_mem_reg_reg );
10112 %}
10113 
10114 
10115 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10116   predicate(UseSSE<=1);
10117   match(Set dst (ModD dst src));
10118   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10119 
10120   format %{ "DMOD   $dst,$src" %}
10121   ins_cost(250);
10122   ins_encode(Push_Reg_Mod_DPR(dst, src),
10123               emitModDPR(),
10124               Push_Result_Mod_DPR(src),
10125               Pop_Reg_DPR(dst));
10126   ins_pipe( pipe_slow );
10127 %}
10128 
10129 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10130   predicate(UseSSE>=2);
10131   match(Set dst (ModD src0 src1));
10132   effect(KILL rax, KILL cr);
10133 
10134   format %{ "SUB    ESP,8\t # DMOD\n"
10135           "\tMOVSD  [ESP+0],$src1\n"
10136           "\tFLD_D  [ESP+0]\n"
10137           "\tMOVSD  [ESP+0],$src0\n"
10138           "\tFLD_D  [ESP+0]\n"
10139      "loop:\tFPREM\n"
10140           "\tFWAIT\n"
10141           "\tFNSTSW AX\n"
10142           "\tSAHF\n"
10143           "\tJP     loop\n"
10144           "\tFSTP_D [ESP+0]\n"
10145           "\tMOVSD  $dst,[ESP+0]\n"
10146           "\tADD    ESP,8\n"
10147           "\tFSTP   ST0\t # Restore FPU Stack"
10148     %}
10149   ins_cost(250);
10150   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10151   ins_pipe( pipe_slow );
10152 %}
10153 
10154 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10155   predicate (UseSSE<=1);
10156   match(Set dst(AtanD dst src));
10157   format %{ "DATA   $dst,$src" %}
10158   opcode(0xD9, 0xF3);
10159   ins_encode( Push_Reg_DPR(src),
10160               OpcP, OpcS, RegOpc(dst) );
10161   ins_pipe( pipe_slow );
10162 %}
10163 
10164 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10165   predicate (UseSSE>=2);
10166   match(Set dst(AtanD dst src));
10167   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10168   format %{ "DATA   $dst,$src" %}
10169   opcode(0xD9, 0xF3);
10170   ins_encode( Push_SrcD(src),
10171               OpcP, OpcS, Push_ResultD(dst) );
10172   ins_pipe( pipe_slow );
10173 %}
10174 
10175 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10176   predicate (UseSSE<=1);
10177   match(Set dst (SqrtD src));
10178   format %{ "DSQRT  $dst,$src" %}
10179   opcode(0xFA, 0xD9);
10180   ins_encode( Push_Reg_DPR(src),
10181               OpcS, OpcP, Pop_Reg_DPR(dst) );
10182   ins_pipe( pipe_slow );
10183 %}
10184 
10185 //-------------Float Instructions-------------------------------
10186 // Float Math
10187 
10188 // Code for float compare:
10189 //     fcompp();
10190 //     fwait(); fnstsw_ax();
10191 //     sahf();
10192 //     movl(dst, unordered_result);
10193 //     jcc(Assembler::parity, exit);
10194 //     movl(dst, less_result);
10195 //     jcc(Assembler::below, exit);
10196 //     movl(dst, equal_result);
10197 //     jcc(Assembler::equal, exit);
10198 //     movl(dst, greater_result);
10199 //   exit:
10200 
10201 // P6 version of float compare, sets condition codes in EFLAGS
10202 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10203   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10204   match(Set cr (CmpF src1 src2));
10205   effect(KILL rax);
10206   ins_cost(150);
10207   format %{ "FLD    $src1\n\t"
10208             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10209             "JNP    exit\n\t"
10210             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10211             "SAHF\n"
10212      "exit:\tNOP               // avoid branch to branch" %}
10213   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10214   ins_encode( Push_Reg_DPR(src1),
10215               OpcP, RegOpc(src2),
10216               cmpF_P6_fixup );
10217   ins_pipe( pipe_slow );
10218 %}
10219 
10220 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10221   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10222   match(Set cr (CmpF src1 src2));
10223   ins_cost(100);
10224   format %{ "FLD    $src1\n\t"
10225             "FUCOMIP ST,$src2  // P6 instruction" %}
10226   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10227   ins_encode( Push_Reg_DPR(src1),
10228               OpcP, RegOpc(src2));
10229   ins_pipe( pipe_slow );
10230 %}
10231 
10232 
10233 // Compare & branch
10234 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10235   predicate(UseSSE == 0);
10236   match(Set cr (CmpF src1 src2));
10237   effect(KILL rax);
10238   ins_cost(200);
10239   format %{ "FLD    $src1\n\t"
10240             "FCOMp  $src2\n\t"
10241             "FNSTSW AX\n\t"
10242             "TEST   AX,0x400\n\t"
10243             "JZ,s   flags\n\t"
10244             "MOV    AH,1\t# unordered treat as LT\n"
10245     "flags:\tSAHF" %}
10246   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10247   ins_encode( Push_Reg_DPR(src1),
10248               OpcP, RegOpc(src2),
10249               fpu_flags);
10250   ins_pipe( pipe_slow );
10251 %}
10252 
10253 // Compare vs zero into -1,0,1
10254 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10255   predicate(UseSSE == 0);
10256   match(Set dst (CmpF3 src1 zero));
10257   effect(KILL cr, KILL rax);
10258   ins_cost(280);
10259   format %{ "FTSTF  $dst,$src1" %}
10260   opcode(0xE4, 0xD9);
10261   ins_encode( Push_Reg_DPR(src1),
10262               OpcS, OpcP, PopFPU,
10263               CmpF_Result(dst));
10264   ins_pipe( pipe_slow );
10265 %}
10266 
10267 // Compare into -1,0,1
10268 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10269   predicate(UseSSE == 0);
10270   match(Set dst (CmpF3 src1 src2));
10271   effect(KILL cr, KILL rax);
10272   ins_cost(300);
10273   format %{ "FCMPF  $dst,$src1,$src2" %}
10274   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10275   ins_encode( Push_Reg_DPR(src1),
10276               OpcP, RegOpc(src2),
10277               CmpF_Result(dst));
10278   ins_pipe( pipe_slow );
10279 %}
10280 
10281 // float compare and set condition codes in EFLAGS by XMM regs
10282 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10283   predicate(UseSSE>=1);
10284   match(Set cr (CmpF src1 src2));
10285   ins_cost(145);
10286   format %{ "UCOMISS $src1,$src2\n\t"
10287             "JNP,s   exit\n\t"
10288             "PUSHF\t# saw NaN, set CF\n\t"
10289             "AND     [rsp], #0xffffff2b\n\t"
10290             "POPF\n"
10291     "exit:" %}
10292   ins_encode %{
10293     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10294     emit_cmpfp_fixup(_masm);
10295   %}
10296   ins_pipe( pipe_slow );
10297 %}
10298 
10299 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10300   predicate(UseSSE>=1);
10301   match(Set cr (CmpF src1 src2));
10302   ins_cost(100);
10303   format %{ "UCOMISS $src1,$src2" %}
10304   ins_encode %{
10305     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10306   %}
10307   ins_pipe( pipe_slow );
10308 %}
10309 
10310 // float compare and set condition codes in EFLAGS by XMM regs
10311 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10312   predicate(UseSSE>=1);
10313   match(Set cr (CmpF src1 (LoadF src2)));
10314   ins_cost(165);
10315   format %{ "UCOMISS $src1,$src2\n\t"
10316             "JNP,s   exit\n\t"
10317             "PUSHF\t# saw NaN, set CF\n\t"
10318             "AND     [rsp], #0xffffff2b\n\t"
10319             "POPF\n"
10320     "exit:" %}
10321   ins_encode %{
10322     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10323     emit_cmpfp_fixup(_masm);
10324   %}
10325   ins_pipe( pipe_slow );
10326 %}
10327 
10328 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10329   predicate(UseSSE>=1);
10330   match(Set cr (CmpF src1 (LoadF src2)));
10331   ins_cost(100);
10332   format %{ "UCOMISS $src1,$src2" %}
10333   ins_encode %{
10334     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10335   %}
10336   ins_pipe( pipe_slow );
10337 %}
10338 
10339 // Compare into -1,0,1 in XMM
10340 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10341   predicate(UseSSE>=1);
10342   match(Set dst (CmpF3 src1 src2));
10343   effect(KILL cr);
10344   ins_cost(255);
10345   format %{ "UCOMISS $src1, $src2\n\t"
10346             "MOV     $dst, #-1\n\t"
10347             "JP,s    done\n\t"
10348             "JB,s    done\n\t"
10349             "SETNE   $dst\n\t"
10350             "MOVZB   $dst, $dst\n"
10351     "done:" %}
10352   ins_encode %{
10353     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10354     emit_cmpfp3(_masm, $dst$$Register);
10355   %}
10356   ins_pipe( pipe_slow );
10357 %}
10358 
10359 // Compare into -1,0,1 in XMM and memory
10360 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10361   predicate(UseSSE>=1);
10362   match(Set dst (CmpF3 src1 (LoadF src2)));
10363   effect(KILL cr);
10364   ins_cost(275);
10365   format %{ "UCOMISS $src1, $src2\n\t"
10366             "MOV     $dst, #-1\n\t"
10367             "JP,s    done\n\t"
10368             "JB,s    done\n\t"
10369             "SETNE   $dst\n\t"
10370             "MOVZB   $dst, $dst\n"
10371     "done:" %}
10372   ins_encode %{
10373     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10374     emit_cmpfp3(_masm, $dst$$Register);
10375   %}
10376   ins_pipe( pipe_slow );
10377 %}
10378 
10379 // Spill to obtain 24-bit precision
10380 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10381   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10382   match(Set dst (SubF src1 src2));
10383 
10384   format %{ "FSUB   $dst,$src1 - $src2" %}
10385   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10386   ins_encode( Push_Reg_FPR(src1),
10387               OpcReg_FPR(src2),
10388               Pop_Mem_FPR(dst) );
10389   ins_pipe( fpu_mem_reg_reg );
10390 %}
10391 //
10392 // This instruction does not round to 24-bits
10393 instruct subFPR_reg(regFPR dst, regFPR src) %{
10394   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10395   match(Set dst (SubF dst src));
10396 
10397   format %{ "FSUB   $dst,$src" %}
10398   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10399   ins_encode( Push_Reg_FPR(src),
10400               OpcP, RegOpc(dst) );
10401   ins_pipe( fpu_reg_reg );
10402 %}
10403 
10404 // Spill to obtain 24-bit precision
10405 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10406   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10407   match(Set dst (AddF src1 src2));
10408 
10409   format %{ "FADD   $dst,$src1,$src2" %}
10410   opcode(0xD8, 0x0); /* D8 C0+i */
10411   ins_encode( Push_Reg_FPR(src2),
10412               OpcReg_FPR(src1),
10413               Pop_Mem_FPR(dst) );
10414   ins_pipe( fpu_mem_reg_reg );
10415 %}
10416 //
10417 // This instruction does not round to 24-bits
10418 instruct addFPR_reg(regFPR dst, regFPR src) %{
10419   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10420   match(Set dst (AddF dst src));
10421 
10422   format %{ "FLD    $src\n\t"
10423             "FADDp  $dst,ST" %}
10424   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10425   ins_encode( Push_Reg_FPR(src),
10426               OpcP, RegOpc(dst) );
10427   ins_pipe( fpu_reg_reg );
10428 %}
10429 
10430 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10431   predicate(UseSSE==0);
10432   match(Set dst (AbsF src));
10433   ins_cost(100);
10434   format %{ "FABS" %}
10435   opcode(0xE1, 0xD9);
10436   ins_encode( OpcS, OpcP );
10437   ins_pipe( fpu_reg_reg );
10438 %}
10439 
10440 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10441   predicate(UseSSE==0);
10442   match(Set dst (NegF src));
10443   ins_cost(100);
10444   format %{ "FCHS" %}
10445   opcode(0xE0, 0xD9);
10446   ins_encode( OpcS, OpcP );
10447   ins_pipe( fpu_reg_reg );
10448 %}
10449 
10450 // Cisc-alternate to addFPR_reg
10451 // Spill to obtain 24-bit precision
10452 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10453   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10454   match(Set dst (AddF src1 (LoadF src2)));
10455 
10456   format %{ "FLD    $src2\n\t"
10457             "FADD   ST,$src1\n\t"
10458             "FSTP_S $dst" %}
10459   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10460   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10461               OpcReg_FPR(src1),
10462               Pop_Mem_FPR(dst) );
10463   ins_pipe( fpu_mem_reg_mem );
10464 %}
10465 //
10466 // Cisc-alternate to addFPR_reg
10467 // This instruction does not round to 24-bits
10468 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10469   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10470   match(Set dst (AddF dst (LoadF src)));
10471 
10472   format %{ "FADD   $dst,$src" %}
10473   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10474   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10475               OpcP, RegOpc(dst) );
10476   ins_pipe( fpu_reg_mem );
10477 %}
10478 
10479 // // Following two instructions for _222_mpegaudio
10480 // Spill to obtain 24-bit precision
10481 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10482   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10483   match(Set dst (AddF src1 src2));
10484 
10485   format %{ "FADD   $dst,$src1,$src2" %}
10486   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10487   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10488               OpcReg_FPR(src2),
10489               Pop_Mem_FPR(dst) );
10490   ins_pipe( fpu_mem_reg_mem );
10491 %}
10492 
10493 // Cisc-spill variant
10494 // Spill to obtain 24-bit precision
10495 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10496   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10497   match(Set dst (AddF src1 (LoadF src2)));
10498 
10499   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10500   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10501   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10502               set_instruction_start,
10503               OpcP, RMopc_Mem(secondary,src1),
10504               Pop_Mem_FPR(dst) );
10505   ins_pipe( fpu_mem_mem_mem );
10506 %}
10507 
10508 // Spill to obtain 24-bit precision
10509 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10510   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10511   match(Set dst (AddF src1 src2));
10512 
10513   format %{ "FADD   $dst,$src1,$src2" %}
10514   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10515   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10516               set_instruction_start,
10517               OpcP, RMopc_Mem(secondary,src1),
10518               Pop_Mem_FPR(dst) );
10519   ins_pipe( fpu_mem_mem_mem );
10520 %}
10521 
10522 
10523 // Spill to obtain 24-bit precision
10524 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10525   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10526   match(Set dst (AddF src con));
10527   format %{ "FLD    $src\n\t"
10528             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10529             "FSTP_S $dst"  %}
10530   ins_encode %{
10531     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10532     __ fadd_s($constantaddress($con));
10533     __ fstp_s(Address(rsp, $dst$$disp));
10534   %}
10535   ins_pipe(fpu_mem_reg_con);
10536 %}
10537 //
10538 // This instruction does not round to 24-bits
10539 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10540   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10541   match(Set dst (AddF src con));
10542   format %{ "FLD    $src\n\t"
10543             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10544             "FSTP   $dst"  %}
10545   ins_encode %{
10546     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10547     __ fadd_s($constantaddress($con));
10548     __ fstp_d($dst$$reg);
10549   %}
10550   ins_pipe(fpu_reg_reg_con);
10551 %}
10552 
10553 // Spill to obtain 24-bit precision
10554 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10555   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10556   match(Set dst (MulF src1 src2));
10557 
10558   format %{ "FLD    $src1\n\t"
10559             "FMUL   $src2\n\t"
10560             "FSTP_S $dst"  %}
10561   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10562   ins_encode( Push_Reg_FPR(src1),
10563               OpcReg_FPR(src2),
10564               Pop_Mem_FPR(dst) );
10565   ins_pipe( fpu_mem_reg_reg );
10566 %}
10567 //
10568 // This instruction does not round to 24-bits
10569 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10570   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10571   match(Set dst (MulF src1 src2));
10572 
10573   format %{ "FLD    $src1\n\t"
10574             "FMUL   $src2\n\t"
10575             "FSTP_S $dst"  %}
10576   opcode(0xD8, 0x1); /* D8 C8+i */
10577   ins_encode( Push_Reg_FPR(src2),
10578               OpcReg_FPR(src1),
10579               Pop_Reg_FPR(dst) );
10580   ins_pipe( fpu_reg_reg_reg );
10581 %}
10582 
10583 
10584 // Spill to obtain 24-bit precision
10585 // Cisc-alternate to reg-reg multiply
10586 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10587   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10588   match(Set dst (MulF src1 (LoadF src2)));
10589 
10590   format %{ "FLD_S  $src2\n\t"
10591             "FMUL   $src1\n\t"
10592             "FSTP_S $dst"  %}
10593   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10594   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10595               OpcReg_FPR(src1),
10596               Pop_Mem_FPR(dst) );
10597   ins_pipe( fpu_mem_reg_mem );
10598 %}
10599 //
10600 // This instruction does not round to 24-bits
10601 // Cisc-alternate to reg-reg multiply
10602 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10603   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10604   match(Set dst (MulF src1 (LoadF src2)));
10605 
10606   format %{ "FMUL   $dst,$src1,$src2" %}
10607   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10608   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10609               OpcReg_FPR(src1),
10610               Pop_Reg_FPR(dst) );
10611   ins_pipe( fpu_reg_reg_mem );
10612 %}
10613 
10614 // Spill to obtain 24-bit precision
10615 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10616   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10617   match(Set dst (MulF src1 src2));
10618 
10619   format %{ "FMUL   $dst,$src1,$src2" %}
10620   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10621   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10622               set_instruction_start,
10623               OpcP, RMopc_Mem(secondary,src1),
10624               Pop_Mem_FPR(dst) );
10625   ins_pipe( fpu_mem_mem_mem );
10626 %}
10627 
10628 // Spill to obtain 24-bit precision
10629 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10630   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10631   match(Set dst (MulF src con));
10632 
10633   format %{ "FLD    $src\n\t"
10634             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10635             "FSTP_S $dst"  %}
10636   ins_encode %{
10637     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10638     __ fmul_s($constantaddress($con));
10639     __ fstp_s(Address(rsp, $dst$$disp));
10640   %}
10641   ins_pipe(fpu_mem_reg_con);
10642 %}
10643 //
10644 // This instruction does not round to 24-bits
10645 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10646   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10647   match(Set dst (MulF src con));
10648 
10649   format %{ "FLD    $src\n\t"
10650             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10651             "FSTP   $dst"  %}
10652   ins_encode %{
10653     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10654     __ fmul_s($constantaddress($con));
10655     __ fstp_d($dst$$reg);
10656   %}
10657   ins_pipe(fpu_reg_reg_con);
10658 %}
10659 
10660 
10661 //
10662 // MACRO1 -- subsume unshared load into mulFPR
10663 // This instruction does not round to 24-bits
10664 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10665   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10666   match(Set dst (MulF (LoadF mem1) src));
10667 
10668   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10669             "FMUL   ST,$src\n\t"
10670             "FSTP   $dst" %}
10671   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10672   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10673               OpcReg_FPR(src),
10674               Pop_Reg_FPR(dst) );
10675   ins_pipe( fpu_reg_reg_mem );
10676 %}
10677 //
10678 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10679 // This instruction does not round to 24-bits
10680 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10681   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10682   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10683   ins_cost(95);
10684 
10685   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10686             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10687             "FADD   ST,$src2\n\t"
10688             "FSTP   $dst" %}
10689   opcode(0xD9); /* LoadF D9 /0 */
10690   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10691               FMul_ST_reg(src1),
10692               FAdd_ST_reg(src2),
10693               Pop_Reg_FPR(dst) );
10694   ins_pipe( fpu_reg_mem_reg_reg );
10695 %}
10696 
10697 // MACRO3 -- addFPR a mulFPR
10698 // This instruction does not round to 24-bits.  It is a '2-address'
10699 // instruction in that the result goes back to src2.  This eliminates
10700 // a move from the macro; possibly the register allocator will have
10701 // to add it back (and maybe not).
10702 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10703   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10704   match(Set src2 (AddF (MulF src0 src1) src2));
10705 
10706   format %{ "FLD    $src0     ===MACRO3===\n\t"
10707             "FMUL   ST,$src1\n\t"
10708             "FADDP  $src2,ST" %}
10709   opcode(0xD9); /* LoadF D9 /0 */
10710   ins_encode( Push_Reg_FPR(src0),
10711               FMul_ST_reg(src1),
10712               FAddP_reg_ST(src2) );
10713   ins_pipe( fpu_reg_reg_reg );
10714 %}
10715 
10716 // MACRO4 -- divFPR subFPR
10717 // This instruction does not round to 24-bits
10718 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10719   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10720   match(Set dst (DivF (SubF src2 src1) src3));
10721 
10722   format %{ "FLD    $src2   ===MACRO4===\n\t"
10723             "FSUB   ST,$src1\n\t"
10724             "FDIV   ST,$src3\n\t"
10725             "FSTP  $dst" %}
10726   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10727   ins_encode( Push_Reg_FPR(src2),
10728               subFPR_divFPR_encode(src1,src3),
10729               Pop_Reg_FPR(dst) );
10730   ins_pipe( fpu_reg_reg_reg_reg );
10731 %}
10732 
10733 // Spill to obtain 24-bit precision
10734 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10735   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10736   match(Set dst (DivF src1 src2));
10737 
10738   format %{ "FDIV   $dst,$src1,$src2" %}
10739   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10740   ins_encode( Push_Reg_FPR(src1),
10741               OpcReg_FPR(src2),
10742               Pop_Mem_FPR(dst) );
10743   ins_pipe( fpu_mem_reg_reg );
10744 %}
10745 //
10746 // This instruction does not round to 24-bits
10747 instruct divFPR_reg(regFPR dst, regFPR src) %{
10748   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10749   match(Set dst (DivF dst src));
10750 
10751   format %{ "FDIV   $dst,$src" %}
10752   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10753   ins_encode( Push_Reg_FPR(src),
10754               OpcP, RegOpc(dst) );
10755   ins_pipe( fpu_reg_reg );
10756 %}
10757 
10758 
10759 // Spill to obtain 24-bit precision
10760 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10761   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10762   match(Set dst (ModF src1 src2));
10763   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10764 
10765   format %{ "FMOD   $dst,$src1,$src2" %}
10766   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10767               emitModDPR(),
10768               Push_Result_Mod_DPR(src2),
10769               Pop_Mem_FPR(dst));
10770   ins_pipe( pipe_slow );
10771 %}
10772 //
10773 // This instruction does not round to 24-bits
10774 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10775   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10776   match(Set dst (ModF dst src));
10777   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10778 
10779   format %{ "FMOD   $dst,$src" %}
10780   ins_encode(Push_Reg_Mod_DPR(dst, src),
10781               emitModDPR(),
10782               Push_Result_Mod_DPR(src),
10783               Pop_Reg_FPR(dst));
10784   ins_pipe( pipe_slow );
10785 %}
10786 
10787 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10788   predicate(UseSSE>=1);
10789   match(Set dst (ModF src0 src1));
10790   effect(KILL rax, KILL cr);
10791   format %{ "SUB    ESP,4\t # FMOD\n"
10792           "\tMOVSS  [ESP+0],$src1\n"
10793           "\tFLD_S  [ESP+0]\n"
10794           "\tMOVSS  [ESP+0],$src0\n"
10795           "\tFLD_S  [ESP+0]\n"
10796      "loop:\tFPREM\n"
10797           "\tFWAIT\n"
10798           "\tFNSTSW AX\n"
10799           "\tSAHF\n"
10800           "\tJP     loop\n"
10801           "\tFSTP_S [ESP+0]\n"
10802           "\tMOVSS  $dst,[ESP+0]\n"
10803           "\tADD    ESP,4\n"
10804           "\tFSTP   ST0\t # Restore FPU Stack"
10805     %}
10806   ins_cost(250);
10807   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10808   ins_pipe( pipe_slow );
10809 %}
10810 
10811 
10812 //----------Arithmetic Conversion Instructions---------------------------------
10813 // The conversions operations are all Alpha sorted.  Please keep it that way!
10814 
10815 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10816   predicate(UseSSE==0);
10817   match(Set dst (RoundFloat src));
10818   ins_cost(125);
10819   format %{ "FST_S  $dst,$src\t# F-round" %}
10820   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10821   ins_pipe( fpu_mem_reg );
10822 %}
10823 
10824 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10825   predicate(UseSSE<=1);
10826   match(Set dst (RoundDouble src));
10827   ins_cost(125);
10828   format %{ "FST_D  $dst,$src\t# D-round" %}
10829   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10830   ins_pipe( fpu_mem_reg );
10831 %}
10832 
10833 // Force rounding to 24-bit precision and 6-bit exponent
10834 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10835   predicate(UseSSE==0);
10836   match(Set dst (ConvD2F src));
10837   format %{ "FST_S  $dst,$src\t# F-round" %}
10838   expand %{
10839     roundFloat_mem_reg(dst,src);
10840   %}
10841 %}
10842 
10843 // Force rounding to 24-bit precision and 6-bit exponent
10844 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10845   predicate(UseSSE==1);
10846   match(Set dst (ConvD2F src));
10847   effect( KILL cr );
10848   format %{ "SUB    ESP,4\n\t"
10849             "FST_S  [ESP],$src\t# F-round\n\t"
10850             "MOVSS  $dst,[ESP]\n\t"
10851             "ADD ESP,4" %}
10852   ins_encode %{
10853     __ subptr(rsp, 4);
10854     if ($src$$reg != FPR1L_enc) {
10855       __ fld_s($src$$reg-1);
10856       __ fstp_s(Address(rsp, 0));
10857     } else {
10858       __ fst_s(Address(rsp, 0));
10859     }
10860     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10861     __ addptr(rsp, 4);
10862   %}
10863   ins_pipe( pipe_slow );
10864 %}
10865 
10866 // Force rounding double precision to single precision
10867 instruct convD2F_reg(regF dst, regD src) %{
10868   predicate(UseSSE>=2);
10869   match(Set dst (ConvD2F src));
10870   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10871   ins_encode %{
10872     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10873   %}
10874   ins_pipe( pipe_slow );
10875 %}
10876 
10877 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10878   predicate(UseSSE==0);
10879   match(Set dst (ConvF2D src));
10880   format %{ "FST_S  $dst,$src\t# D-round" %}
10881   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10882   ins_pipe( fpu_reg_reg );
10883 %}
10884 
10885 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10886   predicate(UseSSE==1);
10887   match(Set dst (ConvF2D src));
10888   format %{ "FST_D  $dst,$src\t# D-round" %}
10889   expand %{
10890     roundDouble_mem_reg(dst,src);
10891   %}
10892 %}
10893 
10894 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10895   predicate(UseSSE==1);
10896   match(Set dst (ConvF2D src));
10897   effect( KILL cr );
10898   format %{ "SUB    ESP,4\n\t"
10899             "MOVSS  [ESP] $src\n\t"
10900             "FLD_S  [ESP]\n\t"
10901             "ADD    ESP,4\n\t"
10902             "FSTP   $dst\t# D-round" %}
10903   ins_encode %{
10904     __ subptr(rsp, 4);
10905     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10906     __ fld_s(Address(rsp, 0));
10907     __ addptr(rsp, 4);
10908     __ fstp_d($dst$$reg);
10909   %}
10910   ins_pipe( pipe_slow );
10911 %}
10912 
10913 instruct convF2D_reg(regD dst, regF src) %{
10914   predicate(UseSSE>=2);
10915   match(Set dst (ConvF2D src));
10916   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10917   ins_encode %{
10918     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10919   %}
10920   ins_pipe( pipe_slow );
10921 %}
10922 
10923 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10924 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10925   predicate(UseSSE<=1);
10926   match(Set dst (ConvD2I src));
10927   effect( KILL tmp, KILL cr );
10928   format %{ "FLD    $src\t# Convert double to int \n\t"
10929             "FLDCW  trunc mode\n\t"
10930             "SUB    ESP,4\n\t"
10931             "FISTp  [ESP + #0]\n\t"
10932             "FLDCW  std/24-bit mode\n\t"
10933             "POP    EAX\n\t"
10934             "CMP    EAX,0x80000000\n\t"
10935             "JNE,s  fast\n\t"
10936             "FLD_D  $src\n\t"
10937             "CALL   d2i_wrapper\n"
10938       "fast:" %}
10939   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10940   ins_pipe( pipe_slow );
10941 %}
10942 
10943 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10944 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10945   predicate(UseSSE>=2);
10946   match(Set dst (ConvD2I src));
10947   effect( KILL tmp, KILL cr );
10948   format %{ "CVTTSD2SI $dst, $src\n\t"
10949             "CMP    $dst,0x80000000\n\t"
10950             "JNE,s  fast\n\t"
10951             "SUB    ESP, 8\n\t"
10952             "MOVSD  [ESP], $src\n\t"
10953             "FLD_D  [ESP]\n\t"
10954             "ADD    ESP, 8\n\t"
10955             "CALL   d2i_wrapper\n"
10956       "fast:" %}
10957   ins_encode %{
10958     Label fast;
10959     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10960     __ cmpl($dst$$Register, 0x80000000);
10961     __ jccb(Assembler::notEqual, fast);
10962     __ subptr(rsp, 8);
10963     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10964     __ fld_d(Address(rsp, 0));
10965     __ addptr(rsp, 8);
10966     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10967     __ bind(fast);
10968   %}
10969   ins_pipe( pipe_slow );
10970 %}
10971 
10972 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10973   predicate(UseSSE<=1);
10974   match(Set dst (ConvD2L src));
10975   effect( KILL cr );
10976   format %{ "FLD    $src\t# Convert double to long\n\t"
10977             "FLDCW  trunc mode\n\t"
10978             "SUB    ESP,8\n\t"
10979             "FISTp  [ESP + #0]\n\t"
10980             "FLDCW  std/24-bit mode\n\t"
10981             "POP    EAX\n\t"
10982             "POP    EDX\n\t"
10983             "CMP    EDX,0x80000000\n\t"
10984             "JNE,s  fast\n\t"
10985             "TEST   EAX,EAX\n\t"
10986             "JNE,s  fast\n\t"
10987             "FLD    $src\n\t"
10988             "CALL   d2l_wrapper\n"
10989       "fast:" %}
10990   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10991   ins_pipe( pipe_slow );
10992 %}
10993 
10994 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10995 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10996   predicate (UseSSE>=2);
10997   match(Set dst (ConvD2L src));
10998   effect( KILL cr );
10999   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11000             "MOVSD  [ESP],$src\n\t"
11001             "FLD_D  [ESP]\n\t"
11002             "FLDCW  trunc mode\n\t"
11003             "FISTp  [ESP + #0]\n\t"
11004             "FLDCW  std/24-bit mode\n\t"
11005             "POP    EAX\n\t"
11006             "POP    EDX\n\t"
11007             "CMP    EDX,0x80000000\n\t"
11008             "JNE,s  fast\n\t"
11009             "TEST   EAX,EAX\n\t"
11010             "JNE,s  fast\n\t"
11011             "SUB    ESP,8\n\t"
11012             "MOVSD  [ESP],$src\n\t"
11013             "FLD_D  [ESP]\n\t"
11014             "ADD    ESP,8\n\t"
11015             "CALL   d2l_wrapper\n"
11016       "fast:" %}
11017   ins_encode %{
11018     Label fast;
11019     __ subptr(rsp, 8);
11020     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11021     __ fld_d(Address(rsp, 0));
11022     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11023     __ fistp_d(Address(rsp, 0));
11024     // Restore the rounding mode, mask the exception
11025     if (Compile::current()->in_24_bit_fp_mode()) {
11026       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11027     } else {
11028       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11029     }
11030     // Load the converted long, adjust CPU stack
11031     __ pop(rax);
11032     __ pop(rdx);
11033     __ cmpl(rdx, 0x80000000);
11034     __ jccb(Assembler::notEqual, fast);
11035     __ testl(rax, rax);
11036     __ jccb(Assembler::notEqual, fast);
11037     __ subptr(rsp, 8);
11038     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11039     __ fld_d(Address(rsp, 0));
11040     __ addptr(rsp, 8);
11041     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11042     __ bind(fast);
11043   %}
11044   ins_pipe( pipe_slow );
11045 %}
11046 
11047 // Convert a double to an int.  Java semantics require we do complex
11048 // manglations in the corner cases.  So we set the rounding mode to
11049 // 'zero', store the darned double down as an int, and reset the
11050 // rounding mode to 'nearest'.  The hardware stores a flag value down
11051 // if we would overflow or converted a NAN; we check for this and
11052 // and go the slow path if needed.
11053 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11054   predicate(UseSSE==0);
11055   match(Set dst (ConvF2I src));
11056   effect( KILL tmp, KILL cr );
11057   format %{ "FLD    $src\t# Convert float to int \n\t"
11058             "FLDCW  trunc mode\n\t"
11059             "SUB    ESP,4\n\t"
11060             "FISTp  [ESP + #0]\n\t"
11061             "FLDCW  std/24-bit mode\n\t"
11062             "POP    EAX\n\t"
11063             "CMP    EAX,0x80000000\n\t"
11064             "JNE,s  fast\n\t"
11065             "FLD    $src\n\t"
11066             "CALL   d2i_wrapper\n"
11067       "fast:" %}
11068   // DPR2I_encoding works for FPR2I
11069   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11070   ins_pipe( pipe_slow );
11071 %}
11072 
11073 // Convert a float in xmm to an int reg.
11074 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11075   predicate(UseSSE>=1);
11076   match(Set dst (ConvF2I src));
11077   effect( KILL tmp, KILL cr );
11078   format %{ "CVTTSS2SI $dst, $src\n\t"
11079             "CMP    $dst,0x80000000\n\t"
11080             "JNE,s  fast\n\t"
11081             "SUB    ESP, 4\n\t"
11082             "MOVSS  [ESP], $src\n\t"
11083             "FLD    [ESP]\n\t"
11084             "ADD    ESP, 4\n\t"
11085             "CALL   d2i_wrapper\n"
11086       "fast:" %}
11087   ins_encode %{
11088     Label fast;
11089     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11090     __ cmpl($dst$$Register, 0x80000000);
11091     __ jccb(Assembler::notEqual, fast);
11092     __ subptr(rsp, 4);
11093     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11094     __ fld_s(Address(rsp, 0));
11095     __ addptr(rsp, 4);
11096     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11097     __ bind(fast);
11098   %}
11099   ins_pipe( pipe_slow );
11100 %}
11101 
11102 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11103   predicate(UseSSE==0);
11104   match(Set dst (ConvF2L src));
11105   effect( KILL cr );
11106   format %{ "FLD    $src\t# Convert float to long\n\t"
11107             "FLDCW  trunc mode\n\t"
11108             "SUB    ESP,8\n\t"
11109             "FISTp  [ESP + #0]\n\t"
11110             "FLDCW  std/24-bit mode\n\t"
11111             "POP    EAX\n\t"
11112             "POP    EDX\n\t"
11113             "CMP    EDX,0x80000000\n\t"
11114             "JNE,s  fast\n\t"
11115             "TEST   EAX,EAX\n\t"
11116             "JNE,s  fast\n\t"
11117             "FLD    $src\n\t"
11118             "CALL   d2l_wrapper\n"
11119       "fast:" %}
11120   // DPR2L_encoding works for FPR2L
11121   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11122   ins_pipe( pipe_slow );
11123 %}
11124 
11125 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11126 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11127   predicate (UseSSE>=1);
11128   match(Set dst (ConvF2L src));
11129   effect( KILL cr );
11130   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11131             "MOVSS  [ESP],$src\n\t"
11132             "FLD_S  [ESP]\n\t"
11133             "FLDCW  trunc mode\n\t"
11134             "FISTp  [ESP + #0]\n\t"
11135             "FLDCW  std/24-bit mode\n\t"
11136             "POP    EAX\n\t"
11137             "POP    EDX\n\t"
11138             "CMP    EDX,0x80000000\n\t"
11139             "JNE,s  fast\n\t"
11140             "TEST   EAX,EAX\n\t"
11141             "JNE,s  fast\n\t"
11142             "SUB    ESP,4\t# Convert float to long\n\t"
11143             "MOVSS  [ESP],$src\n\t"
11144             "FLD_S  [ESP]\n\t"
11145             "ADD    ESP,4\n\t"
11146             "CALL   d2l_wrapper\n"
11147       "fast:" %}
11148   ins_encode %{
11149     Label fast;
11150     __ subptr(rsp, 8);
11151     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11152     __ fld_s(Address(rsp, 0));
11153     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11154     __ fistp_d(Address(rsp, 0));
11155     // Restore the rounding mode, mask the exception
11156     if (Compile::current()->in_24_bit_fp_mode()) {
11157       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11158     } else {
11159       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11160     }
11161     // Load the converted long, adjust CPU stack
11162     __ pop(rax);
11163     __ pop(rdx);
11164     __ cmpl(rdx, 0x80000000);
11165     __ jccb(Assembler::notEqual, fast);
11166     __ testl(rax, rax);
11167     __ jccb(Assembler::notEqual, fast);
11168     __ subptr(rsp, 4);
11169     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11170     __ fld_s(Address(rsp, 0));
11171     __ addptr(rsp, 4);
11172     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11173     __ bind(fast);
11174   %}
11175   ins_pipe( pipe_slow );
11176 %}
11177 
11178 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11179   predicate( UseSSE<=1 );
11180   match(Set dst (ConvI2D src));
11181   format %{ "FILD   $src\n\t"
11182             "FSTP   $dst" %}
11183   opcode(0xDB, 0x0);  /* DB /0 */
11184   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11185   ins_pipe( fpu_reg_mem );
11186 %}
11187 
11188 instruct convI2D_reg(regD dst, rRegI src) %{
11189   predicate( UseSSE>=2 && !UseXmmI2D );
11190   match(Set dst (ConvI2D src));
11191   format %{ "CVTSI2SD $dst,$src" %}
11192   ins_encode %{
11193     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11194   %}
11195   ins_pipe( pipe_slow );
11196 %}
11197 
11198 instruct convI2D_mem(regD dst, memory mem) %{
11199   predicate( UseSSE>=2 );
11200   match(Set dst (ConvI2D (LoadI mem)));
11201   format %{ "CVTSI2SD $dst,$mem" %}
11202   ins_encode %{
11203     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11204   %}
11205   ins_pipe( pipe_slow );
11206 %}
11207 
11208 instruct convXI2D_reg(regD dst, rRegI src)
11209 %{
11210   predicate( UseSSE>=2 && UseXmmI2D );
11211   match(Set dst (ConvI2D src));
11212 
11213   format %{ "MOVD  $dst,$src\n\t"
11214             "CVTDQ2PD $dst,$dst\t# i2d" %}
11215   ins_encode %{
11216     __ movdl($dst$$XMMRegister, $src$$Register);
11217     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11218   %}
11219   ins_pipe(pipe_slow); // XXX
11220 %}
11221 
11222 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11223   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11224   match(Set dst (ConvI2D (LoadI mem)));
11225   format %{ "FILD   $mem\n\t"
11226             "FSTP   $dst" %}
11227   opcode(0xDB);      /* DB /0 */
11228   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11229               Pop_Reg_DPR(dst));
11230   ins_pipe( fpu_reg_mem );
11231 %}
11232 
11233 // Convert a byte to a float; no rounding step needed.
11234 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11235   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11236   match(Set dst (ConvI2F src));
11237   format %{ "FILD   $src\n\t"
11238             "FSTP   $dst" %}
11239 
11240   opcode(0xDB, 0x0);  /* DB /0 */
11241   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11242   ins_pipe( fpu_reg_mem );
11243 %}
11244 
11245 // In 24-bit mode, force exponent rounding by storing back out
11246 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11247   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11248   match(Set dst (ConvI2F src));
11249   ins_cost(200);
11250   format %{ "FILD   $src\n\t"
11251             "FSTP_S $dst" %}
11252   opcode(0xDB, 0x0);  /* DB /0 */
11253   ins_encode( Push_Mem_I(src),
11254               Pop_Mem_FPR(dst));
11255   ins_pipe( fpu_mem_mem );
11256 %}
11257 
11258 // In 24-bit mode, force exponent rounding by storing back out
11259 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11260   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11261   match(Set dst (ConvI2F (LoadI mem)));
11262   ins_cost(200);
11263   format %{ "FILD   $mem\n\t"
11264             "FSTP_S $dst" %}
11265   opcode(0xDB);  /* DB /0 */
11266   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11267               Pop_Mem_FPR(dst));
11268   ins_pipe( fpu_mem_mem );
11269 %}
11270 
11271 // This instruction does not round to 24-bits
11272 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11273   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11274   match(Set dst (ConvI2F src));
11275   format %{ "FILD   $src\n\t"
11276             "FSTP   $dst" %}
11277   opcode(0xDB, 0x0);  /* DB /0 */
11278   ins_encode( Push_Mem_I(src),
11279               Pop_Reg_FPR(dst));
11280   ins_pipe( fpu_reg_mem );
11281 %}
11282 
11283 // This instruction does not round to 24-bits
11284 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11285   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11286   match(Set dst (ConvI2F (LoadI mem)));
11287   format %{ "FILD   $mem\n\t"
11288             "FSTP   $dst" %}
11289   opcode(0xDB);      /* DB /0 */
11290   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11291               Pop_Reg_FPR(dst));
11292   ins_pipe( fpu_reg_mem );
11293 %}
11294 
11295 // Convert an int to a float in xmm; no rounding step needed.
11296 instruct convI2F_reg(regF dst, rRegI src) %{
11297   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11298   match(Set dst (ConvI2F src));
11299   format %{ "CVTSI2SS $dst, $src" %}
11300   ins_encode %{
11301     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11302   %}
11303   ins_pipe( pipe_slow );
11304 %}
11305 
11306  instruct convXI2F_reg(regF dst, rRegI src)
11307 %{
11308   predicate( UseSSE>=2 && UseXmmI2F );
11309   match(Set dst (ConvI2F src));
11310 
11311   format %{ "MOVD  $dst,$src\n\t"
11312             "CVTDQ2PS $dst,$dst\t# i2f" %}
11313   ins_encode %{
11314     __ movdl($dst$$XMMRegister, $src$$Register);
11315     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11316   %}
11317   ins_pipe(pipe_slow); // XXX
11318 %}
11319 
11320 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11321   match(Set dst (ConvI2L src));
11322   effect(KILL cr);
11323   ins_cost(375);
11324   format %{ "MOV    $dst.lo,$src\n\t"
11325             "MOV    $dst.hi,$src\n\t"
11326             "SAR    $dst.hi,31" %}
11327   ins_encode(convert_int_long(dst,src));
11328   ins_pipe( ialu_reg_reg_long );
11329 %}
11330 
11331 // Zero-extend convert int to long
11332 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11333   match(Set dst (AndL (ConvI2L src) mask) );
11334   effect( KILL flags );
11335   ins_cost(250);
11336   format %{ "MOV    $dst.lo,$src\n\t"
11337             "XOR    $dst.hi,$dst.hi" %}
11338   opcode(0x33); // XOR
11339   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11340   ins_pipe( ialu_reg_reg_long );
11341 %}
11342 
11343 // Zero-extend long
11344 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11345   match(Set dst (AndL src mask) );
11346   effect( KILL flags );
11347   ins_cost(250);
11348   format %{ "MOV    $dst.lo,$src.lo\n\t"
11349             "XOR    $dst.hi,$dst.hi\n\t" %}
11350   opcode(0x33); // XOR
11351   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11352   ins_pipe( ialu_reg_reg_long );
11353 %}
11354 
11355 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11356   predicate (UseSSE<=1);
11357   match(Set dst (ConvL2D src));
11358   effect( KILL cr );
11359   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11360             "PUSH   $src.lo\n\t"
11361             "FILD   ST,[ESP + #0]\n\t"
11362             "ADD    ESP,8\n\t"
11363             "FSTP_D $dst\t# D-round" %}
11364   opcode(0xDF, 0x5);  /* DF /5 */
11365   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11370   predicate (UseSSE>=2);
11371   match(Set dst (ConvL2D src));
11372   effect( KILL cr );
11373   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11374             "PUSH   $src.lo\n\t"
11375             "FILD_D [ESP]\n\t"
11376             "FSTP_D [ESP]\n\t"
11377             "MOVSD  $dst,[ESP]\n\t"
11378             "ADD    ESP,8" %}
11379   opcode(0xDF, 0x5);  /* DF /5 */
11380   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11381   ins_pipe( pipe_slow );
11382 %}
11383 
11384 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11385   predicate (UseSSE>=1);
11386   match(Set dst (ConvL2F src));
11387   effect( KILL cr );
11388   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11389             "PUSH   $src.lo\n\t"
11390             "FILD_D [ESP]\n\t"
11391             "FSTP_S [ESP]\n\t"
11392             "MOVSS  $dst,[ESP]\n\t"
11393             "ADD    ESP,8" %}
11394   opcode(0xDF, 0x5);  /* DF /5 */
11395   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11396   ins_pipe( pipe_slow );
11397 %}
11398 
11399 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11400   match(Set dst (ConvL2F src));
11401   effect( KILL cr );
11402   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11403             "PUSH   $src.lo\n\t"
11404             "FILD   ST,[ESP + #0]\n\t"
11405             "ADD    ESP,8\n\t"
11406             "FSTP_S $dst\t# F-round" %}
11407   opcode(0xDF, 0x5);  /* DF /5 */
11408   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11409   ins_pipe( pipe_slow );
11410 %}
11411 
11412 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11413   match(Set dst (ConvL2I src));
11414   effect( DEF dst, USE src );
11415   format %{ "MOV    $dst,$src.lo" %}
11416   ins_encode(enc_CopyL_Lo(dst,src));
11417   ins_pipe( ialu_reg_reg );
11418 %}
11419 
11420 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11421   match(Set dst (MoveF2I src));
11422   effect( DEF dst, USE src );
11423   ins_cost(100);
11424   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11425   ins_encode %{
11426     __ movl($dst$$Register, Address(rsp, $src$$disp));
11427   %}
11428   ins_pipe( ialu_reg_mem );
11429 %}
11430 
11431 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11432   predicate(UseSSE==0);
11433   match(Set dst (MoveF2I src));
11434   effect( DEF dst, USE src );
11435 
11436   ins_cost(125);
11437   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11438   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11439   ins_pipe( fpu_mem_reg );
11440 %}
11441 
11442 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11443   predicate(UseSSE>=1);
11444   match(Set dst (MoveF2I src));
11445   effect( DEF dst, USE src );
11446 
11447   ins_cost(95);
11448   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11449   ins_encode %{
11450     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11451   %}
11452   ins_pipe( pipe_slow );
11453 %}
11454 
11455 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11456   predicate(UseSSE>=2);
11457   match(Set dst (MoveF2I src));
11458   effect( DEF dst, USE src );
11459   ins_cost(85);
11460   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11461   ins_encode %{
11462     __ movdl($dst$$Register, $src$$XMMRegister);
11463   %}
11464   ins_pipe( pipe_slow );
11465 %}
11466 
11467 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11468   match(Set dst (MoveI2F src));
11469   effect( DEF dst, USE src );
11470 
11471   ins_cost(100);
11472   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11473   ins_encode %{
11474     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11475   %}
11476   ins_pipe( ialu_mem_reg );
11477 %}
11478 
11479 
11480 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11481   predicate(UseSSE==0);
11482   match(Set dst (MoveI2F src));
11483   effect(DEF dst, USE src);
11484 
11485   ins_cost(125);
11486   format %{ "FLD_S  $src\n\t"
11487             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11488   opcode(0xD9);               /* D9 /0, FLD m32real */
11489   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11490               Pop_Reg_FPR(dst) );
11491   ins_pipe( fpu_reg_mem );
11492 %}
11493 
11494 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11495   predicate(UseSSE>=1);
11496   match(Set dst (MoveI2F src));
11497   effect( DEF dst, USE src );
11498 
11499   ins_cost(95);
11500   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11501   ins_encode %{
11502     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11503   %}
11504   ins_pipe( pipe_slow );
11505 %}
11506 
11507 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11508   predicate(UseSSE>=2);
11509   match(Set dst (MoveI2F src));
11510   effect( DEF dst, USE src );
11511 
11512   ins_cost(85);
11513   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11514   ins_encode %{
11515     __ movdl($dst$$XMMRegister, $src$$Register);
11516   %}
11517   ins_pipe( pipe_slow );
11518 %}
11519 
11520 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11521   match(Set dst (MoveD2L src));
11522   effect(DEF dst, USE src);
11523 
11524   ins_cost(250);
11525   format %{ "MOV    $dst.lo,$src\n\t"
11526             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11527   opcode(0x8B, 0x8B);
11528   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11529   ins_pipe( ialu_mem_long_reg );
11530 %}
11531 
11532 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11533   predicate(UseSSE<=1);
11534   match(Set dst (MoveD2L src));
11535   effect(DEF dst, USE src);
11536 
11537   ins_cost(125);
11538   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11539   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11540   ins_pipe( fpu_mem_reg );
11541 %}
11542 
11543 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11544   predicate(UseSSE>=2);
11545   match(Set dst (MoveD2L src));
11546   effect(DEF dst, USE src);
11547   ins_cost(95);
11548   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11549   ins_encode %{
11550     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11551   %}
11552   ins_pipe( pipe_slow );
11553 %}
11554 
11555 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11556   predicate(UseSSE>=2);
11557   match(Set dst (MoveD2L src));
11558   effect(DEF dst, USE src, TEMP tmp);
11559   ins_cost(85);
11560   format %{ "MOVD   $dst.lo,$src\n\t"
11561             "PSHUFLW $tmp,$src,0x4E\n\t"
11562             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11563   ins_encode %{
11564     __ movdl($dst$$Register, $src$$XMMRegister);
11565     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11566     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11567   %}
11568   ins_pipe( pipe_slow );
11569 %}
11570 
11571 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11572   match(Set dst (MoveL2D src));
11573   effect(DEF dst, USE src);
11574 
11575   ins_cost(200);
11576   format %{ "MOV    $dst,$src.lo\n\t"
11577             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11578   opcode(0x89, 0x89);
11579   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11580   ins_pipe( ialu_mem_long_reg );
11581 %}
11582 
11583 
11584 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11585   predicate(UseSSE<=1);
11586   match(Set dst (MoveL2D src));
11587   effect(DEF dst, USE src);
11588   ins_cost(125);
11589 
11590   format %{ "FLD_D  $src\n\t"
11591             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11592   opcode(0xDD);               /* DD /0, FLD m64real */
11593   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11594               Pop_Reg_DPR(dst) );
11595   ins_pipe( fpu_reg_mem );
11596 %}
11597 
11598 
11599 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11600   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11601   match(Set dst (MoveL2D src));
11602   effect(DEF dst, USE src);
11603 
11604   ins_cost(95);
11605   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11606   ins_encode %{
11607     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11608   %}
11609   ins_pipe( pipe_slow );
11610 %}
11611 
11612 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11613   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11614   match(Set dst (MoveL2D src));
11615   effect(DEF dst, USE src);
11616 
11617   ins_cost(95);
11618   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11619   ins_encode %{
11620     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11621   %}
11622   ins_pipe( pipe_slow );
11623 %}
11624 
11625 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11626   predicate(UseSSE>=2);
11627   match(Set dst (MoveL2D src));
11628   effect(TEMP dst, USE src, TEMP tmp);
11629   ins_cost(85);
11630   format %{ "MOVD   $dst,$src.lo\n\t"
11631             "MOVD   $tmp,$src.hi\n\t"
11632             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11633   ins_encode %{
11634     __ movdl($dst$$XMMRegister, $src$$Register);
11635     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11636     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11637   %}
11638   ins_pipe( pipe_slow );
11639 %}
11640 
11641 
11642 // =======================================================================
11643 // fast clearing of an array
11644 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11645   predicate(!((ClearArrayNode*)n)->is_large());
11646   match(Set dummy (ClearArray cnt base));
11647   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11648 
11649   format %{ $$template
11650     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11651     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11652     $$emit$$"JG     LARGE\n\t"
11653     $$emit$$"SHL    ECX, 1\n\t"
11654     $$emit$$"DEC    ECX\n\t"
11655     $$emit$$"JS     DONE\t# Zero length\n\t"
11656     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11657     $$emit$$"DEC    ECX\n\t"
11658     $$emit$$"JGE    LOOP\n\t"
11659     $$emit$$"JMP    DONE\n\t"
11660     $$emit$$"# LARGE:\n\t"
11661     if (UseFastStosb) {
11662        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11663        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11664     } else if (UseXMMForObjInit) {
11665        $$emit$$"MOV     RDI,RAX\n\t"
11666        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11667        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11668        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11669        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11670        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11671        $$emit$$"ADD     0x40,RAX\n\t"
11672        $$emit$$"# L_zero_64_bytes:\n\t"
11673        $$emit$$"SUB     0x8,RCX\n\t"
11674        $$emit$$"JGE     L_loop\n\t"
11675        $$emit$$"ADD     0x4,RCX\n\t"
11676        $$emit$$"JL      L_tail\n\t"
11677        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11678        $$emit$$"ADD     0x20,RAX\n\t"
11679        $$emit$$"SUB     0x4,RCX\n\t"
11680        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11681        $$emit$$"ADD     0x4,RCX\n\t"
11682        $$emit$$"JLE     L_end\n\t"
11683        $$emit$$"DEC     RCX\n\t"
11684        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11685        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11686        $$emit$$"ADD     0x8,RAX\n\t"
11687        $$emit$$"DEC     RCX\n\t"
11688        $$emit$$"JGE     L_sloop\n\t"
11689        $$emit$$"# L_end:\n\t"
11690     } else {
11691        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11692        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11693     }
11694     $$emit$$"# DONE"
11695   %}
11696   ins_encode %{
11697     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11698                  $tmp$$XMMRegister, false);
11699   %}
11700   ins_pipe( pipe_slow );
11701 %}
11702 
11703 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11704   predicate(((ClearArrayNode*)n)->is_large());
11705   match(Set dummy (ClearArray cnt base));
11706   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11707   format %{ $$template
11708     if (UseFastStosb) {
11709        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11710        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11711        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11712     } else if (UseXMMForObjInit) {
11713        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11714        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11715        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11716        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11717        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11718        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11719        $$emit$$"ADD     0x40,RAX\n\t"
11720        $$emit$$"# L_zero_64_bytes:\n\t"
11721        $$emit$$"SUB     0x8,RCX\n\t"
11722        $$emit$$"JGE     L_loop\n\t"
11723        $$emit$$"ADD     0x4,RCX\n\t"
11724        $$emit$$"JL      L_tail\n\t"
11725        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11726        $$emit$$"ADD     0x20,RAX\n\t"
11727        $$emit$$"SUB     0x4,RCX\n\t"
11728        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11729        $$emit$$"ADD     0x4,RCX\n\t"
11730        $$emit$$"JLE     L_end\n\t"
11731        $$emit$$"DEC     RCX\n\t"
11732        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11733        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11734        $$emit$$"ADD     0x8,RAX\n\t"
11735        $$emit$$"DEC     RCX\n\t"
11736        $$emit$$"JGE     L_sloop\n\t"
11737        $$emit$$"# L_end:\n\t"
11738     } else {
11739        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11740        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11741        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11742     }
11743     $$emit$$"# DONE"
11744   %}
11745   ins_encode %{
11746     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11747                  $tmp$$XMMRegister, true);
11748   %}
11749   ins_pipe( pipe_slow );
11750 %}
11751 
11752 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11753                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11754   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11755   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11756   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11757 
11758   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11759   ins_encode %{
11760     __ string_compare($str1$$Register, $str2$$Register,
11761                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11762                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11763   %}
11764   ins_pipe( pipe_slow );
11765 %}
11766 
11767 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11768                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11769   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11770   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11771   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11772 
11773   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11774   ins_encode %{
11775     __ string_compare($str1$$Register, $str2$$Register,
11776                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11777                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11778   %}
11779   ins_pipe( pipe_slow );
11780 %}
11781 
11782 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11783                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11784   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11785   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11786   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11787 
11788   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11789   ins_encode %{
11790     __ string_compare($str1$$Register, $str2$$Register,
11791                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11792                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11793   %}
11794   ins_pipe( pipe_slow );
11795 %}
11796 
11797 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11798                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11799   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11800   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11801   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11802 
11803   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11804   ins_encode %{
11805     __ string_compare($str2$$Register, $str1$$Register,
11806                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11807                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11808   %}
11809   ins_pipe( pipe_slow );
11810 %}
11811 
11812 // fast string equals
11813 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11814                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11815   match(Set result (StrEquals (Binary str1 str2) cnt));
11816   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11817 
11818   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11819   ins_encode %{
11820     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11821                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11822                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11823   %}
11824 
11825   ins_pipe( pipe_slow );
11826 %}
11827 
11828 // fast search of substring with known size.
11829 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11830                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11831   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11832   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11833   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11834 
11835   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11836   ins_encode %{
11837     int icnt2 = (int)$int_cnt2$$constant;
11838     if (icnt2 >= 16) {
11839       // IndexOf for constant substrings with size >= 16 elements
11840       // which don't need to be loaded through stack.
11841       __ string_indexofC8($str1$$Register, $str2$$Register,
11842                           $cnt1$$Register, $cnt2$$Register,
11843                           icnt2, $result$$Register,
11844                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11845     } else {
11846       // Small strings are loaded through stack if they cross page boundary.
11847       __ string_indexof($str1$$Register, $str2$$Register,
11848                         $cnt1$$Register, $cnt2$$Register,
11849                         icnt2, $result$$Register,
11850                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11851     }
11852   %}
11853   ins_pipe( pipe_slow );
11854 %}
11855 
11856 // fast search of substring with known size.
11857 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11858                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11859   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11860   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11861   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11862 
11863   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11864   ins_encode %{
11865     int icnt2 = (int)$int_cnt2$$constant;
11866     if (icnt2 >= 8) {
11867       // IndexOf for constant substrings with size >= 8 elements
11868       // which don't need to be loaded through stack.
11869       __ string_indexofC8($str1$$Register, $str2$$Register,
11870                           $cnt1$$Register, $cnt2$$Register,
11871                           icnt2, $result$$Register,
11872                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11873     } else {
11874       // Small strings are loaded through stack if they cross page boundary.
11875       __ string_indexof($str1$$Register, $str2$$Register,
11876                         $cnt1$$Register, $cnt2$$Register,
11877                         icnt2, $result$$Register,
11878                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11879     }
11880   %}
11881   ins_pipe( pipe_slow );
11882 %}
11883 
11884 // fast search of substring with known size.
11885 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11886                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11887   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11888   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11889   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11890 
11891   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11892   ins_encode %{
11893     int icnt2 = (int)$int_cnt2$$constant;
11894     if (icnt2 >= 8) {
11895       // IndexOf for constant substrings with size >= 8 elements
11896       // which don't need to be loaded through stack.
11897       __ string_indexofC8($str1$$Register, $str2$$Register,
11898                           $cnt1$$Register, $cnt2$$Register,
11899                           icnt2, $result$$Register,
11900                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11901     } else {
11902       // Small strings are loaded through stack if they cross page boundary.
11903       __ string_indexof($str1$$Register, $str2$$Register,
11904                         $cnt1$$Register, $cnt2$$Register,
11905                         icnt2, $result$$Register,
11906                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11907     }
11908   %}
11909   ins_pipe( pipe_slow );
11910 %}
11911 
11912 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11913                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11914   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11915   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11916   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11917 
11918   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11919   ins_encode %{
11920     __ string_indexof($str1$$Register, $str2$$Register,
11921                       $cnt1$$Register, $cnt2$$Register,
11922                       (-1), $result$$Register,
11923                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11924   %}
11925   ins_pipe( pipe_slow );
11926 %}
11927 
11928 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11929                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11930   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11931   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11932   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11933 
11934   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11935   ins_encode %{
11936     __ string_indexof($str1$$Register, $str2$$Register,
11937                       $cnt1$$Register, $cnt2$$Register,
11938                       (-1), $result$$Register,
11939                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11940   %}
11941   ins_pipe( pipe_slow );
11942 %}
11943 
11944 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11945                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11946   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11947   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11948   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11949 
11950   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11951   ins_encode %{
11952     __ string_indexof($str1$$Register, $str2$$Register,
11953                       $cnt1$$Register, $cnt2$$Register,
11954                       (-1), $result$$Register,
11955                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11956   %}
11957   ins_pipe( pipe_slow );
11958 %}
11959 
11960 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11961                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11962   predicate(UseSSE42Intrinsics);
11963   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11964   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11965   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11966   ins_encode %{
11967     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11968                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11969   %}
11970   ins_pipe( pipe_slow );
11971 %}
11972 
11973 // fast array equals
11974 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11975                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11976 %{
11977   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11978   match(Set result (AryEq ary1 ary2));
11979   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11980   //ins_cost(300);
11981 
11982   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11983   ins_encode %{
11984     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11985                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11986                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11987   %}
11988   ins_pipe( pipe_slow );
11989 %}
11990 
11991 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11992                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11993 %{
11994   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11995   match(Set result (AryEq ary1 ary2));
11996   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11997   //ins_cost(300);
11998 
11999   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12000   ins_encode %{
12001     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12002                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12003                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
12004   %}
12005   ins_pipe( pipe_slow );
12006 %}
12007 
12008 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12009                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12010 %{
12011   match(Set result (HasNegatives ary1 len));
12012   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12013 
12014   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12015   ins_encode %{
12016     __ has_negatives($ary1$$Register, $len$$Register,
12017                      $result$$Register, $tmp3$$Register,
12018                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12019   %}
12020   ins_pipe( pipe_slow );
12021 %}
12022 
12023 // fast char[] to byte[] compression
12024 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12025                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12026   match(Set result (StrCompressedCopy src (Binary dst len)));
12027   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12028 
12029   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12030   ins_encode %{
12031     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12032                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12033                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
12034   %}
12035   ins_pipe( pipe_slow );
12036 %}
12037 
12038 // fast byte[] to char[] inflation
12039 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12040                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12041   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12042   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12043 
12044   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12045   ins_encode %{
12046     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12047                           $tmp1$$XMMRegister, $tmp2$$Register);
12048   %}
12049   ins_pipe( pipe_slow );
12050 %}
12051 
12052 // encode char[] to byte[] in ISO_8859_1
12053 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12054                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12055                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12056   match(Set result (EncodeISOArray src (Binary dst len)));
12057   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12058 
12059   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12060   ins_encode %{
12061     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12062                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12063                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
12064   %}
12065   ins_pipe( pipe_slow );
12066 %}
12067 
12068 
12069 //----------Control Flow Instructions------------------------------------------
12070 // Signed compare Instructions
12071 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12072   match(Set cr (CmpI op1 op2));
12073   effect( DEF cr, USE op1, USE op2 );
12074   format %{ "CMP    $op1,$op2" %}
12075   opcode(0x3B);  /* Opcode 3B /r */
12076   ins_encode( OpcP, RegReg( op1, op2) );
12077   ins_pipe( ialu_cr_reg_reg );
12078 %}
12079 
12080 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12081   match(Set cr (CmpI op1 op2));
12082   effect( DEF cr, USE op1 );
12083   format %{ "CMP    $op1,$op2" %}
12084   opcode(0x81,0x07);  /* Opcode 81 /7 */
12085   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12086   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12087   ins_pipe( ialu_cr_reg_imm );
12088 %}
12089 
12090 // Cisc-spilled version of cmpI_eReg
12091 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12092   match(Set cr (CmpI op1 (LoadI op2)));
12093 
12094   format %{ "CMP    $op1,$op2" %}
12095   ins_cost(500);
12096   opcode(0x3B);  /* Opcode 3B /r */
12097   ins_encode( OpcP, RegMem( op1, op2) );
12098   ins_pipe( ialu_cr_reg_mem );
12099 %}
12100 
12101 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12102   match(Set cr (CmpI src zero));
12103   effect( DEF cr, USE src );
12104 
12105   format %{ "TEST   $src,$src" %}
12106   opcode(0x85);
12107   ins_encode( OpcP, RegReg( src, src ) );
12108   ins_pipe( ialu_cr_reg_imm );
12109 %}
12110 
12111 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12112   match(Set cr (CmpI (AndI src con) zero));
12113 
12114   format %{ "TEST   $src,$con" %}
12115   opcode(0xF7,0x00);
12116   ins_encode( OpcP, RegOpc(src), Con32(con) );
12117   ins_pipe( ialu_cr_reg_imm );
12118 %}
12119 
12120 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12121   match(Set cr (CmpI (AndI src mem) zero));
12122 
12123   format %{ "TEST   $src,$mem" %}
12124   opcode(0x85);
12125   ins_encode( OpcP, RegMem( src, mem ) );
12126   ins_pipe( ialu_cr_reg_mem );
12127 %}
12128 
12129 // Unsigned compare Instructions; really, same as signed except they
12130 // produce an eFlagsRegU instead of eFlagsReg.
12131 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12132   match(Set cr (CmpU op1 op2));
12133 
12134   format %{ "CMPu   $op1,$op2" %}
12135   opcode(0x3B);  /* Opcode 3B /r */
12136   ins_encode( OpcP, RegReg( op1, op2) );
12137   ins_pipe( ialu_cr_reg_reg );
12138 %}
12139 
12140 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12141   match(Set cr (CmpU op1 op2));
12142 
12143   format %{ "CMPu   $op1,$op2" %}
12144   opcode(0x81,0x07);  /* Opcode 81 /7 */
12145   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12146   ins_pipe( ialu_cr_reg_imm );
12147 %}
12148 
12149 // // Cisc-spilled version of cmpU_eReg
12150 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12151   match(Set cr (CmpU op1 (LoadI op2)));
12152 
12153   format %{ "CMPu   $op1,$op2" %}
12154   ins_cost(500);
12155   opcode(0x3B);  /* Opcode 3B /r */
12156   ins_encode( OpcP, RegMem( op1, op2) );
12157   ins_pipe( ialu_cr_reg_mem );
12158 %}
12159 
12160 // // Cisc-spilled version of cmpU_eReg
12161 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12162 //  match(Set cr (CmpU (LoadI op1) op2));
12163 //
12164 //  format %{ "CMPu   $op1,$op2" %}
12165 //  ins_cost(500);
12166 //  opcode(0x39);  /* Opcode 39 /r */
12167 //  ins_encode( OpcP, RegMem( op1, op2) );
12168 //%}
12169 
12170 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12171   match(Set cr (CmpU src zero));
12172 
12173   format %{ "TESTu  $src,$src" %}
12174   opcode(0x85);
12175   ins_encode( OpcP, RegReg( src, src ) );
12176   ins_pipe( ialu_cr_reg_imm );
12177 %}
12178 
12179 // Unsigned pointer compare Instructions
12180 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12181   match(Set cr (CmpP op1 op2));
12182 
12183   format %{ "CMPu   $op1,$op2" %}
12184   opcode(0x3B);  /* Opcode 3B /r */
12185   ins_encode( OpcP, RegReg( op1, op2) );
12186   ins_pipe( ialu_cr_reg_reg );
12187 %}
12188 
12189 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12190   match(Set cr (CmpP op1 op2));
12191 
12192   format %{ "CMPu   $op1,$op2" %}
12193   opcode(0x81,0x07);  /* Opcode 81 /7 */
12194   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12195   ins_pipe( ialu_cr_reg_imm );
12196 %}
12197 
12198 // // Cisc-spilled version of cmpP_eReg
12199 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12200   match(Set cr (CmpP op1 (LoadP op2)));
12201 
12202   format %{ "CMPu   $op1,$op2" %}
12203   ins_cost(500);
12204   opcode(0x3B);  /* Opcode 3B /r */
12205   ins_encode( OpcP, RegMem( op1, op2) );
12206   ins_pipe( ialu_cr_reg_mem );
12207 %}
12208 
12209 // // Cisc-spilled version of cmpP_eReg
12210 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12211 //  match(Set cr (CmpP (LoadP op1) op2));
12212 //
12213 //  format %{ "CMPu   $op1,$op2" %}
12214 //  ins_cost(500);
12215 //  opcode(0x39);  /* Opcode 39 /r */
12216 //  ins_encode( OpcP, RegMem( op1, op2) );
12217 //%}
12218 
12219 // Compare raw pointer (used in out-of-heap check).
12220 // Only works because non-oop pointers must be raw pointers
12221 // and raw pointers have no anti-dependencies.
12222 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12223   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12224   match(Set cr (CmpP op1 (LoadP op2)));
12225 
12226   format %{ "CMPu   $op1,$op2" %}
12227   opcode(0x3B);  /* Opcode 3B /r */
12228   ins_encode( OpcP, RegMem( op1, op2) );
12229   ins_pipe( ialu_cr_reg_mem );
12230 %}
12231 
12232 //
12233 // This will generate a signed flags result. This should be ok
12234 // since any compare to a zero should be eq/neq.
12235 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12236   match(Set cr (CmpP src zero));
12237 
12238   format %{ "TEST   $src,$src" %}
12239   opcode(0x85);
12240   ins_encode( OpcP, RegReg( src, src ) );
12241   ins_pipe( ialu_cr_reg_imm );
12242 %}
12243 
12244 // Cisc-spilled version of testP_reg
12245 // This will generate a signed flags result. This should be ok
12246 // since any compare to a zero should be eq/neq.
12247 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12248   match(Set cr (CmpP (LoadP op) zero));
12249 
12250   format %{ "TEST   $op,0xFFFFFFFF" %}
12251   ins_cost(500);
12252   opcode(0xF7);               /* Opcode F7 /0 */
12253   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12254   ins_pipe( ialu_cr_reg_imm );
12255 %}
12256 
12257 // Yanked all unsigned pointer compare operations.
12258 // Pointer compares are done with CmpP which is already unsigned.
12259 
12260 //----------Max and Min--------------------------------------------------------
12261 // Min Instructions
12262 ////
12263 //   *** Min and Max using the conditional move are slower than the
12264 //   *** branch version on a Pentium III.
12265 // // Conditional move for min
12266 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12267 //  effect( USE_DEF op2, USE op1, USE cr );
12268 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12269 //  opcode(0x4C,0x0F);
12270 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12271 //  ins_pipe( pipe_cmov_reg );
12272 //%}
12273 //
12274 //// Min Register with Register (P6 version)
12275 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12276 //  predicate(VM_Version::supports_cmov() );
12277 //  match(Set op2 (MinI op1 op2));
12278 //  ins_cost(200);
12279 //  expand %{
12280 //    eFlagsReg cr;
12281 //    compI_eReg(cr,op1,op2);
12282 //    cmovI_reg_lt(op2,op1,cr);
12283 //  %}
12284 //%}
12285 
12286 // Min Register with Register (generic version)
12287 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12288   match(Set dst (MinI dst src));
12289   effect(KILL flags);
12290   ins_cost(300);
12291 
12292   format %{ "MIN    $dst,$src" %}
12293   opcode(0xCC);
12294   ins_encode( min_enc(dst,src) );
12295   ins_pipe( pipe_slow );
12296 %}
12297 
12298 // Max Register with Register
12299 //   *** Min and Max using the conditional move are slower than the
12300 //   *** branch version on a Pentium III.
12301 // // Conditional move for max
12302 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12303 //  effect( USE_DEF op2, USE op1, USE cr );
12304 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12305 //  opcode(0x4F,0x0F);
12306 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12307 //  ins_pipe( pipe_cmov_reg );
12308 //%}
12309 //
12310 // // Max Register with Register (P6 version)
12311 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12312 //  predicate(VM_Version::supports_cmov() );
12313 //  match(Set op2 (MaxI op1 op2));
12314 //  ins_cost(200);
12315 //  expand %{
12316 //    eFlagsReg cr;
12317 //    compI_eReg(cr,op1,op2);
12318 //    cmovI_reg_gt(op2,op1,cr);
12319 //  %}
12320 //%}
12321 
12322 // Max Register with Register (generic version)
12323 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12324   match(Set dst (MaxI dst src));
12325   effect(KILL flags);
12326   ins_cost(300);
12327 
12328   format %{ "MAX    $dst,$src" %}
12329   opcode(0xCC);
12330   ins_encode( max_enc(dst,src) );
12331   ins_pipe( pipe_slow );
12332 %}
12333 
12334 // ============================================================================
12335 // Counted Loop limit node which represents exact final iterator value.
12336 // Note: the resulting value should fit into integer range since
12337 // counted loops have limit check on overflow.
12338 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12339   match(Set limit (LoopLimit (Binary init limit) stride));
12340   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12341   ins_cost(300);
12342 
12343   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12344   ins_encode %{
12345     int strd = (int)$stride$$constant;
12346     assert(strd != 1 && strd != -1, "sanity");
12347     int m1 = (strd > 0) ? 1 : -1;
12348     // Convert limit to long (EAX:EDX)
12349     __ cdql();
12350     // Convert init to long (init:tmp)
12351     __ movl($tmp$$Register, $init$$Register);
12352     __ sarl($tmp$$Register, 31);
12353     // $limit - $init
12354     __ subl($limit$$Register, $init$$Register);
12355     __ sbbl($limit_hi$$Register, $tmp$$Register);
12356     // + ($stride - 1)
12357     if (strd > 0) {
12358       __ addl($limit$$Register, (strd - 1));
12359       __ adcl($limit_hi$$Register, 0);
12360       __ movl($tmp$$Register, strd);
12361     } else {
12362       __ addl($limit$$Register, (strd + 1));
12363       __ adcl($limit_hi$$Register, -1);
12364       __ lneg($limit_hi$$Register, $limit$$Register);
12365       __ movl($tmp$$Register, -strd);
12366     }
12367     // signed devision: (EAX:EDX) / pos_stride
12368     __ idivl($tmp$$Register);
12369     if (strd < 0) {
12370       // restore sign
12371       __ negl($tmp$$Register);
12372     }
12373     // (EAX) * stride
12374     __ mull($tmp$$Register);
12375     // + init (ignore upper bits)
12376     __ addl($limit$$Register, $init$$Register);
12377   %}
12378   ins_pipe( pipe_slow );
12379 %}
12380 
12381 // ============================================================================
12382 // Branch Instructions
12383 // Jump Table
12384 instruct jumpXtnd(rRegI switch_val) %{
12385   match(Jump switch_val);
12386   ins_cost(350);
12387   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12388   ins_encode %{
12389     // Jump to Address(table_base + switch_reg)
12390     Address index(noreg, $switch_val$$Register, Address::times_1);
12391     __ jump(ArrayAddress($constantaddress, index));
12392   %}
12393   ins_pipe(pipe_jmp);
12394 %}
12395 
12396 // Jump Direct - Label defines a relative address from JMP+1
12397 instruct jmpDir(label labl) %{
12398   match(Goto);
12399   effect(USE labl);
12400 
12401   ins_cost(300);
12402   format %{ "JMP    $labl" %}
12403   size(5);
12404   ins_encode %{
12405     Label* L = $labl$$label;
12406     __ jmp(*L, false); // Always long jump
12407   %}
12408   ins_pipe( pipe_jmp );
12409 %}
12410 
12411 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12412 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12413   match(If cop cr);
12414   effect(USE labl);
12415 
12416   ins_cost(300);
12417   format %{ "J$cop    $labl" %}
12418   size(6);
12419   ins_encode %{
12420     Label* L = $labl$$label;
12421     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12422   %}
12423   ins_pipe( pipe_jcc );
12424 %}
12425 
12426 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12427 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12428   predicate(!n->has_vector_mask_set());
12429   match(CountedLoopEnd cop cr);
12430   effect(USE labl);
12431 
12432   ins_cost(300);
12433   format %{ "J$cop    $labl\t# Loop end" %}
12434   size(6);
12435   ins_encode %{
12436     Label* L = $labl$$label;
12437     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12438   %}
12439   ins_pipe( pipe_jcc );
12440 %}
12441 
12442 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12443 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12444   predicate(!n->has_vector_mask_set());
12445   match(CountedLoopEnd cop cmp);
12446   effect(USE labl);
12447 
12448   ins_cost(300);
12449   format %{ "J$cop,u  $labl\t# Loop end" %}
12450   size(6);
12451   ins_encode %{
12452     Label* L = $labl$$label;
12453     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12454   %}
12455   ins_pipe( pipe_jcc );
12456 %}
12457 
12458 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12459   predicate(!n->has_vector_mask_set());
12460   match(CountedLoopEnd cop cmp);
12461   effect(USE labl);
12462 
12463   ins_cost(200);
12464   format %{ "J$cop,u  $labl\t# Loop end" %}
12465   size(6);
12466   ins_encode %{
12467     Label* L = $labl$$label;
12468     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12469   %}
12470   ins_pipe( pipe_jcc );
12471 %}
12472 
12473 // mask version
12474 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12475 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12476   predicate(n->has_vector_mask_set());
12477   match(CountedLoopEnd cop cr);
12478   effect(USE labl);
12479 
12480   ins_cost(400);
12481   format %{ "J$cop    $labl\t# Loop end\n\t"
12482             "restorevectmask \t# vector mask restore for loops" %}
12483   size(10);
12484   ins_encode %{
12485     Label* L = $labl$$label;
12486     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12487     __ restorevectmask();
12488   %}
12489   ins_pipe( pipe_jcc );
12490 %}
12491 
12492 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12493 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12494   predicate(n->has_vector_mask_set());
12495   match(CountedLoopEnd cop cmp);
12496   effect(USE labl);
12497 
12498   ins_cost(400);
12499   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12500             "restorevectmask \t# vector mask restore for loops" %}
12501   size(10);
12502   ins_encode %{
12503     Label* L = $labl$$label;
12504     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12505     __ restorevectmask();
12506   %}
12507   ins_pipe( pipe_jcc );
12508 %}
12509 
12510 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12511   predicate(n->has_vector_mask_set());
12512   match(CountedLoopEnd cop cmp);
12513   effect(USE labl);
12514 
12515   ins_cost(300);
12516   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12517             "restorevectmask \t# vector mask restore for loops" %}
12518   size(10);
12519   ins_encode %{
12520     Label* L = $labl$$label;
12521     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12522     __ restorevectmask();
12523   %}
12524   ins_pipe( pipe_jcc );
12525 %}
12526 
12527 // Jump Direct Conditional - using unsigned comparison
12528 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12529   match(If cop cmp);
12530   effect(USE labl);
12531 
12532   ins_cost(300);
12533   format %{ "J$cop,u  $labl" %}
12534   size(6);
12535   ins_encode %{
12536     Label* L = $labl$$label;
12537     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12538   %}
12539   ins_pipe(pipe_jcc);
12540 %}
12541 
12542 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12543   match(If cop cmp);
12544   effect(USE labl);
12545 
12546   ins_cost(200);
12547   format %{ "J$cop,u  $labl" %}
12548   size(6);
12549   ins_encode %{
12550     Label* L = $labl$$label;
12551     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12552   %}
12553   ins_pipe(pipe_jcc);
12554 %}
12555 
12556 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12557   match(If cop cmp);
12558   effect(USE labl);
12559 
12560   ins_cost(200);
12561   format %{ $$template
12562     if ($cop$$cmpcode == Assembler::notEqual) {
12563       $$emit$$"JP,u   $labl\n\t"
12564       $$emit$$"J$cop,u   $labl"
12565     } else {
12566       $$emit$$"JP,u   done\n\t"
12567       $$emit$$"J$cop,u   $labl\n\t"
12568       $$emit$$"done:"
12569     }
12570   %}
12571   ins_encode %{
12572     Label* l = $labl$$label;
12573     if ($cop$$cmpcode == Assembler::notEqual) {
12574       __ jcc(Assembler::parity, *l, false);
12575       __ jcc(Assembler::notEqual, *l, false);
12576     } else if ($cop$$cmpcode == Assembler::equal) {
12577       Label done;
12578       __ jccb(Assembler::parity, done);
12579       __ jcc(Assembler::equal, *l, false);
12580       __ bind(done);
12581     } else {
12582        ShouldNotReachHere();
12583     }
12584   %}
12585   ins_pipe(pipe_jcc);
12586 %}
12587 
12588 // ============================================================================
12589 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12590 // array for an instance of the superklass.  Set a hidden internal cache on a
12591 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12592 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12593 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12594   match(Set result (PartialSubtypeCheck sub super));
12595   effect( KILL rcx, KILL cr );
12596 
12597   ins_cost(1100);  // slightly larger than the next version
12598   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12599             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12600             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12601             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12602             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12603             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12604             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12605      "miss:\t" %}
12606 
12607   opcode(0x1); // Force a XOR of EDI
12608   ins_encode( enc_PartialSubtypeCheck() );
12609   ins_pipe( pipe_slow );
12610 %}
12611 
12612 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12613   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12614   effect( KILL rcx, KILL result );
12615 
12616   ins_cost(1000);
12617   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12618             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12619             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12620             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12621             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12622             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12623      "miss:\t" %}
12624 
12625   opcode(0x0);  // No need to XOR EDI
12626   ins_encode( enc_PartialSubtypeCheck() );
12627   ins_pipe( pipe_slow );
12628 %}
12629 
12630 // ============================================================================
12631 // Branch Instructions -- short offset versions
12632 //
12633 // These instructions are used to replace jumps of a long offset (the default
12634 // match) with jumps of a shorter offset.  These instructions are all tagged
12635 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12636 // match rules in general matching.  Instead, the ADLC generates a conversion
12637 // method in the MachNode which can be used to do in-place replacement of the
12638 // long variant with the shorter variant.  The compiler will determine if a
12639 // branch can be taken by the is_short_branch_offset() predicate in the machine
12640 // specific code section of the file.
12641 
12642 // Jump Direct - Label defines a relative address from JMP+1
12643 instruct jmpDir_short(label labl) %{
12644   match(Goto);
12645   effect(USE labl);
12646 
12647   ins_cost(300);
12648   format %{ "JMP,s  $labl" %}
12649   size(2);
12650   ins_encode %{
12651     Label* L = $labl$$label;
12652     __ jmpb(*L);
12653   %}
12654   ins_pipe( pipe_jmp );
12655   ins_short_branch(1);
12656 %}
12657 
12658 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12659 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12660   match(If cop cr);
12661   effect(USE labl);
12662 
12663   ins_cost(300);
12664   format %{ "J$cop,s  $labl" %}
12665   size(2);
12666   ins_encode %{
12667     Label* L = $labl$$label;
12668     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12669   %}
12670   ins_pipe( pipe_jcc );
12671   ins_short_branch(1);
12672 %}
12673 
12674 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12675 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12676   match(CountedLoopEnd cop cr);
12677   effect(USE labl);
12678 
12679   ins_cost(300);
12680   format %{ "J$cop,s  $labl\t# Loop end" %}
12681   size(2);
12682   ins_encode %{
12683     Label* L = $labl$$label;
12684     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12685   %}
12686   ins_pipe( pipe_jcc );
12687   ins_short_branch(1);
12688 %}
12689 
12690 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12691 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12692   match(CountedLoopEnd cop cmp);
12693   effect(USE labl);
12694 
12695   ins_cost(300);
12696   format %{ "J$cop,us $labl\t# Loop end" %}
12697   size(2);
12698   ins_encode %{
12699     Label* L = $labl$$label;
12700     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12701   %}
12702   ins_pipe( pipe_jcc );
12703   ins_short_branch(1);
12704 %}
12705 
12706 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12707   match(CountedLoopEnd cop cmp);
12708   effect(USE labl);
12709 
12710   ins_cost(300);
12711   format %{ "J$cop,us $labl\t# Loop end" %}
12712   size(2);
12713   ins_encode %{
12714     Label* L = $labl$$label;
12715     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12716   %}
12717   ins_pipe( pipe_jcc );
12718   ins_short_branch(1);
12719 %}
12720 
12721 // Jump Direct Conditional - using unsigned comparison
12722 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12723   match(If cop cmp);
12724   effect(USE labl);
12725 
12726   ins_cost(300);
12727   format %{ "J$cop,us $labl" %}
12728   size(2);
12729   ins_encode %{
12730     Label* L = $labl$$label;
12731     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12732   %}
12733   ins_pipe( pipe_jcc );
12734   ins_short_branch(1);
12735 %}
12736 
12737 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12738   match(If cop cmp);
12739   effect(USE labl);
12740 
12741   ins_cost(300);
12742   format %{ "J$cop,us $labl" %}
12743   size(2);
12744   ins_encode %{
12745     Label* L = $labl$$label;
12746     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12747   %}
12748   ins_pipe( pipe_jcc );
12749   ins_short_branch(1);
12750 %}
12751 
12752 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12753   match(If cop cmp);
12754   effect(USE labl);
12755 
12756   ins_cost(300);
12757   format %{ $$template
12758     if ($cop$$cmpcode == Assembler::notEqual) {
12759       $$emit$$"JP,u,s   $labl\n\t"
12760       $$emit$$"J$cop,u,s   $labl"
12761     } else {
12762       $$emit$$"JP,u,s   done\n\t"
12763       $$emit$$"J$cop,u,s  $labl\n\t"
12764       $$emit$$"done:"
12765     }
12766   %}
12767   size(4);
12768   ins_encode %{
12769     Label* l = $labl$$label;
12770     if ($cop$$cmpcode == Assembler::notEqual) {
12771       __ jccb(Assembler::parity, *l);
12772       __ jccb(Assembler::notEqual, *l);
12773     } else if ($cop$$cmpcode == Assembler::equal) {
12774       Label done;
12775       __ jccb(Assembler::parity, done);
12776       __ jccb(Assembler::equal, *l);
12777       __ bind(done);
12778     } else {
12779        ShouldNotReachHere();
12780     }
12781   %}
12782   ins_pipe(pipe_jcc);
12783   ins_short_branch(1);
12784 %}
12785 
12786 // ============================================================================
12787 // Long Compare
12788 //
12789 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12790 // is tricky.  The flavor of compare used depends on whether we are testing
12791 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12792 // The GE test is the negated LT test.  The LE test can be had by commuting
12793 // the operands (yielding a GE test) and then negating; negate again for the
12794 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12795 // NE test is negated from that.
12796 
12797 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12798 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12799 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12800 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12801 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12802 // foo match ends up with the wrong leaf.  One fix is to not match both
12803 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12804 // both forms beat the trinary form of long-compare and both are very useful
12805 // on Intel which has so few registers.
12806 
12807 // Manifest a CmpL result in an integer register.  Very painful.
12808 // This is the test to avoid.
12809 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12810   match(Set dst (CmpL3 src1 src2));
12811   effect( KILL flags );
12812   ins_cost(1000);
12813   format %{ "XOR    $dst,$dst\n\t"
12814             "CMP    $src1.hi,$src2.hi\n\t"
12815             "JLT,s  m_one\n\t"
12816             "JGT,s  p_one\n\t"
12817             "CMP    $src1.lo,$src2.lo\n\t"
12818             "JB,s   m_one\n\t"
12819             "JEQ,s  done\n"
12820     "p_one:\tINC    $dst\n\t"
12821             "JMP,s  done\n"
12822     "m_one:\tDEC    $dst\n"
12823      "done:" %}
12824   ins_encode %{
12825     Label p_one, m_one, done;
12826     __ xorptr($dst$$Register, $dst$$Register);
12827     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12828     __ jccb(Assembler::less,    m_one);
12829     __ jccb(Assembler::greater, p_one);
12830     __ cmpl($src1$$Register, $src2$$Register);
12831     __ jccb(Assembler::below,   m_one);
12832     __ jccb(Assembler::equal,   done);
12833     __ bind(p_one);
12834     __ incrementl($dst$$Register);
12835     __ jmpb(done);
12836     __ bind(m_one);
12837     __ decrementl($dst$$Register);
12838     __ bind(done);
12839   %}
12840   ins_pipe( pipe_slow );
12841 %}
12842 
12843 //======
12844 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12845 // compares.  Can be used for LE or GT compares by reversing arguments.
12846 // NOT GOOD FOR EQ/NE tests.
12847 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12848   match( Set flags (CmpL src zero ));
12849   ins_cost(100);
12850   format %{ "TEST   $src.hi,$src.hi" %}
12851   opcode(0x85);
12852   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12853   ins_pipe( ialu_cr_reg_reg );
12854 %}
12855 
12856 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12857 // compares.  Can be used for LE or GT compares by reversing arguments.
12858 // NOT GOOD FOR EQ/NE tests.
12859 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12860   match( Set flags (CmpL src1 src2 ));
12861   effect( TEMP tmp );
12862   ins_cost(300);
12863   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12864             "MOV    $tmp,$src1.hi\n\t"
12865             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12866   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12867   ins_pipe( ialu_cr_reg_reg );
12868 %}
12869 
12870 // Long compares reg < zero/req OR reg >= zero/req.
12871 // Just a wrapper for a normal branch, plus the predicate test.
12872 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12873   match(If cmp flags);
12874   effect(USE labl);
12875   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12876   expand %{
12877     jmpCon(cmp,flags,labl);    // JLT or JGE...
12878   %}
12879 %}
12880 
12881 //======
12882 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12883 // compares.  Can be used for LE or GT compares by reversing arguments.
12884 // NOT GOOD FOR EQ/NE tests.
12885 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12886   match(Set flags (CmpUL src zero));
12887   ins_cost(100);
12888   format %{ "TEST   $src.hi,$src.hi" %}
12889   opcode(0x85);
12890   ins_encode(OpcP, RegReg_Hi2(src, src));
12891   ins_pipe(ialu_cr_reg_reg);
12892 %}
12893 
12894 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12895 // compares.  Can be used for LE or GT compares by reversing arguments.
12896 // NOT GOOD FOR EQ/NE tests.
12897 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12898   match(Set flags (CmpUL src1 src2));
12899   effect(TEMP tmp);
12900   ins_cost(300);
12901   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12902             "MOV    $tmp,$src1.hi\n\t"
12903             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12904   ins_encode(long_cmp_flags2(src1, src2, tmp));
12905   ins_pipe(ialu_cr_reg_reg);
12906 %}
12907 
12908 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12909 // Just a wrapper for a normal branch, plus the predicate test.
12910 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12911   match(If cmp flags);
12912   effect(USE labl);
12913   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12914   expand %{
12915     jmpCon(cmp, flags, labl);    // JLT or JGE...
12916   %}
12917 %}
12918 
12919 // Compare 2 longs and CMOVE longs.
12920 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12921   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12922   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12923   ins_cost(400);
12924   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12925             "CMOV$cmp $dst.hi,$src.hi" %}
12926   opcode(0x0F,0x40);
12927   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12928   ins_pipe( pipe_cmov_reg_long );
12929 %}
12930 
12931 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12932   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12933   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12934   ins_cost(500);
12935   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12936             "CMOV$cmp $dst.hi,$src.hi" %}
12937   opcode(0x0F,0x40);
12938   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12939   ins_pipe( pipe_cmov_reg_long );
12940 %}
12941 
12942 // Compare 2 longs and CMOVE ints.
12943 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12944   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12945   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12946   ins_cost(200);
12947   format %{ "CMOV$cmp $dst,$src" %}
12948   opcode(0x0F,0x40);
12949   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12950   ins_pipe( pipe_cmov_reg );
12951 %}
12952 
12953 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12954   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12955   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12956   ins_cost(250);
12957   format %{ "CMOV$cmp $dst,$src" %}
12958   opcode(0x0F,0x40);
12959   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12960   ins_pipe( pipe_cmov_mem );
12961 %}
12962 
12963 // Compare 2 longs and CMOVE ints.
12964 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12965   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12966   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12967   ins_cost(200);
12968   format %{ "CMOV$cmp $dst,$src" %}
12969   opcode(0x0F,0x40);
12970   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12971   ins_pipe( pipe_cmov_reg );
12972 %}
12973 
12974 // Compare 2 longs and CMOVE doubles
12975 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12976   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12977   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12978   ins_cost(200);
12979   expand %{
12980     fcmovDPR_regS(cmp,flags,dst,src);
12981   %}
12982 %}
12983 
12984 // Compare 2 longs and CMOVE doubles
12985 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12986   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12987   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12988   ins_cost(200);
12989   expand %{
12990     fcmovD_regS(cmp,flags,dst,src);
12991   %}
12992 %}
12993 
12994 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12995   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12996   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12997   ins_cost(200);
12998   expand %{
12999     fcmovFPR_regS(cmp,flags,dst,src);
13000   %}
13001 %}
13002 
13003 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13004   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13005   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13006   ins_cost(200);
13007   expand %{
13008     fcmovF_regS(cmp,flags,dst,src);
13009   %}
13010 %}
13011 
13012 //======
13013 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13014 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13015   match( Set flags (CmpL src zero ));
13016   effect(TEMP tmp);
13017   ins_cost(200);
13018   format %{ "MOV    $tmp,$src.lo\n\t"
13019             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13020   ins_encode( long_cmp_flags0( src, tmp ) );
13021   ins_pipe( ialu_reg_reg_long );
13022 %}
13023 
13024 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13025 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13026   match( Set flags (CmpL src1 src2 ));
13027   ins_cost(200+300);
13028   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13029             "JNE,s  skip\n\t"
13030             "CMP    $src1.hi,$src2.hi\n\t"
13031      "skip:\t" %}
13032   ins_encode( long_cmp_flags1( src1, src2 ) );
13033   ins_pipe( ialu_cr_reg_reg );
13034 %}
13035 
13036 // Long compare reg == zero/reg OR reg != zero/reg
13037 // Just a wrapper for a normal branch, plus the predicate test.
13038 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13039   match(If cmp flags);
13040   effect(USE labl);
13041   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13042   expand %{
13043     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13044   %}
13045 %}
13046 
13047 //======
13048 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13049 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13050   match(Set flags (CmpUL src zero));
13051   effect(TEMP tmp);
13052   ins_cost(200);
13053   format %{ "MOV    $tmp,$src.lo\n\t"
13054             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13055   ins_encode(long_cmp_flags0(src, tmp));
13056   ins_pipe(ialu_reg_reg_long);
13057 %}
13058 
13059 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13060 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13061   match(Set flags (CmpUL src1 src2));
13062   ins_cost(200+300);
13063   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13064             "JNE,s  skip\n\t"
13065             "CMP    $src1.hi,$src2.hi\n\t"
13066      "skip:\t" %}
13067   ins_encode(long_cmp_flags1(src1, src2));
13068   ins_pipe(ialu_cr_reg_reg);
13069 %}
13070 
13071 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13072 // Just a wrapper for a normal branch, plus the predicate test.
13073 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13074   match(If cmp flags);
13075   effect(USE labl);
13076   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13077   expand %{
13078     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13079   %}
13080 %}
13081 
13082 // Compare 2 longs and CMOVE longs.
13083 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13084   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13085   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13086   ins_cost(400);
13087   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13088             "CMOV$cmp $dst.hi,$src.hi" %}
13089   opcode(0x0F,0x40);
13090   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13091   ins_pipe( pipe_cmov_reg_long );
13092 %}
13093 
13094 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13095   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13096   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13097   ins_cost(500);
13098   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13099             "CMOV$cmp $dst.hi,$src.hi" %}
13100   opcode(0x0F,0x40);
13101   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13102   ins_pipe( pipe_cmov_reg_long );
13103 %}
13104 
13105 // Compare 2 longs and CMOVE ints.
13106 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13107   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13108   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13109   ins_cost(200);
13110   format %{ "CMOV$cmp $dst,$src" %}
13111   opcode(0x0F,0x40);
13112   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13113   ins_pipe( pipe_cmov_reg );
13114 %}
13115 
13116 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13117   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13118   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13119   ins_cost(250);
13120   format %{ "CMOV$cmp $dst,$src" %}
13121   opcode(0x0F,0x40);
13122   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13123   ins_pipe( pipe_cmov_mem );
13124 %}
13125 
13126 // Compare 2 longs and CMOVE ints.
13127 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13128   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13129   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13130   ins_cost(200);
13131   format %{ "CMOV$cmp $dst,$src" %}
13132   opcode(0x0F,0x40);
13133   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13134   ins_pipe( pipe_cmov_reg );
13135 %}
13136 
13137 // Compare 2 longs and CMOVE doubles
13138 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13139   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13140   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13141   ins_cost(200);
13142   expand %{
13143     fcmovDPR_regS(cmp,flags,dst,src);
13144   %}
13145 %}
13146 
13147 // Compare 2 longs and CMOVE doubles
13148 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13149   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13150   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13151   ins_cost(200);
13152   expand %{
13153     fcmovD_regS(cmp,flags,dst,src);
13154   %}
13155 %}
13156 
13157 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13158   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13159   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13160   ins_cost(200);
13161   expand %{
13162     fcmovFPR_regS(cmp,flags,dst,src);
13163   %}
13164 %}
13165 
13166 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13167   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13168   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13169   ins_cost(200);
13170   expand %{
13171     fcmovF_regS(cmp,flags,dst,src);
13172   %}
13173 %}
13174 
13175 //======
13176 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13177 // Same as cmpL_reg_flags_LEGT except must negate src
13178 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13179   match( Set flags (CmpL src zero ));
13180   effect( TEMP tmp );
13181   ins_cost(300);
13182   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13183             "CMP    $tmp,$src.lo\n\t"
13184             "SBB    $tmp,$src.hi\n\t" %}
13185   ins_encode( long_cmp_flags3(src, tmp) );
13186   ins_pipe( ialu_reg_reg_long );
13187 %}
13188 
13189 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13190 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13191 // requires a commuted test to get the same result.
13192 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13193   match( Set flags (CmpL src1 src2 ));
13194   effect( TEMP tmp );
13195   ins_cost(300);
13196   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13197             "MOV    $tmp,$src2.hi\n\t"
13198             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13199   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13200   ins_pipe( ialu_cr_reg_reg );
13201 %}
13202 
13203 // Long compares reg < zero/req OR reg >= zero/req.
13204 // Just a wrapper for a normal branch, plus the predicate test
13205 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13206   match(If cmp flags);
13207   effect(USE labl);
13208   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13209   ins_cost(300);
13210   expand %{
13211     jmpCon(cmp,flags,labl);    // JGT or JLE...
13212   %}
13213 %}
13214 
13215 //======
13216 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13217 // Same as cmpUL_reg_flags_LEGT except must negate src
13218 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13219   match(Set flags (CmpUL src zero));
13220   effect(TEMP tmp);
13221   ins_cost(300);
13222   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13223             "CMP    $tmp,$src.lo\n\t"
13224             "SBB    $tmp,$src.hi\n\t" %}
13225   ins_encode(long_cmp_flags3(src, tmp));
13226   ins_pipe(ialu_reg_reg_long);
13227 %}
13228 
13229 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13230 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13231 // requires a commuted test to get the same result.
13232 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13233   match(Set flags (CmpUL src1 src2));
13234   effect(TEMP tmp);
13235   ins_cost(300);
13236   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13237             "MOV    $tmp,$src2.hi\n\t"
13238             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13239   ins_encode(long_cmp_flags2( src2, src1, tmp));
13240   ins_pipe(ialu_cr_reg_reg);
13241 %}
13242 
13243 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13244 // Just a wrapper for a normal branch, plus the predicate test
13245 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13246   match(If cmp flags);
13247   effect(USE labl);
13248   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13249   ins_cost(300);
13250   expand %{
13251     jmpCon(cmp, flags, labl);    // JGT or JLE...
13252   %}
13253 %}
13254 
13255 // Compare 2 longs and CMOVE longs.
13256 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13257   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13258   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13259   ins_cost(400);
13260   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13261             "CMOV$cmp $dst.hi,$src.hi" %}
13262   opcode(0x0F,0x40);
13263   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13264   ins_pipe( pipe_cmov_reg_long );
13265 %}
13266 
13267 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13268   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13269   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13270   ins_cost(500);
13271   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13272             "CMOV$cmp $dst.hi,$src.hi+4" %}
13273   opcode(0x0F,0x40);
13274   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13275   ins_pipe( pipe_cmov_reg_long );
13276 %}
13277 
13278 // Compare 2 longs and CMOVE ints.
13279 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13280   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13281   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13282   ins_cost(200);
13283   format %{ "CMOV$cmp $dst,$src" %}
13284   opcode(0x0F,0x40);
13285   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13286   ins_pipe( pipe_cmov_reg );
13287 %}
13288 
13289 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13290   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13291   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13292   ins_cost(250);
13293   format %{ "CMOV$cmp $dst,$src" %}
13294   opcode(0x0F,0x40);
13295   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13296   ins_pipe( pipe_cmov_mem );
13297 %}
13298 
13299 // Compare 2 longs and CMOVE ptrs.
13300 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13301   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13302   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13303   ins_cost(200);
13304   format %{ "CMOV$cmp $dst,$src" %}
13305   opcode(0x0F,0x40);
13306   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13307   ins_pipe( pipe_cmov_reg );
13308 %}
13309 
13310 // Compare 2 longs and CMOVE doubles
13311 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13312   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13313   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13314   ins_cost(200);
13315   expand %{
13316     fcmovDPR_regS(cmp,flags,dst,src);
13317   %}
13318 %}
13319 
13320 // Compare 2 longs and CMOVE doubles
13321 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13322   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13323   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13324   ins_cost(200);
13325   expand %{
13326     fcmovD_regS(cmp,flags,dst,src);
13327   %}
13328 %}
13329 
13330 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13331   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13332   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13333   ins_cost(200);
13334   expand %{
13335     fcmovFPR_regS(cmp,flags,dst,src);
13336   %}
13337 %}
13338 
13339 
13340 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13341   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13342   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13343   ins_cost(200);
13344   expand %{
13345     fcmovF_regS(cmp,flags,dst,src);
13346   %}
13347 %}
13348 
13349 
13350 // ============================================================================
13351 // Procedure Call/Return Instructions
13352 // Call Java Static Instruction
13353 // Note: If this code changes, the corresponding ret_addr_offset() and
13354 //       compute_padding() functions will have to be adjusted.
13355 instruct CallStaticJavaDirect(method meth) %{
13356   match(CallStaticJava);
13357   effect(USE meth);
13358 
13359   ins_cost(300);
13360   format %{ "CALL,static " %}
13361   opcode(0xE8); /* E8 cd */
13362   ins_encode( pre_call_resets,
13363               Java_Static_Call( meth ),
13364               call_epilog,
13365               post_call_FPU );
13366   ins_pipe( pipe_slow );
13367   ins_alignment(4);
13368 %}
13369 
13370 // Call Java Dynamic Instruction
13371 // Note: If this code changes, the corresponding ret_addr_offset() and
13372 //       compute_padding() functions will have to be adjusted.
13373 instruct CallDynamicJavaDirect(method meth) %{
13374   match(CallDynamicJava);
13375   effect(USE meth);
13376 
13377   ins_cost(300);
13378   format %{ "MOV    EAX,(oop)-1\n\t"
13379             "CALL,dynamic" %}
13380   opcode(0xE8); /* E8 cd */
13381   ins_encode( pre_call_resets,
13382               Java_Dynamic_Call( meth ),
13383               call_epilog,
13384               post_call_FPU );
13385   ins_pipe( pipe_slow );
13386   ins_alignment(4);
13387 %}
13388 
13389 // Call Runtime Instruction
13390 instruct CallRuntimeDirect(method meth) %{
13391   match(CallRuntime );
13392   effect(USE meth);
13393 
13394   ins_cost(300);
13395   format %{ "CALL,runtime " %}
13396   opcode(0xE8); /* E8 cd */
13397   // Use FFREEs to clear entries in float stack
13398   ins_encode( pre_call_resets,
13399               FFree_Float_Stack_All,
13400               Java_To_Runtime( meth ),
13401               post_call_FPU );
13402   ins_pipe( pipe_slow );
13403 %}
13404 
13405 // Call runtime without safepoint
13406 instruct CallLeafDirect(method meth) %{
13407   match(CallLeaf);
13408   effect(USE meth);
13409 
13410   ins_cost(300);
13411   format %{ "CALL_LEAF,runtime " %}
13412   opcode(0xE8); /* E8 cd */
13413   ins_encode( pre_call_resets,
13414               FFree_Float_Stack_All,
13415               Java_To_Runtime( meth ),
13416               Verify_FPU_For_Leaf, post_call_FPU );
13417   ins_pipe( pipe_slow );
13418 %}
13419 
13420 instruct CallLeafNoFPDirect(method meth) %{
13421   match(CallLeafNoFP);
13422   effect(USE meth);
13423 
13424   ins_cost(300);
13425   format %{ "CALL_LEAF_NOFP,runtime " %}
13426   opcode(0xE8); /* E8 cd */
13427   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13428   ins_pipe( pipe_slow );
13429 %}
13430 
13431 
13432 // Return Instruction
13433 // Remove the return address & jump to it.
13434 instruct Ret() %{
13435   match(Return);
13436   format %{ "RET" %}
13437   opcode(0xC3);
13438   ins_encode(OpcP);
13439   ins_pipe( pipe_jmp );
13440 %}
13441 
13442 // Tail Call; Jump from runtime stub to Java code.
13443 // Also known as an 'interprocedural jump'.
13444 // Target of jump will eventually return to caller.
13445 // TailJump below removes the return address.
13446 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13447   match(TailCall jump_target method_oop );
13448   ins_cost(300);
13449   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13450   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13451   ins_encode( OpcP, RegOpc(jump_target) );
13452   ins_pipe( pipe_jmp );
13453 %}
13454 
13455 
13456 // Tail Jump; remove the return address; jump to target.
13457 // TailCall above leaves the return address around.
13458 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13459   match( TailJump jump_target ex_oop );
13460   ins_cost(300);
13461   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13462             "JMP    $jump_target " %}
13463   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13464   ins_encode( enc_pop_rdx,
13465               OpcP, RegOpc(jump_target) );
13466   ins_pipe( pipe_jmp );
13467 %}
13468 
13469 // Create exception oop: created by stack-crawling runtime code.
13470 // Created exception is now available to this handler, and is setup
13471 // just prior to jumping to this handler.  No code emitted.
13472 instruct CreateException( eAXRegP ex_oop )
13473 %{
13474   match(Set ex_oop (CreateEx));
13475 
13476   size(0);
13477   // use the following format syntax
13478   format %{ "# exception oop is in EAX; no code emitted" %}
13479   ins_encode();
13480   ins_pipe( empty );
13481 %}
13482 
13483 
13484 // Rethrow exception:
13485 // The exception oop will come in the first argument position.
13486 // Then JUMP (not call) to the rethrow stub code.
13487 instruct RethrowException()
13488 %{
13489   match(Rethrow);
13490 
13491   // use the following format syntax
13492   format %{ "JMP    rethrow_stub" %}
13493   ins_encode(enc_rethrow);
13494   ins_pipe( pipe_jmp );
13495 %}
13496 
13497 // inlined locking and unlocking
13498 
13499 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13500   predicate(Compile::current()->use_rtm());
13501   match(Set cr (FastLock object box));
13502   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13503   ins_cost(300);
13504   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13505   ins_encode %{
13506     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13507                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13508                  _counters, _rtm_counters, _stack_rtm_counters,
13509                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13510                  true, ra_->C->profile_rtm());
13511   %}
13512   ins_pipe(pipe_slow);
13513 %}
13514 
13515 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13516   predicate(!Compile::current()->use_rtm());
13517   match(Set cr (FastLock object box));
13518   effect(TEMP tmp, TEMP scr, USE_KILL box);
13519   ins_cost(300);
13520   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13521   ins_encode %{
13522     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13523                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13524   %}
13525   ins_pipe(pipe_slow);
13526 %}
13527 
13528 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13529   match(Set cr (FastUnlock object box));
13530   effect(TEMP tmp, USE_KILL box);
13531   ins_cost(300);
13532   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13533   ins_encode %{
13534     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13535   %}
13536   ins_pipe(pipe_slow);
13537 %}
13538 
13539 
13540 
13541 // ============================================================================
13542 // Safepoint Instruction
13543 instruct safePoint_poll(eFlagsReg cr) %{
13544   predicate(SafepointMechanism::uses_global_page_poll());
13545   match(SafePoint);
13546   effect(KILL cr);
13547 
13548   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13549   // On SPARC that might be acceptable as we can generate the address with
13550   // just a sethi, saving an or.  By polling at offset 0 we can end up
13551   // putting additional pressure on the index-0 in the D$.  Because of
13552   // alignment (just like the situation at hand) the lower indices tend
13553   // to see more traffic.  It'd be better to change the polling address
13554   // to offset 0 of the last $line in the polling page.
13555 
13556   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13557   ins_cost(125);
13558   size(6) ;
13559   ins_encode( Safepoint_Poll() );
13560   ins_pipe( ialu_reg_mem );
13561 %}
13562 
13563 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13564   predicate(SafepointMechanism::uses_thread_local_poll());
13565   match(SafePoint poll);
13566   effect(KILL cr, USE poll);
13567 
13568   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13569   ins_cost(125);
13570   // EBP would need size(3)
13571   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13572   ins_encode %{
13573     __ relocate(relocInfo::poll_type);
13574     address pre_pc = __ pc();
13575     __ testl(rax, Address($poll$$Register, 0));
13576     address post_pc = __ pc();
13577     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13578   %}
13579   ins_pipe(ialu_reg_mem);
13580 %}
13581 
13582 
13583 // ============================================================================
13584 // This name is KNOWN by the ADLC and cannot be changed.
13585 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13586 // for this guy.
13587 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13588   match(Set dst (ThreadLocal));
13589   effect(DEF dst, KILL cr);
13590 
13591   format %{ "MOV    $dst, Thread::current()" %}
13592   ins_encode %{
13593     Register dstReg = as_Register($dst$$reg);
13594     __ get_thread(dstReg);
13595   %}
13596   ins_pipe( ialu_reg_fat );
13597 %}
13598 
13599 
13600 
13601 //----------PEEPHOLE RULES-----------------------------------------------------
13602 // These must follow all instruction definitions as they use the names
13603 // defined in the instructions definitions.
13604 //
13605 // peepmatch ( root_instr_name [preceding_instruction]* );
13606 //
13607 // peepconstraint %{
13608 // (instruction_number.operand_name relational_op instruction_number.operand_name
13609 //  [, ...] );
13610 // // instruction numbers are zero-based using left to right order in peepmatch
13611 //
13612 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13613 // // provide an instruction_number.operand_name for each operand that appears
13614 // // in the replacement instruction's match rule
13615 //
13616 // ---------VM FLAGS---------------------------------------------------------
13617 //
13618 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13619 //
13620 // Each peephole rule is given an identifying number starting with zero and
13621 // increasing by one in the order seen by the parser.  An individual peephole
13622 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13623 // on the command-line.
13624 //
13625 // ---------CURRENT LIMITATIONS----------------------------------------------
13626 //
13627 // Only match adjacent instructions in same basic block
13628 // Only equality constraints
13629 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13630 // Only one replacement instruction
13631 //
13632 // ---------EXAMPLE----------------------------------------------------------
13633 //
13634 // // pertinent parts of existing instructions in architecture description
13635 // instruct movI(rRegI dst, rRegI src) %{
13636 //   match(Set dst (CopyI src));
13637 // %}
13638 //
13639 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13640 //   match(Set dst (AddI dst src));
13641 //   effect(KILL cr);
13642 // %}
13643 //
13644 // // Change (inc mov) to lea
13645 // peephole %{
13646 //   // increment preceeded by register-register move
13647 //   peepmatch ( incI_eReg movI );
13648 //   // require that the destination register of the increment
13649 //   // match the destination register of the move
13650 //   peepconstraint ( 0.dst == 1.dst );
13651 //   // construct a replacement instruction that sets
13652 //   // the destination to ( move's source register + one )
13653 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13654 // %}
13655 //
13656 // Implementation no longer uses movX instructions since
13657 // machine-independent system no longer uses CopyX nodes.
13658 //
13659 // peephole %{
13660 //   peepmatch ( incI_eReg movI );
13661 //   peepconstraint ( 0.dst == 1.dst );
13662 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13663 // %}
13664 //
13665 // peephole %{
13666 //   peepmatch ( decI_eReg movI );
13667 //   peepconstraint ( 0.dst == 1.dst );
13668 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13669 // %}
13670 //
13671 // peephole %{
13672 //   peepmatch ( addI_eReg_imm movI );
13673 //   peepconstraint ( 0.dst == 1.dst );
13674 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13675 // %}
13676 //
13677 // peephole %{
13678 //   peepmatch ( addP_eReg_imm movP );
13679 //   peepconstraint ( 0.dst == 1.dst );
13680 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13681 // %}
13682 
13683 // // Change load of spilled value to only a spill
13684 // instruct storeI(memory mem, rRegI src) %{
13685 //   match(Set mem (StoreI mem src));
13686 // %}
13687 //
13688 // instruct loadI(rRegI dst, memory mem) %{
13689 //   match(Set dst (LoadI mem));
13690 // %}
13691 //
13692 peephole %{
13693   peepmatch ( loadI storeI );
13694   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13695   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13696 %}
13697 
13698 //----------SMARTSPILL RULES---------------------------------------------------
13699 // These must follow all instruction definitions as they use the names
13700 // defined in the instructions definitions.