1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673 
 674   if (C->max_vector_size() > 16) {
 675     // Clear upper bits of YMM registers when current compiled code uses
 676     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 677     MacroAssembler masm(&cbuf);
 678     masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     MacroAssembler masm(&cbuf);
 683     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 684   }
 685 
 686   int framesize = C->frame_size_in_bytes();
 687   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 688   // Remove two words for return addr and rbp,
 689   framesize -= 2*wordSize;
 690 
 691   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 692 
 693   if (framesize >= 128) {
 694     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 695     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 696     emit_d32(cbuf, framesize);
 697   } else if (framesize) {
 698     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 699     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 700     emit_d8(cbuf, framesize);
 701   }
 702 
 703   emit_opcode(cbuf, 0x58 | EBP_enc);
 704 
 705   if (do_polling() && C->is_method_compilation()) {
 706     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 707     emit_opcode(cbuf,0x85);
 708     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 709     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 710   }
 711 }
 712 
 713 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 714   Compile *C = ra_->C;
 715   // If method set FPU control word, restore to standard control word
 716   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 717   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 718   if (do_polling() && C->is_method_compilation()) size += 6;
 719 
 720   int framesize = C->frame_size_in_bytes();
 721   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 722   // Remove two words for return addr and rbp,
 723   framesize -= 2*wordSize;
 724 
 725   size++; // popl rbp,
 726 
 727   if (framesize >= 128) {
 728     size += 6;
 729   } else {
 730     size += framesize ? 3 : 0;
 731   }
 732   return size;
 733 }
 734 
 735 int MachEpilogNode::reloc() const {
 736   return 0; // a large enough number
 737 }
 738 
 739 const Pipeline * MachEpilogNode::pipeline() const {
 740   return MachNode::pipeline_class();
 741 }
 742 
 743 int MachEpilogNode::safepoint_offset() const { return 0; }
 744 
 745 //=============================================================================
 746 
 747 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 748 static enum RC rc_class( OptoReg::Name reg ) {
 749 
 750   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 751   if (OptoReg::is_stack(reg)) return rc_stack;
 752 
 753   VMReg r = OptoReg::as_VMReg(reg);
 754   if (r->is_Register()) return rc_int;
 755   if (r->is_FloatRegister()) {
 756     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 757     return rc_float;
 758   }
 759   assert(r->is_XMMRegister(), "must be");
 760   return rc_xmm;
 761 }
 762 
 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 764                         int opcode, const char *op_str, int size, outputStream* st ) {
 765   if( cbuf ) {
 766     emit_opcode  (*cbuf, opcode );
 767     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 768 #ifndef PRODUCT
 769   } else if( !do_size ) {
 770     if( size != 0 ) st->print("\n\t");
 771     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 772       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 773       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 774     } else { // FLD, FST, PUSH, POP
 775       st->print("%s [ESP + #%d]",op_str,offset);
 776     }
 777 #endif
 778   }
 779   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 780   return size+3+offset_size;
 781 }
 782 
 783 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 785                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 786   int in_size_in_bits = Assembler::EVEX_32bit;
 787   int evex_encoding = 0;
 788   if (reg_lo+1 == reg_hi) {
 789     in_size_in_bits = Assembler::EVEX_64bit;
 790     evex_encoding = Assembler::VEX_W;
 791   }
 792   if (cbuf) {
 793     MacroAssembler _masm(cbuf);
 794     if (reg_lo+1 == reg_hi) { // double move?
 795       if (is_load) {
 796         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 797       } else {
 798         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 799       }
 800     } else {
 801       if (is_load) {
 802         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 803       } else {
 804         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 805       }
 806     }
 807 #ifndef PRODUCT
 808   } else if (!do_size) {
 809     if (size != 0) st->print("\n\t");
 810     if (reg_lo+1 == reg_hi) { // double move?
 811       if (is_load) st->print("%s %s,[ESP + #%d]",
 812                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 813                               Matcher::regName[reg_lo], offset);
 814       else         st->print("MOVSD  [ESP + #%d],%s",
 815                               offset, Matcher::regName[reg_lo]);
 816     } else {
 817       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 818                               Matcher::regName[reg_lo], offset);
 819       else         st->print("MOVSS  [ESP + #%d],%s",
 820                               offset, Matcher::regName[reg_lo]);
 821     }
 822 #endif
 823   }
 824   bool is_single_byte = false;
 825   if ((UseAVX > 2) && (offset != 0)) {
 826     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 827   }
 828   int offset_size = 0;
 829   if (UseAVX > 2 ) {
 830     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 831   } else {
 832     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 833   }
 834   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 835   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 836   return size+5+offset_size;
 837 }
 838 
 839 
 840 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 841                             int src_hi, int dst_hi, int size, outputStream* st ) {
 842   if (cbuf) {
 843     MacroAssembler _masm(cbuf);
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 883              as_Register(Matcher::_regEncode[src_lo]));
 884 #ifndef PRODUCT
 885   } else if (!do_size) {
 886     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 887 #endif
 888   }
 889   return (UseAVX> 2) ? 6 : 4;
 890 }
 891 
 892 
 893 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 894                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 895   // 32-bit
 896   if (cbuf) {
 897     MacroAssembler _masm(cbuf);
 898     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 899              as_XMMRegister(Matcher::_regEncode[src_lo]));
 900 #ifndef PRODUCT
 901   } else if (!do_size) {
 902     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 903 #endif
 904   }
 905   return (UseAVX> 2) ? 6 : 4;
 906 }
 907 
 908 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 909   if( cbuf ) {
 910     emit_opcode(*cbuf, 0x8B );
 911     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 912 #ifndef PRODUCT
 913   } else if( !do_size ) {
 914     if( size != 0 ) st->print("\n\t");
 915     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 916 #endif
 917   }
 918   return size+2;
 919 }
 920 
 921 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 922                                  int offset, int size, outputStream* st ) {
 923   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 924     if( cbuf ) {
 925       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 926       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 927 #ifndef PRODUCT
 928     } else if( !do_size ) {
 929       if( size != 0 ) st->print("\n\t");
 930       st->print("FLD    %s",Matcher::regName[src_lo]);
 931 #endif
 932     }
 933     size += 2;
 934   }
 935 
 936   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 937   const char *op_str;
 938   int op;
 939   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 940     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 941     op = 0xDD;
 942   } else {                   // 32-bit store
 943     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 944     op = 0xD9;
 945     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 946   }
 947 
 948   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 949 }
 950 
 951 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 952 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 953                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 954 
 955 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 956                             int stack_offset, int reg, uint ireg, outputStream* st);
 957 
 958 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 959                                      int dst_offset, uint ireg, outputStream* st) {
 960   int calc_size = 0;
 961   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 962   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 963   switch (ireg) {
 964   case Op_VecS:
 965     calc_size = 3+src_offset_size + 3+dst_offset_size;
 966     break;
 967   case Op_VecD:
 968     calc_size = 3+src_offset_size + 3+dst_offset_size;
 969     src_offset += 4;
 970     dst_offset += 4;
 971     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 972     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 973     calc_size += 3+src_offset_size + 3+dst_offset_size;
 974     break;
 975   case Op_VecX:
 976   case Op_VecY:
 977   case Op_VecZ:
 978     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 979     break;
 980   default:
 981     ShouldNotReachHere();
 982   }
 983   if (cbuf) {
 984     MacroAssembler _masm(cbuf);
 985     int offset = __ offset();
 986     switch (ireg) {
 987     case Op_VecS:
 988       __ pushl(Address(rsp, src_offset));
 989       __ popl (Address(rsp, dst_offset));
 990       break;
 991     case Op_VecD:
 992       __ pushl(Address(rsp, src_offset));
 993       __ popl (Address(rsp, dst_offset));
 994       __ pushl(Address(rsp, src_offset+4));
 995       __ popl (Address(rsp, dst_offset+4));
 996       break;
 997     case Op_VecX:
 998       __ movdqu(Address(rsp, -16), xmm0);
 999       __ movdqu(xmm0, Address(rsp, src_offset));
1000       __ movdqu(Address(rsp, dst_offset), xmm0);
1001       __ movdqu(xmm0, Address(rsp, -16));
1002       break;
1003     case Op_VecY:
1004       __ vmovdqu(Address(rsp, -32), xmm0);
1005       __ vmovdqu(xmm0, Address(rsp, src_offset));
1006       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1007       __ vmovdqu(xmm0, Address(rsp, -32));
1008     case Op_VecZ:
1009       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1010       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1011       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1012       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1013       break;
1014     default:
1015       ShouldNotReachHere();
1016     }
1017     int size = __ offset() - offset;
1018     assert(size == calc_size, "incorrect size calculattion");
1019     return size;
1020 #ifndef PRODUCT
1021   } else if (!do_size) {
1022     switch (ireg) {
1023     case Op_VecS:
1024       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1025                 "popl    [rsp + #%d]",
1026                 src_offset, dst_offset);
1027       break;
1028     case Op_VecD:
1029       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1030                 "popq    [rsp + #%d]\n\t"
1031                 "pushl   [rsp + #%d]\n\t"
1032                 "popq    [rsp + #%d]",
1033                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1034       break;
1035      case Op_VecX:
1036       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1037                 "movdqu  xmm0, [rsp + #%d]\n\t"
1038                 "movdqu  [rsp + #%d], xmm0\n\t"
1039                 "movdqu  xmm0, [rsp - #16]",
1040                 src_offset, dst_offset);
1041       break;
1042     case Op_VecY:
1043       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1044                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1045                 "vmovdqu [rsp + #%d], xmm0\n\t"
1046                 "vmovdqu xmm0, [rsp - #32]",
1047                 src_offset, dst_offset);
1048     case Op_VecZ:
1049       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #64]",
1053                 src_offset, dst_offset);
1054       break;
1055     default:
1056       ShouldNotReachHere();
1057     }
1058 #endif
1059   }
1060   return calc_size;
1061 }
1062 
1063 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1064   // Get registers to move
1065   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1066   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1067   OptoReg::Name dst_second = ra_->get_reg_second(this );
1068   OptoReg::Name dst_first = ra_->get_reg_first(this );
1069 
1070   enum RC src_second_rc = rc_class(src_second);
1071   enum RC src_first_rc = rc_class(src_first);
1072   enum RC dst_second_rc = rc_class(dst_second);
1073   enum RC dst_first_rc = rc_class(dst_first);
1074 
1075   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1076 
1077   // Generate spill code!
1078   int size = 0;
1079 
1080   if( src_first == dst_first && src_second == dst_second )
1081     return size;            // Self copy, no move
1082 
1083   if (bottom_type()->isa_vect() != NULL) {
1084     uint ireg = ideal_reg();
1085     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1086     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1087     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1088     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1089       // mem -> mem
1090       int src_offset = ra_->reg2offset(src_first);
1091       int dst_offset = ra_->reg2offset(dst_first);
1092       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1093     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1094       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1095     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1096       int stack_offset = ra_->reg2offset(dst_first);
1097       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1098     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1099       int stack_offset = ra_->reg2offset(src_first);
1100       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1101     } else {
1102       ShouldNotReachHere();
1103     }
1104   }
1105 
1106   // --------------------------------------
1107   // Check for mem-mem move.  push/pop to move.
1108   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1109     if( src_second == dst_first ) { // overlapping stack copy ranges
1110       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1111       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1112       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1113       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1114     }
1115     // move low bits
1116     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1117     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1118     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1119       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1120       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1121     }
1122     return size;
1123   }
1124 
1125   // --------------------------------------
1126   // Check for integer reg-reg copy
1127   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1128     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1129 
1130   // Check for integer store
1131   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1132     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1133 
1134   // Check for integer load
1135   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1136     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1137 
1138   // Check for integer reg-xmm reg copy
1139   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1140     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1141             "no 64 bit integer-float reg moves" );
1142     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1143   }
1144   // --------------------------------------
1145   // Check for float reg-reg copy
1146   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1148             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1149     if( cbuf ) {
1150 
1151       // Note the mucking with the register encode to compensate for the 0/1
1152       // indexing issue mentioned in a comment in the reg_def sections
1153       // for FPR registers many lines above here.
1154 
1155       if( src_first != FPR1L_num ) {
1156         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1157         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1158         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1159         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1160      } else {
1161         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1162         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1163      }
1164 #ifndef PRODUCT
1165     } else if( !do_size ) {
1166       if( size != 0 ) st->print("\n\t");
1167       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1168       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1169 #endif
1170     }
1171     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1172   }
1173 
1174   // Check for float store
1175   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1176     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1177   }
1178 
1179   // Check for float load
1180   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1181     int offset = ra_->reg2offset(src_first);
1182     const char *op_str;
1183     int op;
1184     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1185       op_str = "FLD_D";
1186       op = 0xDD;
1187     } else {                   // 32-bit load
1188       op_str = "FLD_S";
1189       op = 0xD9;
1190       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1191     }
1192     if( cbuf ) {
1193       emit_opcode  (*cbuf, op );
1194       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1195       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1196       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1197 #ifndef PRODUCT
1198     } else if( !do_size ) {
1199       if( size != 0 ) st->print("\n\t");
1200       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1201 #endif
1202     }
1203     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1204     return size + 3+offset_size+2;
1205   }
1206 
1207   // Check for xmm reg-reg copy
1208   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1209     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1210             (src_first+1 == src_second && dst_first+1 == dst_second),
1211             "no non-adjacent float-moves" );
1212     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1213   }
1214 
1215   // Check for xmm reg-integer reg copy
1216   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1217     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1218             "no 64 bit float-integer reg moves" );
1219     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm store
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1224     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1225   }
1226 
1227   // Check for float xmm load
1228   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1229     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1230   }
1231 
1232   // Copy from float reg to xmm reg
1233   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1234     // copy to the top of stack from floating point reg
1235     // and use LEA to preserve flags
1236     if( cbuf ) {
1237       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1238       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1239       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1240       emit_d8(*cbuf,0xF8);
1241 #ifndef PRODUCT
1242     } else if( !do_size ) {
1243       if( size != 0 ) st->print("\n\t");
1244       st->print("LEA    ESP,[ESP-8]");
1245 #endif
1246     }
1247     size += 4;
1248 
1249     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1250 
1251     // Copy from the temp memory to the xmm reg.
1252     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1253 
1254     if( cbuf ) {
1255       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1256       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1257       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1258       emit_d8(*cbuf,0x08);
1259 #ifndef PRODUCT
1260     } else if( !do_size ) {
1261       if( size != 0 ) st->print("\n\t");
1262       st->print("LEA    ESP,[ESP+8]");
1263 #endif
1264     }
1265     size += 4;
1266     return size;
1267   }
1268 
1269   assert( size > 0, "missed a case" );
1270 
1271   // --------------------------------------------------------------------
1272   // Check for second bits still needing moving.
1273   if( src_second == dst_second )
1274     return size;               // Self copy; no move
1275   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1276 
1277   // Check for second word int-int move
1278   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1279     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1280 
1281   // Check for second word integer store
1282   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1283     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1284 
1285   // Check for second word integer load
1286   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1287     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1288 
1289 
1290   Unimplemented();
1291   return 0; // Mute compiler
1292 }
1293 
1294 #ifndef PRODUCT
1295 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1296   implementation( NULL, ra_, false, st );
1297 }
1298 #endif
1299 
1300 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1301   implementation( &cbuf, ra_, false, NULL );
1302 }
1303 
1304 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1305   return implementation( NULL, ra_, true, NULL );
1306 }
1307 
1308 
1309 //=============================================================================
1310 #ifndef PRODUCT
1311 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1312   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1313   int reg = ra_->get_reg_first(this);
1314   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1315 }
1316 #endif
1317 
1318 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_encode(this);
1321   if( offset >= 128 ) {
1322     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1323     emit_rm(cbuf, 0x2, reg, 0x04);
1324     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1325     emit_d32(cbuf, offset);
1326   }
1327   else {
1328     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1329     emit_rm(cbuf, 0x1, reg, 0x04);
1330     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1331     emit_d8(cbuf, offset);
1332   }
1333 }
1334 
1335 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1336   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1337   if( offset >= 128 ) {
1338     return 7;
1339   }
1340   else {
1341     return 4;
1342   }
1343 }
1344 
1345 //=============================================================================
1346 #ifndef PRODUCT
1347 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1348   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1349   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1350   st->print_cr("\tNOP");
1351   st->print_cr("\tNOP");
1352   if( !OptoBreakpoint )
1353     st->print_cr("\tNOP");
1354 }
1355 #endif
1356 
1357 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1358   MacroAssembler masm(&cbuf);
1359 #ifdef ASSERT
1360   uint insts_size = cbuf.insts_size();
1361 #endif
1362   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1363   masm.jump_cc(Assembler::notEqual,
1364                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1365   /* WARNING these NOPs are critical so that verified entry point is properly
1366      aligned for patching by NativeJump::patch_verified_entry() */
1367   int nops_cnt = 2;
1368   if( !OptoBreakpoint ) // Leave space for int3
1369      nops_cnt += 1;
1370   masm.nop(nops_cnt);
1371 
1372   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1373 }
1374 
1375 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1376   return OptoBreakpoint ? 11 : 12;
1377 }
1378 
1379 
1380 //=============================================================================
1381 
1382 int Matcher::regnum_to_fpu_offset(int regnum) {
1383   return regnum - 32; // The FP registers are in the second chunk
1384 }
1385 
1386 // This is UltraSparc specific, true just means we have fast l2f conversion
1387 const bool Matcher::convL2FSupported(void) {
1388   return true;
1389 }
1390 
1391 // Is this branch offset short enough that a short branch can be used?
1392 //
1393 // NOTE: If the platform does not provide any short branch variants, then
1394 //       this method should return false for offset 0.
1395 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1396   // The passed offset is relative to address of the branch.
1397   // On 86 a branch displacement is calculated relative to address
1398   // of a next instruction.
1399   offset -= br_size;
1400 
1401   // the short version of jmpConUCF2 contains multiple branches,
1402   // making the reach slightly less
1403   if (rule == jmpConUCF2_rule)
1404     return (-126 <= offset && offset <= 125);
1405   return (-128 <= offset && offset <= 127);
1406 }
1407 
1408 const bool Matcher::isSimpleConstant64(jlong value) {
1409   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1410   return false;
1411 }
1412 
1413 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1414 const bool Matcher::init_array_count_is_in_bytes = false;
1415 
1416 // Threshold size for cleararray.
1417 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1418 
1419 // Needs 2 CMOV's for longs.
1420 const int Matcher::long_cmove_cost() { return 1; }
1421 
1422 // No CMOVF/CMOVD with SSE/SSE2
1423 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1424 
1425 // Does the CPU require late expand (see block.cpp for description of late expand)?
1426 const bool Matcher::require_postalloc_expand = false;
1427 
1428 // Should the Matcher clone shifts on addressing modes, expecting them to
1429 // be subsumed into complex addressing expressions or compute them into
1430 // registers?  True for Intel but false for most RISCs
1431 const bool Matcher::clone_shift_expressions = true;
1432 
1433 // Do we need to mask the count passed to shift instructions or does
1434 // the cpu only look at the lower 5/6 bits anyway?
1435 const bool Matcher::need_masked_shift_count = false;
1436 
1437 bool Matcher::narrow_oop_use_complex_address() {
1438   ShouldNotCallThis();
1439   return true;
1440 }
1441 
1442 bool Matcher::narrow_klass_use_complex_address() {
1443   ShouldNotCallThis();
1444   return true;
1445 }
1446 
1447 
1448 // Is it better to copy float constants, or load them directly from memory?
1449 // Intel can load a float constant from a direct address, requiring no
1450 // extra registers.  Most RISCs will have to materialize an address into a
1451 // register first, so they would do better to copy the constant from stack.
1452 const bool Matcher::rematerialize_float_constants = true;
1453 
1454 // If CPU can load and store mis-aligned doubles directly then no fixup is
1455 // needed.  Else we split the double into 2 integer pieces and move it
1456 // piece-by-piece.  Only happens when passing doubles into C code as the
1457 // Java calling convention forces doubles to be aligned.
1458 const bool Matcher::misaligned_doubles_ok = true;
1459 
1460 
1461 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1462   // Get the memory operand from the node
1463   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1464   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1465   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1466   uint opcnt     = 1;                 // First operand
1467   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1468   while( idx >= skipped+num_edges ) {
1469     skipped += num_edges;
1470     opcnt++;                          // Bump operand count
1471     assert( opcnt < numopnds, "Accessing non-existent operand" );
1472     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1473   }
1474 
1475   MachOper *memory = node->_opnds[opcnt];
1476   MachOper *new_memory = NULL;
1477   switch (memory->opcode()) {
1478   case DIRECT:
1479   case INDOFFSET32X:
1480     // No transformation necessary.
1481     return;
1482   case INDIRECT:
1483     new_memory = new indirect_win95_safeOper( );
1484     break;
1485   case INDOFFSET8:
1486     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1487     break;
1488   case INDOFFSET32:
1489     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1490     break;
1491   case INDINDEXOFFSET:
1492     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1493     break;
1494   case INDINDEXSCALE:
1495     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1496     break;
1497   case INDINDEXSCALEOFFSET:
1498     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1499     break;
1500   case LOAD_LONG_INDIRECT:
1501   case LOAD_LONG_INDOFFSET32:
1502     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1503     return;
1504   default:
1505     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1506     return;
1507   }
1508   node->_opnds[opcnt] = new_memory;
1509 }
1510 
1511 // Advertise here if the CPU requires explicit rounding operations
1512 // to implement the UseStrictFP mode.
1513 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1514 
1515 // Are floats conerted to double when stored to stack during deoptimization?
1516 // On x32 it is stored with convertion only when FPU is used for floats.
1517 bool Matcher::float_in_double() { return (UseSSE == 0); }
1518 
1519 // Do ints take an entire long register or just half?
1520 const bool Matcher::int_in_long = false;
1521 
1522 // Return whether or not this register is ever used as an argument.  This
1523 // function is used on startup to build the trampoline stubs in generateOptoStub.
1524 // Registers not mentioned will be killed by the VM call in the trampoline, and
1525 // arguments in those registers not be available to the callee.
1526 bool Matcher::can_be_java_arg( int reg ) {
1527   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1528   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1529   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1530   return false;
1531 }
1532 
1533 bool Matcher::is_spillable_arg( int reg ) {
1534   return can_be_java_arg(reg);
1535 }
1536 
1537 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1538   // Use hardware integer DIV instruction when
1539   // it is faster than a code which use multiply.
1540   // Only when constant divisor fits into 32 bit
1541   // (min_jint is excluded to get only correct
1542   // positive 32 bit values from negative).
1543   return VM_Version::has_fast_idiv() &&
1544          (divisor == (int)divisor && divisor != min_jint);
1545 }
1546 
1547 // Register for DIVI projection of divmodI
1548 RegMask Matcher::divI_proj_mask() {
1549   return EAX_REG_mask();
1550 }
1551 
1552 // Register for MODI projection of divmodI
1553 RegMask Matcher::modI_proj_mask() {
1554   return EDX_REG_mask();
1555 }
1556 
1557 // Register for DIVL projection of divmodL
1558 RegMask Matcher::divL_proj_mask() {
1559   ShouldNotReachHere();
1560   return RegMask();
1561 }
1562 
1563 // Register for MODL projection of divmodL
1564 RegMask Matcher::modL_proj_mask() {
1565   ShouldNotReachHere();
1566   return RegMask();
1567 }
1568 
1569 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1570   return NO_REG_mask();
1571 }
1572 
1573 // Returns true if the high 32 bits of the value is known to be zero.
1574 bool is_operand_hi32_zero(Node* n) {
1575   int opc = n->Opcode();
1576   if (opc == Op_AndL) {
1577     Node* o2 = n->in(2);
1578     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1579       return true;
1580     }
1581   }
1582   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1583     return true;
1584   }
1585   return false;
1586 }
1587 
1588 %}
1589 
1590 //----------ENCODING BLOCK-----------------------------------------------------
1591 // This block specifies the encoding classes used by the compiler to output
1592 // byte streams.  Encoding classes generate functions which are called by
1593 // Machine Instruction Nodes in order to generate the bit encoding of the
1594 // instruction.  Operands specify their base encoding interface with the
1595 // interface keyword.  There are currently supported four interfaces,
1596 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1597 // operand to generate a function which returns its register number when
1598 // queried.   CONST_INTER causes an operand to generate a function which
1599 // returns the value of the constant when queried.  MEMORY_INTER causes an
1600 // operand to generate four functions which return the Base Register, the
1601 // Index Register, the Scale Value, and the Offset Value of the operand when
1602 // queried.  COND_INTER causes an operand to generate six functions which
1603 // return the encoding code (ie - encoding bits for the instruction)
1604 // associated with each basic boolean condition for a conditional instruction.
1605 // Instructions specify two basic values for encoding.  They use the
1606 // ins_encode keyword to specify their encoding class (which must be one of
1607 // the class names specified in the encoding block), and they use the
1608 // opcode keyword to specify, in order, their primary, secondary, and
1609 // tertiary opcode.  Only the opcode sections which a particular instruction
1610 // needs for encoding need to be specified.
1611 encode %{
1612   // Build emit functions for each basic byte or larger field in the intel
1613   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1614   // code in the enc_class source block.  Emit functions will live in the
1615   // main source block for now.  In future, we can generalize this by
1616   // adding a syntax that specifies the sizes of fields in an order,
1617   // so that the adlc can build the emit functions automagically
1618 
1619   // Emit primary opcode
1620   enc_class OpcP %{
1621     emit_opcode(cbuf, $primary);
1622   %}
1623 
1624   // Emit secondary opcode
1625   enc_class OpcS %{
1626     emit_opcode(cbuf, $secondary);
1627   %}
1628 
1629   // Emit opcode directly
1630   enc_class Opcode(immI d8) %{
1631     emit_opcode(cbuf, $d8$$constant);
1632   %}
1633 
1634   enc_class SizePrefix %{
1635     emit_opcode(cbuf,0x66);
1636   %}
1637 
1638   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1639     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1640   %}
1641 
1642   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1643     emit_opcode(cbuf,$opcode$$constant);
1644     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1645   %}
1646 
1647   enc_class mov_r32_imm0( rRegI dst ) %{
1648     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1649     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1650   %}
1651 
1652   enc_class cdq_enc %{
1653     // Full implementation of Java idiv and irem; checks for
1654     // special case as described in JVM spec., p.243 & p.271.
1655     //
1656     //         normal case                           special case
1657     //
1658     // input : rax,: dividend                         min_int
1659     //         reg: divisor                          -1
1660     //
1661     // output: rax,: quotient  (= rax, idiv reg)       min_int
1662     //         rdx: remainder (= rax, irem reg)       0
1663     //
1664     //  Code sequnce:
1665     //
1666     //  81 F8 00 00 00 80    cmp         rax,80000000h
1667     //  0F 85 0B 00 00 00    jne         normal_case
1668     //  33 D2                xor         rdx,edx
1669     //  83 F9 FF             cmp         rcx,0FFh
1670     //  0F 84 03 00 00 00    je          done
1671     //                  normal_case:
1672     //  99                   cdq
1673     //  F7 F9                idiv        rax,ecx
1674     //                  done:
1675     //
1676     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1677     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1678     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1679     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1680     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1681     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1682     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1683     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1684     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1685     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1687     // normal_case:
1688     emit_opcode(cbuf,0x99);                                         // cdq
1689     // idiv (note: must be emitted by the user of this rule)
1690     // normal:
1691   %}
1692 
1693   // Dense encoding for older common ops
1694   enc_class Opc_plus(immI opcode, rRegI reg) %{
1695     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1696   %}
1697 
1698 
1699   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1700   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1701     // Check for 8-bit immediate, and set sign extend bit in opcode
1702     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1703       emit_opcode(cbuf, $primary | 0x02);
1704     }
1705     else {                          // If 32-bit immediate
1706       emit_opcode(cbuf, $primary);
1707     }
1708   %}
1709 
1710   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1711     // Emit primary opcode and set sign-extend bit
1712     // Check for 8-bit immediate, and set sign extend bit in opcode
1713     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1714       emit_opcode(cbuf, $primary | 0x02);    }
1715     else {                          // If 32-bit immediate
1716       emit_opcode(cbuf, $primary);
1717     }
1718     // Emit r/m byte with secondary opcode, after primary opcode.
1719     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1720   %}
1721 
1722   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1723     // Check for 8-bit immediate, and set sign extend bit in opcode
1724     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1725       $$$emit8$imm$$constant;
1726     }
1727     else {                          // If 32-bit immediate
1728       // Output immediate
1729       $$$emit32$imm$$constant;
1730     }
1731   %}
1732 
1733   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1734     // Emit primary opcode and set sign-extend bit
1735     // Check for 8-bit immediate, and set sign extend bit in opcode
1736     int con = (int)$imm$$constant; // Throw away top bits
1737     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1738     // Emit r/m byte with secondary opcode, after primary opcode.
1739     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1740     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1741     else                               emit_d32(cbuf,con);
1742   %}
1743 
1744   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1745     // Emit primary opcode and set sign-extend bit
1746     // Check for 8-bit immediate, and set sign extend bit in opcode
1747     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1748     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1749     // Emit r/m byte with tertiary opcode, after primary opcode.
1750     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1751     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1752     else                               emit_d32(cbuf,con);
1753   %}
1754 
1755   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1756     emit_cc(cbuf, $secondary, $dst$$reg );
1757   %}
1758 
1759   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1760     int destlo = $dst$$reg;
1761     int desthi = HIGH_FROM_LOW(destlo);
1762     // bswap lo
1763     emit_opcode(cbuf, 0x0F);
1764     emit_cc(cbuf, 0xC8, destlo);
1765     // bswap hi
1766     emit_opcode(cbuf, 0x0F);
1767     emit_cc(cbuf, 0xC8, desthi);
1768     // xchg lo and hi
1769     emit_opcode(cbuf, 0x87);
1770     emit_rm(cbuf, 0x3, destlo, desthi);
1771   %}
1772 
1773   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1774     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1775   %}
1776 
1777   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1778     $$$emit8$primary;
1779     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1780   %}
1781 
1782   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1783     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1784     emit_d8(cbuf, op >> 8 );
1785     emit_d8(cbuf, op & 255);
1786   %}
1787 
1788   // emulate a CMOV with a conditional branch around a MOV
1789   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1790     // Invert sense of branch from sense of CMOV
1791     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1792     emit_d8( cbuf, $brOffs$$constant );
1793   %}
1794 
1795   enc_class enc_PartialSubtypeCheck( ) %{
1796     Register Redi = as_Register(EDI_enc); // result register
1797     Register Reax = as_Register(EAX_enc); // super class
1798     Register Recx = as_Register(ECX_enc); // killed
1799     Register Resi = as_Register(ESI_enc); // sub class
1800     Label miss;
1801 
1802     MacroAssembler _masm(&cbuf);
1803     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1804                                      NULL, &miss,
1805                                      /*set_cond_codes:*/ true);
1806     if ($primary) {
1807       __ xorptr(Redi, Redi);
1808     }
1809     __ bind(miss);
1810   %}
1811 
1812   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1813     MacroAssembler masm(&cbuf);
1814     int start = masm.offset();
1815     if (UseSSE >= 2) {
1816       if (VerifyFPU) {
1817         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1818       }
1819     } else {
1820       // External c_calling_convention expects the FPU stack to be 'clean'.
1821       // Compiled code leaves it dirty.  Do cleanup now.
1822       masm.empty_FPU_stack();
1823     }
1824     if (sizeof_FFree_Float_Stack_All == -1) {
1825       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1826     } else {
1827       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1828     }
1829   %}
1830 
1831   enc_class Verify_FPU_For_Leaf %{
1832     if( VerifyFPU ) {
1833       MacroAssembler masm(&cbuf);
1834       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1835     }
1836   %}
1837 
1838   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1839     // This is the instruction starting address for relocation info.
1840     cbuf.set_insts_mark();
1841     $$$emit8$primary;
1842     // CALL directly to the runtime
1843     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1844                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1845 
1846     if (UseSSE >= 2) {
1847       MacroAssembler _masm(&cbuf);
1848       BasicType rt = tf()->return_type();
1849 
1850       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1851         // A C runtime call where the return value is unused.  In SSE2+
1852         // mode the result needs to be removed from the FPU stack.  It's
1853         // likely that this function call could be removed by the
1854         // optimizer if the C function is a pure function.
1855         __ ffree(0);
1856       } else if (rt == T_FLOAT) {
1857         __ lea(rsp, Address(rsp, -4));
1858         __ fstp_s(Address(rsp, 0));
1859         __ movflt(xmm0, Address(rsp, 0));
1860         __ lea(rsp, Address(rsp,  4));
1861       } else if (rt == T_DOUBLE) {
1862         __ lea(rsp, Address(rsp, -8));
1863         __ fstp_d(Address(rsp, 0));
1864         __ movdbl(xmm0, Address(rsp, 0));
1865         __ lea(rsp, Address(rsp,  8));
1866       }
1867     }
1868   %}
1869 
1870 
1871   enc_class pre_call_resets %{
1872     // If method sets FPU control word restore it here
1873     debug_only(int off0 = cbuf.insts_size());
1874     if (ra_->C->in_24_bit_fp_mode()) {
1875       MacroAssembler _masm(&cbuf);
1876       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1877     }
1878     if (ra_->C->max_vector_size() > 16) {
1879       // Clear upper bits of YMM registers when current compiled code uses
1880       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1881       MacroAssembler _masm(&cbuf);
1882       __ vzeroupper();
1883     }
1884     debug_only(int off1 = cbuf.insts_size());
1885     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1886   %}
1887 
1888   enc_class post_call_FPU %{
1889     // If method sets FPU control word do it here also
1890     if (Compile::current()->in_24_bit_fp_mode()) {
1891       MacroAssembler masm(&cbuf);
1892       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1893     }
1894   %}
1895 
1896   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1897     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1898     // who we intended to call.
1899     cbuf.set_insts_mark();
1900     $$$emit8$primary;
1901 
1902     if (!_method) {
1903       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1904                      runtime_call_Relocation::spec(),
1905                      RELOC_IMM32);
1906     } else {
1907       int method_index = resolved_method_index(cbuf);
1908       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1909                                                   : static_call_Relocation::spec(method_index);
1910       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1911                      rspec, RELOC_DISP32);
1912       // Emit stubs for static call.
1913       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1914       if (stub == NULL) {
1915         ciEnv::current()->record_failure("CodeCache is full");
1916         return;
1917       }
1918     }
1919   %}
1920 
1921   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1922     MacroAssembler _masm(&cbuf);
1923     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1924   %}
1925 
1926   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1927     int disp = in_bytes(Method::from_compiled_offset());
1928     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1929 
1930     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1931     cbuf.set_insts_mark();
1932     $$$emit8$primary;
1933     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1934     emit_d8(cbuf, disp);             // Displacement
1935 
1936   %}
1937 
1938 //   Following encoding is no longer used, but may be restored if calling
1939 //   convention changes significantly.
1940 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1941 //
1942 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1943 //     // int ic_reg     = Matcher::inline_cache_reg();
1944 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1945 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1946 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1947 //
1948 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1949 //     // // so we load it immediately before the call
1950 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1951 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1952 //
1953 //     // xor rbp,ebp
1954 //     emit_opcode(cbuf, 0x33);
1955 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1956 //
1957 //     // CALL to interpreter.
1958 //     cbuf.set_insts_mark();
1959 //     $$$emit8$primary;
1960 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1961 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1962 //   %}
1963 
1964   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1965     $$$emit8$primary;
1966     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1967     $$$emit8$shift$$constant;
1968   %}
1969 
1970   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1971     // Load immediate does not have a zero or sign extended version
1972     // for 8-bit immediates
1973     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1974     $$$emit32$src$$constant;
1975   %}
1976 
1977   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1978     // Load immediate does not have a zero or sign extended version
1979     // for 8-bit immediates
1980     emit_opcode(cbuf, $primary + $dst$$reg);
1981     $$$emit32$src$$constant;
1982   %}
1983 
1984   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1985     // Load immediate does not have a zero or sign extended version
1986     // for 8-bit immediates
1987     int dst_enc = $dst$$reg;
1988     int src_con = $src$$constant & 0x0FFFFFFFFL;
1989     if (src_con == 0) {
1990       // xor dst, dst
1991       emit_opcode(cbuf, 0x33);
1992       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1993     } else {
1994       emit_opcode(cbuf, $primary + dst_enc);
1995       emit_d32(cbuf, src_con);
1996     }
1997   %}
1998 
1999   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2000     // Load immediate does not have a zero or sign extended version
2001     // for 8-bit immediates
2002     int dst_enc = $dst$$reg + 2;
2003     int src_con = ((julong)($src$$constant)) >> 32;
2004     if (src_con == 0) {
2005       // xor dst, dst
2006       emit_opcode(cbuf, 0x33);
2007       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2008     } else {
2009       emit_opcode(cbuf, $primary + dst_enc);
2010       emit_d32(cbuf, src_con);
2011     }
2012   %}
2013 
2014 
2015   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2016   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2017     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2018   %}
2019 
2020   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2021     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2022   %}
2023 
2024   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2025     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2026   %}
2027 
2028   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2029     $$$emit8$primary;
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$secondary;
2035     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2036   %}
2037 
2038   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2040   %}
2041 
2042   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2043     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2044   %}
2045 
2046   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2047     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2048   %}
2049 
2050   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2051     // Output immediate
2052     $$$emit32$src$$constant;
2053   %}
2054 
2055   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2056     // Output Float immediate bits
2057     jfloat jf = $src$$constant;
2058     int    jf_as_bits = jint_cast( jf );
2059     emit_d32(cbuf, jf_as_bits);
2060   %}
2061 
2062   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2063     // Output Float immediate bits
2064     jfloat jf = $src$$constant;
2065     int    jf_as_bits = jint_cast( jf );
2066     emit_d32(cbuf, jf_as_bits);
2067   %}
2068 
2069   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2070     // Output immediate
2071     $$$emit16$src$$constant;
2072   %}
2073 
2074   enc_class Con_d32(immI src) %{
2075     emit_d32(cbuf,$src$$constant);
2076   %}
2077 
2078   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2079     // Output immediate memory reference
2080     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2081     emit_d32(cbuf, 0x00);
2082   %}
2083 
2084   enc_class lock_prefix( ) %{
2085     if( os::is_MP() )
2086       emit_opcode(cbuf,0xF0);         // [Lock]
2087   %}
2088 
2089   // Cmp-xchg long value.
2090   // Note: we need to swap rbx, and rcx before and after the
2091   //       cmpxchg8 instruction because the instruction uses
2092   //       rcx as the high order word of the new value to store but
2093   //       our register encoding uses rbx,.
2094   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2095 
2096     // XCHG  rbx,ecx
2097     emit_opcode(cbuf,0x87);
2098     emit_opcode(cbuf,0xD9);
2099     // [Lock]
2100     if( os::is_MP() )
2101       emit_opcode(cbuf,0xF0);
2102     // CMPXCHG8 [Eptr]
2103     emit_opcode(cbuf,0x0F);
2104     emit_opcode(cbuf,0xC7);
2105     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2106     // XCHG  rbx,ecx
2107     emit_opcode(cbuf,0x87);
2108     emit_opcode(cbuf,0xD9);
2109   %}
2110 
2111   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2112     // [Lock]
2113     if( os::is_MP() )
2114       emit_opcode(cbuf,0xF0);
2115 
2116     // CMPXCHG [Eptr]
2117     emit_opcode(cbuf,0x0F);
2118     emit_opcode(cbuf,0xB1);
2119     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2120   %}
2121 
2122   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2123     int res_encoding = $res$$reg;
2124 
2125     // MOV  res,0
2126     emit_opcode( cbuf, 0xB8 + res_encoding);
2127     emit_d32( cbuf, 0 );
2128     // JNE,s  fail
2129     emit_opcode(cbuf,0x75);
2130     emit_d8(cbuf, 5 );
2131     // MOV  res,1
2132     emit_opcode( cbuf, 0xB8 + res_encoding);
2133     emit_d32( cbuf, 1 );
2134     // fail:
2135   %}
2136 
2137   enc_class set_instruction_start( ) %{
2138     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2139   %}
2140 
2141   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2142     int reg_encoding = $ereg$$reg;
2143     int base  = $mem$$base;
2144     int index = $mem$$index;
2145     int scale = $mem$$scale;
2146     int displace = $mem$$disp;
2147     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2148     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2149   %}
2150 
2151   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2152     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2153     int base  = $mem$$base;
2154     int index = $mem$$index;
2155     int scale = $mem$$scale;
2156     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2157     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2158     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2159   %}
2160 
2161   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2162     int r1, r2;
2163     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2164     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2165     emit_opcode(cbuf,0x0F);
2166     emit_opcode(cbuf,$tertiary);
2167     emit_rm(cbuf, 0x3, r1, r2);
2168     emit_d8(cbuf,$cnt$$constant);
2169     emit_d8(cbuf,$primary);
2170     emit_rm(cbuf, 0x3, $secondary, r1);
2171     emit_d8(cbuf,$cnt$$constant);
2172   %}
2173 
2174   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2175     emit_opcode( cbuf, 0x8B ); // Move
2176     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2177     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2178       emit_d8(cbuf,$primary);
2179       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2180       emit_d8(cbuf,$cnt$$constant-32);
2181     }
2182     emit_d8(cbuf,$primary);
2183     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2184     emit_d8(cbuf,31);
2185   %}
2186 
2187   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2188     int r1, r2;
2189     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2190     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2191 
2192     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2193     emit_rm(cbuf, 0x3, r1, r2);
2194     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2195       emit_opcode(cbuf,$primary);
2196       emit_rm(cbuf, 0x3, $secondary, r1);
2197       emit_d8(cbuf,$cnt$$constant-32);
2198     }
2199     emit_opcode(cbuf,0x33);  // XOR r2,r2
2200     emit_rm(cbuf, 0x3, r2, r2);
2201   %}
2202 
2203   // Clone of RegMem but accepts an extra parameter to access each
2204   // half of a double in memory; it never needs relocation info.
2205   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2206     emit_opcode(cbuf,$opcode$$constant);
2207     int reg_encoding = $rm_reg$$reg;
2208     int base     = $mem$$base;
2209     int index    = $mem$$index;
2210     int scale    = $mem$$scale;
2211     int displace = $mem$$disp + $disp_for_half$$constant;
2212     relocInfo::relocType disp_reloc = relocInfo::none;
2213     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2214   %}
2215 
2216   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2217   //
2218   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2219   // and it never needs relocation information.
2220   // Frequently used to move data between FPU's Stack Top and memory.
2221   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2222     int rm_byte_opcode = $rm_opcode$$constant;
2223     int base     = $mem$$base;
2224     int index    = $mem$$index;
2225     int scale    = $mem$$scale;
2226     int displace = $mem$$disp;
2227     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2228     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2229   %}
2230 
2231   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2232     int rm_byte_opcode = $rm_opcode$$constant;
2233     int base     = $mem$$base;
2234     int index    = $mem$$index;
2235     int scale    = $mem$$scale;
2236     int displace = $mem$$disp;
2237     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2238     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2239   %}
2240 
2241   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2242     int reg_encoding = $dst$$reg;
2243     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2244     int index        = 0x04;            // 0x04 indicates no index
2245     int scale        = 0x00;            // 0x00 indicates no scale
2246     int displace     = $src1$$constant; // 0x00 indicates no displacement
2247     relocInfo::relocType disp_reloc = relocInfo::none;
2248     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2249   %}
2250 
2251   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2252     // Compare dst,src
2253     emit_opcode(cbuf,0x3B);
2254     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2255     // jmp dst < src around move
2256     emit_opcode(cbuf,0x7C);
2257     emit_d8(cbuf,2);
2258     // move dst,src
2259     emit_opcode(cbuf,0x8B);
2260     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2261   %}
2262 
2263   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2264     // Compare dst,src
2265     emit_opcode(cbuf,0x3B);
2266     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2267     // jmp dst > src around move
2268     emit_opcode(cbuf,0x7F);
2269     emit_d8(cbuf,2);
2270     // move dst,src
2271     emit_opcode(cbuf,0x8B);
2272     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2273   %}
2274 
2275   enc_class enc_FPR_store(memory mem, regDPR src) %{
2276     // If src is FPR1, we can just FST to store it.
2277     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2278     int reg_encoding = 0x2; // Just store
2279     int base  = $mem$$base;
2280     int index = $mem$$index;
2281     int scale = $mem$$scale;
2282     int displace = $mem$$disp;
2283     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2284     if( $src$$reg != FPR1L_enc ) {
2285       reg_encoding = 0x3;  // Store & pop
2286       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2287       emit_d8( cbuf, 0xC0-1+$src$$reg );
2288     }
2289     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2290     emit_opcode(cbuf,$primary);
2291     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2292   %}
2293 
2294   enc_class neg_reg(rRegI dst) %{
2295     // NEG $dst
2296     emit_opcode(cbuf,0xF7);
2297     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2298   %}
2299 
2300   enc_class setLT_reg(eCXRegI dst) %{
2301     // SETLT $dst
2302     emit_opcode(cbuf,0x0F);
2303     emit_opcode(cbuf,0x9C);
2304     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2305   %}
2306 
2307   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2308     int tmpReg = $tmp$$reg;
2309 
2310     // SUB $p,$q
2311     emit_opcode(cbuf,0x2B);
2312     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2313     // SBB $tmp,$tmp
2314     emit_opcode(cbuf,0x1B);
2315     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2316     // AND $tmp,$y
2317     emit_opcode(cbuf,0x23);
2318     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2319     // ADD $p,$tmp
2320     emit_opcode(cbuf,0x03);
2321     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2322   %}
2323 
2324   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2325     // TEST shift,32
2326     emit_opcode(cbuf,0xF7);
2327     emit_rm(cbuf, 0x3, 0, ECX_enc);
2328     emit_d32(cbuf,0x20);
2329     // JEQ,s small
2330     emit_opcode(cbuf, 0x74);
2331     emit_d8(cbuf, 0x04);
2332     // MOV    $dst.hi,$dst.lo
2333     emit_opcode( cbuf, 0x8B );
2334     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2335     // CLR    $dst.lo
2336     emit_opcode(cbuf, 0x33);
2337     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2338 // small:
2339     // SHLD   $dst.hi,$dst.lo,$shift
2340     emit_opcode(cbuf,0x0F);
2341     emit_opcode(cbuf,0xA5);
2342     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2343     // SHL    $dst.lo,$shift"
2344     emit_opcode(cbuf,0xD3);
2345     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2346   %}
2347 
2348   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2349     // TEST shift,32
2350     emit_opcode(cbuf,0xF7);
2351     emit_rm(cbuf, 0x3, 0, ECX_enc);
2352     emit_d32(cbuf,0x20);
2353     // JEQ,s small
2354     emit_opcode(cbuf, 0x74);
2355     emit_d8(cbuf, 0x04);
2356     // MOV    $dst.lo,$dst.hi
2357     emit_opcode( cbuf, 0x8B );
2358     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2359     // CLR    $dst.hi
2360     emit_opcode(cbuf, 0x33);
2361     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2362 // small:
2363     // SHRD   $dst.lo,$dst.hi,$shift
2364     emit_opcode(cbuf,0x0F);
2365     emit_opcode(cbuf,0xAD);
2366     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2367     // SHR    $dst.hi,$shift"
2368     emit_opcode(cbuf,0xD3);
2369     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2370   %}
2371 
2372   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2373     // TEST shift,32
2374     emit_opcode(cbuf,0xF7);
2375     emit_rm(cbuf, 0x3, 0, ECX_enc);
2376     emit_d32(cbuf,0x20);
2377     // JEQ,s small
2378     emit_opcode(cbuf, 0x74);
2379     emit_d8(cbuf, 0x05);
2380     // MOV    $dst.lo,$dst.hi
2381     emit_opcode( cbuf, 0x8B );
2382     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2383     // SAR    $dst.hi,31
2384     emit_opcode(cbuf, 0xC1);
2385     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2386     emit_d8(cbuf, 0x1F );
2387 // small:
2388     // SHRD   $dst.lo,$dst.hi,$shift
2389     emit_opcode(cbuf,0x0F);
2390     emit_opcode(cbuf,0xAD);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392     // SAR    $dst.hi,$shift"
2393     emit_opcode(cbuf,0xD3);
2394     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2395   %}
2396 
2397 
2398   // ----------------- Encodings for floating point unit -----------------
2399   // May leave result in FPU-TOS or FPU reg depending on opcodes
2400   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2401     $$$emit8$primary;
2402     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2403   %}
2404 
2405   // Pop argument in FPR0 with FSTP ST(0)
2406   enc_class PopFPU() %{
2407     emit_opcode( cbuf, 0xDD );
2408     emit_d8( cbuf, 0xD8 );
2409   %}
2410 
2411   // !!!!! equivalent to Pop_Reg_F
2412   enc_class Pop_Reg_DPR( regDPR dst ) %{
2413     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2414     emit_d8( cbuf, 0xD8+$dst$$reg );
2415   %}
2416 
2417   enc_class Push_Reg_DPR( regDPR dst ) %{
2418     emit_opcode( cbuf, 0xD9 );
2419     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2420   %}
2421 
2422   enc_class strictfp_bias1( regDPR dst ) %{
2423     emit_opcode( cbuf, 0xDB );           // FLD m80real
2424     emit_opcode( cbuf, 0x2D );
2425     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2426     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2427     emit_opcode( cbuf, 0xC8+$dst$$reg );
2428   %}
2429 
2430   enc_class strictfp_bias2( regDPR dst ) %{
2431     emit_opcode( cbuf, 0xDB );           // FLD m80real
2432     emit_opcode( cbuf, 0x2D );
2433     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2434     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2435     emit_opcode( cbuf, 0xC8+$dst$$reg );
2436   %}
2437 
2438   // Special case for moving an integer register to a stack slot.
2439   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2440     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2441   %}
2442 
2443   // Special case for moving a register to a stack slot.
2444   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2445     // Opcode already emitted
2446     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2447     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2448     emit_d32(cbuf, $dst$$disp);   // Displacement
2449   %}
2450 
2451   // Push the integer in stackSlot 'src' onto FP-stack
2452   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2453     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2454   %}
2455 
2456   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2457   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2458     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2459   %}
2460 
2461   // Same as Pop_Mem_F except for opcode
2462   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2463   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2464     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2465   %}
2466 
2467   enc_class Pop_Reg_FPR( regFPR dst ) %{
2468     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2469     emit_d8( cbuf, 0xD8+$dst$$reg );
2470   %}
2471 
2472   enc_class Push_Reg_FPR( regFPR dst ) %{
2473     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2474     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2475   %}
2476 
2477   // Push FPU's float to a stack-slot, and pop FPU-stack
2478   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2479     int pop = 0x02;
2480     if ($src$$reg != FPR1L_enc) {
2481       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2482       emit_d8( cbuf, 0xC0-1+$src$$reg );
2483       pop = 0x03;
2484     }
2485     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2486   %}
2487 
2488   // Push FPU's double to a stack-slot, and pop FPU-stack
2489   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2490     int pop = 0x02;
2491     if ($src$$reg != FPR1L_enc) {
2492       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2493       emit_d8( cbuf, 0xC0-1+$src$$reg );
2494       pop = 0x03;
2495     }
2496     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2497   %}
2498 
2499   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2500   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2501     int pop = 0xD0 - 1; // -1 since we skip FLD
2502     if ($src$$reg != FPR1L_enc) {
2503       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2504       emit_d8( cbuf, 0xC0-1+$src$$reg );
2505       pop = 0xD8;
2506     }
2507     emit_opcode( cbuf, 0xDD );
2508     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2509   %}
2510 
2511 
2512   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2513     // load dst in FPR0
2514     emit_opcode( cbuf, 0xD9 );
2515     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2516     if ($src$$reg != FPR1L_enc) {
2517       // fincstp
2518       emit_opcode (cbuf, 0xD9);
2519       emit_opcode (cbuf, 0xF7);
2520       // swap src with FPR1:
2521       // FXCH FPR1 with src
2522       emit_opcode(cbuf, 0xD9);
2523       emit_d8(cbuf, 0xC8-1+$src$$reg );
2524       // fdecstp
2525       emit_opcode (cbuf, 0xD9);
2526       emit_opcode (cbuf, 0xF6);
2527     }
2528   %}
2529 
2530   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2531     MacroAssembler _masm(&cbuf);
2532     __ subptr(rsp, 8);
2533     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2534     __ fld_d(Address(rsp, 0));
2535     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2536     __ fld_d(Address(rsp, 0));
2537   %}
2538 
2539   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2540     MacroAssembler _masm(&cbuf);
2541     __ subptr(rsp, 4);
2542     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2543     __ fld_s(Address(rsp, 0));
2544     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2545     __ fld_s(Address(rsp, 0));
2546   %}
2547 
2548   enc_class Push_ResultD(regD dst) %{
2549     MacroAssembler _masm(&cbuf);
2550     __ fstp_d(Address(rsp, 0));
2551     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2552     __ addptr(rsp, 8);
2553   %}
2554 
2555   enc_class Push_ResultF(regF dst, immI d8) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ fstp_s(Address(rsp, 0));
2558     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2559     __ addptr(rsp, $d8$$constant);
2560   %}
2561 
2562   enc_class Push_SrcD(regD src) %{
2563     MacroAssembler _masm(&cbuf);
2564     __ subptr(rsp, 8);
2565     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2566     __ fld_d(Address(rsp, 0));
2567   %}
2568 
2569   enc_class push_stack_temp_qword() %{
2570     MacroAssembler _masm(&cbuf);
2571     __ subptr(rsp, 8);
2572   %}
2573 
2574   enc_class pop_stack_temp_qword() %{
2575     MacroAssembler _masm(&cbuf);
2576     __ addptr(rsp, 8);
2577   %}
2578 
2579   enc_class push_xmm_to_fpr1(regD src) %{
2580     MacroAssembler _masm(&cbuf);
2581     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2582     __ fld_d(Address(rsp, 0));
2583   %}
2584 
2585   enc_class Push_Result_Mod_DPR( regDPR src) %{
2586     if ($src$$reg != FPR1L_enc) {
2587       // fincstp
2588       emit_opcode (cbuf, 0xD9);
2589       emit_opcode (cbuf, 0xF7);
2590       // FXCH FPR1 with src
2591       emit_opcode(cbuf, 0xD9);
2592       emit_d8(cbuf, 0xC8-1+$src$$reg );
2593       // fdecstp
2594       emit_opcode (cbuf, 0xD9);
2595       emit_opcode (cbuf, 0xF6);
2596     }
2597     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2598     // // FSTP   FPR$dst$$reg
2599     // emit_opcode( cbuf, 0xDD );
2600     // emit_d8( cbuf, 0xD8+$dst$$reg );
2601   %}
2602 
2603   enc_class fnstsw_sahf_skip_parity() %{
2604     // fnstsw ax
2605     emit_opcode( cbuf, 0xDF );
2606     emit_opcode( cbuf, 0xE0 );
2607     // sahf
2608     emit_opcode( cbuf, 0x9E );
2609     // jnp  ::skip
2610     emit_opcode( cbuf, 0x7B );
2611     emit_opcode( cbuf, 0x05 );
2612   %}
2613 
2614   enc_class emitModDPR() %{
2615     // fprem must be iterative
2616     // :: loop
2617     // fprem
2618     emit_opcode( cbuf, 0xD9 );
2619     emit_opcode( cbuf, 0xF8 );
2620     // wait
2621     emit_opcode( cbuf, 0x9b );
2622     // fnstsw ax
2623     emit_opcode( cbuf, 0xDF );
2624     emit_opcode( cbuf, 0xE0 );
2625     // sahf
2626     emit_opcode( cbuf, 0x9E );
2627     // jp  ::loop
2628     emit_opcode( cbuf, 0x0F );
2629     emit_opcode( cbuf, 0x8A );
2630     emit_opcode( cbuf, 0xF4 );
2631     emit_opcode( cbuf, 0xFF );
2632     emit_opcode( cbuf, 0xFF );
2633     emit_opcode( cbuf, 0xFF );
2634   %}
2635 
2636   enc_class fpu_flags() %{
2637     // fnstsw_ax
2638     emit_opcode( cbuf, 0xDF);
2639     emit_opcode( cbuf, 0xE0);
2640     // test ax,0x0400
2641     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2642     emit_opcode( cbuf, 0xA9 );
2643     emit_d16   ( cbuf, 0x0400 );
2644     // // // This sequence works, but stalls for 12-16 cycles on PPro
2645     // // test rax,0x0400
2646     // emit_opcode( cbuf, 0xA9 );
2647     // emit_d32   ( cbuf, 0x00000400 );
2648     //
2649     // jz exit (no unordered comparison)
2650     emit_opcode( cbuf, 0x74 );
2651     emit_d8    ( cbuf, 0x02 );
2652     // mov ah,1 - treat as LT case (set carry flag)
2653     emit_opcode( cbuf, 0xB4 );
2654     emit_d8    ( cbuf, 0x01 );
2655     // sahf
2656     emit_opcode( cbuf, 0x9E);
2657   %}
2658 
2659   enc_class cmpF_P6_fixup() %{
2660     // Fixup the integer flags in case comparison involved a NaN
2661     //
2662     // JNP exit (no unordered comparison, P-flag is set by NaN)
2663     emit_opcode( cbuf, 0x7B );
2664     emit_d8    ( cbuf, 0x03 );
2665     // MOV AH,1 - treat as LT case (set carry flag)
2666     emit_opcode( cbuf, 0xB4 );
2667     emit_d8    ( cbuf, 0x01 );
2668     // SAHF
2669     emit_opcode( cbuf, 0x9E);
2670     // NOP     // target for branch to avoid branch to branch
2671     emit_opcode( cbuf, 0x90);
2672   %}
2673 
2674 //     fnstsw_ax();
2675 //     sahf();
2676 //     movl(dst, nan_result);
2677 //     jcc(Assembler::parity, exit);
2678 //     movl(dst, less_result);
2679 //     jcc(Assembler::below, exit);
2680 //     movl(dst, equal_result);
2681 //     jcc(Assembler::equal, exit);
2682 //     movl(dst, greater_result);
2683 
2684 // less_result     =  1;
2685 // greater_result  = -1;
2686 // equal_result    = 0;
2687 // nan_result      = -1;
2688 
2689   enc_class CmpF_Result(rRegI dst) %{
2690     // fnstsw_ax();
2691     emit_opcode( cbuf, 0xDF);
2692     emit_opcode( cbuf, 0xE0);
2693     // sahf
2694     emit_opcode( cbuf, 0x9E);
2695     // movl(dst, nan_result);
2696     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2697     emit_d32( cbuf, -1 );
2698     // jcc(Assembler::parity, exit);
2699     emit_opcode( cbuf, 0x7A );
2700     emit_d8    ( cbuf, 0x13 );
2701     // movl(dst, less_result);
2702     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2703     emit_d32( cbuf, -1 );
2704     // jcc(Assembler::below, exit);
2705     emit_opcode( cbuf, 0x72 );
2706     emit_d8    ( cbuf, 0x0C );
2707     // movl(dst, equal_result);
2708     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2709     emit_d32( cbuf, 0 );
2710     // jcc(Assembler::equal, exit);
2711     emit_opcode( cbuf, 0x74 );
2712     emit_d8    ( cbuf, 0x05 );
2713     // movl(dst, greater_result);
2714     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2715     emit_d32( cbuf, 1 );
2716   %}
2717 
2718 
2719   // Compare the longs and set flags
2720   // BROKEN!  Do Not use as-is
2721   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2722     // CMP    $src1.hi,$src2.hi
2723     emit_opcode( cbuf, 0x3B );
2724     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2725     // JNE,s  done
2726     emit_opcode(cbuf,0x75);
2727     emit_d8(cbuf, 2 );
2728     // CMP    $src1.lo,$src2.lo
2729     emit_opcode( cbuf, 0x3B );
2730     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2731 // done:
2732   %}
2733 
2734   enc_class convert_int_long( regL dst, rRegI src ) %{
2735     // mov $dst.lo,$src
2736     int dst_encoding = $dst$$reg;
2737     int src_encoding = $src$$reg;
2738     encode_Copy( cbuf, dst_encoding  , src_encoding );
2739     // mov $dst.hi,$src
2740     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2741     // sar $dst.hi,31
2742     emit_opcode( cbuf, 0xC1 );
2743     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2744     emit_d8(cbuf, 0x1F );
2745   %}
2746 
2747   enc_class convert_long_double( eRegL src ) %{
2748     // push $src.hi
2749     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2750     // push $src.lo
2751     emit_opcode(cbuf, 0x50+$src$$reg  );
2752     // fild 64-bits at [SP]
2753     emit_opcode(cbuf,0xdf);
2754     emit_d8(cbuf, 0x6C);
2755     emit_d8(cbuf, 0x24);
2756     emit_d8(cbuf, 0x00);
2757     // pop stack
2758     emit_opcode(cbuf, 0x83); // add  SP, #8
2759     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2760     emit_d8(cbuf, 0x8);
2761   %}
2762 
2763   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2764     // IMUL   EDX:EAX,$src1
2765     emit_opcode( cbuf, 0xF7 );
2766     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2767     // SAR    EDX,$cnt-32
2768     int shift_count = ((int)$cnt$$constant) - 32;
2769     if (shift_count > 0) {
2770       emit_opcode(cbuf, 0xC1);
2771       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2772       emit_d8(cbuf, shift_count);
2773     }
2774   %}
2775 
2776   // this version doesn't have add sp, 8
2777   enc_class convert_long_double2( eRegL src ) %{
2778     // push $src.hi
2779     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2780     // push $src.lo
2781     emit_opcode(cbuf, 0x50+$src$$reg  );
2782     // fild 64-bits at [SP]
2783     emit_opcode(cbuf,0xdf);
2784     emit_d8(cbuf, 0x6C);
2785     emit_d8(cbuf, 0x24);
2786     emit_d8(cbuf, 0x00);
2787   %}
2788 
2789   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2790     // Basic idea: long = (long)int * (long)int
2791     // IMUL EDX:EAX, src
2792     emit_opcode( cbuf, 0xF7 );
2793     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2794   %}
2795 
2796   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2797     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2798     // MUL EDX:EAX, src
2799     emit_opcode( cbuf, 0xF7 );
2800     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2801   %}
2802 
2803   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2804     // Basic idea: lo(result) = lo(x_lo * y_lo)
2805     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2806     // MOV    $tmp,$src.lo
2807     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2808     // IMUL   $tmp,EDX
2809     emit_opcode( cbuf, 0x0F );
2810     emit_opcode( cbuf, 0xAF );
2811     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2812     // MOV    EDX,$src.hi
2813     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2814     // IMUL   EDX,EAX
2815     emit_opcode( cbuf, 0x0F );
2816     emit_opcode( cbuf, 0xAF );
2817     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2818     // ADD    $tmp,EDX
2819     emit_opcode( cbuf, 0x03 );
2820     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2821     // MUL   EDX:EAX,$src.lo
2822     emit_opcode( cbuf, 0xF7 );
2823     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2824     // ADD    EDX,ESI
2825     emit_opcode( cbuf, 0x03 );
2826     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2827   %}
2828 
2829   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2830     // Basic idea: lo(result) = lo(src * y_lo)
2831     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2832     // IMUL   $tmp,EDX,$src
2833     emit_opcode( cbuf, 0x6B );
2834     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2835     emit_d8( cbuf, (int)$src$$constant );
2836     // MOV    EDX,$src
2837     emit_opcode(cbuf, 0xB8 + EDX_enc);
2838     emit_d32( cbuf, (int)$src$$constant );
2839     // MUL   EDX:EAX,EDX
2840     emit_opcode( cbuf, 0xF7 );
2841     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2842     // ADD    EDX,ESI
2843     emit_opcode( cbuf, 0x03 );
2844     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2845   %}
2846 
2847   enc_class long_div( eRegL src1, eRegL src2 ) %{
2848     // PUSH src1.hi
2849     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2850     // PUSH src1.lo
2851     emit_opcode(cbuf,               0x50+$src1$$reg  );
2852     // PUSH src2.hi
2853     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2854     // PUSH src2.lo
2855     emit_opcode(cbuf,               0x50+$src2$$reg  );
2856     // CALL directly to the runtime
2857     cbuf.set_insts_mark();
2858     emit_opcode(cbuf,0xE8);       // Call into runtime
2859     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2860     // Restore stack
2861     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2862     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2863     emit_d8(cbuf, 4*4);
2864   %}
2865 
2866   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2867     // PUSH src1.hi
2868     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2869     // PUSH src1.lo
2870     emit_opcode(cbuf,               0x50+$src1$$reg  );
2871     // PUSH src2.hi
2872     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2873     // PUSH src2.lo
2874     emit_opcode(cbuf,               0x50+$src2$$reg  );
2875     // CALL directly to the runtime
2876     cbuf.set_insts_mark();
2877     emit_opcode(cbuf,0xE8);       // Call into runtime
2878     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2879     // Restore stack
2880     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2881     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2882     emit_d8(cbuf, 4*4);
2883   %}
2884 
2885   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2886     // MOV   $tmp,$src.lo
2887     emit_opcode(cbuf, 0x8B);
2888     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2889     // OR    $tmp,$src.hi
2890     emit_opcode(cbuf, 0x0B);
2891     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2892   %}
2893 
2894   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2895     // CMP    $src1.lo,$src2.lo
2896     emit_opcode( cbuf, 0x3B );
2897     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2898     // JNE,s  skip
2899     emit_cc(cbuf, 0x70, 0x5);
2900     emit_d8(cbuf,2);
2901     // CMP    $src1.hi,$src2.hi
2902     emit_opcode( cbuf, 0x3B );
2903     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2904   %}
2905 
2906   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2907     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2908     emit_opcode( cbuf, 0x3B );
2909     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2910     // MOV    $tmp,$src1.hi
2911     emit_opcode( cbuf, 0x8B );
2912     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2913     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2914     emit_opcode( cbuf, 0x1B );
2915     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2916   %}
2917 
2918   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2919     // XOR    $tmp,$tmp
2920     emit_opcode(cbuf,0x33);  // XOR
2921     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2922     // CMP    $tmp,$src.lo
2923     emit_opcode( cbuf, 0x3B );
2924     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2925     // SBB    $tmp,$src.hi
2926     emit_opcode( cbuf, 0x1B );
2927     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2928   %}
2929 
2930  // Sniff, sniff... smells like Gnu Superoptimizer
2931   enc_class neg_long( eRegL dst ) %{
2932     emit_opcode(cbuf,0xF7);    // NEG hi
2933     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2934     emit_opcode(cbuf,0xF7);    // NEG lo
2935     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2936     emit_opcode(cbuf,0x83);    // SBB hi,0
2937     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2938     emit_d8    (cbuf,0 );
2939   %}
2940 
2941   enc_class enc_pop_rdx() %{
2942     emit_opcode(cbuf,0x5A);
2943   %}
2944 
2945   enc_class enc_rethrow() %{
2946     cbuf.set_insts_mark();
2947     emit_opcode(cbuf, 0xE9);        // jmp    entry
2948     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2949                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2950   %}
2951 
2952 
2953   // Convert a double to an int.  Java semantics require we do complex
2954   // manglelations in the corner cases.  So we set the rounding mode to
2955   // 'zero', store the darned double down as an int, and reset the
2956   // rounding mode to 'nearest'.  The hardware throws an exception which
2957   // patches up the correct value directly to the stack.
2958   enc_class DPR2I_encoding( regDPR src ) %{
2959     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2960     // exceptions here, so that a NAN or other corner-case value will
2961     // thrown an exception (but normal values get converted at full speed).
2962     // However, I2C adapters and other float-stack manglers leave pending
2963     // invalid-op exceptions hanging.  We would have to clear them before
2964     // enabling them and that is more expensive than just testing for the
2965     // invalid value Intel stores down in the corner cases.
2966     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2967     emit_opcode(cbuf,0x2D);
2968     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2969     // Allocate a word
2970     emit_opcode(cbuf,0x83);            // SUB ESP,4
2971     emit_opcode(cbuf,0xEC);
2972     emit_d8(cbuf,0x04);
2973     // Encoding assumes a double has been pushed into FPR0.
2974     // Store down the double as an int, popping the FPU stack
2975     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2976     emit_opcode(cbuf,0x1C);
2977     emit_d8(cbuf,0x24);
2978     // Restore the rounding mode; mask the exception
2979     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2980     emit_opcode(cbuf,0x2D);
2981     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2982         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2983         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2984 
2985     // Load the converted int; adjust CPU stack
2986     emit_opcode(cbuf,0x58);       // POP EAX
2987     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2988     emit_d32   (cbuf,0x80000000); //         0x80000000
2989     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2990     emit_d8    (cbuf,0x07);       // Size of slow_call
2991     // Push src onto stack slow-path
2992     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2993     emit_d8    (cbuf,0xC0-1+$src$$reg );
2994     // CALL directly to the runtime
2995     cbuf.set_insts_mark();
2996     emit_opcode(cbuf,0xE8);       // Call into runtime
2997     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2998     // Carry on here...
2999   %}
3000 
3001   enc_class DPR2L_encoding( regDPR src ) %{
3002     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3003     emit_opcode(cbuf,0x2D);
3004     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3005     // Allocate a word
3006     emit_opcode(cbuf,0x83);            // SUB ESP,8
3007     emit_opcode(cbuf,0xEC);
3008     emit_d8(cbuf,0x08);
3009     // Encoding assumes a double has been pushed into FPR0.
3010     // Store down the double as a long, popping the FPU stack
3011     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3012     emit_opcode(cbuf,0x3C);
3013     emit_d8(cbuf,0x24);
3014     // Restore the rounding mode; mask the exception
3015     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3016     emit_opcode(cbuf,0x2D);
3017     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3018         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3019         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3020 
3021     // Load the converted int; adjust CPU stack
3022     emit_opcode(cbuf,0x58);       // POP EAX
3023     emit_opcode(cbuf,0x5A);       // POP EDX
3024     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3025     emit_d8    (cbuf,0xFA);       // rdx
3026     emit_d32   (cbuf,0x80000000); //         0x80000000
3027     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3028     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3029     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3030     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3031     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3032     emit_d8    (cbuf,0x07);       // Size of slow_call
3033     // Push src onto stack slow-path
3034     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3035     emit_d8    (cbuf,0xC0-1+$src$$reg );
3036     // CALL directly to the runtime
3037     cbuf.set_insts_mark();
3038     emit_opcode(cbuf,0xE8);       // Call into runtime
3039     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3040     // Carry on here...
3041   %}
3042 
3043   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3044     // Operand was loaded from memory into fp ST (stack top)
3045     // FMUL   ST,$src  /* D8 C8+i */
3046     emit_opcode(cbuf, 0xD8);
3047     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3048   %}
3049 
3050   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3051     // FADDP  ST,src2  /* D8 C0+i */
3052     emit_opcode(cbuf, 0xD8);
3053     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3054     //could use FADDP  src2,fpST  /* DE C0+i */
3055   %}
3056 
3057   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3058     // FADDP  src2,ST  /* DE C0+i */
3059     emit_opcode(cbuf, 0xDE);
3060     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3061   %}
3062 
3063   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3064     // Operand has been loaded into fp ST (stack top)
3065       // FSUB   ST,$src1
3066       emit_opcode(cbuf, 0xD8);
3067       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3068 
3069       // FDIV
3070       emit_opcode(cbuf, 0xD8);
3071       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3072   %}
3073 
3074   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3075     // Operand was loaded from memory into fp ST (stack top)
3076     // FADD   ST,$src  /* D8 C0+i */
3077     emit_opcode(cbuf, 0xD8);
3078     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3079 
3080     // FMUL  ST,src2  /* D8 C*+i */
3081     emit_opcode(cbuf, 0xD8);
3082     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3083   %}
3084 
3085 
3086   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3087     // Operand was loaded from memory into fp ST (stack top)
3088     // FADD   ST,$src  /* D8 C0+i */
3089     emit_opcode(cbuf, 0xD8);
3090     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3091 
3092     // FMULP  src2,ST  /* DE C8+i */
3093     emit_opcode(cbuf, 0xDE);
3094     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3095   %}
3096 
3097   // Atomically load the volatile long
3098   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3099     emit_opcode(cbuf,0xDF);
3100     int rm_byte_opcode = 0x05;
3101     int base     = $mem$$base;
3102     int index    = $mem$$index;
3103     int scale    = $mem$$scale;
3104     int displace = $mem$$disp;
3105     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3106     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3107     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3108   %}
3109 
3110   // Volatile Store Long.  Must be atomic, so move it into
3111   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3112   // target address before the store (for null-ptr checks)
3113   // so the memory operand is used twice in the encoding.
3114   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3115     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3116     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3117     emit_opcode(cbuf,0xDF);
3118     int rm_byte_opcode = 0x07;
3119     int base     = $mem$$base;
3120     int index    = $mem$$index;
3121     int scale    = $mem$$scale;
3122     int displace = $mem$$disp;
3123     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3124     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3125   %}
3126 
3127   // Safepoint Poll.  This polls the safepoint page, and causes an
3128   // exception if it is not readable. Unfortunately, it kills the condition code
3129   // in the process
3130   // We current use TESTL [spp],EDI
3131   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3132 
3133   enc_class Safepoint_Poll() %{
3134     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3135     emit_opcode(cbuf,0x85);
3136     emit_rm (cbuf, 0x0, 0x7, 0x5);
3137     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3138   %}
3139 %}
3140 
3141 
3142 //----------FRAME--------------------------------------------------------------
3143 // Definition of frame structure and management information.
3144 //
3145 //  S T A C K   L A Y O U T    Allocators stack-slot number
3146 //                             |   (to get allocators register number
3147 //  G  Owned by    |        |  v    add OptoReg::stack0())
3148 //  r   CALLER     |        |
3149 //  o     |        +--------+      pad to even-align allocators stack-slot
3150 //  w     V        |  pad0  |        numbers; owned by CALLER
3151 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3152 //  h     ^        |   in   |  5
3153 //        |        |  args  |  4   Holes in incoming args owned by SELF
3154 //  |     |        |        |  3
3155 //  |     |        +--------+
3156 //  V     |        | old out|      Empty on Intel, window on Sparc
3157 //        |    old |preserve|      Must be even aligned.
3158 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3159 //        |        |   in   |  3   area for Intel ret address
3160 //     Owned by    |preserve|      Empty on Sparc.
3161 //       SELF      +--------+
3162 //        |        |  pad2  |  2   pad to align old SP
3163 //        |        +--------+  1
3164 //        |        | locks  |  0
3165 //        |        +--------+----> OptoReg::stack0(), even aligned
3166 //        |        |  pad1  | 11   pad to align new SP
3167 //        |        +--------+
3168 //        |        |        | 10
3169 //        |        | spills |  9   spills
3170 //        V        |        |  8   (pad0 slot for callee)
3171 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3172 //        ^        |  out   |  7
3173 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3174 //     Owned by    +--------+
3175 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3176 //        |    new |preserve|      Must be even-aligned.
3177 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3178 //        |        |        |
3179 //
3180 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3181 //         known from SELF's arguments and the Java calling convention.
3182 //         Region 6-7 is determined per call site.
3183 // Note 2: If the calling convention leaves holes in the incoming argument
3184 //         area, those holes are owned by SELF.  Holes in the outgoing area
3185 //         are owned by the CALLEE.  Holes should not be nessecary in the
3186 //         incoming area, as the Java calling convention is completely under
3187 //         the control of the AD file.  Doubles can be sorted and packed to
3188 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3189 //         varargs C calling conventions.
3190 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3191 //         even aligned with pad0 as needed.
3192 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3193 //         region 6-11 is even aligned; it may be padded out more so that
3194 //         the region from SP to FP meets the minimum stack alignment.
3195 
3196 frame %{
3197   // What direction does stack grow in (assumed to be same for C & Java)
3198   stack_direction(TOWARDS_LOW);
3199 
3200   // These three registers define part of the calling convention
3201   // between compiled code and the interpreter.
3202   inline_cache_reg(EAX);                // Inline Cache Register
3203   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3204 
3205   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3206   cisc_spilling_operand_name(indOffset32);
3207 
3208   // Number of stack slots consumed by locking an object
3209   sync_stack_slots(1);
3210 
3211   // Compiled code's Frame Pointer
3212   frame_pointer(ESP);
3213   // Interpreter stores its frame pointer in a register which is
3214   // stored to the stack by I2CAdaptors.
3215   // I2CAdaptors convert from interpreted java to compiled java.
3216   interpreter_frame_pointer(EBP);
3217 
3218   // Stack alignment requirement
3219   // Alignment size in bytes (128-bit -> 16 bytes)
3220   stack_alignment(StackAlignmentInBytes);
3221 
3222   // Number of stack slots between incoming argument block and the start of
3223   // a new frame.  The PROLOG must add this many slots to the stack.  The
3224   // EPILOG must remove this many slots.  Intel needs one slot for
3225   // return address and one for rbp, (must save rbp)
3226   in_preserve_stack_slots(2+VerifyStackAtCalls);
3227 
3228   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3229   // for calls to C.  Supports the var-args backing area for register parms.
3230   varargs_C_out_slots_killed(0);
3231 
3232   // The after-PROLOG location of the return address.  Location of
3233   // return address specifies a type (REG or STACK) and a number
3234   // representing the register number (i.e. - use a register name) or
3235   // stack slot.
3236   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3237   // Otherwise, it is above the locks and verification slot and alignment word
3238   return_addr(STACK - 1 +
3239               round_to((Compile::current()->in_preserve_stack_slots() +
3240                         Compile::current()->fixed_slots()),
3241                        stack_alignment_in_slots()));
3242 
3243   // Body of function which returns an integer array locating
3244   // arguments either in registers or in stack slots.  Passed an array
3245   // of ideal registers called "sig" and a "length" count.  Stack-slot
3246   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3247   // arguments for a CALLEE.  Incoming stack arguments are
3248   // automatically biased by the preserve_stack_slots field above.
3249   calling_convention %{
3250     // No difference between ingoing/outgoing just pass false
3251     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3252   %}
3253 
3254 
3255   // Body of function which returns an integer array locating
3256   // arguments either in registers or in stack slots.  Passed an array
3257   // of ideal registers called "sig" and a "length" count.  Stack-slot
3258   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3259   // arguments for a CALLEE.  Incoming stack arguments are
3260   // automatically biased by the preserve_stack_slots field above.
3261   c_calling_convention %{
3262     // This is obviously always outgoing
3263     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3264   %}
3265 
3266   // Location of C & interpreter return values
3267   c_return_value %{
3268     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3269     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3270     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3271 
3272     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3273     // that C functions return float and double results in XMM0.
3274     if( ideal_reg == Op_RegD && UseSSE>=2 )
3275       return OptoRegPair(XMM0b_num,XMM0_num);
3276     if( ideal_reg == Op_RegF && UseSSE>=2 )
3277       return OptoRegPair(OptoReg::Bad,XMM0_num);
3278 
3279     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3280   %}
3281 
3282   // Location of return values
3283   return_value %{
3284     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3285     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3286     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3287     if( ideal_reg == Op_RegD && UseSSE>=2 )
3288       return OptoRegPair(XMM0b_num,XMM0_num);
3289     if( ideal_reg == Op_RegF && UseSSE>=1 )
3290       return OptoRegPair(OptoReg::Bad,XMM0_num);
3291     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3292   %}
3293 
3294 %}
3295 
3296 //----------ATTRIBUTES---------------------------------------------------------
3297 //----------Operand Attributes-------------------------------------------------
3298 op_attrib op_cost(0);        // Required cost attribute
3299 
3300 //----------Instruction Attributes---------------------------------------------
3301 ins_attrib ins_cost(100);       // Required cost attribute
3302 ins_attrib ins_size(8);         // Required size attribute (in bits)
3303 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3304                                 // non-matching short branch variant of some
3305                                                             // long branch?
3306 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3307                                 // specifies the alignment that some part of the instruction (not
3308                                 // necessarily the start) requires.  If > 1, a compute_padding()
3309                                 // function must be provided for the instruction
3310 
3311 //----------OPERANDS-----------------------------------------------------------
3312 // Operand definitions must precede instruction definitions for correct parsing
3313 // in the ADLC because operands constitute user defined types which are used in
3314 // instruction definitions.
3315 
3316 //----------Simple Operands----------------------------------------------------
3317 // Immediate Operands
3318 // Integer Immediate
3319 operand immI() %{
3320   match(ConI);
3321 
3322   op_cost(10);
3323   format %{ %}
3324   interface(CONST_INTER);
3325 %}
3326 
3327 // Constant for test vs zero
3328 operand immI0() %{
3329   predicate(n->get_int() == 0);
3330   match(ConI);
3331 
3332   op_cost(0);
3333   format %{ %}
3334   interface(CONST_INTER);
3335 %}
3336 
3337 // Constant for increment
3338 operand immI1() %{
3339   predicate(n->get_int() == 1);
3340   match(ConI);
3341 
3342   op_cost(0);
3343   format %{ %}
3344   interface(CONST_INTER);
3345 %}
3346 
3347 // Constant for decrement
3348 operand immI_M1() %{
3349   predicate(n->get_int() == -1);
3350   match(ConI);
3351 
3352   op_cost(0);
3353   format %{ %}
3354   interface(CONST_INTER);
3355 %}
3356 
3357 // Valid scale values for addressing modes
3358 operand immI2() %{
3359   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3360   match(ConI);
3361 
3362   format %{ %}
3363   interface(CONST_INTER);
3364 %}
3365 
3366 operand immI8() %{
3367   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3368   match(ConI);
3369 
3370   op_cost(5);
3371   format %{ %}
3372   interface(CONST_INTER);
3373 %}
3374 
3375 operand immI16() %{
3376   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3377   match(ConI);
3378 
3379   op_cost(10);
3380   format %{ %}
3381   interface(CONST_INTER);
3382 %}
3383 
3384 // Int Immediate non-negative
3385 operand immU31()
3386 %{
3387   predicate(n->get_int() >= 0);
3388   match(ConI);
3389 
3390   op_cost(0);
3391   format %{ %}
3392   interface(CONST_INTER);
3393 %}
3394 
3395 // Constant for long shifts
3396 operand immI_32() %{
3397   predicate( n->get_int() == 32 );
3398   match(ConI);
3399 
3400   op_cost(0);
3401   format %{ %}
3402   interface(CONST_INTER);
3403 %}
3404 
3405 operand immI_1_31() %{
3406   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3407   match(ConI);
3408 
3409   op_cost(0);
3410   format %{ %}
3411   interface(CONST_INTER);
3412 %}
3413 
3414 operand immI_32_63() %{
3415   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3416   match(ConI);
3417   op_cost(0);
3418 
3419   format %{ %}
3420   interface(CONST_INTER);
3421 %}
3422 
3423 operand immI_1() %{
3424   predicate( n->get_int() == 1 );
3425   match(ConI);
3426 
3427   op_cost(0);
3428   format %{ %}
3429   interface(CONST_INTER);
3430 %}
3431 
3432 operand immI_2() %{
3433   predicate( n->get_int() == 2 );
3434   match(ConI);
3435 
3436   op_cost(0);
3437   format %{ %}
3438   interface(CONST_INTER);
3439 %}
3440 
3441 operand immI_3() %{
3442   predicate( n->get_int() == 3 );
3443   match(ConI);
3444 
3445   op_cost(0);
3446   format %{ %}
3447   interface(CONST_INTER);
3448 %}
3449 
3450 // Pointer Immediate
3451 operand immP() %{
3452   match(ConP);
3453 
3454   op_cost(10);
3455   format %{ %}
3456   interface(CONST_INTER);
3457 %}
3458 
3459 // NULL Pointer Immediate
3460 operand immP0() %{
3461   predicate( n->get_ptr() == 0 );
3462   match(ConP);
3463   op_cost(0);
3464 
3465   format %{ %}
3466   interface(CONST_INTER);
3467 %}
3468 
3469 // Long Immediate
3470 operand immL() %{
3471   match(ConL);
3472 
3473   op_cost(20);
3474   format %{ %}
3475   interface(CONST_INTER);
3476 %}
3477 
3478 // Long Immediate zero
3479 operand immL0() %{
3480   predicate( n->get_long() == 0L );
3481   match(ConL);
3482   op_cost(0);
3483 
3484   format %{ %}
3485   interface(CONST_INTER);
3486 %}
3487 
3488 // Long Immediate zero
3489 operand immL_M1() %{
3490   predicate( n->get_long() == -1L );
3491   match(ConL);
3492   op_cost(0);
3493 
3494   format %{ %}
3495   interface(CONST_INTER);
3496 %}
3497 
3498 // Long immediate from 0 to 127.
3499 // Used for a shorter form of long mul by 10.
3500 operand immL_127() %{
3501   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3502   match(ConL);
3503   op_cost(0);
3504 
3505   format %{ %}
3506   interface(CONST_INTER);
3507 %}
3508 
3509 // Long Immediate: low 32-bit mask
3510 operand immL_32bits() %{
3511   predicate(n->get_long() == 0xFFFFFFFFL);
3512   match(ConL);
3513   op_cost(0);
3514 
3515   format %{ %}
3516   interface(CONST_INTER);
3517 %}
3518 
3519 // Long Immediate: low 32-bit mask
3520 operand immL32() %{
3521   predicate(n->get_long() == (int)(n->get_long()));
3522   match(ConL);
3523   op_cost(20);
3524 
3525   format %{ %}
3526   interface(CONST_INTER);
3527 %}
3528 
3529 //Double Immediate zero
3530 operand immDPR0() %{
3531   // Do additional (and counter-intuitive) test against NaN to work around VC++
3532   // bug that generates code such that NaNs compare equal to 0.0
3533   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3534   match(ConD);
3535 
3536   op_cost(5);
3537   format %{ %}
3538   interface(CONST_INTER);
3539 %}
3540 
3541 // Double Immediate one
3542 operand immDPR1() %{
3543   predicate( UseSSE<=1 && n->getd() == 1.0 );
3544   match(ConD);
3545 
3546   op_cost(5);
3547   format %{ %}
3548   interface(CONST_INTER);
3549 %}
3550 
3551 // Double Immediate
3552 operand immDPR() %{
3553   predicate(UseSSE<=1);
3554   match(ConD);
3555 
3556   op_cost(5);
3557   format %{ %}
3558   interface(CONST_INTER);
3559 %}
3560 
3561 operand immD() %{
3562   predicate(UseSSE>=2);
3563   match(ConD);
3564 
3565   op_cost(5);
3566   format %{ %}
3567   interface(CONST_INTER);
3568 %}
3569 
3570 // Double Immediate zero
3571 operand immD0() %{
3572   // Do additional (and counter-intuitive) test against NaN to work around VC++
3573   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3574   // compare equal to -0.0.
3575   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3576   match(ConD);
3577 
3578   format %{ %}
3579   interface(CONST_INTER);
3580 %}
3581 
3582 // Float Immediate zero
3583 operand immFPR0() %{
3584   predicate(UseSSE == 0 && n->getf() == 0.0F);
3585   match(ConF);
3586 
3587   op_cost(5);
3588   format %{ %}
3589   interface(CONST_INTER);
3590 %}
3591 
3592 // Float Immediate one
3593 operand immFPR1() %{
3594   predicate(UseSSE == 0 && n->getf() == 1.0F);
3595   match(ConF);
3596 
3597   op_cost(5);
3598   format %{ %}
3599   interface(CONST_INTER);
3600 %}
3601 
3602 // Float Immediate
3603 operand immFPR() %{
3604   predicate( UseSSE == 0 );
3605   match(ConF);
3606 
3607   op_cost(5);
3608   format %{ %}
3609   interface(CONST_INTER);
3610 %}
3611 
3612 // Float Immediate
3613 operand immF() %{
3614   predicate(UseSSE >= 1);
3615   match(ConF);
3616 
3617   op_cost(5);
3618   format %{ %}
3619   interface(CONST_INTER);
3620 %}
3621 
3622 // Float Immediate zero.  Zero and not -0.0
3623 operand immF0() %{
3624   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3625   match(ConF);
3626 
3627   op_cost(5);
3628   format %{ %}
3629   interface(CONST_INTER);
3630 %}
3631 
3632 // Immediates for special shifts (sign extend)
3633 
3634 // Constants for increment
3635 operand immI_16() %{
3636   predicate( n->get_int() == 16 );
3637   match(ConI);
3638 
3639   format %{ %}
3640   interface(CONST_INTER);
3641 %}
3642 
3643 operand immI_24() %{
3644   predicate( n->get_int() == 24 );
3645   match(ConI);
3646 
3647   format %{ %}
3648   interface(CONST_INTER);
3649 %}
3650 
3651 // Constant for byte-wide masking
3652 operand immI_255() %{
3653   predicate( n->get_int() == 255 );
3654   match(ConI);
3655 
3656   format %{ %}
3657   interface(CONST_INTER);
3658 %}
3659 
3660 // Constant for short-wide masking
3661 operand immI_65535() %{
3662   predicate(n->get_int() == 65535);
3663   match(ConI);
3664 
3665   format %{ %}
3666   interface(CONST_INTER);
3667 %}
3668 
3669 // Register Operands
3670 // Integer Register
3671 operand rRegI() %{
3672   constraint(ALLOC_IN_RC(int_reg));
3673   match(RegI);
3674   match(xRegI);
3675   match(eAXRegI);
3676   match(eBXRegI);
3677   match(eCXRegI);
3678   match(eDXRegI);
3679   match(eDIRegI);
3680   match(eSIRegI);
3681 
3682   format %{ %}
3683   interface(REG_INTER);
3684 %}
3685 
3686 // Subset of Integer Register
3687 operand xRegI(rRegI reg) %{
3688   constraint(ALLOC_IN_RC(int_x_reg));
3689   match(reg);
3690   match(eAXRegI);
3691   match(eBXRegI);
3692   match(eCXRegI);
3693   match(eDXRegI);
3694 
3695   format %{ %}
3696   interface(REG_INTER);
3697 %}
3698 
3699 // Special Registers
3700 operand eAXRegI(xRegI reg) %{
3701   constraint(ALLOC_IN_RC(eax_reg));
3702   match(reg);
3703   match(rRegI);
3704 
3705   format %{ "EAX" %}
3706   interface(REG_INTER);
3707 %}
3708 
3709 // Special Registers
3710 operand eBXRegI(xRegI reg) %{
3711   constraint(ALLOC_IN_RC(ebx_reg));
3712   match(reg);
3713   match(rRegI);
3714 
3715   format %{ "EBX" %}
3716   interface(REG_INTER);
3717 %}
3718 
3719 operand eCXRegI(xRegI reg) %{
3720   constraint(ALLOC_IN_RC(ecx_reg));
3721   match(reg);
3722   match(rRegI);
3723 
3724   format %{ "ECX" %}
3725   interface(REG_INTER);
3726 %}
3727 
3728 operand eDXRegI(xRegI reg) %{
3729   constraint(ALLOC_IN_RC(edx_reg));
3730   match(reg);
3731   match(rRegI);
3732 
3733   format %{ "EDX" %}
3734   interface(REG_INTER);
3735 %}
3736 
3737 operand eDIRegI(xRegI reg) %{
3738   constraint(ALLOC_IN_RC(edi_reg));
3739   match(reg);
3740   match(rRegI);
3741 
3742   format %{ "EDI" %}
3743   interface(REG_INTER);
3744 %}
3745 
3746 operand naxRegI() %{
3747   constraint(ALLOC_IN_RC(nax_reg));
3748   match(RegI);
3749   match(eCXRegI);
3750   match(eDXRegI);
3751   match(eSIRegI);
3752   match(eDIRegI);
3753 
3754   format %{ %}
3755   interface(REG_INTER);
3756 %}
3757 
3758 operand nadxRegI() %{
3759   constraint(ALLOC_IN_RC(nadx_reg));
3760   match(RegI);
3761   match(eBXRegI);
3762   match(eCXRegI);
3763   match(eSIRegI);
3764   match(eDIRegI);
3765 
3766   format %{ %}
3767   interface(REG_INTER);
3768 %}
3769 
3770 operand ncxRegI() %{
3771   constraint(ALLOC_IN_RC(ncx_reg));
3772   match(RegI);
3773   match(eAXRegI);
3774   match(eDXRegI);
3775   match(eSIRegI);
3776   match(eDIRegI);
3777 
3778   format %{ %}
3779   interface(REG_INTER);
3780 %}
3781 
3782 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3783 // //
3784 operand eSIRegI(xRegI reg) %{
3785    constraint(ALLOC_IN_RC(esi_reg));
3786    match(reg);
3787    match(rRegI);
3788 
3789    format %{ "ESI" %}
3790    interface(REG_INTER);
3791 %}
3792 
3793 // Pointer Register
3794 operand anyRegP() %{
3795   constraint(ALLOC_IN_RC(any_reg));
3796   match(RegP);
3797   match(eAXRegP);
3798   match(eBXRegP);
3799   match(eCXRegP);
3800   match(eDIRegP);
3801   match(eRegP);
3802 
3803   format %{ %}
3804   interface(REG_INTER);
3805 %}
3806 
3807 operand eRegP() %{
3808   constraint(ALLOC_IN_RC(int_reg));
3809   match(RegP);
3810   match(eAXRegP);
3811   match(eBXRegP);
3812   match(eCXRegP);
3813   match(eDIRegP);
3814 
3815   format %{ %}
3816   interface(REG_INTER);
3817 %}
3818 
3819 // On windows95, EBP is not safe to use for implicit null tests.
3820 operand eRegP_no_EBP() %{
3821   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3822   match(RegP);
3823   match(eAXRegP);
3824   match(eBXRegP);
3825   match(eCXRegP);
3826   match(eDIRegP);
3827 
3828   op_cost(100);
3829   format %{ %}
3830   interface(REG_INTER);
3831 %}
3832 
3833 operand naxRegP() %{
3834   constraint(ALLOC_IN_RC(nax_reg));
3835   match(RegP);
3836   match(eBXRegP);
3837   match(eDXRegP);
3838   match(eCXRegP);
3839   match(eSIRegP);
3840   match(eDIRegP);
3841 
3842   format %{ %}
3843   interface(REG_INTER);
3844 %}
3845 
3846 operand nabxRegP() %{
3847   constraint(ALLOC_IN_RC(nabx_reg));
3848   match(RegP);
3849   match(eCXRegP);
3850   match(eDXRegP);
3851   match(eSIRegP);
3852   match(eDIRegP);
3853 
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 operand pRegP() %{
3859   constraint(ALLOC_IN_RC(p_reg));
3860   match(RegP);
3861   match(eBXRegP);
3862   match(eDXRegP);
3863   match(eSIRegP);
3864   match(eDIRegP);
3865 
3866   format %{ %}
3867   interface(REG_INTER);
3868 %}
3869 
3870 // Special Registers
3871 // Return a pointer value
3872 operand eAXRegP(eRegP reg) %{
3873   constraint(ALLOC_IN_RC(eax_reg));
3874   match(reg);
3875   format %{ "EAX" %}
3876   interface(REG_INTER);
3877 %}
3878 
3879 // Used in AtomicAdd
3880 operand eBXRegP(eRegP reg) %{
3881   constraint(ALLOC_IN_RC(ebx_reg));
3882   match(reg);
3883   format %{ "EBX" %}
3884   interface(REG_INTER);
3885 %}
3886 
3887 // Tail-call (interprocedural jump) to interpreter
3888 operand eCXRegP(eRegP reg) %{
3889   constraint(ALLOC_IN_RC(ecx_reg));
3890   match(reg);
3891   format %{ "ECX" %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 operand eSIRegP(eRegP reg) %{
3896   constraint(ALLOC_IN_RC(esi_reg));
3897   match(reg);
3898   format %{ "ESI" %}
3899   interface(REG_INTER);
3900 %}
3901 
3902 // Used in rep stosw
3903 operand eDIRegP(eRegP reg) %{
3904   constraint(ALLOC_IN_RC(edi_reg));
3905   match(reg);
3906   format %{ "EDI" %}
3907   interface(REG_INTER);
3908 %}
3909 
3910 operand eRegL() %{
3911   constraint(ALLOC_IN_RC(long_reg));
3912   match(RegL);
3913   match(eADXRegL);
3914 
3915   format %{ %}
3916   interface(REG_INTER);
3917 %}
3918 
3919 operand eADXRegL( eRegL reg ) %{
3920   constraint(ALLOC_IN_RC(eadx_reg));
3921   match(reg);
3922 
3923   format %{ "EDX:EAX" %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 operand eBCXRegL( eRegL reg ) %{
3928   constraint(ALLOC_IN_RC(ebcx_reg));
3929   match(reg);
3930 
3931   format %{ "EBX:ECX" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 // Special case for integer high multiply
3936 operand eADXRegL_low_only() %{
3937   constraint(ALLOC_IN_RC(eadx_reg));
3938   match(RegL);
3939 
3940   format %{ "EAX" %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 // Flags register, used as output of compare instructions
3945 operand eFlagsReg() %{
3946   constraint(ALLOC_IN_RC(int_flags));
3947   match(RegFlags);
3948 
3949   format %{ "EFLAGS" %}
3950   interface(REG_INTER);
3951 %}
3952 
3953 // Flags register, used as output of FLOATING POINT compare instructions
3954 operand eFlagsRegU() %{
3955   constraint(ALLOC_IN_RC(int_flags));
3956   match(RegFlags);
3957 
3958   format %{ "EFLAGS_U" %}
3959   interface(REG_INTER);
3960 %}
3961 
3962 operand eFlagsRegUCF() %{
3963   constraint(ALLOC_IN_RC(int_flags));
3964   match(RegFlags);
3965   predicate(false);
3966 
3967   format %{ "EFLAGS_U_CF" %}
3968   interface(REG_INTER);
3969 %}
3970 
3971 // Condition Code Register used by long compare
3972 operand flagsReg_long_LTGE() %{
3973   constraint(ALLOC_IN_RC(int_flags));
3974   match(RegFlags);
3975   format %{ "FLAGS_LTGE" %}
3976   interface(REG_INTER);
3977 %}
3978 operand flagsReg_long_EQNE() %{
3979   constraint(ALLOC_IN_RC(int_flags));
3980   match(RegFlags);
3981   format %{ "FLAGS_EQNE" %}
3982   interface(REG_INTER);
3983 %}
3984 operand flagsReg_long_LEGT() %{
3985   constraint(ALLOC_IN_RC(int_flags));
3986   match(RegFlags);
3987   format %{ "FLAGS_LEGT" %}
3988   interface(REG_INTER);
3989 %}
3990 
3991 // Float register operands
3992 operand regDPR() %{
3993   predicate( UseSSE < 2 );
3994   constraint(ALLOC_IN_RC(fp_dbl_reg));
3995   match(RegD);
3996   match(regDPR1);
3997   match(regDPR2);
3998   format %{ %}
3999   interface(REG_INTER);
4000 %}
4001 
4002 operand regDPR1(regDPR reg) %{
4003   predicate( UseSSE < 2 );
4004   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4005   match(reg);
4006   format %{ "FPR1" %}
4007   interface(REG_INTER);
4008 %}
4009 
4010 operand regDPR2(regDPR reg) %{
4011   predicate( UseSSE < 2 );
4012   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4013   match(reg);
4014   format %{ "FPR2" %}
4015   interface(REG_INTER);
4016 %}
4017 
4018 operand regnotDPR1(regDPR reg) %{
4019   predicate( UseSSE < 2 );
4020   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4021   match(reg);
4022   format %{ %}
4023   interface(REG_INTER);
4024 %}
4025 
4026 // Float register operands
4027 operand regFPR() %{
4028   predicate( UseSSE < 2 );
4029   constraint(ALLOC_IN_RC(fp_flt_reg));
4030   match(RegF);
4031   match(regFPR1);
4032   format %{ %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Float register operands
4037 operand regFPR1(regFPR reg) %{
4038   predicate( UseSSE < 2 );
4039   constraint(ALLOC_IN_RC(fp_flt_reg0));
4040   match(reg);
4041   format %{ "FPR1" %}
4042   interface(REG_INTER);
4043 %}
4044 
4045 // XMM Float register operands
4046 operand regF() %{
4047   predicate( UseSSE>=1 );
4048   constraint(ALLOC_IN_RC(float_reg_legacy));
4049   match(RegF);
4050   format %{ %}
4051   interface(REG_INTER);
4052 %}
4053 
4054 // XMM Double register operands
4055 operand regD() %{
4056   predicate( UseSSE>=2 );
4057   constraint(ALLOC_IN_RC(double_reg_legacy));
4058   match(RegD);
4059   format %{ %}
4060   interface(REG_INTER);
4061 %}
4062 
4063 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4064 // runtime code generation via reg_class_dynamic.
4065 operand vecS() %{
4066   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4067   match(VecS);
4068 
4069   format %{ %}
4070   interface(REG_INTER);
4071 %}
4072 
4073 operand vecD() %{
4074   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4075   match(VecD);
4076 
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 operand vecX() %{
4082   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4083   match(VecX);
4084 
4085   format %{ %}
4086   interface(REG_INTER);
4087 %}
4088 
4089 operand vecY() %{
4090   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4091   match(VecY);
4092 
4093   format %{ %}
4094   interface(REG_INTER);
4095 %}
4096 
4097 //----------Memory Operands----------------------------------------------------
4098 // Direct Memory Operand
4099 operand direct(immP addr) %{
4100   match(addr);
4101 
4102   format %{ "[$addr]" %}
4103   interface(MEMORY_INTER) %{
4104     base(0xFFFFFFFF);
4105     index(0x4);
4106     scale(0x0);
4107     disp($addr);
4108   %}
4109 %}
4110 
4111 // Indirect Memory Operand
4112 operand indirect(eRegP reg) %{
4113   constraint(ALLOC_IN_RC(int_reg));
4114   match(reg);
4115 
4116   format %{ "[$reg]" %}
4117   interface(MEMORY_INTER) %{
4118     base($reg);
4119     index(0x4);
4120     scale(0x0);
4121     disp(0x0);
4122   %}
4123 %}
4124 
4125 // Indirect Memory Plus Short Offset Operand
4126 operand indOffset8(eRegP reg, immI8 off) %{
4127   match(AddP reg off);
4128 
4129   format %{ "[$reg + $off]" %}
4130   interface(MEMORY_INTER) %{
4131     base($reg);
4132     index(0x4);
4133     scale(0x0);
4134     disp($off);
4135   %}
4136 %}
4137 
4138 // Indirect Memory Plus Long Offset Operand
4139 operand indOffset32(eRegP reg, immI off) %{
4140   match(AddP reg off);
4141 
4142   format %{ "[$reg + $off]" %}
4143   interface(MEMORY_INTER) %{
4144     base($reg);
4145     index(0x4);
4146     scale(0x0);
4147     disp($off);
4148   %}
4149 %}
4150 
4151 // Indirect Memory Plus Long Offset Operand
4152 operand indOffset32X(rRegI reg, immP off) %{
4153   match(AddP off reg);
4154 
4155   format %{ "[$reg + $off]" %}
4156   interface(MEMORY_INTER) %{
4157     base($reg);
4158     index(0x4);
4159     scale(0x0);
4160     disp($off);
4161   %}
4162 %}
4163 
4164 // Indirect Memory Plus Index Register Plus Offset Operand
4165 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4166   match(AddP (AddP reg ireg) off);
4167 
4168   op_cost(10);
4169   format %{"[$reg + $off + $ireg]" %}
4170   interface(MEMORY_INTER) %{
4171     base($reg);
4172     index($ireg);
4173     scale(0x0);
4174     disp($off);
4175   %}
4176 %}
4177 
4178 // Indirect Memory Plus Index Register Plus Offset Operand
4179 operand indIndex(eRegP reg, rRegI ireg) %{
4180   match(AddP reg ireg);
4181 
4182   op_cost(10);
4183   format %{"[$reg + $ireg]" %}
4184   interface(MEMORY_INTER) %{
4185     base($reg);
4186     index($ireg);
4187     scale(0x0);
4188     disp(0x0);
4189   %}
4190 %}
4191 
4192 // // -------------------------------------------------------------------------
4193 // // 486 architecture doesn't support "scale * index + offset" with out a base
4194 // // -------------------------------------------------------------------------
4195 // // Scaled Memory Operands
4196 // // Indirect Memory Times Scale Plus Offset Operand
4197 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4198 //   match(AddP off (LShiftI ireg scale));
4199 //
4200 //   op_cost(10);
4201 //   format %{"[$off + $ireg << $scale]" %}
4202 //   interface(MEMORY_INTER) %{
4203 //     base(0x4);
4204 //     index($ireg);
4205 //     scale($scale);
4206 //     disp($off);
4207 //   %}
4208 // %}
4209 
4210 // Indirect Memory Times Scale Plus Index Register
4211 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4212   match(AddP reg (LShiftI ireg scale));
4213 
4214   op_cost(10);
4215   format %{"[$reg + $ireg << $scale]" %}
4216   interface(MEMORY_INTER) %{
4217     base($reg);
4218     index($ireg);
4219     scale($scale);
4220     disp(0x0);
4221   %}
4222 %}
4223 
4224 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4225 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4226   match(AddP (AddP reg (LShiftI ireg scale)) off);
4227 
4228   op_cost(10);
4229   format %{"[$reg + $off + $ireg << $scale]" %}
4230   interface(MEMORY_INTER) %{
4231     base($reg);
4232     index($ireg);
4233     scale($scale);
4234     disp($off);
4235   %}
4236 %}
4237 
4238 //----------Load Long Memory Operands------------------------------------------
4239 // The load-long idiom will use it's address expression again after loading
4240 // the first word of the long.  If the load-long destination overlaps with
4241 // registers used in the addressing expression, the 2nd half will be loaded
4242 // from a clobbered address.  Fix this by requiring that load-long use
4243 // address registers that do not overlap with the load-long target.
4244 
4245 // load-long support
4246 operand load_long_RegP() %{
4247   constraint(ALLOC_IN_RC(esi_reg));
4248   match(RegP);
4249   match(eSIRegP);
4250   op_cost(100);
4251   format %{  %}
4252   interface(REG_INTER);
4253 %}
4254 
4255 // Indirect Memory Operand Long
4256 operand load_long_indirect(load_long_RegP reg) %{
4257   constraint(ALLOC_IN_RC(esi_reg));
4258   match(reg);
4259 
4260   format %{ "[$reg]" %}
4261   interface(MEMORY_INTER) %{
4262     base($reg);
4263     index(0x4);
4264     scale(0x0);
4265     disp(0x0);
4266   %}
4267 %}
4268 
4269 // Indirect Memory Plus Long Offset Operand
4270 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4271   match(AddP reg off);
4272 
4273   format %{ "[$reg + $off]" %}
4274   interface(MEMORY_INTER) %{
4275     base($reg);
4276     index(0x4);
4277     scale(0x0);
4278     disp($off);
4279   %}
4280 %}
4281 
4282 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4283 
4284 
4285 //----------Special Memory Operands--------------------------------------------
4286 // Stack Slot Operand - This operand is used for loading and storing temporary
4287 //                      values on the stack where a match requires a value to
4288 //                      flow through memory.
4289 operand stackSlotP(sRegP reg) %{
4290   constraint(ALLOC_IN_RC(stack_slots));
4291   // No match rule because this operand is only generated in matching
4292   format %{ "[$reg]" %}
4293   interface(MEMORY_INTER) %{
4294     base(0x4);   // ESP
4295     index(0x4);  // No Index
4296     scale(0x0);  // No Scale
4297     disp($reg);  // Stack Offset
4298   %}
4299 %}
4300 
4301 operand stackSlotI(sRegI reg) %{
4302   constraint(ALLOC_IN_RC(stack_slots));
4303   // No match rule because this operand is only generated in matching
4304   format %{ "[$reg]" %}
4305   interface(MEMORY_INTER) %{
4306     base(0x4);   // ESP
4307     index(0x4);  // No Index
4308     scale(0x0);  // No Scale
4309     disp($reg);  // Stack Offset
4310   %}
4311 %}
4312 
4313 operand stackSlotF(sRegF reg) %{
4314   constraint(ALLOC_IN_RC(stack_slots));
4315   // No match rule because this operand is only generated in matching
4316   format %{ "[$reg]" %}
4317   interface(MEMORY_INTER) %{
4318     base(0x4);   // ESP
4319     index(0x4);  // No Index
4320     scale(0x0);  // No Scale
4321     disp($reg);  // Stack Offset
4322   %}
4323 %}
4324 
4325 operand stackSlotD(sRegD reg) %{
4326   constraint(ALLOC_IN_RC(stack_slots));
4327   // No match rule because this operand is only generated in matching
4328   format %{ "[$reg]" %}
4329   interface(MEMORY_INTER) %{
4330     base(0x4);   // ESP
4331     index(0x4);  // No Index
4332     scale(0x0);  // No Scale
4333     disp($reg);  // Stack Offset
4334   %}
4335 %}
4336 
4337 operand stackSlotL(sRegL reg) %{
4338   constraint(ALLOC_IN_RC(stack_slots));
4339   // No match rule because this operand is only generated in matching
4340   format %{ "[$reg]" %}
4341   interface(MEMORY_INTER) %{
4342     base(0x4);   // ESP
4343     index(0x4);  // No Index
4344     scale(0x0);  // No Scale
4345     disp($reg);  // Stack Offset
4346   %}
4347 %}
4348 
4349 //----------Memory Operands - Win95 Implicit Null Variants----------------
4350 // Indirect Memory Operand
4351 operand indirect_win95_safe(eRegP_no_EBP reg)
4352 %{
4353   constraint(ALLOC_IN_RC(int_reg));
4354   match(reg);
4355 
4356   op_cost(100);
4357   format %{ "[$reg]" %}
4358   interface(MEMORY_INTER) %{
4359     base($reg);
4360     index(0x4);
4361     scale(0x0);
4362     disp(0x0);
4363   %}
4364 %}
4365 
4366 // Indirect Memory Plus Short Offset Operand
4367 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4368 %{
4369   match(AddP reg off);
4370 
4371   op_cost(100);
4372   format %{ "[$reg + $off]" %}
4373   interface(MEMORY_INTER) %{
4374     base($reg);
4375     index(0x4);
4376     scale(0x0);
4377     disp($off);
4378   %}
4379 %}
4380 
4381 // Indirect Memory Plus Long Offset Operand
4382 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4383 %{
4384   match(AddP reg off);
4385 
4386   op_cost(100);
4387   format %{ "[$reg + $off]" %}
4388   interface(MEMORY_INTER) %{
4389     base($reg);
4390     index(0x4);
4391     scale(0x0);
4392     disp($off);
4393   %}
4394 %}
4395 
4396 // Indirect Memory Plus Index Register Plus Offset Operand
4397 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4398 %{
4399   match(AddP (AddP reg ireg) off);
4400 
4401   op_cost(100);
4402   format %{"[$reg + $off + $ireg]" %}
4403   interface(MEMORY_INTER) %{
4404     base($reg);
4405     index($ireg);
4406     scale(0x0);
4407     disp($off);
4408   %}
4409 %}
4410 
4411 // Indirect Memory Times Scale Plus Index Register
4412 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4413 %{
4414   match(AddP reg (LShiftI ireg scale));
4415 
4416   op_cost(100);
4417   format %{"[$reg + $ireg << $scale]" %}
4418   interface(MEMORY_INTER) %{
4419     base($reg);
4420     index($ireg);
4421     scale($scale);
4422     disp(0x0);
4423   %}
4424 %}
4425 
4426 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4427 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4428 %{
4429   match(AddP (AddP reg (LShiftI ireg scale)) off);
4430 
4431   op_cost(100);
4432   format %{"[$reg + $off + $ireg << $scale]" %}
4433   interface(MEMORY_INTER) %{
4434     base($reg);
4435     index($ireg);
4436     scale($scale);
4437     disp($off);
4438   %}
4439 %}
4440 
4441 //----------Conditional Branch Operands----------------------------------------
4442 // Comparison Op  - This is the operation of the comparison, and is limited to
4443 //                  the following set of codes:
4444 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4445 //
4446 // Other attributes of the comparison, such as unsignedness, are specified
4447 // by the comparison instruction that sets a condition code flags register.
4448 // That result is represented by a flags operand whose subtype is appropriate
4449 // to the unsignedness (etc.) of the comparison.
4450 //
4451 // Later, the instruction which matches both the Comparison Op (a Bool) and
4452 // the flags (produced by the Cmp) specifies the coding of the comparison op
4453 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4454 
4455 // Comparision Code
4456 operand cmpOp() %{
4457   match(Bool);
4458 
4459   format %{ "" %}
4460   interface(COND_INTER) %{
4461     equal(0x4, "e");
4462     not_equal(0x5, "ne");
4463     less(0xC, "l");
4464     greater_equal(0xD, "ge");
4465     less_equal(0xE, "le");
4466     greater(0xF, "g");
4467     overflow(0x0, "o");
4468     no_overflow(0x1, "no");
4469   %}
4470 %}
4471 
4472 // Comparison Code, unsigned compare.  Used by FP also, with
4473 // C2 (unordered) turned into GT or LT already.  The other bits
4474 // C0 and C3 are turned into Carry & Zero flags.
4475 operand cmpOpU() %{
4476   match(Bool);
4477 
4478   format %{ "" %}
4479   interface(COND_INTER) %{
4480     equal(0x4, "e");
4481     not_equal(0x5, "ne");
4482     less(0x2, "b");
4483     greater_equal(0x3, "nb");
4484     less_equal(0x6, "be");
4485     greater(0x7, "nbe");
4486     overflow(0x0, "o");
4487     no_overflow(0x1, "no");
4488   %}
4489 %}
4490 
4491 // Floating comparisons that don't require any fixup for the unordered case
4492 operand cmpOpUCF() %{
4493   match(Bool);
4494   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4495             n->as_Bool()->_test._test == BoolTest::ge ||
4496             n->as_Bool()->_test._test == BoolTest::le ||
4497             n->as_Bool()->_test._test == BoolTest::gt);
4498   format %{ "" %}
4499   interface(COND_INTER) %{
4500     equal(0x4, "e");
4501     not_equal(0x5, "ne");
4502     less(0x2, "b");
4503     greater_equal(0x3, "nb");
4504     less_equal(0x6, "be");
4505     greater(0x7, "nbe");
4506     overflow(0x0, "o");
4507     no_overflow(0x1, "no");
4508   %}
4509 %}
4510 
4511 
4512 // Floating comparisons that can be fixed up with extra conditional jumps
4513 operand cmpOpUCF2() %{
4514   match(Bool);
4515   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4516             n->as_Bool()->_test._test == BoolTest::eq);
4517   format %{ "" %}
4518   interface(COND_INTER) %{
4519     equal(0x4, "e");
4520     not_equal(0x5, "ne");
4521     less(0x2, "b");
4522     greater_equal(0x3, "nb");
4523     less_equal(0x6, "be");
4524     greater(0x7, "nbe");
4525     overflow(0x0, "o");
4526     no_overflow(0x1, "no");
4527   %}
4528 %}
4529 
4530 // Comparison Code for FP conditional move
4531 operand cmpOp_fcmov() %{
4532   match(Bool);
4533 
4534   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4535             n->as_Bool()->_test._test != BoolTest::no_overflow);
4536   format %{ "" %}
4537   interface(COND_INTER) %{
4538     equal        (0x0C8);
4539     not_equal    (0x1C8);
4540     less         (0x0C0);
4541     greater_equal(0x1C0);
4542     less_equal   (0x0D0);
4543     greater      (0x1D0);
4544     overflow(0x0, "o"); // not really supported by the instruction
4545     no_overflow(0x1, "no"); // not really supported by the instruction
4546   %}
4547 %}
4548 
4549 // Comparision Code used in long compares
4550 operand cmpOp_commute() %{
4551   match(Bool);
4552 
4553   format %{ "" %}
4554   interface(COND_INTER) %{
4555     equal(0x4, "e");
4556     not_equal(0x5, "ne");
4557     less(0xF, "g");
4558     greater_equal(0xE, "le");
4559     less_equal(0xD, "ge");
4560     greater(0xC, "l");
4561     overflow(0x0, "o");
4562     no_overflow(0x1, "no");
4563   %}
4564 %}
4565 
4566 //----------OPERAND CLASSES----------------------------------------------------
4567 // Operand Classes are groups of operands that are used as to simplify
4568 // instruction definitions by not requiring the AD writer to specify separate
4569 // instructions for every form of operand when the instruction accepts
4570 // multiple operand types with the same basic encoding and format.  The classic
4571 // case of this is memory operands.
4572 
4573 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4574                indIndex, indIndexScale, indIndexScaleOffset);
4575 
4576 // Long memory operations are encoded in 2 instructions and a +4 offset.
4577 // This means some kind of offset is always required and you cannot use
4578 // an oop as the offset (done when working on static globals).
4579 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4580                     indIndex, indIndexScale, indIndexScaleOffset);
4581 
4582 
4583 //----------PIPELINE-----------------------------------------------------------
4584 // Rules which define the behavior of the target architectures pipeline.
4585 pipeline %{
4586 
4587 //----------ATTRIBUTES---------------------------------------------------------
4588 attributes %{
4589   variable_size_instructions;        // Fixed size instructions
4590   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4591   instruction_unit_size = 1;         // An instruction is 1 bytes long
4592   instruction_fetch_unit_size = 16;  // The processor fetches one line
4593   instruction_fetch_units = 1;       // of 16 bytes
4594 
4595   // List of nop instructions
4596   nops( MachNop );
4597 %}
4598 
4599 //----------RESOURCES----------------------------------------------------------
4600 // Resources are the functional units available to the machine
4601 
4602 // Generic P2/P3 pipeline
4603 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4604 // 3 instructions decoded per cycle.
4605 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4606 // 2 ALU op, only ALU0 handles mul/div instructions.
4607 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4608            MS0, MS1, MEM = MS0 | MS1,
4609            BR, FPU,
4610            ALU0, ALU1, ALU = ALU0 | ALU1 );
4611 
4612 //----------PIPELINE DESCRIPTION-----------------------------------------------
4613 // Pipeline Description specifies the stages in the machine's pipeline
4614 
4615 // Generic P2/P3 pipeline
4616 pipe_desc(S0, S1, S2, S3, S4, S5);
4617 
4618 //----------PIPELINE CLASSES---------------------------------------------------
4619 // Pipeline Classes describe the stages in which input and output are
4620 // referenced by the hardware pipeline.
4621 
4622 // Naming convention: ialu or fpu
4623 // Then: _reg
4624 // Then: _reg if there is a 2nd register
4625 // Then: _long if it's a pair of instructions implementing a long
4626 // Then: _fat if it requires the big decoder
4627 //   Or: _mem if it requires the big decoder and a memory unit.
4628 
4629 // Integer ALU reg operation
4630 pipe_class ialu_reg(rRegI dst) %{
4631     single_instruction;
4632     dst    : S4(write);
4633     dst    : S3(read);
4634     DECODE : S0;        // any decoder
4635     ALU    : S3;        // any alu
4636 %}
4637 
4638 // Long ALU reg operation
4639 pipe_class ialu_reg_long(eRegL dst) %{
4640     instruction_count(2);
4641     dst    : S4(write);
4642     dst    : S3(read);
4643     DECODE : S0(2);     // any 2 decoders
4644     ALU    : S3(2);     // both alus
4645 %}
4646 
4647 // Integer ALU reg operation using big decoder
4648 pipe_class ialu_reg_fat(rRegI dst) %{
4649     single_instruction;
4650     dst    : S4(write);
4651     dst    : S3(read);
4652     D0     : S0;        // big decoder only
4653     ALU    : S3;        // any alu
4654 %}
4655 
4656 // Long ALU reg operation using big decoder
4657 pipe_class ialu_reg_long_fat(eRegL dst) %{
4658     instruction_count(2);
4659     dst    : S4(write);
4660     dst    : S3(read);
4661     D0     : S0(2);     // big decoder only; twice
4662     ALU    : S3(2);     // any 2 alus
4663 %}
4664 
4665 // Integer ALU reg-reg operation
4666 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4667     single_instruction;
4668     dst    : S4(write);
4669     src    : S3(read);
4670     DECODE : S0;        // any decoder
4671     ALU    : S3;        // any alu
4672 %}
4673 
4674 // Long ALU reg-reg operation
4675 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4676     instruction_count(2);
4677     dst    : S4(write);
4678     src    : S3(read);
4679     DECODE : S0(2);     // any 2 decoders
4680     ALU    : S3(2);     // both alus
4681 %}
4682 
4683 // Integer ALU reg-reg operation
4684 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4685     single_instruction;
4686     dst    : S4(write);
4687     src    : S3(read);
4688     D0     : S0;        // big decoder only
4689     ALU    : S3;        // any alu
4690 %}
4691 
4692 // Long ALU reg-reg operation
4693 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4694     instruction_count(2);
4695     dst    : S4(write);
4696     src    : S3(read);
4697     D0     : S0(2);     // big decoder only; twice
4698     ALU    : S3(2);     // both alus
4699 %}
4700 
4701 // Integer ALU reg-mem operation
4702 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4703     single_instruction;
4704     dst    : S5(write);
4705     mem    : S3(read);
4706     D0     : S0;        // big decoder only
4707     ALU    : S4;        // any alu
4708     MEM    : S3;        // any mem
4709 %}
4710 
4711 // Long ALU reg-mem operation
4712 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4713     instruction_count(2);
4714     dst    : S5(write);
4715     mem    : S3(read);
4716     D0     : S0(2);     // big decoder only; twice
4717     ALU    : S4(2);     // any 2 alus
4718     MEM    : S3(2);     // both mems
4719 %}
4720 
4721 // Integer mem operation (prefetch)
4722 pipe_class ialu_mem(memory mem)
4723 %{
4724     single_instruction;
4725     mem    : S3(read);
4726     D0     : S0;        // big decoder only
4727     MEM    : S3;        // any mem
4728 %}
4729 
4730 // Integer Store to Memory
4731 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4732     single_instruction;
4733     mem    : S3(read);
4734     src    : S5(read);
4735     D0     : S0;        // big decoder only
4736     ALU    : S4;        // any alu
4737     MEM    : S3;
4738 %}
4739 
4740 // Long Store to Memory
4741 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4742     instruction_count(2);
4743     mem    : S3(read);
4744     src    : S5(read);
4745     D0     : S0(2);     // big decoder only; twice
4746     ALU    : S4(2);     // any 2 alus
4747     MEM    : S3(2);     // Both mems
4748 %}
4749 
4750 // Integer Store to Memory
4751 pipe_class ialu_mem_imm(memory mem) %{
4752     single_instruction;
4753     mem    : S3(read);
4754     D0     : S0;        // big decoder only
4755     ALU    : S4;        // any alu
4756     MEM    : S3;
4757 %}
4758 
4759 // Integer ALU0 reg-reg operation
4760 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4761     single_instruction;
4762     dst    : S4(write);
4763     src    : S3(read);
4764     D0     : S0;        // Big decoder only
4765     ALU0   : S3;        // only alu0
4766 %}
4767 
4768 // Integer ALU0 reg-mem operation
4769 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4770     single_instruction;
4771     dst    : S5(write);
4772     mem    : S3(read);
4773     D0     : S0;        // big decoder only
4774     ALU0   : S4;        // ALU0 only
4775     MEM    : S3;        // any mem
4776 %}
4777 
4778 // Integer ALU reg-reg operation
4779 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4780     single_instruction;
4781     cr     : S4(write);
4782     src1   : S3(read);
4783     src2   : S3(read);
4784     DECODE : S0;        // any decoder
4785     ALU    : S3;        // any alu
4786 %}
4787 
4788 // Integer ALU reg-imm operation
4789 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4790     single_instruction;
4791     cr     : S4(write);
4792     src1   : S3(read);
4793     DECODE : S0;        // any decoder
4794     ALU    : S3;        // any alu
4795 %}
4796 
4797 // Integer ALU reg-mem operation
4798 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4799     single_instruction;
4800     cr     : S4(write);
4801     src1   : S3(read);
4802     src2   : S3(read);
4803     D0     : S0;        // big decoder only
4804     ALU    : S4;        // any alu
4805     MEM    : S3;
4806 %}
4807 
4808 // Conditional move reg-reg
4809 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4810     instruction_count(4);
4811     y      : S4(read);
4812     q      : S3(read);
4813     p      : S3(read);
4814     DECODE : S0(4);     // any decoder
4815 %}
4816 
4817 // Conditional move reg-reg
4818 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4819     single_instruction;
4820     dst    : S4(write);
4821     src    : S3(read);
4822     cr     : S3(read);
4823     DECODE : S0;        // any decoder
4824 %}
4825 
4826 // Conditional move reg-mem
4827 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4828     single_instruction;
4829     dst    : S4(write);
4830     src    : S3(read);
4831     cr     : S3(read);
4832     DECODE : S0;        // any decoder
4833     MEM    : S3;
4834 %}
4835 
4836 // Conditional move reg-reg long
4837 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4838     single_instruction;
4839     dst    : S4(write);
4840     src    : S3(read);
4841     cr     : S3(read);
4842     DECODE : S0(2);     // any 2 decoders
4843 %}
4844 
4845 // Conditional move double reg-reg
4846 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4847     single_instruction;
4848     dst    : S4(write);
4849     src    : S3(read);
4850     cr     : S3(read);
4851     DECODE : S0;        // any decoder
4852 %}
4853 
4854 // Float reg-reg operation
4855 pipe_class fpu_reg(regDPR dst) %{
4856     instruction_count(2);
4857     dst    : S3(read);
4858     DECODE : S0(2);     // any 2 decoders
4859     FPU    : S3;
4860 %}
4861 
4862 // Float reg-reg operation
4863 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4864     instruction_count(2);
4865     dst    : S4(write);
4866     src    : S3(read);
4867     DECODE : S0(2);     // any 2 decoders
4868     FPU    : S3;
4869 %}
4870 
4871 // Float reg-reg operation
4872 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4873     instruction_count(3);
4874     dst    : S4(write);
4875     src1   : S3(read);
4876     src2   : S3(read);
4877     DECODE : S0(3);     // any 3 decoders
4878     FPU    : S3(2);
4879 %}
4880 
4881 // Float reg-reg operation
4882 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4883     instruction_count(4);
4884     dst    : S4(write);
4885     src1   : S3(read);
4886     src2   : S3(read);
4887     src3   : S3(read);
4888     DECODE : S0(4);     // any 3 decoders
4889     FPU    : S3(2);
4890 %}
4891 
4892 // Float reg-reg operation
4893 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4894     instruction_count(4);
4895     dst    : S4(write);
4896     src1   : S3(read);
4897     src2   : S3(read);
4898     src3   : S3(read);
4899     DECODE : S1(3);     // any 3 decoders
4900     D0     : S0;        // Big decoder only
4901     FPU    : S3(2);
4902     MEM    : S3;
4903 %}
4904 
4905 // Float reg-mem operation
4906 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4907     instruction_count(2);
4908     dst    : S5(write);
4909     mem    : S3(read);
4910     D0     : S0;        // big decoder only
4911     DECODE : S1;        // any decoder for FPU POP
4912     FPU    : S4;
4913     MEM    : S3;        // any mem
4914 %}
4915 
4916 // Float reg-mem operation
4917 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4918     instruction_count(3);
4919     dst    : S5(write);
4920     src1   : S3(read);
4921     mem    : S3(read);
4922     D0     : S0;        // big decoder only
4923     DECODE : S1(2);     // any decoder for FPU POP
4924     FPU    : S4;
4925     MEM    : S3;        // any mem
4926 %}
4927 
4928 // Float mem-reg operation
4929 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4930     instruction_count(2);
4931     src    : S5(read);
4932     mem    : S3(read);
4933     DECODE : S0;        // any decoder for FPU PUSH
4934     D0     : S1;        // big decoder only
4935     FPU    : S4;
4936     MEM    : S3;        // any mem
4937 %}
4938 
4939 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4940     instruction_count(3);
4941     src1   : S3(read);
4942     src2   : S3(read);
4943     mem    : S3(read);
4944     DECODE : S0(2);     // any decoder for FPU PUSH
4945     D0     : S1;        // big decoder only
4946     FPU    : S4;
4947     MEM    : S3;        // any mem
4948 %}
4949 
4950 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4951     instruction_count(3);
4952     src1   : S3(read);
4953     src2   : S3(read);
4954     mem    : S4(read);
4955     DECODE : S0;        // any decoder for FPU PUSH
4956     D0     : S0(2);     // big decoder only
4957     FPU    : S4;
4958     MEM    : S3(2);     // any mem
4959 %}
4960 
4961 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4962     instruction_count(2);
4963     src1   : S3(read);
4964     dst    : S4(read);
4965     D0     : S0(2);     // big decoder only
4966     MEM    : S3(2);     // any mem
4967 %}
4968 
4969 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4970     instruction_count(3);
4971     src1   : S3(read);
4972     src2   : S3(read);
4973     dst    : S4(read);
4974     D0     : S0(3);     // big decoder only
4975     FPU    : S4;
4976     MEM    : S3(3);     // any mem
4977 %}
4978 
4979 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4980     instruction_count(3);
4981     src1   : S4(read);
4982     mem    : S4(read);
4983     DECODE : S0;        // any decoder for FPU PUSH
4984     D0     : S0(2);     // big decoder only
4985     FPU    : S4;
4986     MEM    : S3(2);     // any mem
4987 %}
4988 
4989 // Float load constant
4990 pipe_class fpu_reg_con(regDPR dst) %{
4991     instruction_count(2);
4992     dst    : S5(write);
4993     D0     : S0;        // big decoder only for the load
4994     DECODE : S1;        // any decoder for FPU POP
4995     FPU    : S4;
4996     MEM    : S3;        // any mem
4997 %}
4998 
4999 // Float load constant
5000 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5001     instruction_count(3);
5002     dst    : S5(write);
5003     src    : S3(read);
5004     D0     : S0;        // big decoder only for the load
5005     DECODE : S1(2);     // any decoder for FPU POP
5006     FPU    : S4;
5007     MEM    : S3;        // any mem
5008 %}
5009 
5010 // UnConditional branch
5011 pipe_class pipe_jmp( label labl ) %{
5012     single_instruction;
5013     BR   : S3;
5014 %}
5015 
5016 // Conditional branch
5017 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5018     single_instruction;
5019     cr    : S1(read);
5020     BR    : S3;
5021 %}
5022 
5023 // Allocation idiom
5024 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5025     instruction_count(1); force_serialization;
5026     fixed_latency(6);
5027     heap_ptr : S3(read);
5028     DECODE   : S0(3);
5029     D0       : S2;
5030     MEM      : S3;
5031     ALU      : S3(2);
5032     dst      : S5(write);
5033     BR       : S5;
5034 %}
5035 
5036 // Generic big/slow expanded idiom
5037 pipe_class pipe_slow(  ) %{
5038     instruction_count(10); multiple_bundles; force_serialization;
5039     fixed_latency(100);
5040     D0  : S0(2);
5041     MEM : S3(2);
5042 %}
5043 
5044 // The real do-nothing guy
5045 pipe_class empty( ) %{
5046     instruction_count(0);
5047 %}
5048 
5049 // Define the class for the Nop node
5050 define %{
5051    MachNop = empty;
5052 %}
5053 
5054 %}
5055 
5056 //----------INSTRUCTIONS-------------------------------------------------------
5057 //
5058 // match      -- States which machine-independent subtree may be replaced
5059 //               by this instruction.
5060 // ins_cost   -- The estimated cost of this instruction is used by instruction
5061 //               selection to identify a minimum cost tree of machine
5062 //               instructions that matches a tree of machine-independent
5063 //               instructions.
5064 // format     -- A string providing the disassembly for this instruction.
5065 //               The value of an instruction's operand may be inserted
5066 //               by referring to it with a '$' prefix.
5067 // opcode     -- Three instruction opcodes may be provided.  These are referred
5068 //               to within an encode class as $primary, $secondary, and $tertiary
5069 //               respectively.  The primary opcode is commonly used to
5070 //               indicate the type of machine instruction, while secondary
5071 //               and tertiary are often used for prefix options or addressing
5072 //               modes.
5073 // ins_encode -- A list of encode classes with parameters. The encode class
5074 //               name must have been defined in an 'enc_class' specification
5075 //               in the encode section of the architecture description.
5076 
5077 //----------BSWAP-Instruction--------------------------------------------------
5078 instruct bytes_reverse_int(rRegI dst) %{
5079   match(Set dst (ReverseBytesI dst));
5080 
5081   format %{ "BSWAP  $dst" %}
5082   opcode(0x0F, 0xC8);
5083   ins_encode( OpcP, OpcSReg(dst) );
5084   ins_pipe( ialu_reg );
5085 %}
5086 
5087 instruct bytes_reverse_long(eRegL dst) %{
5088   match(Set dst (ReverseBytesL dst));
5089 
5090   format %{ "BSWAP  $dst.lo\n\t"
5091             "BSWAP  $dst.hi\n\t"
5092             "XCHG   $dst.lo $dst.hi" %}
5093 
5094   ins_cost(125);
5095   ins_encode( bswap_long_bytes(dst) );
5096   ins_pipe( ialu_reg_reg);
5097 %}
5098 
5099 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5100   match(Set dst (ReverseBytesUS dst));
5101   effect(KILL cr);
5102 
5103   format %{ "BSWAP  $dst\n\t"
5104             "SHR    $dst,16\n\t" %}
5105   ins_encode %{
5106     __ bswapl($dst$$Register);
5107     __ shrl($dst$$Register, 16);
5108   %}
5109   ins_pipe( ialu_reg );
5110 %}
5111 
5112 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5113   match(Set dst (ReverseBytesS dst));
5114   effect(KILL cr);
5115 
5116   format %{ "BSWAP  $dst\n\t"
5117             "SAR    $dst,16\n\t" %}
5118   ins_encode %{
5119     __ bswapl($dst$$Register);
5120     __ sarl($dst$$Register, 16);
5121   %}
5122   ins_pipe( ialu_reg );
5123 %}
5124 
5125 
5126 //---------- Zeros Count Instructions ------------------------------------------
5127 
5128 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5129   predicate(UseCountLeadingZerosInstruction);
5130   match(Set dst (CountLeadingZerosI src));
5131   effect(KILL cr);
5132 
5133   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5134   ins_encode %{
5135     __ lzcntl($dst$$Register, $src$$Register);
5136   %}
5137   ins_pipe(ialu_reg);
5138 %}
5139 
5140 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5141   predicate(!UseCountLeadingZerosInstruction);
5142   match(Set dst (CountLeadingZerosI src));
5143   effect(KILL cr);
5144 
5145   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5146             "JNZ    skip\n\t"
5147             "MOV    $dst, -1\n"
5148       "skip:\n\t"
5149             "NEG    $dst\n\t"
5150             "ADD    $dst, 31" %}
5151   ins_encode %{
5152     Register Rdst = $dst$$Register;
5153     Register Rsrc = $src$$Register;
5154     Label skip;
5155     __ bsrl(Rdst, Rsrc);
5156     __ jccb(Assembler::notZero, skip);
5157     __ movl(Rdst, -1);
5158     __ bind(skip);
5159     __ negl(Rdst);
5160     __ addl(Rdst, BitsPerInt - 1);
5161   %}
5162   ins_pipe(ialu_reg);
5163 %}
5164 
5165 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5166   predicate(UseCountLeadingZerosInstruction);
5167   match(Set dst (CountLeadingZerosL src));
5168   effect(TEMP dst, KILL cr);
5169 
5170   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5171             "JNC    done\n\t"
5172             "LZCNT  $dst, $src.lo\n\t"
5173             "ADD    $dst, 32\n"
5174       "done:" %}
5175   ins_encode %{
5176     Register Rdst = $dst$$Register;
5177     Register Rsrc = $src$$Register;
5178     Label done;
5179     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5180     __ jccb(Assembler::carryClear, done);
5181     __ lzcntl(Rdst, Rsrc);
5182     __ addl(Rdst, BitsPerInt);
5183     __ bind(done);
5184   %}
5185   ins_pipe(ialu_reg);
5186 %}
5187 
5188 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5189   predicate(!UseCountLeadingZerosInstruction);
5190   match(Set dst (CountLeadingZerosL src));
5191   effect(TEMP dst, KILL cr);
5192 
5193   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5194             "JZ     msw_is_zero\n\t"
5195             "ADD    $dst, 32\n\t"
5196             "JMP    not_zero\n"
5197       "msw_is_zero:\n\t"
5198             "BSR    $dst, $src.lo\n\t"
5199             "JNZ    not_zero\n\t"
5200             "MOV    $dst, -1\n"
5201       "not_zero:\n\t"
5202             "NEG    $dst\n\t"
5203             "ADD    $dst, 63\n" %}
5204  ins_encode %{
5205     Register Rdst = $dst$$Register;
5206     Register Rsrc = $src$$Register;
5207     Label msw_is_zero;
5208     Label not_zero;
5209     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5210     __ jccb(Assembler::zero, msw_is_zero);
5211     __ addl(Rdst, BitsPerInt);
5212     __ jmpb(not_zero);
5213     __ bind(msw_is_zero);
5214     __ bsrl(Rdst, Rsrc);
5215     __ jccb(Assembler::notZero, not_zero);
5216     __ movl(Rdst, -1);
5217     __ bind(not_zero);
5218     __ negl(Rdst);
5219     __ addl(Rdst, BitsPerLong - 1);
5220   %}
5221   ins_pipe(ialu_reg);
5222 %}
5223 
5224 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5225   predicate(UseCountTrailingZerosInstruction);
5226   match(Set dst (CountTrailingZerosI src));
5227   effect(KILL cr);
5228 
5229   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5230   ins_encode %{
5231     __ tzcntl($dst$$Register, $src$$Register);
5232   %}
5233   ins_pipe(ialu_reg);
5234 %}
5235 
5236 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5237   predicate(!UseCountTrailingZerosInstruction);
5238   match(Set dst (CountTrailingZerosI src));
5239   effect(KILL cr);
5240 
5241   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5242             "JNZ    done\n\t"
5243             "MOV    $dst, 32\n"
5244       "done:" %}
5245   ins_encode %{
5246     Register Rdst = $dst$$Register;
5247     Label done;
5248     __ bsfl(Rdst, $src$$Register);
5249     __ jccb(Assembler::notZero, done);
5250     __ movl(Rdst, BitsPerInt);
5251     __ bind(done);
5252   %}
5253   ins_pipe(ialu_reg);
5254 %}
5255 
5256 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5257   predicate(UseCountTrailingZerosInstruction);
5258   match(Set dst (CountTrailingZerosL src));
5259   effect(TEMP dst, KILL cr);
5260 
5261   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5262             "JNC    done\n\t"
5263             "TZCNT  $dst, $src.hi\n\t"
5264             "ADD    $dst, 32\n"
5265             "done:" %}
5266   ins_encode %{
5267     Register Rdst = $dst$$Register;
5268     Register Rsrc = $src$$Register;
5269     Label done;
5270     __ tzcntl(Rdst, Rsrc);
5271     __ jccb(Assembler::carryClear, done);
5272     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5273     __ addl(Rdst, BitsPerInt);
5274     __ bind(done);
5275   %}
5276   ins_pipe(ialu_reg);
5277 %}
5278 
5279 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5280   predicate(!UseCountTrailingZerosInstruction);
5281   match(Set dst (CountTrailingZerosL src));
5282   effect(TEMP dst, KILL cr);
5283 
5284   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5285             "JNZ    done\n\t"
5286             "BSF    $dst, $src.hi\n\t"
5287             "JNZ    msw_not_zero\n\t"
5288             "MOV    $dst, 32\n"
5289       "msw_not_zero:\n\t"
5290             "ADD    $dst, 32\n"
5291       "done:" %}
5292   ins_encode %{
5293     Register Rdst = $dst$$Register;
5294     Register Rsrc = $src$$Register;
5295     Label msw_not_zero;
5296     Label done;
5297     __ bsfl(Rdst, Rsrc);
5298     __ jccb(Assembler::notZero, done);
5299     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5300     __ jccb(Assembler::notZero, msw_not_zero);
5301     __ movl(Rdst, BitsPerInt);
5302     __ bind(msw_not_zero);
5303     __ addl(Rdst, BitsPerInt);
5304     __ bind(done);
5305   %}
5306   ins_pipe(ialu_reg);
5307 %}
5308 
5309 
5310 //---------- Population Count Instructions -------------------------------------
5311 
5312 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5313   predicate(UsePopCountInstruction);
5314   match(Set dst (PopCountI src));
5315   effect(KILL cr);
5316 
5317   format %{ "POPCNT $dst, $src" %}
5318   ins_encode %{
5319     __ popcntl($dst$$Register, $src$$Register);
5320   %}
5321   ins_pipe(ialu_reg);
5322 %}
5323 
5324 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5325   predicate(UsePopCountInstruction);
5326   match(Set dst (PopCountI (LoadI mem)));
5327   effect(KILL cr);
5328 
5329   format %{ "POPCNT $dst, $mem" %}
5330   ins_encode %{
5331     __ popcntl($dst$$Register, $mem$$Address);
5332   %}
5333   ins_pipe(ialu_reg);
5334 %}
5335 
5336 // Note: Long.bitCount(long) returns an int.
5337 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5338   predicate(UsePopCountInstruction);
5339   match(Set dst (PopCountL src));
5340   effect(KILL cr, TEMP tmp, TEMP dst);
5341 
5342   format %{ "POPCNT $dst, $src.lo\n\t"
5343             "POPCNT $tmp, $src.hi\n\t"
5344             "ADD    $dst, $tmp" %}
5345   ins_encode %{
5346     __ popcntl($dst$$Register, $src$$Register);
5347     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5348     __ addl($dst$$Register, $tmp$$Register);
5349   %}
5350   ins_pipe(ialu_reg);
5351 %}
5352 
5353 // Note: Long.bitCount(long) returns an int.
5354 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5355   predicate(UsePopCountInstruction);
5356   match(Set dst (PopCountL (LoadL mem)));
5357   effect(KILL cr, TEMP tmp, TEMP dst);
5358 
5359   format %{ "POPCNT $dst, $mem\n\t"
5360             "POPCNT $tmp, $mem+4\n\t"
5361             "ADD    $dst, $tmp" %}
5362   ins_encode %{
5363     //__ popcntl($dst$$Register, $mem$$Address$$first);
5364     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5365     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5366     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5367     __ addl($dst$$Register, $tmp$$Register);
5368   %}
5369   ins_pipe(ialu_reg);
5370 %}
5371 
5372 
5373 //----------Load/Store/Move Instructions---------------------------------------
5374 //----------Load Instructions--------------------------------------------------
5375 // Load Byte (8bit signed)
5376 instruct loadB(xRegI dst, memory mem) %{
5377   match(Set dst (LoadB mem));
5378 
5379   ins_cost(125);
5380   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5381 
5382   ins_encode %{
5383     __ movsbl($dst$$Register, $mem$$Address);
5384   %}
5385 
5386   ins_pipe(ialu_reg_mem);
5387 %}
5388 
5389 // Load Byte (8bit signed) into Long Register
5390 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5391   match(Set dst (ConvI2L (LoadB mem)));
5392   effect(KILL cr);
5393 
5394   ins_cost(375);
5395   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5396             "MOV    $dst.hi,$dst.lo\n\t"
5397             "SAR    $dst.hi,7" %}
5398 
5399   ins_encode %{
5400     __ movsbl($dst$$Register, $mem$$Address);
5401     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5402     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5403   %}
5404 
5405   ins_pipe(ialu_reg_mem);
5406 %}
5407 
5408 // Load Unsigned Byte (8bit UNsigned)
5409 instruct loadUB(xRegI dst, memory mem) %{
5410   match(Set dst (LoadUB mem));
5411 
5412   ins_cost(125);
5413   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5414 
5415   ins_encode %{
5416     __ movzbl($dst$$Register, $mem$$Address);
5417   %}
5418 
5419   ins_pipe(ialu_reg_mem);
5420 %}
5421 
5422 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5423 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5424   match(Set dst (ConvI2L (LoadUB mem)));
5425   effect(KILL cr);
5426 
5427   ins_cost(250);
5428   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5429             "XOR    $dst.hi,$dst.hi" %}
5430 
5431   ins_encode %{
5432     Register Rdst = $dst$$Register;
5433     __ movzbl(Rdst, $mem$$Address);
5434     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5435   %}
5436 
5437   ins_pipe(ialu_reg_mem);
5438 %}
5439 
5440 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5441 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5442   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5443   effect(KILL cr);
5444 
5445   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5446             "XOR    $dst.hi,$dst.hi\n\t"
5447             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5448   ins_encode %{
5449     Register Rdst = $dst$$Register;
5450     __ movzbl(Rdst, $mem$$Address);
5451     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5452     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5453   %}
5454   ins_pipe(ialu_reg_mem);
5455 %}
5456 
5457 // Load Short (16bit signed)
5458 instruct loadS(rRegI dst, memory mem) %{
5459   match(Set dst (LoadS mem));
5460 
5461   ins_cost(125);
5462   format %{ "MOVSX  $dst,$mem\t# short" %}
5463 
5464   ins_encode %{
5465     __ movswl($dst$$Register, $mem$$Address);
5466   %}
5467 
5468   ins_pipe(ialu_reg_mem);
5469 %}
5470 
5471 // Load Short (16 bit signed) to Byte (8 bit signed)
5472 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5473   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5474 
5475   ins_cost(125);
5476   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5477   ins_encode %{
5478     __ movsbl($dst$$Register, $mem$$Address);
5479   %}
5480   ins_pipe(ialu_reg_mem);
5481 %}
5482 
5483 // Load Short (16bit signed) into Long Register
5484 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5485   match(Set dst (ConvI2L (LoadS mem)));
5486   effect(KILL cr);
5487 
5488   ins_cost(375);
5489   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5490             "MOV    $dst.hi,$dst.lo\n\t"
5491             "SAR    $dst.hi,15" %}
5492 
5493   ins_encode %{
5494     __ movswl($dst$$Register, $mem$$Address);
5495     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5496     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5497   %}
5498 
5499   ins_pipe(ialu_reg_mem);
5500 %}
5501 
5502 // Load Unsigned Short/Char (16bit unsigned)
5503 instruct loadUS(rRegI dst, memory mem) %{
5504   match(Set dst (LoadUS mem));
5505 
5506   ins_cost(125);
5507   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5508 
5509   ins_encode %{
5510     __ movzwl($dst$$Register, $mem$$Address);
5511   %}
5512 
5513   ins_pipe(ialu_reg_mem);
5514 %}
5515 
5516 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5517 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5518   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5519 
5520   ins_cost(125);
5521   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5522   ins_encode %{
5523     __ movsbl($dst$$Register, $mem$$Address);
5524   %}
5525   ins_pipe(ialu_reg_mem);
5526 %}
5527 
5528 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5529 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5530   match(Set dst (ConvI2L (LoadUS mem)));
5531   effect(KILL cr);
5532 
5533   ins_cost(250);
5534   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5535             "XOR    $dst.hi,$dst.hi" %}
5536 
5537   ins_encode %{
5538     __ movzwl($dst$$Register, $mem$$Address);
5539     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5540   %}
5541 
5542   ins_pipe(ialu_reg_mem);
5543 %}
5544 
5545 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5546 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5547   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5548   effect(KILL cr);
5549 
5550   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5551             "XOR    $dst.hi,$dst.hi" %}
5552   ins_encode %{
5553     Register Rdst = $dst$$Register;
5554     __ movzbl(Rdst, $mem$$Address);
5555     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5556   %}
5557   ins_pipe(ialu_reg_mem);
5558 %}
5559 
5560 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5561 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5562   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5563   effect(KILL cr);
5564 
5565   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5566             "XOR    $dst.hi,$dst.hi\n\t"
5567             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5568   ins_encode %{
5569     Register Rdst = $dst$$Register;
5570     __ movzwl(Rdst, $mem$$Address);
5571     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5572     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5573   %}
5574   ins_pipe(ialu_reg_mem);
5575 %}
5576 
5577 // Load Integer
5578 instruct loadI(rRegI dst, memory mem) %{
5579   match(Set dst (LoadI mem));
5580 
5581   ins_cost(125);
5582   format %{ "MOV    $dst,$mem\t# int" %}
5583 
5584   ins_encode %{
5585     __ movl($dst$$Register, $mem$$Address);
5586   %}
5587 
5588   ins_pipe(ialu_reg_mem);
5589 %}
5590 
5591 // Load Integer (32 bit signed) to Byte (8 bit signed)
5592 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5593   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5594 
5595   ins_cost(125);
5596   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5597   ins_encode %{
5598     __ movsbl($dst$$Register, $mem$$Address);
5599   %}
5600   ins_pipe(ialu_reg_mem);
5601 %}
5602 
5603 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5604 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5605   match(Set dst (AndI (LoadI mem) mask));
5606 
5607   ins_cost(125);
5608   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5609   ins_encode %{
5610     __ movzbl($dst$$Register, $mem$$Address);
5611   %}
5612   ins_pipe(ialu_reg_mem);
5613 %}
5614 
5615 // Load Integer (32 bit signed) to Short (16 bit signed)
5616 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5617   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5618 
5619   ins_cost(125);
5620   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5621   ins_encode %{
5622     __ movswl($dst$$Register, $mem$$Address);
5623   %}
5624   ins_pipe(ialu_reg_mem);
5625 %}
5626 
5627 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5628 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5629   match(Set dst (AndI (LoadI mem) mask));
5630 
5631   ins_cost(125);
5632   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5633   ins_encode %{
5634     __ movzwl($dst$$Register, $mem$$Address);
5635   %}
5636   ins_pipe(ialu_reg_mem);
5637 %}
5638 
5639 // Load Integer into Long Register
5640 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5641   match(Set dst (ConvI2L (LoadI mem)));
5642   effect(KILL cr);
5643 
5644   ins_cost(375);
5645   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5646             "MOV    $dst.hi,$dst.lo\n\t"
5647             "SAR    $dst.hi,31" %}
5648 
5649   ins_encode %{
5650     __ movl($dst$$Register, $mem$$Address);
5651     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5652     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5653   %}
5654 
5655   ins_pipe(ialu_reg_mem);
5656 %}
5657 
5658 // Load Integer with mask 0xFF into Long Register
5659 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5660   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5661   effect(KILL cr);
5662 
5663   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5664             "XOR    $dst.hi,$dst.hi" %}
5665   ins_encode %{
5666     Register Rdst = $dst$$Register;
5667     __ movzbl(Rdst, $mem$$Address);
5668     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5669   %}
5670   ins_pipe(ialu_reg_mem);
5671 %}
5672 
5673 // Load Integer with mask 0xFFFF into Long Register
5674 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5675   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5676   effect(KILL cr);
5677 
5678   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5679             "XOR    $dst.hi,$dst.hi" %}
5680   ins_encode %{
5681     Register Rdst = $dst$$Register;
5682     __ movzwl(Rdst, $mem$$Address);
5683     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5684   %}
5685   ins_pipe(ialu_reg_mem);
5686 %}
5687 
5688 // Load Integer with 31-bit mask into Long Register
5689 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5690   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5691   effect(KILL cr);
5692 
5693   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5694             "XOR    $dst.hi,$dst.hi\n\t"
5695             "AND    $dst.lo,$mask" %}
5696   ins_encode %{
5697     Register Rdst = $dst$$Register;
5698     __ movl(Rdst, $mem$$Address);
5699     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5700     __ andl(Rdst, $mask$$constant);
5701   %}
5702   ins_pipe(ialu_reg_mem);
5703 %}
5704 
5705 // Load Unsigned Integer into Long Register
5706 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5707   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5708   effect(KILL cr);
5709 
5710   ins_cost(250);
5711   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5712             "XOR    $dst.hi,$dst.hi" %}
5713 
5714   ins_encode %{
5715     __ movl($dst$$Register, $mem$$Address);
5716     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5717   %}
5718 
5719   ins_pipe(ialu_reg_mem);
5720 %}
5721 
5722 // Load Long.  Cannot clobber address while loading, so restrict address
5723 // register to ESI
5724 instruct loadL(eRegL dst, load_long_memory mem) %{
5725   predicate(!((LoadLNode*)n)->require_atomic_access());
5726   match(Set dst (LoadL mem));
5727 
5728   ins_cost(250);
5729   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5730             "MOV    $dst.hi,$mem+4" %}
5731 
5732   ins_encode %{
5733     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5734     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5735     __ movl($dst$$Register, Amemlo);
5736     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5737   %}
5738 
5739   ins_pipe(ialu_reg_long_mem);
5740 %}
5741 
5742 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5743 // then store it down to the stack and reload on the int
5744 // side.
5745 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5746   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5747   match(Set dst (LoadL mem));
5748 
5749   ins_cost(200);
5750   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5751             "FISTp  $dst" %}
5752   ins_encode(enc_loadL_volatile(mem,dst));
5753   ins_pipe( fpu_reg_mem );
5754 %}
5755 
5756 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5757   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5758   match(Set dst (LoadL mem));
5759   effect(TEMP tmp);
5760   ins_cost(180);
5761   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5762             "MOVSD  $dst,$tmp" %}
5763   ins_encode %{
5764     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5765     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5766   %}
5767   ins_pipe( pipe_slow );
5768 %}
5769 
5770 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5771   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5772   match(Set dst (LoadL mem));
5773   effect(TEMP tmp);
5774   ins_cost(160);
5775   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5776             "MOVD   $dst.lo,$tmp\n\t"
5777             "PSRLQ  $tmp,32\n\t"
5778             "MOVD   $dst.hi,$tmp" %}
5779   ins_encode %{
5780     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5781     __ movdl($dst$$Register, $tmp$$XMMRegister);
5782     __ psrlq($tmp$$XMMRegister, 32);
5783     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5784   %}
5785   ins_pipe( pipe_slow );
5786 %}
5787 
5788 // Load Range
5789 instruct loadRange(rRegI dst, memory mem) %{
5790   match(Set dst (LoadRange mem));
5791 
5792   ins_cost(125);
5793   format %{ "MOV    $dst,$mem" %}
5794   opcode(0x8B);
5795   ins_encode( OpcP, RegMem(dst,mem));
5796   ins_pipe( ialu_reg_mem );
5797 %}
5798 
5799 
5800 // Load Pointer
5801 instruct loadP(eRegP dst, memory mem) %{
5802   match(Set dst (LoadP mem));
5803 
5804   ins_cost(125);
5805   format %{ "MOV    $dst,$mem" %}
5806   opcode(0x8B);
5807   ins_encode( OpcP, RegMem(dst,mem));
5808   ins_pipe( ialu_reg_mem );
5809 %}
5810 
5811 // Load Klass Pointer
5812 instruct loadKlass(eRegP dst, memory mem) %{
5813   match(Set dst (LoadKlass mem));
5814 
5815   ins_cost(125);
5816   format %{ "MOV    $dst,$mem" %}
5817   opcode(0x8B);
5818   ins_encode( OpcP, RegMem(dst,mem));
5819   ins_pipe( ialu_reg_mem );
5820 %}
5821 
5822 // Load Double
5823 instruct loadDPR(regDPR dst, memory mem) %{
5824   predicate(UseSSE<=1);
5825   match(Set dst (LoadD mem));
5826 
5827   ins_cost(150);
5828   format %{ "FLD_D  ST,$mem\n\t"
5829             "FSTP   $dst" %}
5830   opcode(0xDD);               /* DD /0 */
5831   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5832               Pop_Reg_DPR(dst) );
5833   ins_pipe( fpu_reg_mem );
5834 %}
5835 
5836 // Load Double to XMM
5837 instruct loadD(regD dst, memory mem) %{
5838   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5839   match(Set dst (LoadD mem));
5840   ins_cost(145);
5841   format %{ "MOVSD  $dst,$mem" %}
5842   ins_encode %{
5843     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5844   %}
5845   ins_pipe( pipe_slow );
5846 %}
5847 
5848 instruct loadD_partial(regD dst, memory mem) %{
5849   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5850   match(Set dst (LoadD mem));
5851   ins_cost(145);
5852   format %{ "MOVLPD $dst,$mem" %}
5853   ins_encode %{
5854     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5855   %}
5856   ins_pipe( pipe_slow );
5857 %}
5858 
5859 // Load to XMM register (single-precision floating point)
5860 // MOVSS instruction
5861 instruct loadF(regF dst, memory mem) %{
5862   predicate(UseSSE>=1);
5863   match(Set dst (LoadF mem));
5864   ins_cost(145);
5865   format %{ "MOVSS  $dst,$mem" %}
5866   ins_encode %{
5867     __ movflt ($dst$$XMMRegister, $mem$$Address);
5868   %}
5869   ins_pipe( pipe_slow );
5870 %}
5871 
5872 // Load Float
5873 instruct loadFPR(regFPR dst, memory mem) %{
5874   predicate(UseSSE==0);
5875   match(Set dst (LoadF mem));
5876 
5877   ins_cost(150);
5878   format %{ "FLD_S  ST,$mem\n\t"
5879             "FSTP   $dst" %}
5880   opcode(0xD9);               /* D9 /0 */
5881   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5882               Pop_Reg_FPR(dst) );
5883   ins_pipe( fpu_reg_mem );
5884 %}
5885 
5886 // Load Effective Address
5887 instruct leaP8(eRegP dst, indOffset8 mem) %{
5888   match(Set dst mem);
5889 
5890   ins_cost(110);
5891   format %{ "LEA    $dst,$mem" %}
5892   opcode(0x8D);
5893   ins_encode( OpcP, RegMem(dst,mem));
5894   ins_pipe( ialu_reg_reg_fat );
5895 %}
5896 
5897 instruct leaP32(eRegP dst, indOffset32 mem) %{
5898   match(Set dst mem);
5899 
5900   ins_cost(110);
5901   format %{ "LEA    $dst,$mem" %}
5902   opcode(0x8D);
5903   ins_encode( OpcP, RegMem(dst,mem));
5904   ins_pipe( ialu_reg_reg_fat );
5905 %}
5906 
5907 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5908   match(Set dst mem);
5909 
5910   ins_cost(110);
5911   format %{ "LEA    $dst,$mem" %}
5912   opcode(0x8D);
5913   ins_encode( OpcP, RegMem(dst,mem));
5914   ins_pipe( ialu_reg_reg_fat );
5915 %}
5916 
5917 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5918   match(Set dst mem);
5919 
5920   ins_cost(110);
5921   format %{ "LEA    $dst,$mem" %}
5922   opcode(0x8D);
5923   ins_encode( OpcP, RegMem(dst,mem));
5924   ins_pipe( ialu_reg_reg_fat );
5925 %}
5926 
5927 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5928   match(Set dst mem);
5929 
5930   ins_cost(110);
5931   format %{ "LEA    $dst,$mem" %}
5932   opcode(0x8D);
5933   ins_encode( OpcP, RegMem(dst,mem));
5934   ins_pipe( ialu_reg_reg_fat );
5935 %}
5936 
5937 // Load Constant
5938 instruct loadConI(rRegI dst, immI src) %{
5939   match(Set dst src);
5940 
5941   format %{ "MOV    $dst,$src" %}
5942   ins_encode( LdImmI(dst, src) );
5943   ins_pipe( ialu_reg_fat );
5944 %}
5945 
5946 // Load Constant zero
5947 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5948   match(Set dst src);
5949   effect(KILL cr);
5950 
5951   ins_cost(50);
5952   format %{ "XOR    $dst,$dst" %}
5953   opcode(0x33);  /* + rd */
5954   ins_encode( OpcP, RegReg( dst, dst ) );
5955   ins_pipe( ialu_reg );
5956 %}
5957 
5958 instruct loadConP(eRegP dst, immP src) %{
5959   match(Set dst src);
5960 
5961   format %{ "MOV    $dst,$src" %}
5962   opcode(0xB8);  /* + rd */
5963   ins_encode( LdImmP(dst, src) );
5964   ins_pipe( ialu_reg_fat );
5965 %}
5966 
5967 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5968   match(Set dst src);
5969   effect(KILL cr);
5970   ins_cost(200);
5971   format %{ "MOV    $dst.lo,$src.lo\n\t"
5972             "MOV    $dst.hi,$src.hi" %}
5973   opcode(0xB8);
5974   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5975   ins_pipe( ialu_reg_long_fat );
5976 %}
5977 
5978 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5979   match(Set dst src);
5980   effect(KILL cr);
5981   ins_cost(150);
5982   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5983             "XOR    $dst.hi,$dst.hi" %}
5984   opcode(0x33,0x33);
5985   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5986   ins_pipe( ialu_reg_long );
5987 %}
5988 
5989 // The instruction usage is guarded by predicate in operand immFPR().
5990 instruct loadConFPR(regFPR dst, immFPR con) %{
5991   match(Set dst con);
5992   ins_cost(125);
5993   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5994             "FSTP   $dst" %}
5995   ins_encode %{
5996     __ fld_s($constantaddress($con));
5997     __ fstp_d($dst$$reg);
5998   %}
5999   ins_pipe(fpu_reg_con);
6000 %}
6001 
6002 // The instruction usage is guarded by predicate in operand immFPR0().
6003 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6004   match(Set dst con);
6005   ins_cost(125);
6006   format %{ "FLDZ   ST\n\t"
6007             "FSTP   $dst" %}
6008   ins_encode %{
6009     __ fldz();
6010     __ fstp_d($dst$$reg);
6011   %}
6012   ins_pipe(fpu_reg_con);
6013 %}
6014 
6015 // The instruction usage is guarded by predicate in operand immFPR1().
6016 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6017   match(Set dst con);
6018   ins_cost(125);
6019   format %{ "FLD1   ST\n\t"
6020             "FSTP   $dst" %}
6021   ins_encode %{
6022     __ fld1();
6023     __ fstp_d($dst$$reg);
6024   %}
6025   ins_pipe(fpu_reg_con);
6026 %}
6027 
6028 // The instruction usage is guarded by predicate in operand immF().
6029 instruct loadConF(regF dst, immF con) %{
6030   match(Set dst con);
6031   ins_cost(125);
6032   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6033   ins_encode %{
6034     __ movflt($dst$$XMMRegister, $constantaddress($con));
6035   %}
6036   ins_pipe(pipe_slow);
6037 %}
6038 
6039 // The instruction usage is guarded by predicate in operand immF0().
6040 instruct loadConF0(regF dst, immF0 src) %{
6041   match(Set dst src);
6042   ins_cost(100);
6043   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6044   ins_encode %{
6045     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6046   %}
6047   ins_pipe(pipe_slow);
6048 %}
6049 
6050 // The instruction usage is guarded by predicate in operand immDPR().
6051 instruct loadConDPR(regDPR dst, immDPR con) %{
6052   match(Set dst con);
6053   ins_cost(125);
6054 
6055   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6056             "FSTP   $dst" %}
6057   ins_encode %{
6058     __ fld_d($constantaddress($con));
6059     __ fstp_d($dst$$reg);
6060   %}
6061   ins_pipe(fpu_reg_con);
6062 %}
6063 
6064 // The instruction usage is guarded by predicate in operand immDPR0().
6065 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6066   match(Set dst con);
6067   ins_cost(125);
6068 
6069   format %{ "FLDZ   ST\n\t"
6070             "FSTP   $dst" %}
6071   ins_encode %{
6072     __ fldz();
6073     __ fstp_d($dst$$reg);
6074   %}
6075   ins_pipe(fpu_reg_con);
6076 %}
6077 
6078 // The instruction usage is guarded by predicate in operand immDPR1().
6079 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6080   match(Set dst con);
6081   ins_cost(125);
6082 
6083   format %{ "FLD1   ST\n\t"
6084             "FSTP   $dst" %}
6085   ins_encode %{
6086     __ fld1();
6087     __ fstp_d($dst$$reg);
6088   %}
6089   ins_pipe(fpu_reg_con);
6090 %}
6091 
6092 // The instruction usage is guarded by predicate in operand immD().
6093 instruct loadConD(regD dst, immD con) %{
6094   match(Set dst con);
6095   ins_cost(125);
6096   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6097   ins_encode %{
6098     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6099   %}
6100   ins_pipe(pipe_slow);
6101 %}
6102 
6103 // The instruction usage is guarded by predicate in operand immD0().
6104 instruct loadConD0(regD dst, immD0 src) %{
6105   match(Set dst src);
6106   ins_cost(100);
6107   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6108   ins_encode %{
6109     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6110   %}
6111   ins_pipe( pipe_slow );
6112 %}
6113 
6114 // Load Stack Slot
6115 instruct loadSSI(rRegI dst, stackSlotI src) %{
6116   match(Set dst src);
6117   ins_cost(125);
6118 
6119   format %{ "MOV    $dst,$src" %}
6120   opcode(0x8B);
6121   ins_encode( OpcP, RegMem(dst,src));
6122   ins_pipe( ialu_reg_mem );
6123 %}
6124 
6125 instruct loadSSL(eRegL dst, stackSlotL src) %{
6126   match(Set dst src);
6127 
6128   ins_cost(200);
6129   format %{ "MOV    $dst,$src.lo\n\t"
6130             "MOV    $dst+4,$src.hi" %}
6131   opcode(0x8B, 0x8B);
6132   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6133   ins_pipe( ialu_mem_long_reg );
6134 %}
6135 
6136 // Load Stack Slot
6137 instruct loadSSP(eRegP dst, stackSlotP src) %{
6138   match(Set dst src);
6139   ins_cost(125);
6140 
6141   format %{ "MOV    $dst,$src" %}
6142   opcode(0x8B);
6143   ins_encode( OpcP, RegMem(dst,src));
6144   ins_pipe( ialu_reg_mem );
6145 %}
6146 
6147 // Load Stack Slot
6148 instruct loadSSF(regFPR dst, stackSlotF src) %{
6149   match(Set dst src);
6150   ins_cost(125);
6151 
6152   format %{ "FLD_S  $src\n\t"
6153             "FSTP   $dst" %}
6154   opcode(0xD9);               /* D9 /0, FLD m32real */
6155   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6156               Pop_Reg_FPR(dst) );
6157   ins_pipe( fpu_reg_mem );
6158 %}
6159 
6160 // Load Stack Slot
6161 instruct loadSSD(regDPR dst, stackSlotD src) %{
6162   match(Set dst src);
6163   ins_cost(125);
6164 
6165   format %{ "FLD_D  $src\n\t"
6166             "FSTP   $dst" %}
6167   opcode(0xDD);               /* DD /0, FLD m64real */
6168   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6169               Pop_Reg_DPR(dst) );
6170   ins_pipe( fpu_reg_mem );
6171 %}
6172 
6173 // Prefetch instructions for allocation.
6174 // Must be safe to execute with invalid address (cannot fault).
6175 
6176 instruct prefetchAlloc0( memory mem ) %{
6177   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6178   match(PrefetchAllocation mem);
6179   ins_cost(0);
6180   size(0);
6181   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6182   ins_encode();
6183   ins_pipe(empty);
6184 %}
6185 
6186 instruct prefetchAlloc( memory mem ) %{
6187   predicate(AllocatePrefetchInstr==3);
6188   match( PrefetchAllocation mem );
6189   ins_cost(100);
6190 
6191   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6192   ins_encode %{
6193     __ prefetchw($mem$$Address);
6194   %}
6195   ins_pipe(ialu_mem);
6196 %}
6197 
6198 instruct prefetchAllocNTA( memory mem ) %{
6199   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6200   match(PrefetchAllocation mem);
6201   ins_cost(100);
6202 
6203   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6204   ins_encode %{
6205     __ prefetchnta($mem$$Address);
6206   %}
6207   ins_pipe(ialu_mem);
6208 %}
6209 
6210 instruct prefetchAllocT0( memory mem ) %{
6211   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6212   match(PrefetchAllocation mem);
6213   ins_cost(100);
6214 
6215   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6216   ins_encode %{
6217     __ prefetcht0($mem$$Address);
6218   %}
6219   ins_pipe(ialu_mem);
6220 %}
6221 
6222 instruct prefetchAllocT2( memory mem ) %{
6223   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6224   match(PrefetchAllocation mem);
6225   ins_cost(100);
6226 
6227   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6228   ins_encode %{
6229     __ prefetcht2($mem$$Address);
6230   %}
6231   ins_pipe(ialu_mem);
6232 %}
6233 
6234 //----------Store Instructions-------------------------------------------------
6235 
6236 // Store Byte
6237 instruct storeB(memory mem, xRegI src) %{
6238   match(Set mem (StoreB mem src));
6239 
6240   ins_cost(125);
6241   format %{ "MOV8   $mem,$src" %}
6242   opcode(0x88);
6243   ins_encode( OpcP, RegMem( src, mem ) );
6244   ins_pipe( ialu_mem_reg );
6245 %}
6246 
6247 // Store Char/Short
6248 instruct storeC(memory mem, rRegI src) %{
6249   match(Set mem (StoreC mem src));
6250 
6251   ins_cost(125);
6252   format %{ "MOV16  $mem,$src" %}
6253   opcode(0x89, 0x66);
6254   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6255   ins_pipe( ialu_mem_reg );
6256 %}
6257 
6258 // Store Integer
6259 instruct storeI(memory mem, rRegI src) %{
6260   match(Set mem (StoreI mem src));
6261 
6262   ins_cost(125);
6263   format %{ "MOV    $mem,$src" %}
6264   opcode(0x89);
6265   ins_encode( OpcP, RegMem( src, mem ) );
6266   ins_pipe( ialu_mem_reg );
6267 %}
6268 
6269 // Store Long
6270 instruct storeL(long_memory mem, eRegL src) %{
6271   predicate(!((StoreLNode*)n)->require_atomic_access());
6272   match(Set mem (StoreL mem src));
6273 
6274   ins_cost(200);
6275   format %{ "MOV    $mem,$src.lo\n\t"
6276             "MOV    $mem+4,$src.hi" %}
6277   opcode(0x89, 0x89);
6278   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6279   ins_pipe( ialu_mem_long_reg );
6280 %}
6281 
6282 // Store Long to Integer
6283 instruct storeL2I(memory mem, eRegL src) %{
6284   match(Set mem (StoreI mem (ConvL2I src)));
6285 
6286   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6287   ins_encode %{
6288     __ movl($mem$$Address, $src$$Register);
6289   %}
6290   ins_pipe(ialu_mem_reg);
6291 %}
6292 
6293 // Volatile Store Long.  Must be atomic, so move it into
6294 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6295 // target address before the store (for null-ptr checks)
6296 // so the memory operand is used twice in the encoding.
6297 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6298   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6299   match(Set mem (StoreL mem src));
6300   effect( KILL cr );
6301   ins_cost(400);
6302   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6303             "FILD   $src\n\t"
6304             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6305   opcode(0x3B);
6306   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6307   ins_pipe( fpu_reg_mem );
6308 %}
6309 
6310 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6311   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6312   match(Set mem (StoreL mem src));
6313   effect( TEMP tmp, KILL cr );
6314   ins_cost(380);
6315   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6316             "MOVSD  $tmp,$src\n\t"
6317             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6318   ins_encode %{
6319     __ cmpl(rax, $mem$$Address);
6320     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6321     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6322   %}
6323   ins_pipe( pipe_slow );
6324 %}
6325 
6326 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6327   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6328   match(Set mem (StoreL mem src));
6329   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6330   ins_cost(360);
6331   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6332             "MOVD   $tmp,$src.lo\n\t"
6333             "MOVD   $tmp2,$src.hi\n\t"
6334             "PUNPCKLDQ $tmp,$tmp2\n\t"
6335             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6336   ins_encode %{
6337     __ cmpl(rax, $mem$$Address);
6338     __ movdl($tmp$$XMMRegister, $src$$Register);
6339     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6340     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6341     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6342   %}
6343   ins_pipe( pipe_slow );
6344 %}
6345 
6346 // Store Pointer; for storing unknown oops and raw pointers
6347 instruct storeP(memory mem, anyRegP src) %{
6348   match(Set mem (StoreP mem src));
6349 
6350   ins_cost(125);
6351   format %{ "MOV    $mem,$src" %}
6352   opcode(0x89);
6353   ins_encode( OpcP, RegMem( src, mem ) );
6354   ins_pipe( ialu_mem_reg );
6355 %}
6356 
6357 // Store Integer Immediate
6358 instruct storeImmI(memory mem, immI src) %{
6359   match(Set mem (StoreI mem src));
6360 
6361   ins_cost(150);
6362   format %{ "MOV    $mem,$src" %}
6363   opcode(0xC7);               /* C7 /0 */
6364   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6365   ins_pipe( ialu_mem_imm );
6366 %}
6367 
6368 // Store Short/Char Immediate
6369 instruct storeImmI16(memory mem, immI16 src) %{
6370   predicate(UseStoreImmI16);
6371   match(Set mem (StoreC mem src));
6372 
6373   ins_cost(150);
6374   format %{ "MOV16  $mem,$src" %}
6375   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6376   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6377   ins_pipe( ialu_mem_imm );
6378 %}
6379 
6380 // Store Pointer Immediate; null pointers or constant oops that do not
6381 // need card-mark barriers.
6382 instruct storeImmP(memory mem, immP src) %{
6383   match(Set mem (StoreP mem src));
6384 
6385   ins_cost(150);
6386   format %{ "MOV    $mem,$src" %}
6387   opcode(0xC7);               /* C7 /0 */
6388   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6389   ins_pipe( ialu_mem_imm );
6390 %}
6391 
6392 // Store Byte Immediate
6393 instruct storeImmB(memory mem, immI8 src) %{
6394   match(Set mem (StoreB mem src));
6395 
6396   ins_cost(150);
6397   format %{ "MOV8   $mem,$src" %}
6398   opcode(0xC6);               /* C6 /0 */
6399   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6400   ins_pipe( ialu_mem_imm );
6401 %}
6402 
6403 // Store CMS card-mark Immediate
6404 instruct storeImmCM(memory mem, immI8 src) %{
6405   match(Set mem (StoreCM mem src));
6406 
6407   ins_cost(150);
6408   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6409   opcode(0xC6);               /* C6 /0 */
6410   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6411   ins_pipe( ialu_mem_imm );
6412 %}
6413 
6414 // Store Double
6415 instruct storeDPR( memory mem, regDPR1 src) %{
6416   predicate(UseSSE<=1);
6417   match(Set mem (StoreD mem src));
6418 
6419   ins_cost(100);
6420   format %{ "FST_D  $mem,$src" %}
6421   opcode(0xDD);       /* DD /2 */
6422   ins_encode( enc_FPR_store(mem,src) );
6423   ins_pipe( fpu_mem_reg );
6424 %}
6425 
6426 // Store double does rounding on x86
6427 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6428   predicate(UseSSE<=1);
6429   match(Set mem (StoreD mem (RoundDouble src)));
6430 
6431   ins_cost(100);
6432   format %{ "FST_D  $mem,$src\t# round" %}
6433   opcode(0xDD);       /* DD /2 */
6434   ins_encode( enc_FPR_store(mem,src) );
6435   ins_pipe( fpu_mem_reg );
6436 %}
6437 
6438 // Store XMM register to memory (double-precision floating points)
6439 // MOVSD instruction
6440 instruct storeD(memory mem, regD src) %{
6441   predicate(UseSSE>=2);
6442   match(Set mem (StoreD mem src));
6443   ins_cost(95);
6444   format %{ "MOVSD  $mem,$src" %}
6445   ins_encode %{
6446     __ movdbl($mem$$Address, $src$$XMMRegister);
6447   %}
6448   ins_pipe( pipe_slow );
6449 %}
6450 
6451 // Store XMM register to memory (single-precision floating point)
6452 // MOVSS instruction
6453 instruct storeF(memory mem, regF src) %{
6454   predicate(UseSSE>=1);
6455   match(Set mem (StoreF mem src));
6456   ins_cost(95);
6457   format %{ "MOVSS  $mem,$src" %}
6458   ins_encode %{
6459     __ movflt($mem$$Address, $src$$XMMRegister);
6460   %}
6461   ins_pipe( pipe_slow );
6462 %}
6463 
6464 // Store Float
6465 instruct storeFPR( memory mem, regFPR1 src) %{
6466   predicate(UseSSE==0);
6467   match(Set mem (StoreF mem src));
6468 
6469   ins_cost(100);
6470   format %{ "FST_S  $mem,$src" %}
6471   opcode(0xD9);       /* D9 /2 */
6472   ins_encode( enc_FPR_store(mem,src) );
6473   ins_pipe( fpu_mem_reg );
6474 %}
6475 
6476 // Store Float does rounding on x86
6477 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6478   predicate(UseSSE==0);
6479   match(Set mem (StoreF mem (RoundFloat src)));
6480 
6481   ins_cost(100);
6482   format %{ "FST_S  $mem,$src\t# round" %}
6483   opcode(0xD9);       /* D9 /2 */
6484   ins_encode( enc_FPR_store(mem,src) );
6485   ins_pipe( fpu_mem_reg );
6486 %}
6487 
6488 // Store Float does rounding on x86
6489 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6490   predicate(UseSSE<=1);
6491   match(Set mem (StoreF mem (ConvD2F src)));
6492 
6493   ins_cost(100);
6494   format %{ "FST_S  $mem,$src\t# D-round" %}
6495   opcode(0xD9);       /* D9 /2 */
6496   ins_encode( enc_FPR_store(mem,src) );
6497   ins_pipe( fpu_mem_reg );
6498 %}
6499 
6500 // Store immediate Float value (it is faster than store from FPU register)
6501 // The instruction usage is guarded by predicate in operand immFPR().
6502 instruct storeFPR_imm( memory mem, immFPR src) %{
6503   match(Set mem (StoreF mem src));
6504 
6505   ins_cost(50);
6506   format %{ "MOV    $mem,$src\t# store float" %}
6507   opcode(0xC7);               /* C7 /0 */
6508   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6509   ins_pipe( ialu_mem_imm );
6510 %}
6511 
6512 // Store immediate Float value (it is faster than store from XMM register)
6513 // The instruction usage is guarded by predicate in operand immF().
6514 instruct storeF_imm( memory mem, immF src) %{
6515   match(Set mem (StoreF mem src));
6516 
6517   ins_cost(50);
6518   format %{ "MOV    $mem,$src\t# store float" %}
6519   opcode(0xC7);               /* C7 /0 */
6520   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6521   ins_pipe( ialu_mem_imm );
6522 %}
6523 
6524 // Store Integer to stack slot
6525 instruct storeSSI(stackSlotI dst, rRegI src) %{
6526   match(Set dst src);
6527 
6528   ins_cost(100);
6529   format %{ "MOV    $dst,$src" %}
6530   opcode(0x89);
6531   ins_encode( OpcPRegSS( dst, src ) );
6532   ins_pipe( ialu_mem_reg );
6533 %}
6534 
6535 // Store Integer to stack slot
6536 instruct storeSSP(stackSlotP dst, eRegP src) %{
6537   match(Set dst src);
6538 
6539   ins_cost(100);
6540   format %{ "MOV    $dst,$src" %}
6541   opcode(0x89);
6542   ins_encode( OpcPRegSS( dst, src ) );
6543   ins_pipe( ialu_mem_reg );
6544 %}
6545 
6546 // Store Long to stack slot
6547 instruct storeSSL(stackSlotL dst, eRegL src) %{
6548   match(Set dst src);
6549 
6550   ins_cost(200);
6551   format %{ "MOV    $dst,$src.lo\n\t"
6552             "MOV    $dst+4,$src.hi" %}
6553   opcode(0x89, 0x89);
6554   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6555   ins_pipe( ialu_mem_long_reg );
6556 %}
6557 
6558 //----------MemBar Instructions-----------------------------------------------
6559 // Memory barrier flavors
6560 
6561 instruct membar_acquire() %{
6562   match(MemBarAcquire);
6563   match(LoadFence);
6564   ins_cost(400);
6565 
6566   size(0);
6567   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6568   ins_encode();
6569   ins_pipe(empty);
6570 %}
6571 
6572 instruct membar_acquire_lock() %{
6573   match(MemBarAcquireLock);
6574   ins_cost(0);
6575 
6576   size(0);
6577   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6578   ins_encode( );
6579   ins_pipe(empty);
6580 %}
6581 
6582 instruct membar_release() %{
6583   match(MemBarRelease);
6584   match(StoreFence);
6585   ins_cost(400);
6586 
6587   size(0);
6588   format %{ "MEMBAR-release ! (empty encoding)" %}
6589   ins_encode( );
6590   ins_pipe(empty);
6591 %}
6592 
6593 instruct membar_release_lock() %{
6594   match(MemBarReleaseLock);
6595   ins_cost(0);
6596 
6597   size(0);
6598   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6599   ins_encode( );
6600   ins_pipe(empty);
6601 %}
6602 
6603 instruct membar_volatile(eFlagsReg cr) %{
6604   match(MemBarVolatile);
6605   effect(KILL cr);
6606   ins_cost(400);
6607 
6608   format %{
6609     $$template
6610     if (os::is_MP()) {
6611       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6612     } else {
6613       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6614     }
6615   %}
6616   ins_encode %{
6617     __ membar(Assembler::StoreLoad);
6618   %}
6619   ins_pipe(pipe_slow);
6620 %}
6621 
6622 instruct unnecessary_membar_volatile() %{
6623   match(MemBarVolatile);
6624   predicate(Matcher::post_store_load_barrier(n));
6625   ins_cost(0);
6626 
6627   size(0);
6628   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6629   ins_encode( );
6630   ins_pipe(empty);
6631 %}
6632 
6633 instruct membar_storestore() %{
6634   match(MemBarStoreStore);
6635   ins_cost(0);
6636 
6637   size(0);
6638   format %{ "MEMBAR-storestore (empty encoding)" %}
6639   ins_encode( );
6640   ins_pipe(empty);
6641 %}
6642 
6643 //----------Move Instructions--------------------------------------------------
6644 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6645   match(Set dst (CastX2P src));
6646   format %{ "# X2P  $dst, $src" %}
6647   ins_encode( /*empty encoding*/ );
6648   ins_cost(0);
6649   ins_pipe(empty);
6650 %}
6651 
6652 instruct castP2X(rRegI dst, eRegP src ) %{
6653   match(Set dst (CastP2X src));
6654   ins_cost(50);
6655   format %{ "MOV    $dst, $src\t# CastP2X" %}
6656   ins_encode( enc_Copy( dst, src) );
6657   ins_pipe( ialu_reg_reg );
6658 %}
6659 
6660 //----------Conditional Move---------------------------------------------------
6661 // Conditional move
6662 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6663   predicate(!VM_Version::supports_cmov() );
6664   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6665   ins_cost(200);
6666   format %{ "J$cop,us skip\t# signed cmove\n\t"
6667             "MOV    $dst,$src\n"
6668       "skip:" %}
6669   ins_encode %{
6670     Label Lskip;
6671     // Invert sense of branch from sense of CMOV
6672     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6673     __ movl($dst$$Register, $src$$Register);
6674     __ bind(Lskip);
6675   %}
6676   ins_pipe( pipe_cmov_reg );
6677 %}
6678 
6679 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6680   predicate(!VM_Version::supports_cmov() );
6681   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6682   ins_cost(200);
6683   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6684             "MOV    $dst,$src\n"
6685       "skip:" %}
6686   ins_encode %{
6687     Label Lskip;
6688     // Invert sense of branch from sense of CMOV
6689     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6690     __ movl($dst$$Register, $src$$Register);
6691     __ bind(Lskip);
6692   %}
6693   ins_pipe( pipe_cmov_reg );
6694 %}
6695 
6696 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6697   predicate(VM_Version::supports_cmov() );
6698   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6699   ins_cost(200);
6700   format %{ "CMOV$cop $dst,$src" %}
6701   opcode(0x0F,0x40);
6702   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6703   ins_pipe( pipe_cmov_reg );
6704 %}
6705 
6706 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6707   predicate(VM_Version::supports_cmov() );
6708   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6709   ins_cost(200);
6710   format %{ "CMOV$cop $dst,$src" %}
6711   opcode(0x0F,0x40);
6712   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6713   ins_pipe( pipe_cmov_reg );
6714 %}
6715 
6716 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6717   predicate(VM_Version::supports_cmov() );
6718   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6719   ins_cost(200);
6720   expand %{
6721     cmovI_regU(cop, cr, dst, src);
6722   %}
6723 %}
6724 
6725 // Conditional move
6726 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6727   predicate(VM_Version::supports_cmov() );
6728   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6729   ins_cost(250);
6730   format %{ "CMOV$cop $dst,$src" %}
6731   opcode(0x0F,0x40);
6732   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6733   ins_pipe( pipe_cmov_mem );
6734 %}
6735 
6736 // Conditional move
6737 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6738   predicate(VM_Version::supports_cmov() );
6739   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6740   ins_cost(250);
6741   format %{ "CMOV$cop $dst,$src" %}
6742   opcode(0x0F,0x40);
6743   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6744   ins_pipe( pipe_cmov_mem );
6745 %}
6746 
6747 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6748   predicate(VM_Version::supports_cmov() );
6749   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6750   ins_cost(250);
6751   expand %{
6752     cmovI_memU(cop, cr, dst, src);
6753   %}
6754 %}
6755 
6756 // Conditional move
6757 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6758   predicate(VM_Version::supports_cmov() );
6759   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6760   ins_cost(200);
6761   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6762   opcode(0x0F,0x40);
6763   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6764   ins_pipe( pipe_cmov_reg );
6765 %}
6766 
6767 // Conditional move (non-P6 version)
6768 // Note:  a CMoveP is generated for  stubs and native wrappers
6769 //        regardless of whether we are on a P6, so we
6770 //        emulate a cmov here
6771 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6772   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6773   ins_cost(300);
6774   format %{ "Jn$cop   skip\n\t"
6775           "MOV    $dst,$src\t# pointer\n"
6776       "skip:" %}
6777   opcode(0x8b);
6778   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6779   ins_pipe( pipe_cmov_reg );
6780 %}
6781 
6782 // Conditional move
6783 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6784   predicate(VM_Version::supports_cmov() );
6785   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6786   ins_cost(200);
6787   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6788   opcode(0x0F,0x40);
6789   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6790   ins_pipe( pipe_cmov_reg );
6791 %}
6792 
6793 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6794   predicate(VM_Version::supports_cmov() );
6795   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6796   ins_cost(200);
6797   expand %{
6798     cmovP_regU(cop, cr, dst, src);
6799   %}
6800 %}
6801 
6802 // DISABLED: Requires the ADLC to emit a bottom_type call that
6803 // correctly meets the two pointer arguments; one is an incoming
6804 // register but the other is a memory operand.  ALSO appears to
6805 // be buggy with implicit null checks.
6806 //
6807 //// Conditional move
6808 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6809 //  predicate(VM_Version::supports_cmov() );
6810 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6811 //  ins_cost(250);
6812 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6813 //  opcode(0x0F,0x40);
6814 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6815 //  ins_pipe( pipe_cmov_mem );
6816 //%}
6817 //
6818 //// Conditional move
6819 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6820 //  predicate(VM_Version::supports_cmov() );
6821 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6822 //  ins_cost(250);
6823 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6824 //  opcode(0x0F,0x40);
6825 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6826 //  ins_pipe( pipe_cmov_mem );
6827 //%}
6828 
6829 // Conditional move
6830 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6831   predicate(UseSSE<=1);
6832   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6833   ins_cost(200);
6834   format %{ "FCMOV$cop $dst,$src\t# double" %}
6835   opcode(0xDA);
6836   ins_encode( enc_cmov_dpr(cop,src) );
6837   ins_pipe( pipe_cmovDPR_reg );
6838 %}
6839 
6840 // Conditional move
6841 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6842   predicate(UseSSE==0);
6843   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6844   ins_cost(200);
6845   format %{ "FCMOV$cop $dst,$src\t# float" %}
6846   opcode(0xDA);
6847   ins_encode( enc_cmov_dpr(cop,src) );
6848   ins_pipe( pipe_cmovDPR_reg );
6849 %}
6850 
6851 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6852 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6853   predicate(UseSSE<=1);
6854   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6855   ins_cost(200);
6856   format %{ "Jn$cop   skip\n\t"
6857             "MOV    $dst,$src\t# double\n"
6858       "skip:" %}
6859   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6860   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6861   ins_pipe( pipe_cmovDPR_reg );
6862 %}
6863 
6864 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6865 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6866   predicate(UseSSE==0);
6867   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6868   ins_cost(200);
6869   format %{ "Jn$cop    skip\n\t"
6870             "MOV    $dst,$src\t# float\n"
6871       "skip:" %}
6872   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6873   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6874   ins_pipe( pipe_cmovDPR_reg );
6875 %}
6876 
6877 // No CMOVE with SSE/SSE2
6878 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6879   predicate (UseSSE>=1);
6880   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6881   ins_cost(200);
6882   format %{ "Jn$cop   skip\n\t"
6883             "MOVSS  $dst,$src\t# float\n"
6884       "skip:" %}
6885   ins_encode %{
6886     Label skip;
6887     // Invert sense of branch from sense of CMOV
6888     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6889     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6890     __ bind(skip);
6891   %}
6892   ins_pipe( pipe_slow );
6893 %}
6894 
6895 // No CMOVE with SSE/SSE2
6896 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6897   predicate (UseSSE>=2);
6898   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6899   ins_cost(200);
6900   format %{ "Jn$cop   skip\n\t"
6901             "MOVSD  $dst,$src\t# float\n"
6902       "skip:" %}
6903   ins_encode %{
6904     Label skip;
6905     // Invert sense of branch from sense of CMOV
6906     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6907     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6908     __ bind(skip);
6909   %}
6910   ins_pipe( pipe_slow );
6911 %}
6912 
6913 // unsigned version
6914 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6915   predicate (UseSSE>=1);
6916   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6917   ins_cost(200);
6918   format %{ "Jn$cop   skip\n\t"
6919             "MOVSS  $dst,$src\t# float\n"
6920       "skip:" %}
6921   ins_encode %{
6922     Label skip;
6923     // Invert sense of branch from sense of CMOV
6924     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6925     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6926     __ bind(skip);
6927   %}
6928   ins_pipe( pipe_slow );
6929 %}
6930 
6931 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6932   predicate (UseSSE>=1);
6933   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6934   ins_cost(200);
6935   expand %{
6936     fcmovF_regU(cop, cr, dst, src);
6937   %}
6938 %}
6939 
6940 // unsigned version
6941 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6942   predicate (UseSSE>=2);
6943   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6944   ins_cost(200);
6945   format %{ "Jn$cop   skip\n\t"
6946             "MOVSD  $dst,$src\t# float\n"
6947       "skip:" %}
6948   ins_encode %{
6949     Label skip;
6950     // Invert sense of branch from sense of CMOV
6951     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6952     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6953     __ bind(skip);
6954   %}
6955   ins_pipe( pipe_slow );
6956 %}
6957 
6958 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6959   predicate (UseSSE>=2);
6960   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6961   ins_cost(200);
6962   expand %{
6963     fcmovD_regU(cop, cr, dst, src);
6964   %}
6965 %}
6966 
6967 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6968   predicate(VM_Version::supports_cmov() );
6969   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6970   ins_cost(200);
6971   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6972             "CMOV$cop $dst.hi,$src.hi" %}
6973   opcode(0x0F,0x40);
6974   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6975   ins_pipe( pipe_cmov_reg_long );
6976 %}
6977 
6978 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6979   predicate(VM_Version::supports_cmov() );
6980   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6981   ins_cost(200);
6982   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6983             "CMOV$cop $dst.hi,$src.hi" %}
6984   opcode(0x0F,0x40);
6985   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6986   ins_pipe( pipe_cmov_reg_long );
6987 %}
6988 
6989 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6990   predicate(VM_Version::supports_cmov() );
6991   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6992   ins_cost(200);
6993   expand %{
6994     cmovL_regU(cop, cr, dst, src);
6995   %}
6996 %}
6997 
6998 //----------Arithmetic Instructions--------------------------------------------
6999 //----------Addition Instructions----------------------------------------------
7000 
7001 // Integer Addition Instructions
7002 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7003   match(Set dst (AddI dst src));
7004   effect(KILL cr);
7005 
7006   size(2);
7007   format %{ "ADD    $dst,$src" %}
7008   opcode(0x03);
7009   ins_encode( OpcP, RegReg( dst, src) );
7010   ins_pipe( ialu_reg_reg );
7011 %}
7012 
7013 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7014   match(Set dst (AddI dst src));
7015   effect(KILL cr);
7016 
7017   format %{ "ADD    $dst,$src" %}
7018   opcode(0x81, 0x00); /* /0 id */
7019   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7020   ins_pipe( ialu_reg );
7021 %}
7022 
7023 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7024   predicate(UseIncDec);
7025   match(Set dst (AddI dst src));
7026   effect(KILL cr);
7027 
7028   size(1);
7029   format %{ "INC    $dst" %}
7030   opcode(0x40); /*  */
7031   ins_encode( Opc_plus( primary, dst ) );
7032   ins_pipe( ialu_reg );
7033 %}
7034 
7035 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7036   match(Set dst (AddI src0 src1));
7037   ins_cost(110);
7038 
7039   format %{ "LEA    $dst,[$src0 + $src1]" %}
7040   opcode(0x8D); /* 0x8D /r */
7041   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7042   ins_pipe( ialu_reg_reg );
7043 %}
7044 
7045 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7046   match(Set dst (AddP src0 src1));
7047   ins_cost(110);
7048 
7049   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7050   opcode(0x8D); /* 0x8D /r */
7051   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7052   ins_pipe( ialu_reg_reg );
7053 %}
7054 
7055 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7056   predicate(UseIncDec);
7057   match(Set dst (AddI dst src));
7058   effect(KILL cr);
7059 
7060   size(1);
7061   format %{ "DEC    $dst" %}
7062   opcode(0x48); /*  */
7063   ins_encode( Opc_plus( primary, dst ) );
7064   ins_pipe( ialu_reg );
7065 %}
7066 
7067 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7068   match(Set dst (AddP dst src));
7069   effect(KILL cr);
7070 
7071   size(2);
7072   format %{ "ADD    $dst,$src" %}
7073   opcode(0x03);
7074   ins_encode( OpcP, RegReg( dst, src) );
7075   ins_pipe( ialu_reg_reg );
7076 %}
7077 
7078 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7079   match(Set dst (AddP dst src));
7080   effect(KILL cr);
7081 
7082   format %{ "ADD    $dst,$src" %}
7083   opcode(0x81,0x00); /* Opcode 81 /0 id */
7084   // ins_encode( RegImm( dst, src) );
7085   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7086   ins_pipe( ialu_reg );
7087 %}
7088 
7089 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7090   match(Set dst (AddI dst (LoadI src)));
7091   effect(KILL cr);
7092 
7093   ins_cost(125);
7094   format %{ "ADD    $dst,$src" %}
7095   opcode(0x03);
7096   ins_encode( OpcP, RegMem( dst, src) );
7097   ins_pipe( ialu_reg_mem );
7098 %}
7099 
7100 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7101   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7102   effect(KILL cr);
7103 
7104   ins_cost(150);
7105   format %{ "ADD    $dst,$src" %}
7106   opcode(0x01);  /* Opcode 01 /r */
7107   ins_encode( OpcP, RegMem( src, dst ) );
7108   ins_pipe( ialu_mem_reg );
7109 %}
7110 
7111 // Add Memory with Immediate
7112 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7113   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7114   effect(KILL cr);
7115 
7116   ins_cost(125);
7117   format %{ "ADD    $dst,$src" %}
7118   opcode(0x81);               /* Opcode 81 /0 id */
7119   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7120   ins_pipe( ialu_mem_imm );
7121 %}
7122 
7123 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7124   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7125   effect(KILL cr);
7126 
7127   ins_cost(125);
7128   format %{ "INC    $dst" %}
7129   opcode(0xFF);               /* Opcode FF /0 */
7130   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7131   ins_pipe( ialu_mem_imm );
7132 %}
7133 
7134 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7135   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7136   effect(KILL cr);
7137 
7138   ins_cost(125);
7139   format %{ "DEC    $dst" %}
7140   opcode(0xFF);               /* Opcode FF /1 */
7141   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7142   ins_pipe( ialu_mem_imm );
7143 %}
7144 
7145 
7146 instruct checkCastPP( eRegP dst ) %{
7147   match(Set dst (CheckCastPP dst));
7148 
7149   size(0);
7150   format %{ "#checkcastPP of $dst" %}
7151   ins_encode( /*empty encoding*/ );
7152   ins_pipe( empty );
7153 %}
7154 
7155 instruct castPP( eRegP dst ) %{
7156   match(Set dst (CastPP dst));
7157   format %{ "#castPP of $dst" %}
7158   ins_encode( /*empty encoding*/ );
7159   ins_pipe( empty );
7160 %}
7161 
7162 instruct castII( rRegI dst ) %{
7163   match(Set dst (CastII dst));
7164   format %{ "#castII of $dst" %}
7165   ins_encode( /*empty encoding*/ );
7166   ins_cost(0);
7167   ins_pipe( empty );
7168 %}
7169 
7170 
7171 // Load-locked - same as a regular pointer load when used with compare-swap
7172 instruct loadPLocked(eRegP dst, memory mem) %{
7173   match(Set dst (LoadPLocked mem));
7174 
7175   ins_cost(125);
7176   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7177   opcode(0x8B);
7178   ins_encode( OpcP, RegMem(dst,mem));
7179   ins_pipe( ialu_reg_mem );
7180 %}
7181 
7182 // Conditional-store of the updated heap-top.
7183 // Used during allocation of the shared heap.
7184 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7185 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7186   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7187   // EAX is killed if there is contention, but then it's also unused.
7188   // In the common case of no contention, EAX holds the new oop address.
7189   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7190   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7191   ins_pipe( pipe_cmpxchg );
7192 %}
7193 
7194 // Conditional-store of an int value.
7195 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7196 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7197   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7198   effect(KILL oldval);
7199   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7200   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7201   ins_pipe( pipe_cmpxchg );
7202 %}
7203 
7204 // Conditional-store of a long value.
7205 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7206 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7207   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7208   effect(KILL oldval);
7209   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7210             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7211             "XCHG   EBX,ECX"
7212   %}
7213   ins_encode %{
7214     // Note: we need to swap rbx, and rcx before and after the
7215     //       cmpxchg8 instruction because the instruction uses
7216     //       rcx as the high order word of the new value to store but
7217     //       our register encoding uses rbx.
7218     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7219     if( os::is_MP() )
7220       __ lock();
7221     __ cmpxchg8($mem$$Address);
7222     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7223   %}
7224   ins_pipe( pipe_cmpxchg );
7225 %}
7226 
7227 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7228 
7229 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7230   predicate(VM_Version::supports_cx8());
7231   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7232   effect(KILL cr, KILL oldval);
7233   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7234             "MOV    $res,0\n\t"
7235             "JNE,s  fail\n\t"
7236             "MOV    $res,1\n"
7237           "fail:" %}
7238   ins_encode( enc_cmpxchg8(mem_ptr),
7239               enc_flags_ne_to_boolean(res) );
7240   ins_pipe( pipe_cmpxchg );
7241 %}
7242 
7243 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7244   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7245   effect(KILL cr, KILL oldval);
7246   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7247             "MOV    $res,0\n\t"
7248             "JNE,s  fail\n\t"
7249             "MOV    $res,1\n"
7250           "fail:" %}
7251   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7252   ins_pipe( pipe_cmpxchg );
7253 %}
7254 
7255 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7256   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7257   effect(KILL cr, KILL oldval);
7258   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7259             "MOV    $res,0\n\t"
7260             "JNE,s  fail\n\t"
7261             "MOV    $res,1\n"
7262           "fail:" %}
7263   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7264   ins_pipe( pipe_cmpxchg );
7265 %}
7266 
7267 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7268   predicate(n->as_LoadStore()->result_not_used());
7269   match(Set dummy (GetAndAddI mem add));
7270   effect(KILL cr);
7271   format %{ "ADDL  [$mem],$add" %}
7272   ins_encode %{
7273     if (os::is_MP()) { __ lock(); }
7274     __ addl($mem$$Address, $add$$constant);
7275   %}
7276   ins_pipe( pipe_cmpxchg );
7277 %}
7278 
7279 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7280   match(Set newval (GetAndAddI mem newval));
7281   effect(KILL cr);
7282   format %{ "XADDL  [$mem],$newval" %}
7283   ins_encode %{
7284     if (os::is_MP()) { __ lock(); }
7285     __ xaddl($mem$$Address, $newval$$Register);
7286   %}
7287   ins_pipe( pipe_cmpxchg );
7288 %}
7289 
7290 instruct xchgI( memory mem, rRegI newval) %{
7291   match(Set newval (GetAndSetI mem newval));
7292   format %{ "XCHGL  $newval,[$mem]" %}
7293   ins_encode %{
7294     __ xchgl($newval$$Register, $mem$$Address);
7295   %}
7296   ins_pipe( pipe_cmpxchg );
7297 %}
7298 
7299 instruct xchgP( memory mem, pRegP newval) %{
7300   match(Set newval (GetAndSetP mem newval));
7301   format %{ "XCHGL  $newval,[$mem]" %}
7302   ins_encode %{
7303     __ xchgl($newval$$Register, $mem$$Address);
7304   %}
7305   ins_pipe( pipe_cmpxchg );
7306 %}
7307 
7308 //----------Subtraction Instructions-------------------------------------------
7309 
7310 // Integer Subtraction Instructions
7311 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7312   match(Set dst (SubI dst src));
7313   effect(KILL cr);
7314 
7315   size(2);
7316   format %{ "SUB    $dst,$src" %}
7317   opcode(0x2B);
7318   ins_encode( OpcP, RegReg( dst, src) );
7319   ins_pipe( ialu_reg_reg );
7320 %}
7321 
7322 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7323   match(Set dst (SubI dst src));
7324   effect(KILL cr);
7325 
7326   format %{ "SUB    $dst,$src" %}
7327   opcode(0x81,0x05);  /* Opcode 81 /5 */
7328   // ins_encode( RegImm( dst, src) );
7329   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7330   ins_pipe( ialu_reg );
7331 %}
7332 
7333 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7334   match(Set dst (SubI dst (LoadI src)));
7335   effect(KILL cr);
7336 
7337   ins_cost(125);
7338   format %{ "SUB    $dst,$src" %}
7339   opcode(0x2B);
7340   ins_encode( OpcP, RegMem( dst, src) );
7341   ins_pipe( ialu_reg_mem );
7342 %}
7343 
7344 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7345   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7346   effect(KILL cr);
7347 
7348   ins_cost(150);
7349   format %{ "SUB    $dst,$src" %}
7350   opcode(0x29);  /* Opcode 29 /r */
7351   ins_encode( OpcP, RegMem( src, dst ) );
7352   ins_pipe( ialu_mem_reg );
7353 %}
7354 
7355 // Subtract from a pointer
7356 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7357   match(Set dst (AddP dst (SubI zero src)));
7358   effect(KILL cr);
7359 
7360   size(2);
7361   format %{ "SUB    $dst,$src" %}
7362   opcode(0x2B);
7363   ins_encode( OpcP, RegReg( dst, src) );
7364   ins_pipe( ialu_reg_reg );
7365 %}
7366 
7367 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7368   match(Set dst (SubI zero dst));
7369   effect(KILL cr);
7370 
7371   size(2);
7372   format %{ "NEG    $dst" %}
7373   opcode(0xF7,0x03);  // Opcode F7 /3
7374   ins_encode( OpcP, RegOpc( dst ) );
7375   ins_pipe( ialu_reg );
7376 %}
7377 
7378 //----------Multiplication/Division Instructions-------------------------------
7379 // Integer Multiplication Instructions
7380 // Multiply Register
7381 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7382   match(Set dst (MulI dst src));
7383   effect(KILL cr);
7384 
7385   size(3);
7386   ins_cost(300);
7387   format %{ "IMUL   $dst,$src" %}
7388   opcode(0xAF, 0x0F);
7389   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7390   ins_pipe( ialu_reg_reg_alu0 );
7391 %}
7392 
7393 // Multiply 32-bit Immediate
7394 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7395   match(Set dst (MulI src imm));
7396   effect(KILL cr);
7397 
7398   ins_cost(300);
7399   format %{ "IMUL   $dst,$src,$imm" %}
7400   opcode(0x69);  /* 69 /r id */
7401   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7402   ins_pipe( ialu_reg_reg_alu0 );
7403 %}
7404 
7405 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7406   match(Set dst src);
7407   effect(KILL cr);
7408 
7409   // Note that this is artificially increased to make it more expensive than loadConL
7410   ins_cost(250);
7411   format %{ "MOV    EAX,$src\t// low word only" %}
7412   opcode(0xB8);
7413   ins_encode( LdImmL_Lo(dst, src) );
7414   ins_pipe( ialu_reg_fat );
7415 %}
7416 
7417 // Multiply by 32-bit Immediate, taking the shifted high order results
7418 //  (special case for shift by 32)
7419 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7420   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7421   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7422              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7423              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7424   effect(USE src1, KILL cr);
7425 
7426   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7427   ins_cost(0*100 + 1*400 - 150);
7428   format %{ "IMUL   EDX:EAX,$src1" %}
7429   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7430   ins_pipe( pipe_slow );
7431 %}
7432 
7433 // Multiply by 32-bit Immediate, taking the shifted high order results
7434 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7435   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7436   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7437              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7438              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7439   effect(USE src1, KILL cr);
7440 
7441   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7442   ins_cost(1*100 + 1*400 - 150);
7443   format %{ "IMUL   EDX:EAX,$src1\n\t"
7444             "SAR    EDX,$cnt-32" %}
7445   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7446   ins_pipe( pipe_slow );
7447 %}
7448 
7449 // Multiply Memory 32-bit Immediate
7450 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7451   match(Set dst (MulI (LoadI src) imm));
7452   effect(KILL cr);
7453 
7454   ins_cost(300);
7455   format %{ "IMUL   $dst,$src,$imm" %}
7456   opcode(0x69);  /* 69 /r id */
7457   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7458   ins_pipe( ialu_reg_mem_alu0 );
7459 %}
7460 
7461 // Multiply Memory
7462 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7463   match(Set dst (MulI dst (LoadI src)));
7464   effect(KILL cr);
7465 
7466   ins_cost(350);
7467   format %{ "IMUL   $dst,$src" %}
7468   opcode(0xAF, 0x0F);
7469   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7470   ins_pipe( ialu_reg_mem_alu0 );
7471 %}
7472 
7473 // Multiply Register Int to Long
7474 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7475   // Basic Idea: long = (long)int * (long)int
7476   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7477   effect(DEF dst, USE src, USE src1, KILL flags);
7478 
7479   ins_cost(300);
7480   format %{ "IMUL   $dst,$src1" %}
7481 
7482   ins_encode( long_int_multiply( dst, src1 ) );
7483   ins_pipe( ialu_reg_reg_alu0 );
7484 %}
7485 
7486 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7487   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7488   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7489   effect(KILL flags);
7490 
7491   ins_cost(300);
7492   format %{ "MUL    $dst,$src1" %}
7493 
7494   ins_encode( long_uint_multiply(dst, src1) );
7495   ins_pipe( ialu_reg_reg_alu0 );
7496 %}
7497 
7498 // Multiply Register Long
7499 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7500   match(Set dst (MulL dst src));
7501   effect(KILL cr, TEMP tmp);
7502   ins_cost(4*100+3*400);
7503 // Basic idea: lo(result) = lo(x_lo * y_lo)
7504 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7505   format %{ "MOV    $tmp,$src.lo\n\t"
7506             "IMUL   $tmp,EDX\n\t"
7507             "MOV    EDX,$src.hi\n\t"
7508             "IMUL   EDX,EAX\n\t"
7509             "ADD    $tmp,EDX\n\t"
7510             "MUL    EDX:EAX,$src.lo\n\t"
7511             "ADD    EDX,$tmp" %}
7512   ins_encode( long_multiply( dst, src, tmp ) );
7513   ins_pipe( pipe_slow );
7514 %}
7515 
7516 // Multiply Register Long where the left operand's high 32 bits are zero
7517 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7518   predicate(is_operand_hi32_zero(n->in(1)));
7519   match(Set dst (MulL dst src));
7520   effect(KILL cr, TEMP tmp);
7521   ins_cost(2*100+2*400);
7522 // Basic idea: lo(result) = lo(x_lo * y_lo)
7523 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7524   format %{ "MOV    $tmp,$src.hi\n\t"
7525             "IMUL   $tmp,EAX\n\t"
7526             "MUL    EDX:EAX,$src.lo\n\t"
7527             "ADD    EDX,$tmp" %}
7528   ins_encode %{
7529     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7530     __ imull($tmp$$Register, rax);
7531     __ mull($src$$Register);
7532     __ addl(rdx, $tmp$$Register);
7533   %}
7534   ins_pipe( pipe_slow );
7535 %}
7536 
7537 // Multiply Register Long where the right operand's high 32 bits are zero
7538 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7539   predicate(is_operand_hi32_zero(n->in(2)));
7540   match(Set dst (MulL dst src));
7541   effect(KILL cr, TEMP tmp);
7542   ins_cost(2*100+2*400);
7543 // Basic idea: lo(result) = lo(x_lo * y_lo)
7544 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7545   format %{ "MOV    $tmp,$src.lo\n\t"
7546             "IMUL   $tmp,EDX\n\t"
7547             "MUL    EDX:EAX,$src.lo\n\t"
7548             "ADD    EDX,$tmp" %}
7549   ins_encode %{
7550     __ movl($tmp$$Register, $src$$Register);
7551     __ imull($tmp$$Register, rdx);
7552     __ mull($src$$Register);
7553     __ addl(rdx, $tmp$$Register);
7554   %}
7555   ins_pipe( pipe_slow );
7556 %}
7557 
7558 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7559 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7560   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7561   match(Set dst (MulL dst src));
7562   effect(KILL cr);
7563   ins_cost(1*400);
7564 // Basic idea: lo(result) = lo(x_lo * y_lo)
7565 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7566   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7567   ins_encode %{
7568     __ mull($src$$Register);
7569   %}
7570   ins_pipe( pipe_slow );
7571 %}
7572 
7573 // Multiply Register Long by small constant
7574 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7575   match(Set dst (MulL dst src));
7576   effect(KILL cr, TEMP tmp);
7577   ins_cost(2*100+2*400);
7578   size(12);
7579 // Basic idea: lo(result) = lo(src * EAX)
7580 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7581   format %{ "IMUL   $tmp,EDX,$src\n\t"
7582             "MOV    EDX,$src\n\t"
7583             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7584             "ADD    EDX,$tmp" %}
7585   ins_encode( long_multiply_con( dst, src, tmp ) );
7586   ins_pipe( pipe_slow );
7587 %}
7588 
7589 // Integer DIV with Register
7590 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7591   match(Set rax (DivI rax div));
7592   effect(KILL rdx, KILL cr);
7593   size(26);
7594   ins_cost(30*100+10*100);
7595   format %{ "CMP    EAX,0x80000000\n\t"
7596             "JNE,s  normal\n\t"
7597             "XOR    EDX,EDX\n\t"
7598             "CMP    ECX,-1\n\t"
7599             "JE,s   done\n"
7600     "normal: CDQ\n\t"
7601             "IDIV   $div\n\t"
7602     "done:"        %}
7603   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7604   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7605   ins_pipe( ialu_reg_reg_alu0 );
7606 %}
7607 
7608 // Divide Register Long
7609 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7610   match(Set dst (DivL src1 src2));
7611   effect( KILL cr, KILL cx, KILL bx );
7612   ins_cost(10000);
7613   format %{ "PUSH   $src1.hi\n\t"
7614             "PUSH   $src1.lo\n\t"
7615             "PUSH   $src2.hi\n\t"
7616             "PUSH   $src2.lo\n\t"
7617             "CALL   SharedRuntime::ldiv\n\t"
7618             "ADD    ESP,16" %}
7619   ins_encode( long_div(src1,src2) );
7620   ins_pipe( pipe_slow );
7621 %}
7622 
7623 // Integer DIVMOD with Register, both quotient and mod results
7624 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7625   match(DivModI rax div);
7626   effect(KILL cr);
7627   size(26);
7628   ins_cost(30*100+10*100);
7629   format %{ "CMP    EAX,0x80000000\n\t"
7630             "JNE,s  normal\n\t"
7631             "XOR    EDX,EDX\n\t"
7632             "CMP    ECX,-1\n\t"
7633             "JE,s   done\n"
7634     "normal: CDQ\n\t"
7635             "IDIV   $div\n\t"
7636     "done:"        %}
7637   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7638   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7639   ins_pipe( pipe_slow );
7640 %}
7641 
7642 // Integer MOD with Register
7643 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7644   match(Set rdx (ModI rax div));
7645   effect(KILL rax, KILL cr);
7646 
7647   size(26);
7648   ins_cost(300);
7649   format %{ "CDQ\n\t"
7650             "IDIV   $div" %}
7651   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7652   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7653   ins_pipe( ialu_reg_reg_alu0 );
7654 %}
7655 
7656 // Remainder Register Long
7657 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7658   match(Set dst (ModL src1 src2));
7659   effect( KILL cr, KILL cx, KILL bx );
7660   ins_cost(10000);
7661   format %{ "PUSH   $src1.hi\n\t"
7662             "PUSH   $src1.lo\n\t"
7663             "PUSH   $src2.hi\n\t"
7664             "PUSH   $src2.lo\n\t"
7665             "CALL   SharedRuntime::lrem\n\t"
7666             "ADD    ESP,16" %}
7667   ins_encode( long_mod(src1,src2) );
7668   ins_pipe( pipe_slow );
7669 %}
7670 
7671 // Divide Register Long (no special case since divisor != -1)
7672 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7673   match(Set dst (DivL dst imm));
7674   effect( TEMP tmp, TEMP tmp2, KILL cr );
7675   ins_cost(1000);
7676   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7677             "XOR    $tmp2,$tmp2\n\t"
7678             "CMP    $tmp,EDX\n\t"
7679             "JA,s   fast\n\t"
7680             "MOV    $tmp2,EAX\n\t"
7681             "MOV    EAX,EDX\n\t"
7682             "MOV    EDX,0\n\t"
7683             "JLE,s  pos\n\t"
7684             "LNEG   EAX : $tmp2\n\t"
7685             "DIV    $tmp # unsigned division\n\t"
7686             "XCHG   EAX,$tmp2\n\t"
7687             "DIV    $tmp\n\t"
7688             "LNEG   $tmp2 : EAX\n\t"
7689             "JMP,s  done\n"
7690     "pos:\n\t"
7691             "DIV    $tmp\n\t"
7692             "XCHG   EAX,$tmp2\n"
7693     "fast:\n\t"
7694             "DIV    $tmp\n"
7695     "done:\n\t"
7696             "MOV    EDX,$tmp2\n\t"
7697             "NEG    EDX:EAX # if $imm < 0" %}
7698   ins_encode %{
7699     int con = (int)$imm$$constant;
7700     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7701     int pcon = (con > 0) ? con : -con;
7702     Label Lfast, Lpos, Ldone;
7703 
7704     __ movl($tmp$$Register, pcon);
7705     __ xorl($tmp2$$Register,$tmp2$$Register);
7706     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7707     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7708 
7709     __ movl($tmp2$$Register, $dst$$Register); // save
7710     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7711     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7712     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7713 
7714     // Negative dividend.
7715     // convert value to positive to use unsigned division
7716     __ lneg($dst$$Register, $tmp2$$Register);
7717     __ divl($tmp$$Register);
7718     __ xchgl($dst$$Register, $tmp2$$Register);
7719     __ divl($tmp$$Register);
7720     // revert result back to negative
7721     __ lneg($tmp2$$Register, $dst$$Register);
7722     __ jmpb(Ldone);
7723 
7724     __ bind(Lpos);
7725     __ divl($tmp$$Register); // Use unsigned division
7726     __ xchgl($dst$$Register, $tmp2$$Register);
7727     // Fallthrow for final divide, tmp2 has 32 bit hi result
7728 
7729     __ bind(Lfast);
7730     // fast path: src is positive
7731     __ divl($tmp$$Register); // Use unsigned division
7732 
7733     __ bind(Ldone);
7734     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7735     if (con < 0) {
7736       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7737     }
7738   %}
7739   ins_pipe( pipe_slow );
7740 %}
7741 
7742 // Remainder Register Long (remainder fit into 32 bits)
7743 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7744   match(Set dst (ModL dst imm));
7745   effect( TEMP tmp, TEMP tmp2, KILL cr );
7746   ins_cost(1000);
7747   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7748             "CMP    $tmp,EDX\n\t"
7749             "JA,s   fast\n\t"
7750             "MOV    $tmp2,EAX\n\t"
7751             "MOV    EAX,EDX\n\t"
7752             "MOV    EDX,0\n\t"
7753             "JLE,s  pos\n\t"
7754             "LNEG   EAX : $tmp2\n\t"
7755             "DIV    $tmp # unsigned division\n\t"
7756             "MOV    EAX,$tmp2\n\t"
7757             "DIV    $tmp\n\t"
7758             "NEG    EDX\n\t"
7759             "JMP,s  done\n"
7760     "pos:\n\t"
7761             "DIV    $tmp\n\t"
7762             "MOV    EAX,$tmp2\n"
7763     "fast:\n\t"
7764             "DIV    $tmp\n"
7765     "done:\n\t"
7766             "MOV    EAX,EDX\n\t"
7767             "SAR    EDX,31\n\t" %}
7768   ins_encode %{
7769     int con = (int)$imm$$constant;
7770     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7771     int pcon = (con > 0) ? con : -con;
7772     Label  Lfast, Lpos, Ldone;
7773 
7774     __ movl($tmp$$Register, pcon);
7775     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7776     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7777 
7778     __ movl($tmp2$$Register, $dst$$Register); // save
7779     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7780     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7781     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7782 
7783     // Negative dividend.
7784     // convert value to positive to use unsigned division
7785     __ lneg($dst$$Register, $tmp2$$Register);
7786     __ divl($tmp$$Register);
7787     __ movl($dst$$Register, $tmp2$$Register);
7788     __ divl($tmp$$Register);
7789     // revert remainder back to negative
7790     __ negl(HIGH_FROM_LOW($dst$$Register));
7791     __ jmpb(Ldone);
7792 
7793     __ bind(Lpos);
7794     __ divl($tmp$$Register);
7795     __ movl($dst$$Register, $tmp2$$Register);
7796 
7797     __ bind(Lfast);
7798     // fast path: src is positive
7799     __ divl($tmp$$Register);
7800 
7801     __ bind(Ldone);
7802     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7803     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7804 
7805   %}
7806   ins_pipe( pipe_slow );
7807 %}
7808 
7809 // Integer Shift Instructions
7810 // Shift Left by one
7811 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7812   match(Set dst (LShiftI dst shift));
7813   effect(KILL cr);
7814 
7815   size(2);
7816   format %{ "SHL    $dst,$shift" %}
7817   opcode(0xD1, 0x4);  /* D1 /4 */
7818   ins_encode( OpcP, RegOpc( dst ) );
7819   ins_pipe( ialu_reg );
7820 %}
7821 
7822 // Shift Left by 8-bit immediate
7823 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7824   match(Set dst (LShiftI dst shift));
7825   effect(KILL cr);
7826 
7827   size(3);
7828   format %{ "SHL    $dst,$shift" %}
7829   opcode(0xC1, 0x4);  /* C1 /4 ib */
7830   ins_encode( RegOpcImm( dst, shift) );
7831   ins_pipe( ialu_reg );
7832 %}
7833 
7834 // Shift Left by variable
7835 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7836   match(Set dst (LShiftI dst shift));
7837   effect(KILL cr);
7838 
7839   size(2);
7840   format %{ "SHL    $dst,$shift" %}
7841   opcode(0xD3, 0x4);  /* D3 /4 */
7842   ins_encode( OpcP, RegOpc( dst ) );
7843   ins_pipe( ialu_reg_reg );
7844 %}
7845 
7846 // Arithmetic shift right by one
7847 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7848   match(Set dst (RShiftI dst shift));
7849   effect(KILL cr);
7850 
7851   size(2);
7852   format %{ "SAR    $dst,$shift" %}
7853   opcode(0xD1, 0x7);  /* D1 /7 */
7854   ins_encode( OpcP, RegOpc( dst ) );
7855   ins_pipe( ialu_reg );
7856 %}
7857 
7858 // Arithmetic shift right by one
7859 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7860   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7861   effect(KILL cr);
7862   format %{ "SAR    $dst,$shift" %}
7863   opcode(0xD1, 0x7);  /* D1 /7 */
7864   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7865   ins_pipe( ialu_mem_imm );
7866 %}
7867 
7868 // Arithmetic Shift Right by 8-bit immediate
7869 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7870   match(Set dst (RShiftI dst shift));
7871   effect(KILL cr);
7872 
7873   size(3);
7874   format %{ "SAR    $dst,$shift" %}
7875   opcode(0xC1, 0x7);  /* C1 /7 ib */
7876   ins_encode( RegOpcImm( dst, shift ) );
7877   ins_pipe( ialu_mem_imm );
7878 %}
7879 
7880 // Arithmetic Shift Right by 8-bit immediate
7881 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7882   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7883   effect(KILL cr);
7884 
7885   format %{ "SAR    $dst,$shift" %}
7886   opcode(0xC1, 0x7);  /* C1 /7 ib */
7887   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7888   ins_pipe( ialu_mem_imm );
7889 %}
7890 
7891 // Arithmetic Shift Right by variable
7892 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7893   match(Set dst (RShiftI dst shift));
7894   effect(KILL cr);
7895 
7896   size(2);
7897   format %{ "SAR    $dst,$shift" %}
7898   opcode(0xD3, 0x7);  /* D3 /7 */
7899   ins_encode( OpcP, RegOpc( dst ) );
7900   ins_pipe( ialu_reg_reg );
7901 %}
7902 
7903 // Logical shift right by one
7904 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7905   match(Set dst (URShiftI dst shift));
7906   effect(KILL cr);
7907 
7908   size(2);
7909   format %{ "SHR    $dst,$shift" %}
7910   opcode(0xD1, 0x5);  /* D1 /5 */
7911   ins_encode( OpcP, RegOpc( dst ) );
7912   ins_pipe( ialu_reg );
7913 %}
7914 
7915 // Logical Shift Right by 8-bit immediate
7916 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7917   match(Set dst (URShiftI dst shift));
7918   effect(KILL cr);
7919 
7920   size(3);
7921   format %{ "SHR    $dst,$shift" %}
7922   opcode(0xC1, 0x5);  /* C1 /5 ib */
7923   ins_encode( RegOpcImm( dst, shift) );
7924   ins_pipe( ialu_reg );
7925 %}
7926 
7927 
7928 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7929 // This idiom is used by the compiler for the i2b bytecode.
7930 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7931   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7932 
7933   size(3);
7934   format %{ "MOVSX  $dst,$src :8" %}
7935   ins_encode %{
7936     __ movsbl($dst$$Register, $src$$Register);
7937   %}
7938   ins_pipe(ialu_reg_reg);
7939 %}
7940 
7941 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7942 // This idiom is used by the compiler the i2s bytecode.
7943 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7944   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7945 
7946   size(3);
7947   format %{ "MOVSX  $dst,$src :16" %}
7948   ins_encode %{
7949     __ movswl($dst$$Register, $src$$Register);
7950   %}
7951   ins_pipe(ialu_reg_reg);
7952 %}
7953 
7954 
7955 // Logical Shift Right by variable
7956 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7957   match(Set dst (URShiftI dst shift));
7958   effect(KILL cr);
7959 
7960   size(2);
7961   format %{ "SHR    $dst,$shift" %}
7962   opcode(0xD3, 0x5);  /* D3 /5 */
7963   ins_encode( OpcP, RegOpc( dst ) );
7964   ins_pipe( ialu_reg_reg );
7965 %}
7966 
7967 
7968 //----------Logical Instructions-----------------------------------------------
7969 //----------Integer Logical Instructions---------------------------------------
7970 // And Instructions
7971 // And Register with Register
7972 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7973   match(Set dst (AndI dst src));
7974   effect(KILL cr);
7975 
7976   size(2);
7977   format %{ "AND    $dst,$src" %}
7978   opcode(0x23);
7979   ins_encode( OpcP, RegReg( dst, src) );
7980   ins_pipe( ialu_reg_reg );
7981 %}
7982 
7983 // And Register with Immediate
7984 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7985   match(Set dst (AndI dst src));
7986   effect(KILL cr);
7987 
7988   format %{ "AND    $dst,$src" %}
7989   opcode(0x81,0x04);  /* Opcode 81 /4 */
7990   // ins_encode( RegImm( dst, src) );
7991   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7992   ins_pipe( ialu_reg );
7993 %}
7994 
7995 // And Register with Memory
7996 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7997   match(Set dst (AndI dst (LoadI src)));
7998   effect(KILL cr);
7999 
8000   ins_cost(125);
8001   format %{ "AND    $dst,$src" %}
8002   opcode(0x23);
8003   ins_encode( OpcP, RegMem( dst, src) );
8004   ins_pipe( ialu_reg_mem );
8005 %}
8006 
8007 // And Memory with Register
8008 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8009   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8010   effect(KILL cr);
8011 
8012   ins_cost(150);
8013   format %{ "AND    $dst,$src" %}
8014   opcode(0x21);  /* Opcode 21 /r */
8015   ins_encode( OpcP, RegMem( src, dst ) );
8016   ins_pipe( ialu_mem_reg );
8017 %}
8018 
8019 // And Memory with Immediate
8020 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8021   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8022   effect(KILL cr);
8023 
8024   ins_cost(125);
8025   format %{ "AND    $dst,$src" %}
8026   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8027   // ins_encode( MemImm( dst, src) );
8028   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8029   ins_pipe( ialu_mem_imm );
8030 %}
8031 
8032 // BMI1 instructions
8033 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8034   match(Set dst (AndI (XorI src1 minus_1) src2));
8035   predicate(UseBMI1Instructions);
8036   effect(KILL cr);
8037 
8038   format %{ "ANDNL  $dst, $src1, $src2" %}
8039 
8040   ins_encode %{
8041     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8042   %}
8043   ins_pipe(ialu_reg);
8044 %}
8045 
8046 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8047   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8048   predicate(UseBMI1Instructions);
8049   effect(KILL cr);
8050 
8051   ins_cost(125);
8052   format %{ "ANDNL  $dst, $src1, $src2" %}
8053 
8054   ins_encode %{
8055     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8056   %}
8057   ins_pipe(ialu_reg_mem);
8058 %}
8059 
8060 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8061   match(Set dst (AndI (SubI imm_zero src) src));
8062   predicate(UseBMI1Instructions);
8063   effect(KILL cr);
8064 
8065   format %{ "BLSIL  $dst, $src" %}
8066 
8067   ins_encode %{
8068     __ blsil($dst$$Register, $src$$Register);
8069   %}
8070   ins_pipe(ialu_reg);
8071 %}
8072 
8073 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8074   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8075   predicate(UseBMI1Instructions);
8076   effect(KILL cr);
8077 
8078   ins_cost(125);
8079   format %{ "BLSIL  $dst, $src" %}
8080 
8081   ins_encode %{
8082     __ blsil($dst$$Register, $src$$Address);
8083   %}
8084   ins_pipe(ialu_reg_mem);
8085 %}
8086 
8087 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8088 %{
8089   match(Set dst (XorI (AddI src minus_1) src));
8090   predicate(UseBMI1Instructions);
8091   effect(KILL cr);
8092 
8093   format %{ "BLSMSKL $dst, $src" %}
8094 
8095   ins_encode %{
8096     __ blsmskl($dst$$Register, $src$$Register);
8097   %}
8098 
8099   ins_pipe(ialu_reg);
8100 %}
8101 
8102 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8103 %{
8104   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8105   predicate(UseBMI1Instructions);
8106   effect(KILL cr);
8107 
8108   ins_cost(125);
8109   format %{ "BLSMSKL $dst, $src" %}
8110 
8111   ins_encode %{
8112     __ blsmskl($dst$$Register, $src$$Address);
8113   %}
8114 
8115   ins_pipe(ialu_reg_mem);
8116 %}
8117 
8118 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8119 %{
8120   match(Set dst (AndI (AddI src minus_1) src) );
8121   predicate(UseBMI1Instructions);
8122   effect(KILL cr);
8123 
8124   format %{ "BLSRL  $dst, $src" %}
8125 
8126   ins_encode %{
8127     __ blsrl($dst$$Register, $src$$Register);
8128   %}
8129 
8130   ins_pipe(ialu_reg);
8131 %}
8132 
8133 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8134 %{
8135   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8136   predicate(UseBMI1Instructions);
8137   effect(KILL cr);
8138 
8139   ins_cost(125);
8140   format %{ "BLSRL  $dst, $src" %}
8141 
8142   ins_encode %{
8143     __ blsrl($dst$$Register, $src$$Address);
8144   %}
8145 
8146   ins_pipe(ialu_reg_mem);
8147 %}
8148 
8149 // Or Instructions
8150 // Or Register with Register
8151 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8152   match(Set dst (OrI dst src));
8153   effect(KILL cr);
8154 
8155   size(2);
8156   format %{ "OR     $dst,$src" %}
8157   opcode(0x0B);
8158   ins_encode( OpcP, RegReg( dst, src) );
8159   ins_pipe( ialu_reg_reg );
8160 %}
8161 
8162 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8163   match(Set dst (OrI dst (CastP2X src)));
8164   effect(KILL cr);
8165 
8166   size(2);
8167   format %{ "OR     $dst,$src" %}
8168   opcode(0x0B);
8169   ins_encode( OpcP, RegReg( dst, src) );
8170   ins_pipe( ialu_reg_reg );
8171 %}
8172 
8173 
8174 // Or Register with Immediate
8175 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8176   match(Set dst (OrI dst src));
8177   effect(KILL cr);
8178 
8179   format %{ "OR     $dst,$src" %}
8180   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8181   // ins_encode( RegImm( dst, src) );
8182   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8183   ins_pipe( ialu_reg );
8184 %}
8185 
8186 // Or Register with Memory
8187 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8188   match(Set dst (OrI dst (LoadI src)));
8189   effect(KILL cr);
8190 
8191   ins_cost(125);
8192   format %{ "OR     $dst,$src" %}
8193   opcode(0x0B);
8194   ins_encode( OpcP, RegMem( dst, src) );
8195   ins_pipe( ialu_reg_mem );
8196 %}
8197 
8198 // Or Memory with Register
8199 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8200   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8201   effect(KILL cr);
8202 
8203   ins_cost(150);
8204   format %{ "OR     $dst,$src" %}
8205   opcode(0x09);  /* Opcode 09 /r */
8206   ins_encode( OpcP, RegMem( src, dst ) );
8207   ins_pipe( ialu_mem_reg );
8208 %}
8209 
8210 // Or Memory with Immediate
8211 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8212   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8213   effect(KILL cr);
8214 
8215   ins_cost(125);
8216   format %{ "OR     $dst,$src" %}
8217   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8218   // ins_encode( MemImm( dst, src) );
8219   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8220   ins_pipe( ialu_mem_imm );
8221 %}
8222 
8223 // ROL/ROR
8224 // ROL expand
8225 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8226   effect(USE_DEF dst, USE shift, KILL cr);
8227 
8228   format %{ "ROL    $dst, $shift" %}
8229   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8230   ins_encode( OpcP, RegOpc( dst ));
8231   ins_pipe( ialu_reg );
8232 %}
8233 
8234 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8235   effect(USE_DEF dst, USE shift, KILL cr);
8236 
8237   format %{ "ROL    $dst, $shift" %}
8238   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8239   ins_encode( RegOpcImm(dst, shift) );
8240   ins_pipe(ialu_reg);
8241 %}
8242 
8243 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8244   effect(USE_DEF dst, USE shift, KILL cr);
8245 
8246   format %{ "ROL    $dst, $shift" %}
8247   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8248   ins_encode(OpcP, RegOpc(dst));
8249   ins_pipe( ialu_reg_reg );
8250 %}
8251 // end of ROL expand
8252 
8253 // ROL 32bit by one once
8254 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8255   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8256 
8257   expand %{
8258     rolI_eReg_imm1(dst, lshift, cr);
8259   %}
8260 %}
8261 
8262 // ROL 32bit var by imm8 once
8263 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8264   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8265   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8266 
8267   expand %{
8268     rolI_eReg_imm8(dst, lshift, cr);
8269   %}
8270 %}
8271 
8272 // ROL 32bit var by var once
8273 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8274   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8275 
8276   expand %{
8277     rolI_eReg_CL(dst, shift, cr);
8278   %}
8279 %}
8280 
8281 // ROL 32bit var by var once
8282 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8283   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8284 
8285   expand %{
8286     rolI_eReg_CL(dst, shift, cr);
8287   %}
8288 %}
8289 
8290 // ROR expand
8291 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8292   effect(USE_DEF dst, USE shift, KILL cr);
8293 
8294   format %{ "ROR    $dst, $shift" %}
8295   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8296   ins_encode( OpcP, RegOpc( dst ) );
8297   ins_pipe( ialu_reg );
8298 %}
8299 
8300 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8301   effect (USE_DEF dst, USE shift, KILL cr);
8302 
8303   format %{ "ROR    $dst, $shift" %}
8304   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8305   ins_encode( RegOpcImm(dst, shift) );
8306   ins_pipe( ialu_reg );
8307 %}
8308 
8309 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8310   effect(USE_DEF dst, USE shift, KILL cr);
8311 
8312   format %{ "ROR    $dst, $shift" %}
8313   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8314   ins_encode(OpcP, RegOpc(dst));
8315   ins_pipe( ialu_reg_reg );
8316 %}
8317 // end of ROR expand
8318 
8319 // ROR right once
8320 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8321   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8322 
8323   expand %{
8324     rorI_eReg_imm1(dst, rshift, cr);
8325   %}
8326 %}
8327 
8328 // ROR 32bit by immI8 once
8329 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8330   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8331   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8332 
8333   expand %{
8334     rorI_eReg_imm8(dst, rshift, cr);
8335   %}
8336 %}
8337 
8338 // ROR 32bit var by var once
8339 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8340   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8341 
8342   expand %{
8343     rorI_eReg_CL(dst, shift, cr);
8344   %}
8345 %}
8346 
8347 // ROR 32bit var by var once
8348 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8349   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8350 
8351   expand %{
8352     rorI_eReg_CL(dst, shift, cr);
8353   %}
8354 %}
8355 
8356 // Xor Instructions
8357 // Xor Register with Register
8358 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8359   match(Set dst (XorI dst src));
8360   effect(KILL cr);
8361 
8362   size(2);
8363   format %{ "XOR    $dst,$src" %}
8364   opcode(0x33);
8365   ins_encode( OpcP, RegReg( dst, src) );
8366   ins_pipe( ialu_reg_reg );
8367 %}
8368 
8369 // Xor Register with Immediate -1
8370 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8371   match(Set dst (XorI dst imm));
8372 
8373   size(2);
8374   format %{ "NOT    $dst" %}
8375   ins_encode %{
8376      __ notl($dst$$Register);
8377   %}
8378   ins_pipe( ialu_reg );
8379 %}
8380 
8381 // Xor Register with Immediate
8382 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8383   match(Set dst (XorI dst src));
8384   effect(KILL cr);
8385 
8386   format %{ "XOR    $dst,$src" %}
8387   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8388   // ins_encode( RegImm( dst, src) );
8389   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8390   ins_pipe( ialu_reg );
8391 %}
8392 
8393 // Xor Register with Memory
8394 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8395   match(Set dst (XorI dst (LoadI src)));
8396   effect(KILL cr);
8397 
8398   ins_cost(125);
8399   format %{ "XOR    $dst,$src" %}
8400   opcode(0x33);
8401   ins_encode( OpcP, RegMem(dst, src) );
8402   ins_pipe( ialu_reg_mem );
8403 %}
8404 
8405 // Xor Memory with Register
8406 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8407   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8408   effect(KILL cr);
8409 
8410   ins_cost(150);
8411   format %{ "XOR    $dst,$src" %}
8412   opcode(0x31);  /* Opcode 31 /r */
8413   ins_encode( OpcP, RegMem( src, dst ) );
8414   ins_pipe( ialu_mem_reg );
8415 %}
8416 
8417 // Xor Memory with Immediate
8418 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8419   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8420   effect(KILL cr);
8421 
8422   ins_cost(125);
8423   format %{ "XOR    $dst,$src" %}
8424   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8425   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8426   ins_pipe( ialu_mem_imm );
8427 %}
8428 
8429 //----------Convert Int to Boolean---------------------------------------------
8430 
8431 instruct movI_nocopy(rRegI dst, rRegI src) %{
8432   effect( DEF dst, USE src );
8433   format %{ "MOV    $dst,$src" %}
8434   ins_encode( enc_Copy( dst, src) );
8435   ins_pipe( ialu_reg_reg );
8436 %}
8437 
8438 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8439   effect( USE_DEF dst, USE src, KILL cr );
8440 
8441   size(4);
8442   format %{ "NEG    $dst\n\t"
8443             "ADC    $dst,$src" %}
8444   ins_encode( neg_reg(dst),
8445               OpcRegReg(0x13,dst,src) );
8446   ins_pipe( ialu_reg_reg_long );
8447 %}
8448 
8449 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8450   match(Set dst (Conv2B src));
8451 
8452   expand %{
8453     movI_nocopy(dst,src);
8454     ci2b(dst,src,cr);
8455   %}
8456 %}
8457 
8458 instruct movP_nocopy(rRegI dst, eRegP src) %{
8459   effect( DEF dst, USE src );
8460   format %{ "MOV    $dst,$src" %}
8461   ins_encode( enc_Copy( dst, src) );
8462   ins_pipe( ialu_reg_reg );
8463 %}
8464 
8465 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8466   effect( USE_DEF dst, USE src, KILL cr );
8467   format %{ "NEG    $dst\n\t"
8468             "ADC    $dst,$src" %}
8469   ins_encode( neg_reg(dst),
8470               OpcRegReg(0x13,dst,src) );
8471   ins_pipe( ialu_reg_reg_long );
8472 %}
8473 
8474 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8475   match(Set dst (Conv2B src));
8476 
8477   expand %{
8478     movP_nocopy(dst,src);
8479     cp2b(dst,src,cr);
8480   %}
8481 %}
8482 
8483 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8484   match(Set dst (CmpLTMask p q));
8485   effect(KILL cr);
8486   ins_cost(400);
8487 
8488   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8489   format %{ "XOR    $dst,$dst\n\t"
8490             "CMP    $p,$q\n\t"
8491             "SETlt  $dst\n\t"
8492             "NEG    $dst" %}
8493   ins_encode %{
8494     Register Rp = $p$$Register;
8495     Register Rq = $q$$Register;
8496     Register Rd = $dst$$Register;
8497     Label done;
8498     __ xorl(Rd, Rd);
8499     __ cmpl(Rp, Rq);
8500     __ setb(Assembler::less, Rd);
8501     __ negl(Rd);
8502   %}
8503 
8504   ins_pipe(pipe_slow);
8505 %}
8506 
8507 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8508   match(Set dst (CmpLTMask dst zero));
8509   effect(DEF dst, KILL cr);
8510   ins_cost(100);
8511 
8512   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8513   ins_encode %{
8514   __ sarl($dst$$Register, 31);
8515   %}
8516   ins_pipe(ialu_reg);
8517 %}
8518 
8519 /* better to save a register than avoid a branch */
8520 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8521   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8522   effect(KILL cr);
8523   ins_cost(400);
8524   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8525             "JGE    done\n\t"
8526             "ADD    $p,$y\n"
8527             "done:  " %}
8528   ins_encode %{
8529     Register Rp = $p$$Register;
8530     Register Rq = $q$$Register;
8531     Register Ry = $y$$Register;
8532     Label done;
8533     __ subl(Rp, Rq);
8534     __ jccb(Assembler::greaterEqual, done);
8535     __ addl(Rp, Ry);
8536     __ bind(done);
8537   %}
8538 
8539   ins_pipe(pipe_cmplt);
8540 %}
8541 
8542 /* better to save a register than avoid a branch */
8543 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8544   match(Set y (AndI (CmpLTMask p q) y));
8545   effect(KILL cr);
8546 
8547   ins_cost(300);
8548 
8549   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8550             "JLT      done\n\t"
8551             "XORL     $y, $y\n"
8552             "done:  " %}
8553   ins_encode %{
8554     Register Rp = $p$$Register;
8555     Register Rq = $q$$Register;
8556     Register Ry = $y$$Register;
8557     Label done;
8558     __ cmpl(Rp, Rq);
8559     __ jccb(Assembler::less, done);
8560     __ xorl(Ry, Ry);
8561     __ bind(done);
8562   %}
8563 
8564   ins_pipe(pipe_cmplt);
8565 %}
8566 
8567 /* If I enable this, I encourage spilling in the inner loop of compress.
8568 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8569   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8570 */
8571 //----------Overflow Math Instructions-----------------------------------------
8572 
8573 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8574 %{
8575   match(Set cr (OverflowAddI op1 op2));
8576   effect(DEF cr, USE_KILL op1, USE op2);
8577 
8578   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8579 
8580   ins_encode %{
8581     __ addl($op1$$Register, $op2$$Register);
8582   %}
8583   ins_pipe(ialu_reg_reg);
8584 %}
8585 
8586 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8587 %{
8588   match(Set cr (OverflowAddI op1 op2));
8589   effect(DEF cr, USE_KILL op1, USE op2);
8590 
8591   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8592 
8593   ins_encode %{
8594     __ addl($op1$$Register, $op2$$constant);
8595   %}
8596   ins_pipe(ialu_reg_reg);
8597 %}
8598 
8599 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8600 %{
8601   match(Set cr (OverflowSubI op1 op2));
8602 
8603   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8604   ins_encode %{
8605     __ cmpl($op1$$Register, $op2$$Register);
8606   %}
8607   ins_pipe(ialu_reg_reg);
8608 %}
8609 
8610 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8611 %{
8612   match(Set cr (OverflowSubI op1 op2));
8613 
8614   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8615   ins_encode %{
8616     __ cmpl($op1$$Register, $op2$$constant);
8617   %}
8618   ins_pipe(ialu_reg_reg);
8619 %}
8620 
8621 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8622 %{
8623   match(Set cr (OverflowSubI zero op2));
8624   effect(DEF cr, USE_KILL op2);
8625 
8626   format %{ "NEG    $op2\t# overflow check int" %}
8627   ins_encode %{
8628     __ negl($op2$$Register);
8629   %}
8630   ins_pipe(ialu_reg_reg);
8631 %}
8632 
8633 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8634 %{
8635   match(Set cr (OverflowMulI op1 op2));
8636   effect(DEF cr, USE_KILL op1, USE op2);
8637 
8638   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8639   ins_encode %{
8640     __ imull($op1$$Register, $op2$$Register);
8641   %}
8642   ins_pipe(ialu_reg_reg_alu0);
8643 %}
8644 
8645 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8646 %{
8647   match(Set cr (OverflowMulI op1 op2));
8648   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8649 
8650   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8651   ins_encode %{
8652     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8653   %}
8654   ins_pipe(ialu_reg_reg_alu0);
8655 %}
8656 
8657 //----------Long Instructions------------------------------------------------
8658 // Add Long Register with Register
8659 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8660   match(Set dst (AddL dst src));
8661   effect(KILL cr);
8662   ins_cost(200);
8663   format %{ "ADD    $dst.lo,$src.lo\n\t"
8664             "ADC    $dst.hi,$src.hi" %}
8665   opcode(0x03, 0x13);
8666   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8667   ins_pipe( ialu_reg_reg_long );
8668 %}
8669 
8670 // Add Long Register with Immediate
8671 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8672   match(Set dst (AddL dst src));
8673   effect(KILL cr);
8674   format %{ "ADD    $dst.lo,$src.lo\n\t"
8675             "ADC    $dst.hi,$src.hi" %}
8676   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8677   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8678   ins_pipe( ialu_reg_long );
8679 %}
8680 
8681 // Add Long Register with Memory
8682 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8683   match(Set dst (AddL dst (LoadL mem)));
8684   effect(KILL cr);
8685   ins_cost(125);
8686   format %{ "ADD    $dst.lo,$mem\n\t"
8687             "ADC    $dst.hi,$mem+4" %}
8688   opcode(0x03, 0x13);
8689   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8690   ins_pipe( ialu_reg_long_mem );
8691 %}
8692 
8693 // Subtract Long Register with Register.
8694 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8695   match(Set dst (SubL dst src));
8696   effect(KILL cr);
8697   ins_cost(200);
8698   format %{ "SUB    $dst.lo,$src.lo\n\t"
8699             "SBB    $dst.hi,$src.hi" %}
8700   opcode(0x2B, 0x1B);
8701   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8702   ins_pipe( ialu_reg_reg_long );
8703 %}
8704 
8705 // Subtract Long Register with Immediate
8706 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8707   match(Set dst (SubL dst src));
8708   effect(KILL cr);
8709   format %{ "SUB    $dst.lo,$src.lo\n\t"
8710             "SBB    $dst.hi,$src.hi" %}
8711   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8712   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8713   ins_pipe( ialu_reg_long );
8714 %}
8715 
8716 // Subtract Long Register with Memory
8717 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8718   match(Set dst (SubL dst (LoadL mem)));
8719   effect(KILL cr);
8720   ins_cost(125);
8721   format %{ "SUB    $dst.lo,$mem\n\t"
8722             "SBB    $dst.hi,$mem+4" %}
8723   opcode(0x2B, 0x1B);
8724   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8725   ins_pipe( ialu_reg_long_mem );
8726 %}
8727 
8728 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8729   match(Set dst (SubL zero dst));
8730   effect(KILL cr);
8731   ins_cost(300);
8732   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8733   ins_encode( neg_long(dst) );
8734   ins_pipe( ialu_reg_reg_long );
8735 %}
8736 
8737 // And Long Register with Register
8738 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8739   match(Set dst (AndL dst src));
8740   effect(KILL cr);
8741   format %{ "AND    $dst.lo,$src.lo\n\t"
8742             "AND    $dst.hi,$src.hi" %}
8743   opcode(0x23,0x23);
8744   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8745   ins_pipe( ialu_reg_reg_long );
8746 %}
8747 
8748 // And Long Register with Immediate
8749 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8750   match(Set dst (AndL dst src));
8751   effect(KILL cr);
8752   format %{ "AND    $dst.lo,$src.lo\n\t"
8753             "AND    $dst.hi,$src.hi" %}
8754   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8755   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8756   ins_pipe( ialu_reg_long );
8757 %}
8758 
8759 // And Long Register with Memory
8760 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8761   match(Set dst (AndL dst (LoadL mem)));
8762   effect(KILL cr);
8763   ins_cost(125);
8764   format %{ "AND    $dst.lo,$mem\n\t"
8765             "AND    $dst.hi,$mem+4" %}
8766   opcode(0x23, 0x23);
8767   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8768   ins_pipe( ialu_reg_long_mem );
8769 %}
8770 
8771 // BMI1 instructions
8772 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8773   match(Set dst (AndL (XorL src1 minus_1) src2));
8774   predicate(UseBMI1Instructions);
8775   effect(KILL cr, TEMP dst);
8776 
8777   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8778             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8779          %}
8780 
8781   ins_encode %{
8782     Register Rdst = $dst$$Register;
8783     Register Rsrc1 = $src1$$Register;
8784     Register Rsrc2 = $src2$$Register;
8785     __ andnl(Rdst, Rsrc1, Rsrc2);
8786     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8787   %}
8788   ins_pipe(ialu_reg_reg_long);
8789 %}
8790 
8791 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8792   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8793   predicate(UseBMI1Instructions);
8794   effect(KILL cr, TEMP dst);
8795 
8796   ins_cost(125);
8797   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8798             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8799          %}
8800 
8801   ins_encode %{
8802     Register Rdst = $dst$$Register;
8803     Register Rsrc1 = $src1$$Register;
8804     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8805 
8806     __ andnl(Rdst, Rsrc1, $src2$$Address);
8807     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8808   %}
8809   ins_pipe(ialu_reg_mem);
8810 %}
8811 
8812 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8813   match(Set dst (AndL (SubL imm_zero src) src));
8814   predicate(UseBMI1Instructions);
8815   effect(KILL cr, TEMP dst);
8816 
8817   format %{ "MOVL   $dst.hi, 0\n\t"
8818             "BLSIL  $dst.lo, $src.lo\n\t"
8819             "JNZ    done\n\t"
8820             "BLSIL  $dst.hi, $src.hi\n"
8821             "done:"
8822          %}
8823 
8824   ins_encode %{
8825     Label done;
8826     Register Rdst = $dst$$Register;
8827     Register Rsrc = $src$$Register;
8828     __ movl(HIGH_FROM_LOW(Rdst), 0);
8829     __ blsil(Rdst, Rsrc);
8830     __ jccb(Assembler::notZero, done);
8831     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8832     __ bind(done);
8833   %}
8834   ins_pipe(ialu_reg);
8835 %}
8836 
8837 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8838   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8839   predicate(UseBMI1Instructions);
8840   effect(KILL cr, TEMP dst);
8841 
8842   ins_cost(125);
8843   format %{ "MOVL   $dst.hi, 0\n\t"
8844             "BLSIL  $dst.lo, $src\n\t"
8845             "JNZ    done\n\t"
8846             "BLSIL  $dst.hi, $src+4\n"
8847             "done:"
8848          %}
8849 
8850   ins_encode %{
8851     Label done;
8852     Register Rdst = $dst$$Register;
8853     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8854 
8855     __ movl(HIGH_FROM_LOW(Rdst), 0);
8856     __ blsil(Rdst, $src$$Address);
8857     __ jccb(Assembler::notZero, done);
8858     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8859     __ bind(done);
8860   %}
8861   ins_pipe(ialu_reg_mem);
8862 %}
8863 
8864 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8865 %{
8866   match(Set dst (XorL (AddL src minus_1) src));
8867   predicate(UseBMI1Instructions);
8868   effect(KILL cr, TEMP dst);
8869 
8870   format %{ "MOVL    $dst.hi, 0\n\t"
8871             "BLSMSKL $dst.lo, $src.lo\n\t"
8872             "JNC     done\n\t"
8873             "BLSMSKL $dst.hi, $src.hi\n"
8874             "done:"
8875          %}
8876 
8877   ins_encode %{
8878     Label done;
8879     Register Rdst = $dst$$Register;
8880     Register Rsrc = $src$$Register;
8881     __ movl(HIGH_FROM_LOW(Rdst), 0);
8882     __ blsmskl(Rdst, Rsrc);
8883     __ jccb(Assembler::carryClear, done);
8884     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8885     __ bind(done);
8886   %}
8887 
8888   ins_pipe(ialu_reg);
8889 %}
8890 
8891 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8892 %{
8893   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8894   predicate(UseBMI1Instructions);
8895   effect(KILL cr, TEMP dst);
8896 
8897   ins_cost(125);
8898   format %{ "MOVL    $dst.hi, 0\n\t"
8899             "BLSMSKL $dst.lo, $src\n\t"
8900             "JNC     done\n\t"
8901             "BLSMSKL $dst.hi, $src+4\n"
8902             "done:"
8903          %}
8904 
8905   ins_encode %{
8906     Label done;
8907     Register Rdst = $dst$$Register;
8908     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8909 
8910     __ movl(HIGH_FROM_LOW(Rdst), 0);
8911     __ blsmskl(Rdst, $src$$Address);
8912     __ jccb(Assembler::carryClear, done);
8913     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8914     __ bind(done);
8915   %}
8916 
8917   ins_pipe(ialu_reg_mem);
8918 %}
8919 
8920 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8921 %{
8922   match(Set dst (AndL (AddL src minus_1) src) );
8923   predicate(UseBMI1Instructions);
8924   effect(KILL cr, TEMP dst);
8925 
8926   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8927             "BLSRL  $dst.lo, $src.lo\n\t"
8928             "JNC    done\n\t"
8929             "BLSRL  $dst.hi, $src.hi\n"
8930             "done:"
8931   %}
8932 
8933   ins_encode %{
8934     Label done;
8935     Register Rdst = $dst$$Register;
8936     Register Rsrc = $src$$Register;
8937     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8938     __ blsrl(Rdst, Rsrc);
8939     __ jccb(Assembler::carryClear, done);
8940     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8941     __ bind(done);
8942   %}
8943 
8944   ins_pipe(ialu_reg);
8945 %}
8946 
8947 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8948 %{
8949   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8950   predicate(UseBMI1Instructions);
8951   effect(KILL cr, TEMP dst);
8952 
8953   ins_cost(125);
8954   format %{ "MOVL   $dst.hi, $src+4\n\t"
8955             "BLSRL  $dst.lo, $src\n\t"
8956             "JNC    done\n\t"
8957             "BLSRL  $dst.hi, $src+4\n"
8958             "done:"
8959   %}
8960 
8961   ins_encode %{
8962     Label done;
8963     Register Rdst = $dst$$Register;
8964     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8965     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8966     __ blsrl(Rdst, $src$$Address);
8967     __ jccb(Assembler::carryClear, done);
8968     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8969     __ bind(done);
8970   %}
8971 
8972   ins_pipe(ialu_reg_mem);
8973 %}
8974 
8975 // Or Long Register with Register
8976 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8977   match(Set dst (OrL dst src));
8978   effect(KILL cr);
8979   format %{ "OR     $dst.lo,$src.lo\n\t"
8980             "OR     $dst.hi,$src.hi" %}
8981   opcode(0x0B,0x0B);
8982   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8983   ins_pipe( ialu_reg_reg_long );
8984 %}
8985 
8986 // Or Long Register with Immediate
8987 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8988   match(Set dst (OrL dst src));
8989   effect(KILL cr);
8990   format %{ "OR     $dst.lo,$src.lo\n\t"
8991             "OR     $dst.hi,$src.hi" %}
8992   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8993   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8994   ins_pipe( ialu_reg_long );
8995 %}
8996 
8997 // Or Long Register with Memory
8998 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8999   match(Set dst (OrL dst (LoadL mem)));
9000   effect(KILL cr);
9001   ins_cost(125);
9002   format %{ "OR     $dst.lo,$mem\n\t"
9003             "OR     $dst.hi,$mem+4" %}
9004   opcode(0x0B,0x0B);
9005   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9006   ins_pipe( ialu_reg_long_mem );
9007 %}
9008 
9009 // Xor Long Register with Register
9010 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9011   match(Set dst (XorL dst src));
9012   effect(KILL cr);
9013   format %{ "XOR    $dst.lo,$src.lo\n\t"
9014             "XOR    $dst.hi,$src.hi" %}
9015   opcode(0x33,0x33);
9016   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9017   ins_pipe( ialu_reg_reg_long );
9018 %}
9019 
9020 // Xor Long Register with Immediate -1
9021 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9022   match(Set dst (XorL dst imm));
9023   format %{ "NOT    $dst.lo\n\t"
9024             "NOT    $dst.hi" %}
9025   ins_encode %{
9026      __ notl($dst$$Register);
9027      __ notl(HIGH_FROM_LOW($dst$$Register));
9028   %}
9029   ins_pipe( ialu_reg_long );
9030 %}
9031 
9032 // Xor Long Register with Immediate
9033 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9034   match(Set dst (XorL dst src));
9035   effect(KILL cr);
9036   format %{ "XOR    $dst.lo,$src.lo\n\t"
9037             "XOR    $dst.hi,$src.hi" %}
9038   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9039   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9040   ins_pipe( ialu_reg_long );
9041 %}
9042 
9043 // Xor Long Register with Memory
9044 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9045   match(Set dst (XorL dst (LoadL mem)));
9046   effect(KILL cr);
9047   ins_cost(125);
9048   format %{ "XOR    $dst.lo,$mem\n\t"
9049             "XOR    $dst.hi,$mem+4" %}
9050   opcode(0x33,0x33);
9051   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9052   ins_pipe( ialu_reg_long_mem );
9053 %}
9054 
9055 // Shift Left Long by 1
9056 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9057   predicate(UseNewLongLShift);
9058   match(Set dst (LShiftL dst cnt));
9059   effect(KILL cr);
9060   ins_cost(100);
9061   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9062             "ADC    $dst.hi,$dst.hi" %}
9063   ins_encode %{
9064     __ addl($dst$$Register,$dst$$Register);
9065     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9066   %}
9067   ins_pipe( ialu_reg_long );
9068 %}
9069 
9070 // Shift Left Long by 2
9071 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9072   predicate(UseNewLongLShift);
9073   match(Set dst (LShiftL dst cnt));
9074   effect(KILL cr);
9075   ins_cost(100);
9076   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9077             "ADC    $dst.hi,$dst.hi\n\t"
9078             "ADD    $dst.lo,$dst.lo\n\t"
9079             "ADC    $dst.hi,$dst.hi" %}
9080   ins_encode %{
9081     __ addl($dst$$Register,$dst$$Register);
9082     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9083     __ addl($dst$$Register,$dst$$Register);
9084     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9085   %}
9086   ins_pipe( ialu_reg_long );
9087 %}
9088 
9089 // Shift Left Long by 3
9090 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9091   predicate(UseNewLongLShift);
9092   match(Set dst (LShiftL dst cnt));
9093   effect(KILL cr);
9094   ins_cost(100);
9095   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9096             "ADC    $dst.hi,$dst.hi\n\t"
9097             "ADD    $dst.lo,$dst.lo\n\t"
9098             "ADC    $dst.hi,$dst.hi\n\t"
9099             "ADD    $dst.lo,$dst.lo\n\t"
9100             "ADC    $dst.hi,$dst.hi" %}
9101   ins_encode %{
9102     __ addl($dst$$Register,$dst$$Register);
9103     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9104     __ addl($dst$$Register,$dst$$Register);
9105     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9106     __ addl($dst$$Register,$dst$$Register);
9107     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9108   %}
9109   ins_pipe( ialu_reg_long );
9110 %}
9111 
9112 // Shift Left Long by 1-31
9113 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9114   match(Set dst (LShiftL dst cnt));
9115   effect(KILL cr);
9116   ins_cost(200);
9117   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9118             "SHL    $dst.lo,$cnt" %}
9119   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9120   ins_encode( move_long_small_shift(dst,cnt) );
9121   ins_pipe( ialu_reg_long );
9122 %}
9123 
9124 // Shift Left Long by 32-63
9125 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9126   match(Set dst (LShiftL dst cnt));
9127   effect(KILL cr);
9128   ins_cost(300);
9129   format %{ "MOV    $dst.hi,$dst.lo\n"
9130           "\tSHL    $dst.hi,$cnt-32\n"
9131           "\tXOR    $dst.lo,$dst.lo" %}
9132   opcode(0xC1, 0x4);  /* C1 /4 ib */
9133   ins_encode( move_long_big_shift_clr(dst,cnt) );
9134   ins_pipe( ialu_reg_long );
9135 %}
9136 
9137 // Shift Left Long by variable
9138 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9139   match(Set dst (LShiftL dst shift));
9140   effect(KILL cr);
9141   ins_cost(500+200);
9142   size(17);
9143   format %{ "TEST   $shift,32\n\t"
9144             "JEQ,s  small\n\t"
9145             "MOV    $dst.hi,$dst.lo\n\t"
9146             "XOR    $dst.lo,$dst.lo\n"
9147     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9148             "SHL    $dst.lo,$shift" %}
9149   ins_encode( shift_left_long( dst, shift ) );
9150   ins_pipe( pipe_slow );
9151 %}
9152 
9153 // Shift Right Long by 1-31
9154 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9155   match(Set dst (URShiftL dst cnt));
9156   effect(KILL cr);
9157   ins_cost(200);
9158   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9159             "SHR    $dst.hi,$cnt" %}
9160   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9161   ins_encode( move_long_small_shift(dst,cnt) );
9162   ins_pipe( ialu_reg_long );
9163 %}
9164 
9165 // Shift Right Long by 32-63
9166 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9167   match(Set dst (URShiftL dst cnt));
9168   effect(KILL cr);
9169   ins_cost(300);
9170   format %{ "MOV    $dst.lo,$dst.hi\n"
9171           "\tSHR    $dst.lo,$cnt-32\n"
9172           "\tXOR    $dst.hi,$dst.hi" %}
9173   opcode(0xC1, 0x5);  /* C1 /5 ib */
9174   ins_encode( move_long_big_shift_clr(dst,cnt) );
9175   ins_pipe( ialu_reg_long );
9176 %}
9177 
9178 // Shift Right Long by variable
9179 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9180   match(Set dst (URShiftL dst shift));
9181   effect(KILL cr);
9182   ins_cost(600);
9183   size(17);
9184   format %{ "TEST   $shift,32\n\t"
9185             "JEQ,s  small\n\t"
9186             "MOV    $dst.lo,$dst.hi\n\t"
9187             "XOR    $dst.hi,$dst.hi\n"
9188     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9189             "SHR    $dst.hi,$shift" %}
9190   ins_encode( shift_right_long( dst, shift ) );
9191   ins_pipe( pipe_slow );
9192 %}
9193 
9194 // Shift Right Long by 1-31
9195 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9196   match(Set dst (RShiftL dst cnt));
9197   effect(KILL cr);
9198   ins_cost(200);
9199   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9200             "SAR    $dst.hi,$cnt" %}
9201   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9202   ins_encode( move_long_small_shift(dst,cnt) );
9203   ins_pipe( ialu_reg_long );
9204 %}
9205 
9206 // Shift Right Long by 32-63
9207 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9208   match(Set dst (RShiftL dst cnt));
9209   effect(KILL cr);
9210   ins_cost(300);
9211   format %{ "MOV    $dst.lo,$dst.hi\n"
9212           "\tSAR    $dst.lo,$cnt-32\n"
9213           "\tSAR    $dst.hi,31" %}
9214   opcode(0xC1, 0x7);  /* C1 /7 ib */
9215   ins_encode( move_long_big_shift_sign(dst,cnt) );
9216   ins_pipe( ialu_reg_long );
9217 %}
9218 
9219 // Shift Right arithmetic Long by variable
9220 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9221   match(Set dst (RShiftL dst shift));
9222   effect(KILL cr);
9223   ins_cost(600);
9224   size(18);
9225   format %{ "TEST   $shift,32\n\t"
9226             "JEQ,s  small\n\t"
9227             "MOV    $dst.lo,$dst.hi\n\t"
9228             "SAR    $dst.hi,31\n"
9229     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9230             "SAR    $dst.hi,$shift" %}
9231   ins_encode( shift_right_arith_long( dst, shift ) );
9232   ins_pipe( pipe_slow );
9233 %}
9234 
9235 
9236 //----------Double Instructions------------------------------------------------
9237 // Double Math
9238 
9239 // Compare & branch
9240 
9241 // P6 version of float compare, sets condition codes in EFLAGS
9242 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9243   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9244   match(Set cr (CmpD src1 src2));
9245   effect(KILL rax);
9246   ins_cost(150);
9247   format %{ "FLD    $src1\n\t"
9248             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9249             "JNP    exit\n\t"
9250             "MOV    ah,1       // saw a NaN, set CF\n\t"
9251             "SAHF\n"
9252      "exit:\tNOP               // avoid branch to branch" %}
9253   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9254   ins_encode( Push_Reg_DPR(src1),
9255               OpcP, RegOpc(src2),
9256               cmpF_P6_fixup );
9257   ins_pipe( pipe_slow );
9258 %}
9259 
9260 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9261   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9262   match(Set cr (CmpD src1 src2));
9263   ins_cost(150);
9264   format %{ "FLD    $src1\n\t"
9265             "FUCOMIP ST,$src2  // P6 instruction" %}
9266   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9267   ins_encode( Push_Reg_DPR(src1),
9268               OpcP, RegOpc(src2));
9269   ins_pipe( pipe_slow );
9270 %}
9271 
9272 // Compare & branch
9273 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9274   predicate(UseSSE<=1);
9275   match(Set cr (CmpD src1 src2));
9276   effect(KILL rax);
9277   ins_cost(200);
9278   format %{ "FLD    $src1\n\t"
9279             "FCOMp  $src2\n\t"
9280             "FNSTSW AX\n\t"
9281             "TEST   AX,0x400\n\t"
9282             "JZ,s   flags\n\t"
9283             "MOV    AH,1\t# unordered treat as LT\n"
9284     "flags:\tSAHF" %}
9285   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9286   ins_encode( Push_Reg_DPR(src1),
9287               OpcP, RegOpc(src2),
9288               fpu_flags);
9289   ins_pipe( pipe_slow );
9290 %}
9291 
9292 // Compare vs zero into -1,0,1
9293 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9294   predicate(UseSSE<=1);
9295   match(Set dst (CmpD3 src1 zero));
9296   effect(KILL cr, KILL rax);
9297   ins_cost(280);
9298   format %{ "FTSTD  $dst,$src1" %}
9299   opcode(0xE4, 0xD9);
9300   ins_encode( Push_Reg_DPR(src1),
9301               OpcS, OpcP, PopFPU,
9302               CmpF_Result(dst));
9303   ins_pipe( pipe_slow );
9304 %}
9305 
9306 // Compare into -1,0,1
9307 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9308   predicate(UseSSE<=1);
9309   match(Set dst (CmpD3 src1 src2));
9310   effect(KILL cr, KILL rax);
9311   ins_cost(300);
9312   format %{ "FCMPD  $dst,$src1,$src2" %}
9313   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9314   ins_encode( Push_Reg_DPR(src1),
9315               OpcP, RegOpc(src2),
9316               CmpF_Result(dst));
9317   ins_pipe( pipe_slow );
9318 %}
9319 
9320 // float compare and set condition codes in EFLAGS by XMM regs
9321 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9322   predicate(UseSSE>=2);
9323   match(Set cr (CmpD src1 src2));
9324   ins_cost(145);
9325   format %{ "UCOMISD $src1,$src2\n\t"
9326             "JNP,s   exit\n\t"
9327             "PUSHF\t# saw NaN, set CF\n\t"
9328             "AND     [rsp], #0xffffff2b\n\t"
9329             "POPF\n"
9330     "exit:" %}
9331   ins_encode %{
9332     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9333     emit_cmpfp_fixup(_masm);
9334   %}
9335   ins_pipe( pipe_slow );
9336 %}
9337 
9338 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9339   predicate(UseSSE>=2);
9340   match(Set cr (CmpD src1 src2));
9341   ins_cost(100);
9342   format %{ "UCOMISD $src1,$src2" %}
9343   ins_encode %{
9344     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9345   %}
9346   ins_pipe( pipe_slow );
9347 %}
9348 
9349 // float compare and set condition codes in EFLAGS by XMM regs
9350 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9351   predicate(UseSSE>=2);
9352   match(Set cr (CmpD src1 (LoadD src2)));
9353   ins_cost(145);
9354   format %{ "UCOMISD $src1,$src2\n\t"
9355             "JNP,s   exit\n\t"
9356             "PUSHF\t# saw NaN, set CF\n\t"
9357             "AND     [rsp], #0xffffff2b\n\t"
9358             "POPF\n"
9359     "exit:" %}
9360   ins_encode %{
9361     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9362     emit_cmpfp_fixup(_masm);
9363   %}
9364   ins_pipe( pipe_slow );
9365 %}
9366 
9367 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9368   predicate(UseSSE>=2);
9369   match(Set cr (CmpD src1 (LoadD src2)));
9370   ins_cost(100);
9371   format %{ "UCOMISD $src1,$src2" %}
9372   ins_encode %{
9373     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9374   %}
9375   ins_pipe( pipe_slow );
9376 %}
9377 
9378 // Compare into -1,0,1 in XMM
9379 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9380   predicate(UseSSE>=2);
9381   match(Set dst (CmpD3 src1 src2));
9382   effect(KILL cr);
9383   ins_cost(255);
9384   format %{ "UCOMISD $src1, $src2\n\t"
9385             "MOV     $dst, #-1\n\t"
9386             "JP,s    done\n\t"
9387             "JB,s    done\n\t"
9388             "SETNE   $dst\n\t"
9389             "MOVZB   $dst, $dst\n"
9390     "done:" %}
9391   ins_encode %{
9392     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9393     emit_cmpfp3(_masm, $dst$$Register);
9394   %}
9395   ins_pipe( pipe_slow );
9396 %}
9397 
9398 // Compare into -1,0,1 in XMM and memory
9399 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9400   predicate(UseSSE>=2);
9401   match(Set dst (CmpD3 src1 (LoadD src2)));
9402   effect(KILL cr);
9403   ins_cost(275);
9404   format %{ "UCOMISD $src1, $src2\n\t"
9405             "MOV     $dst, #-1\n\t"
9406             "JP,s    done\n\t"
9407             "JB,s    done\n\t"
9408             "SETNE   $dst\n\t"
9409             "MOVZB   $dst, $dst\n"
9410     "done:" %}
9411   ins_encode %{
9412     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9413     emit_cmpfp3(_masm, $dst$$Register);
9414   %}
9415   ins_pipe( pipe_slow );
9416 %}
9417 
9418 
9419 instruct subDPR_reg(regDPR dst, regDPR src) %{
9420   predicate (UseSSE <=1);
9421   match(Set dst (SubD dst src));
9422 
9423   format %{ "FLD    $src\n\t"
9424             "DSUBp  $dst,ST" %}
9425   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9426   ins_cost(150);
9427   ins_encode( Push_Reg_DPR(src),
9428               OpcP, RegOpc(dst) );
9429   ins_pipe( fpu_reg_reg );
9430 %}
9431 
9432 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9433   predicate (UseSSE <=1);
9434   match(Set dst (RoundDouble (SubD src1 src2)));
9435   ins_cost(250);
9436 
9437   format %{ "FLD    $src2\n\t"
9438             "DSUB   ST,$src1\n\t"
9439             "FSTP_D $dst\t# D-round" %}
9440   opcode(0xD8, 0x5);
9441   ins_encode( Push_Reg_DPR(src2),
9442               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9443   ins_pipe( fpu_mem_reg_reg );
9444 %}
9445 
9446 
9447 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9448   predicate (UseSSE <=1);
9449   match(Set dst (SubD dst (LoadD src)));
9450   ins_cost(150);
9451 
9452   format %{ "FLD    $src\n\t"
9453             "DSUBp  $dst,ST" %}
9454   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9455   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9456               OpcP, RegOpc(dst) );
9457   ins_pipe( fpu_reg_mem );
9458 %}
9459 
9460 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9461   predicate (UseSSE<=1);
9462   match(Set dst (AbsD src));
9463   ins_cost(100);
9464   format %{ "FABS" %}
9465   opcode(0xE1, 0xD9);
9466   ins_encode( OpcS, OpcP );
9467   ins_pipe( fpu_reg_reg );
9468 %}
9469 
9470 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9471   predicate(UseSSE<=1);
9472   match(Set dst (NegD src));
9473   ins_cost(100);
9474   format %{ "FCHS" %}
9475   opcode(0xE0, 0xD9);
9476   ins_encode( OpcS, OpcP );
9477   ins_pipe( fpu_reg_reg );
9478 %}
9479 
9480 instruct addDPR_reg(regDPR dst, regDPR src) %{
9481   predicate(UseSSE<=1);
9482   match(Set dst (AddD dst src));
9483   format %{ "FLD    $src\n\t"
9484             "DADD   $dst,ST" %}
9485   size(4);
9486   ins_cost(150);
9487   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9488   ins_encode( Push_Reg_DPR(src),
9489               OpcP, RegOpc(dst) );
9490   ins_pipe( fpu_reg_reg );
9491 %}
9492 
9493 
9494 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9495   predicate(UseSSE<=1);
9496   match(Set dst (RoundDouble (AddD src1 src2)));
9497   ins_cost(250);
9498 
9499   format %{ "FLD    $src2\n\t"
9500             "DADD   ST,$src1\n\t"
9501             "FSTP_D $dst\t# D-round" %}
9502   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9503   ins_encode( Push_Reg_DPR(src2),
9504               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9505   ins_pipe( fpu_mem_reg_reg );
9506 %}
9507 
9508 
9509 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9510   predicate(UseSSE<=1);
9511   match(Set dst (AddD dst (LoadD src)));
9512   ins_cost(150);
9513 
9514   format %{ "FLD    $src\n\t"
9515             "DADDp  $dst,ST" %}
9516   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9517   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9518               OpcP, RegOpc(dst) );
9519   ins_pipe( fpu_reg_mem );
9520 %}
9521 
9522 // add-to-memory
9523 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9524   predicate(UseSSE<=1);
9525   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9526   ins_cost(150);
9527 
9528   format %{ "FLD_D  $dst\n\t"
9529             "DADD   ST,$src\n\t"
9530             "FST_D  $dst" %}
9531   opcode(0xDD, 0x0);
9532   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9533               Opcode(0xD8), RegOpc(src),
9534               set_instruction_start,
9535               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9536   ins_pipe( fpu_reg_mem );
9537 %}
9538 
9539 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9540   predicate(UseSSE<=1);
9541   match(Set dst (AddD dst con));
9542   ins_cost(125);
9543   format %{ "FLD1\n\t"
9544             "DADDp  $dst,ST" %}
9545   ins_encode %{
9546     __ fld1();
9547     __ faddp($dst$$reg);
9548   %}
9549   ins_pipe(fpu_reg);
9550 %}
9551 
9552 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9553   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9554   match(Set dst (AddD dst con));
9555   ins_cost(200);
9556   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9557             "DADDp  $dst,ST" %}
9558   ins_encode %{
9559     __ fld_d($constantaddress($con));
9560     __ faddp($dst$$reg);
9561   %}
9562   ins_pipe(fpu_reg_mem);
9563 %}
9564 
9565 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9566   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9567   match(Set dst (RoundDouble (AddD src con)));
9568   ins_cost(200);
9569   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9570             "DADD   ST,$src\n\t"
9571             "FSTP_D $dst\t# D-round" %}
9572   ins_encode %{
9573     __ fld_d($constantaddress($con));
9574     __ fadd($src$$reg);
9575     __ fstp_d(Address(rsp, $dst$$disp));
9576   %}
9577   ins_pipe(fpu_mem_reg_con);
9578 %}
9579 
9580 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9581   predicate(UseSSE<=1);
9582   match(Set dst (MulD dst src));
9583   format %{ "FLD    $src\n\t"
9584             "DMULp  $dst,ST" %}
9585   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9586   ins_cost(150);
9587   ins_encode( Push_Reg_DPR(src),
9588               OpcP, RegOpc(dst) );
9589   ins_pipe( fpu_reg_reg );
9590 %}
9591 
9592 // Strict FP instruction biases argument before multiply then
9593 // biases result to avoid double rounding of subnormals.
9594 //
9595 // scale arg1 by multiplying arg1 by 2^(-15360)
9596 // load arg2
9597 // multiply scaled arg1 by arg2
9598 // rescale product by 2^(15360)
9599 //
9600 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9601   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9602   match(Set dst (MulD dst src));
9603   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9604 
9605   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9606             "DMULp  $dst,ST\n\t"
9607             "FLD    $src\n\t"
9608             "DMULp  $dst,ST\n\t"
9609             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9610             "DMULp  $dst,ST\n\t" %}
9611   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9612   ins_encode( strictfp_bias1(dst),
9613               Push_Reg_DPR(src),
9614               OpcP, RegOpc(dst),
9615               strictfp_bias2(dst) );
9616   ins_pipe( fpu_reg_reg );
9617 %}
9618 
9619 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9620   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9621   match(Set dst (MulD dst con));
9622   ins_cost(200);
9623   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9624             "DMULp  $dst,ST" %}
9625   ins_encode %{
9626     __ fld_d($constantaddress($con));
9627     __ fmulp($dst$$reg);
9628   %}
9629   ins_pipe(fpu_reg_mem);
9630 %}
9631 
9632 
9633 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9634   predicate( UseSSE<=1 );
9635   match(Set dst (MulD dst (LoadD src)));
9636   ins_cost(200);
9637   format %{ "FLD_D  $src\n\t"
9638             "DMULp  $dst,ST" %}
9639   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9640   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9641               OpcP, RegOpc(dst) );
9642   ins_pipe( fpu_reg_mem );
9643 %}
9644 
9645 //
9646 // Cisc-alternate to reg-reg multiply
9647 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9648   predicate( UseSSE<=1 );
9649   match(Set dst (MulD src (LoadD mem)));
9650   ins_cost(250);
9651   format %{ "FLD_D  $mem\n\t"
9652             "DMUL   ST,$src\n\t"
9653             "FSTP_D $dst" %}
9654   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9655   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9656               OpcReg_FPR(src),
9657               Pop_Reg_DPR(dst) );
9658   ins_pipe( fpu_reg_reg_mem );
9659 %}
9660 
9661 
9662 // MACRO3 -- addDPR a mulDPR
9663 // This instruction is a '2-address' instruction in that the result goes
9664 // back to src2.  This eliminates a move from the macro; possibly the
9665 // register allocator will have to add it back (and maybe not).
9666 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9667   predicate( UseSSE<=1 );
9668   match(Set src2 (AddD (MulD src0 src1) src2));
9669   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9670             "DMUL   ST,$src1\n\t"
9671             "DADDp  $src2,ST" %}
9672   ins_cost(250);
9673   opcode(0xDD); /* LoadD DD /0 */
9674   ins_encode( Push_Reg_FPR(src0),
9675               FMul_ST_reg(src1),
9676               FAddP_reg_ST(src2) );
9677   ins_pipe( fpu_reg_reg_reg );
9678 %}
9679 
9680 
9681 // MACRO3 -- subDPR a mulDPR
9682 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9683   predicate( UseSSE<=1 );
9684   match(Set src2 (SubD (MulD src0 src1) src2));
9685   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9686             "DMUL   ST,$src1\n\t"
9687             "DSUBRp $src2,ST" %}
9688   ins_cost(250);
9689   ins_encode( Push_Reg_FPR(src0),
9690               FMul_ST_reg(src1),
9691               Opcode(0xDE), Opc_plus(0xE0,src2));
9692   ins_pipe( fpu_reg_reg_reg );
9693 %}
9694 
9695 
9696 instruct divDPR_reg(regDPR dst, regDPR src) %{
9697   predicate( UseSSE<=1 );
9698   match(Set dst (DivD dst src));
9699 
9700   format %{ "FLD    $src\n\t"
9701             "FDIVp  $dst,ST" %}
9702   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9703   ins_cost(150);
9704   ins_encode( Push_Reg_DPR(src),
9705               OpcP, RegOpc(dst) );
9706   ins_pipe( fpu_reg_reg );
9707 %}
9708 
9709 // Strict FP instruction biases argument before division then
9710 // biases result, to avoid double rounding of subnormals.
9711 //
9712 // scale dividend by multiplying dividend by 2^(-15360)
9713 // load divisor
9714 // divide scaled dividend by divisor
9715 // rescale quotient by 2^(15360)
9716 //
9717 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9718   predicate (UseSSE<=1);
9719   match(Set dst (DivD dst src));
9720   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9721   ins_cost(01);
9722 
9723   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9724             "DMULp  $dst,ST\n\t"
9725             "FLD    $src\n\t"
9726             "FDIVp  $dst,ST\n\t"
9727             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9728             "DMULp  $dst,ST\n\t" %}
9729   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9730   ins_encode( strictfp_bias1(dst),
9731               Push_Reg_DPR(src),
9732               OpcP, RegOpc(dst),
9733               strictfp_bias2(dst) );
9734   ins_pipe( fpu_reg_reg );
9735 %}
9736 
9737 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9738   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9739   match(Set dst (RoundDouble (DivD src1 src2)));
9740 
9741   format %{ "FLD    $src1\n\t"
9742             "FDIV   ST,$src2\n\t"
9743             "FSTP_D $dst\t# D-round" %}
9744   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9745   ins_encode( Push_Reg_DPR(src1),
9746               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9747   ins_pipe( fpu_mem_reg_reg );
9748 %}
9749 
9750 
9751 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9752   predicate(UseSSE<=1);
9753   match(Set dst (ModD dst src));
9754   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9755 
9756   format %{ "DMOD   $dst,$src" %}
9757   ins_cost(250);
9758   ins_encode(Push_Reg_Mod_DPR(dst, src),
9759               emitModDPR(),
9760               Push_Result_Mod_DPR(src),
9761               Pop_Reg_DPR(dst));
9762   ins_pipe( pipe_slow );
9763 %}
9764 
9765 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9766   predicate(UseSSE>=2);
9767   match(Set dst (ModD src0 src1));
9768   effect(KILL rax, KILL cr);
9769 
9770   format %{ "SUB    ESP,8\t # DMOD\n"
9771           "\tMOVSD  [ESP+0],$src1\n"
9772           "\tFLD_D  [ESP+0]\n"
9773           "\tMOVSD  [ESP+0],$src0\n"
9774           "\tFLD_D  [ESP+0]\n"
9775      "loop:\tFPREM\n"
9776           "\tFWAIT\n"
9777           "\tFNSTSW AX\n"
9778           "\tSAHF\n"
9779           "\tJP     loop\n"
9780           "\tFSTP_D [ESP+0]\n"
9781           "\tMOVSD  $dst,[ESP+0]\n"
9782           "\tADD    ESP,8\n"
9783           "\tFSTP   ST0\t # Restore FPU Stack"
9784     %}
9785   ins_cost(250);
9786   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9787   ins_pipe( pipe_slow );
9788 %}
9789 
9790 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9791   predicate (UseSSE<=1);
9792   match(Set dst (SinD src));
9793   ins_cost(1800);
9794   format %{ "DSIN   $dst" %}
9795   opcode(0xD9, 0xFE);
9796   ins_encode( OpcP, OpcS );
9797   ins_pipe( pipe_slow );
9798 %}
9799 
9800 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9801   predicate (UseSSE>=2);
9802   match(Set dst (SinD dst));
9803   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9804   ins_cost(1800);
9805   format %{ "DSIN   $dst" %}
9806   opcode(0xD9, 0xFE);
9807   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9808   ins_pipe( pipe_slow );
9809 %}
9810 
9811 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9812   predicate (UseSSE<=1);
9813   match(Set dst (CosD src));
9814   ins_cost(1800);
9815   format %{ "DCOS   $dst" %}
9816   opcode(0xD9, 0xFF);
9817   ins_encode( OpcP, OpcS );
9818   ins_pipe( pipe_slow );
9819 %}
9820 
9821 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9822   predicate (UseSSE>=2);
9823   match(Set dst (CosD dst));
9824   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9825   ins_cost(1800);
9826   format %{ "DCOS   $dst" %}
9827   opcode(0xD9, 0xFF);
9828   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9829   ins_pipe( pipe_slow );
9830 %}
9831 
9832 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9833   predicate (UseSSE<=1);
9834   match(Set dst(TanD src));
9835   format %{ "DTAN   $dst" %}
9836   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9837               Opcode(0xDD), Opcode(0xD8));   // fstp st
9838   ins_pipe( pipe_slow );
9839 %}
9840 
9841 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9842   predicate (UseSSE>=2);
9843   match(Set dst(TanD dst));
9844   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9845   format %{ "DTAN   $dst" %}
9846   ins_encode( Push_SrcD(dst),
9847               Opcode(0xD9), Opcode(0xF2),    // fptan
9848               Opcode(0xDD), Opcode(0xD8),   // fstp st
9849               Push_ResultD(dst) );
9850   ins_pipe( pipe_slow );
9851 %}
9852 
9853 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9854   predicate (UseSSE<=1);
9855   match(Set dst(AtanD dst src));
9856   format %{ "DATA   $dst,$src" %}
9857   opcode(0xD9, 0xF3);
9858   ins_encode( Push_Reg_DPR(src),
9859               OpcP, OpcS, RegOpc(dst) );
9860   ins_pipe( pipe_slow );
9861 %}
9862 
9863 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9864   predicate (UseSSE>=2);
9865   match(Set dst(AtanD dst src));
9866   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9867   format %{ "DATA   $dst,$src" %}
9868   opcode(0xD9, 0xF3);
9869   ins_encode( Push_SrcD(src),
9870               OpcP, OpcS, Push_ResultD(dst) );
9871   ins_pipe( pipe_slow );
9872 %}
9873 
9874 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9875   predicate (UseSSE<=1);
9876   match(Set dst (SqrtD src));
9877   format %{ "DSQRT  $dst,$src" %}
9878   opcode(0xFA, 0xD9);
9879   ins_encode( Push_Reg_DPR(src),
9880               OpcS, OpcP, Pop_Reg_DPR(dst) );
9881   ins_pipe( pipe_slow );
9882 %}
9883 
9884 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9885   predicate (UseSSE<=1);
9886   match(Set Y (PowD X Y));  // Raise X to the Yth power
9887   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9888   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9889   ins_encode %{
9890     __ subptr(rsp, 8);
9891     __ fld_s($X$$reg - 1);
9892     __ fast_pow();
9893     __ addptr(rsp, 8);
9894   %}
9895   ins_pipe( pipe_slow );
9896 %}
9897 
9898 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9899   predicate (UseSSE>=2);
9900   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9901   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9902   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9903   ins_encode %{
9904     __ subptr(rsp, 8);
9905     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9906     __ fld_d(Address(rsp, 0));
9907     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9908     __ fld_d(Address(rsp, 0));
9909     __ fast_pow();
9910     __ fstp_d(Address(rsp, 0));
9911     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9912     __ addptr(rsp, 8);
9913   %}
9914   ins_pipe( pipe_slow );
9915 %}
9916 
9917 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9918   predicate (UseSSE<=1);
9919   // The source Double operand on FPU stack
9920   match(Set dst (Log10D src));
9921   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9922   // fxch         ; swap ST(0) with ST(1)
9923   // fyl2x        ; compute log_10(2) * log_2(x)
9924   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9925             "FXCH   \n\t"
9926             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9927          %}
9928   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9929               Opcode(0xD9), Opcode(0xC9),   // fxch
9930               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9931 
9932   ins_pipe( pipe_slow );
9933 %}
9934 
9935 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9936   predicate (UseSSE>=2);
9937   effect(KILL cr);
9938   match(Set dst (Log10D src));
9939   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9940   // fyl2x        ; compute log_10(2) * log_2(x)
9941   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9942             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9943          %}
9944   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9945               Push_SrcD(src),
9946               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9947               Push_ResultD(dst));
9948 
9949   ins_pipe( pipe_slow );
9950 %}
9951 
9952 //-------------Float Instructions-------------------------------
9953 // Float Math
9954 
9955 // Code for float compare:
9956 //     fcompp();
9957 //     fwait(); fnstsw_ax();
9958 //     sahf();
9959 //     movl(dst, unordered_result);
9960 //     jcc(Assembler::parity, exit);
9961 //     movl(dst, less_result);
9962 //     jcc(Assembler::below, exit);
9963 //     movl(dst, equal_result);
9964 //     jcc(Assembler::equal, exit);
9965 //     movl(dst, greater_result);
9966 //   exit:
9967 
9968 // P6 version of float compare, sets condition codes in EFLAGS
9969 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9970   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9971   match(Set cr (CmpF src1 src2));
9972   effect(KILL rax);
9973   ins_cost(150);
9974   format %{ "FLD    $src1\n\t"
9975             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9976             "JNP    exit\n\t"
9977             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9978             "SAHF\n"
9979      "exit:\tNOP               // avoid branch to branch" %}
9980   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9981   ins_encode( Push_Reg_DPR(src1),
9982               OpcP, RegOpc(src2),
9983               cmpF_P6_fixup );
9984   ins_pipe( pipe_slow );
9985 %}
9986 
9987 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9988   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9989   match(Set cr (CmpF src1 src2));
9990   ins_cost(100);
9991   format %{ "FLD    $src1\n\t"
9992             "FUCOMIP ST,$src2  // P6 instruction" %}
9993   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9994   ins_encode( Push_Reg_DPR(src1),
9995               OpcP, RegOpc(src2));
9996   ins_pipe( pipe_slow );
9997 %}
9998 
9999 
10000 // Compare & branch
10001 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10002   predicate(UseSSE == 0);
10003   match(Set cr (CmpF src1 src2));
10004   effect(KILL rax);
10005   ins_cost(200);
10006   format %{ "FLD    $src1\n\t"
10007             "FCOMp  $src2\n\t"
10008             "FNSTSW AX\n\t"
10009             "TEST   AX,0x400\n\t"
10010             "JZ,s   flags\n\t"
10011             "MOV    AH,1\t# unordered treat as LT\n"
10012     "flags:\tSAHF" %}
10013   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10014   ins_encode( Push_Reg_DPR(src1),
10015               OpcP, RegOpc(src2),
10016               fpu_flags);
10017   ins_pipe( pipe_slow );
10018 %}
10019 
10020 // Compare vs zero into -1,0,1
10021 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10022   predicate(UseSSE == 0);
10023   match(Set dst (CmpF3 src1 zero));
10024   effect(KILL cr, KILL rax);
10025   ins_cost(280);
10026   format %{ "FTSTF  $dst,$src1" %}
10027   opcode(0xE4, 0xD9);
10028   ins_encode( Push_Reg_DPR(src1),
10029               OpcS, OpcP, PopFPU,
10030               CmpF_Result(dst));
10031   ins_pipe( pipe_slow );
10032 %}
10033 
10034 // Compare into -1,0,1
10035 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10036   predicate(UseSSE == 0);
10037   match(Set dst (CmpF3 src1 src2));
10038   effect(KILL cr, KILL rax);
10039   ins_cost(300);
10040   format %{ "FCMPF  $dst,$src1,$src2" %}
10041   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10042   ins_encode( Push_Reg_DPR(src1),
10043               OpcP, RegOpc(src2),
10044               CmpF_Result(dst));
10045   ins_pipe( pipe_slow );
10046 %}
10047 
10048 // float compare and set condition codes in EFLAGS by XMM regs
10049 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10050   predicate(UseSSE>=1);
10051   match(Set cr (CmpF src1 src2));
10052   ins_cost(145);
10053   format %{ "UCOMISS $src1,$src2\n\t"
10054             "JNP,s   exit\n\t"
10055             "PUSHF\t# saw NaN, set CF\n\t"
10056             "AND     [rsp], #0xffffff2b\n\t"
10057             "POPF\n"
10058     "exit:" %}
10059   ins_encode %{
10060     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10061     emit_cmpfp_fixup(_masm);
10062   %}
10063   ins_pipe( pipe_slow );
10064 %}
10065 
10066 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10067   predicate(UseSSE>=1);
10068   match(Set cr (CmpF src1 src2));
10069   ins_cost(100);
10070   format %{ "UCOMISS $src1,$src2" %}
10071   ins_encode %{
10072     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10073   %}
10074   ins_pipe( pipe_slow );
10075 %}
10076 
10077 // float compare and set condition codes in EFLAGS by XMM regs
10078 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10079   predicate(UseSSE>=1);
10080   match(Set cr (CmpF src1 (LoadF src2)));
10081   ins_cost(165);
10082   format %{ "UCOMISS $src1,$src2\n\t"
10083             "JNP,s   exit\n\t"
10084             "PUSHF\t# saw NaN, set CF\n\t"
10085             "AND     [rsp], #0xffffff2b\n\t"
10086             "POPF\n"
10087     "exit:" %}
10088   ins_encode %{
10089     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10090     emit_cmpfp_fixup(_masm);
10091   %}
10092   ins_pipe( pipe_slow );
10093 %}
10094 
10095 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10096   predicate(UseSSE>=1);
10097   match(Set cr (CmpF src1 (LoadF src2)));
10098   ins_cost(100);
10099   format %{ "UCOMISS $src1,$src2" %}
10100   ins_encode %{
10101     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10102   %}
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 // Compare into -1,0,1 in XMM
10107 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10108   predicate(UseSSE>=1);
10109   match(Set dst (CmpF3 src1 src2));
10110   effect(KILL cr);
10111   ins_cost(255);
10112   format %{ "UCOMISS $src1, $src2\n\t"
10113             "MOV     $dst, #-1\n\t"
10114             "JP,s    done\n\t"
10115             "JB,s    done\n\t"
10116             "SETNE   $dst\n\t"
10117             "MOVZB   $dst, $dst\n"
10118     "done:" %}
10119   ins_encode %{
10120     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10121     emit_cmpfp3(_masm, $dst$$Register);
10122   %}
10123   ins_pipe( pipe_slow );
10124 %}
10125 
10126 // Compare into -1,0,1 in XMM and memory
10127 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10128   predicate(UseSSE>=1);
10129   match(Set dst (CmpF3 src1 (LoadF src2)));
10130   effect(KILL cr);
10131   ins_cost(275);
10132   format %{ "UCOMISS $src1, $src2\n\t"
10133             "MOV     $dst, #-1\n\t"
10134             "JP,s    done\n\t"
10135             "JB,s    done\n\t"
10136             "SETNE   $dst\n\t"
10137             "MOVZB   $dst, $dst\n"
10138     "done:" %}
10139   ins_encode %{
10140     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10141     emit_cmpfp3(_masm, $dst$$Register);
10142   %}
10143   ins_pipe( pipe_slow );
10144 %}
10145 
10146 // Spill to obtain 24-bit precision
10147 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10148   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10149   match(Set dst (SubF src1 src2));
10150 
10151   format %{ "FSUB   $dst,$src1 - $src2" %}
10152   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10153   ins_encode( Push_Reg_FPR(src1),
10154               OpcReg_FPR(src2),
10155               Pop_Mem_FPR(dst) );
10156   ins_pipe( fpu_mem_reg_reg );
10157 %}
10158 //
10159 // This instruction does not round to 24-bits
10160 instruct subFPR_reg(regFPR dst, regFPR src) %{
10161   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10162   match(Set dst (SubF dst src));
10163 
10164   format %{ "FSUB   $dst,$src" %}
10165   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10166   ins_encode( Push_Reg_FPR(src),
10167               OpcP, RegOpc(dst) );
10168   ins_pipe( fpu_reg_reg );
10169 %}
10170 
10171 // Spill to obtain 24-bit precision
10172 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10173   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10174   match(Set dst (AddF src1 src2));
10175 
10176   format %{ "FADD   $dst,$src1,$src2" %}
10177   opcode(0xD8, 0x0); /* D8 C0+i */
10178   ins_encode( Push_Reg_FPR(src2),
10179               OpcReg_FPR(src1),
10180               Pop_Mem_FPR(dst) );
10181   ins_pipe( fpu_mem_reg_reg );
10182 %}
10183 //
10184 // This instruction does not round to 24-bits
10185 instruct addFPR_reg(regFPR dst, regFPR src) %{
10186   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10187   match(Set dst (AddF dst src));
10188 
10189   format %{ "FLD    $src\n\t"
10190             "FADDp  $dst,ST" %}
10191   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10192   ins_encode( Push_Reg_FPR(src),
10193               OpcP, RegOpc(dst) );
10194   ins_pipe( fpu_reg_reg );
10195 %}
10196 
10197 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10198   predicate(UseSSE==0);
10199   match(Set dst (AbsF src));
10200   ins_cost(100);
10201   format %{ "FABS" %}
10202   opcode(0xE1, 0xD9);
10203   ins_encode( OpcS, OpcP );
10204   ins_pipe( fpu_reg_reg );
10205 %}
10206 
10207 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10208   predicate(UseSSE==0);
10209   match(Set dst (NegF src));
10210   ins_cost(100);
10211   format %{ "FCHS" %}
10212   opcode(0xE0, 0xD9);
10213   ins_encode( OpcS, OpcP );
10214   ins_pipe( fpu_reg_reg );
10215 %}
10216 
10217 // Cisc-alternate to addFPR_reg
10218 // Spill to obtain 24-bit precision
10219 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10220   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10221   match(Set dst (AddF src1 (LoadF src2)));
10222 
10223   format %{ "FLD    $src2\n\t"
10224             "FADD   ST,$src1\n\t"
10225             "FSTP_S $dst" %}
10226   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10227   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10228               OpcReg_FPR(src1),
10229               Pop_Mem_FPR(dst) );
10230   ins_pipe( fpu_mem_reg_mem );
10231 %}
10232 //
10233 // Cisc-alternate to addFPR_reg
10234 // This instruction does not round to 24-bits
10235 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10236   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10237   match(Set dst (AddF dst (LoadF src)));
10238 
10239   format %{ "FADD   $dst,$src" %}
10240   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10241   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10242               OpcP, RegOpc(dst) );
10243   ins_pipe( fpu_reg_mem );
10244 %}
10245 
10246 // // Following two instructions for _222_mpegaudio
10247 // Spill to obtain 24-bit precision
10248 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10249   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10250   match(Set dst (AddF src1 src2));
10251 
10252   format %{ "FADD   $dst,$src1,$src2" %}
10253   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10254   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10255               OpcReg_FPR(src2),
10256               Pop_Mem_FPR(dst) );
10257   ins_pipe( fpu_mem_reg_mem );
10258 %}
10259 
10260 // Cisc-spill variant
10261 // Spill to obtain 24-bit precision
10262 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10263   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10264   match(Set dst (AddF src1 (LoadF src2)));
10265 
10266   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10267   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10268   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10269               set_instruction_start,
10270               OpcP, RMopc_Mem(secondary,src1),
10271               Pop_Mem_FPR(dst) );
10272   ins_pipe( fpu_mem_mem_mem );
10273 %}
10274 
10275 // Spill to obtain 24-bit precision
10276 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10277   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10278   match(Set dst (AddF src1 src2));
10279 
10280   format %{ "FADD   $dst,$src1,$src2" %}
10281   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10282   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10283               set_instruction_start,
10284               OpcP, RMopc_Mem(secondary,src1),
10285               Pop_Mem_FPR(dst) );
10286   ins_pipe( fpu_mem_mem_mem );
10287 %}
10288 
10289 
10290 // Spill to obtain 24-bit precision
10291 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10292   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10293   match(Set dst (AddF src con));
10294   format %{ "FLD    $src\n\t"
10295             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10296             "FSTP_S $dst"  %}
10297   ins_encode %{
10298     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10299     __ fadd_s($constantaddress($con));
10300     __ fstp_s(Address(rsp, $dst$$disp));
10301   %}
10302   ins_pipe(fpu_mem_reg_con);
10303 %}
10304 //
10305 // This instruction does not round to 24-bits
10306 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10307   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10308   match(Set dst (AddF src con));
10309   format %{ "FLD    $src\n\t"
10310             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10311             "FSTP   $dst"  %}
10312   ins_encode %{
10313     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10314     __ fadd_s($constantaddress($con));
10315     __ fstp_d($dst$$reg);
10316   %}
10317   ins_pipe(fpu_reg_reg_con);
10318 %}
10319 
10320 // Spill to obtain 24-bit precision
10321 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10322   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10323   match(Set dst (MulF src1 src2));
10324 
10325   format %{ "FLD    $src1\n\t"
10326             "FMUL   $src2\n\t"
10327             "FSTP_S $dst"  %}
10328   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10329   ins_encode( Push_Reg_FPR(src1),
10330               OpcReg_FPR(src2),
10331               Pop_Mem_FPR(dst) );
10332   ins_pipe( fpu_mem_reg_reg );
10333 %}
10334 //
10335 // This instruction does not round to 24-bits
10336 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10337   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10338   match(Set dst (MulF src1 src2));
10339 
10340   format %{ "FLD    $src1\n\t"
10341             "FMUL   $src2\n\t"
10342             "FSTP_S $dst"  %}
10343   opcode(0xD8, 0x1); /* D8 C8+i */
10344   ins_encode( Push_Reg_FPR(src2),
10345               OpcReg_FPR(src1),
10346               Pop_Reg_FPR(dst) );
10347   ins_pipe( fpu_reg_reg_reg );
10348 %}
10349 
10350 
10351 // Spill to obtain 24-bit precision
10352 // Cisc-alternate to reg-reg multiply
10353 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (MulF src1 (LoadF src2)));
10356 
10357   format %{ "FLD_S  $src2\n\t"
10358             "FMUL   $src1\n\t"
10359             "FSTP_S $dst"  %}
10360   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10361   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10362               OpcReg_FPR(src1),
10363               Pop_Mem_FPR(dst) );
10364   ins_pipe( fpu_mem_reg_mem );
10365 %}
10366 //
10367 // This instruction does not round to 24-bits
10368 // Cisc-alternate to reg-reg multiply
10369 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10370   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10371   match(Set dst (MulF src1 (LoadF src2)));
10372 
10373   format %{ "FMUL   $dst,$src1,$src2" %}
10374   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10375   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10376               OpcReg_FPR(src1),
10377               Pop_Reg_FPR(dst) );
10378   ins_pipe( fpu_reg_reg_mem );
10379 %}
10380 
10381 // Spill to obtain 24-bit precision
10382 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10383   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10384   match(Set dst (MulF src1 src2));
10385 
10386   format %{ "FMUL   $dst,$src1,$src2" %}
10387   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10388   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10389               set_instruction_start,
10390               OpcP, RMopc_Mem(secondary,src1),
10391               Pop_Mem_FPR(dst) );
10392   ins_pipe( fpu_mem_mem_mem );
10393 %}
10394 
10395 // Spill to obtain 24-bit precision
10396 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10397   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10398   match(Set dst (MulF src con));
10399 
10400   format %{ "FLD    $src\n\t"
10401             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10402             "FSTP_S $dst"  %}
10403   ins_encode %{
10404     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10405     __ fmul_s($constantaddress($con));
10406     __ fstp_s(Address(rsp, $dst$$disp));
10407   %}
10408   ins_pipe(fpu_mem_reg_con);
10409 %}
10410 //
10411 // This instruction does not round to 24-bits
10412 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10413   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10414   match(Set dst (MulF src con));
10415 
10416   format %{ "FLD    $src\n\t"
10417             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10418             "FSTP   $dst"  %}
10419   ins_encode %{
10420     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10421     __ fmul_s($constantaddress($con));
10422     __ fstp_d($dst$$reg);
10423   %}
10424   ins_pipe(fpu_reg_reg_con);
10425 %}
10426 
10427 
10428 //
10429 // MACRO1 -- subsume unshared load into mulFPR
10430 // This instruction does not round to 24-bits
10431 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10432   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10433   match(Set dst (MulF (LoadF mem1) src));
10434 
10435   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10436             "FMUL   ST,$src\n\t"
10437             "FSTP   $dst" %}
10438   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10439   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10440               OpcReg_FPR(src),
10441               Pop_Reg_FPR(dst) );
10442   ins_pipe( fpu_reg_reg_mem );
10443 %}
10444 //
10445 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10446 // This instruction does not round to 24-bits
10447 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10448   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10449   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10450   ins_cost(95);
10451 
10452   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10453             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10454             "FADD   ST,$src2\n\t"
10455             "FSTP   $dst" %}
10456   opcode(0xD9); /* LoadF D9 /0 */
10457   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10458               FMul_ST_reg(src1),
10459               FAdd_ST_reg(src2),
10460               Pop_Reg_FPR(dst) );
10461   ins_pipe( fpu_reg_mem_reg_reg );
10462 %}
10463 
10464 // MACRO3 -- addFPR a mulFPR
10465 // This instruction does not round to 24-bits.  It is a '2-address'
10466 // instruction in that the result goes back to src2.  This eliminates
10467 // a move from the macro; possibly the register allocator will have
10468 // to add it back (and maybe not).
10469 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10470   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10471   match(Set src2 (AddF (MulF src0 src1) src2));
10472 
10473   format %{ "FLD    $src0     ===MACRO3===\n\t"
10474             "FMUL   ST,$src1\n\t"
10475             "FADDP  $src2,ST" %}
10476   opcode(0xD9); /* LoadF D9 /0 */
10477   ins_encode( Push_Reg_FPR(src0),
10478               FMul_ST_reg(src1),
10479               FAddP_reg_ST(src2) );
10480   ins_pipe( fpu_reg_reg_reg );
10481 %}
10482 
10483 // MACRO4 -- divFPR subFPR
10484 // This instruction does not round to 24-bits
10485 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10486   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10487   match(Set dst (DivF (SubF src2 src1) src3));
10488 
10489   format %{ "FLD    $src2   ===MACRO4===\n\t"
10490             "FSUB   ST,$src1\n\t"
10491             "FDIV   ST,$src3\n\t"
10492             "FSTP  $dst" %}
10493   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10494   ins_encode( Push_Reg_FPR(src2),
10495               subFPR_divFPR_encode(src1,src3),
10496               Pop_Reg_FPR(dst) );
10497   ins_pipe( fpu_reg_reg_reg_reg );
10498 %}
10499 
10500 // Spill to obtain 24-bit precision
10501 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10502   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10503   match(Set dst (DivF src1 src2));
10504 
10505   format %{ "FDIV   $dst,$src1,$src2" %}
10506   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10507   ins_encode( Push_Reg_FPR(src1),
10508               OpcReg_FPR(src2),
10509               Pop_Mem_FPR(dst) );
10510   ins_pipe( fpu_mem_reg_reg );
10511 %}
10512 //
10513 // This instruction does not round to 24-bits
10514 instruct divFPR_reg(regFPR dst, regFPR src) %{
10515   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10516   match(Set dst (DivF dst src));
10517 
10518   format %{ "FDIV   $dst,$src" %}
10519   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10520   ins_encode( Push_Reg_FPR(src),
10521               OpcP, RegOpc(dst) );
10522   ins_pipe( fpu_reg_reg );
10523 %}
10524 
10525 
10526 // Spill to obtain 24-bit precision
10527 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10528   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10529   match(Set dst (ModF src1 src2));
10530   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10531 
10532   format %{ "FMOD   $dst,$src1,$src2" %}
10533   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10534               emitModDPR(),
10535               Push_Result_Mod_DPR(src2),
10536               Pop_Mem_FPR(dst));
10537   ins_pipe( pipe_slow );
10538 %}
10539 //
10540 // This instruction does not round to 24-bits
10541 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10542   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10543   match(Set dst (ModF dst src));
10544   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10545 
10546   format %{ "FMOD   $dst,$src" %}
10547   ins_encode(Push_Reg_Mod_DPR(dst, src),
10548               emitModDPR(),
10549               Push_Result_Mod_DPR(src),
10550               Pop_Reg_FPR(dst));
10551   ins_pipe( pipe_slow );
10552 %}
10553 
10554 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10555   predicate(UseSSE>=1);
10556   match(Set dst (ModF src0 src1));
10557   effect(KILL rax, KILL cr);
10558   format %{ "SUB    ESP,4\t # FMOD\n"
10559           "\tMOVSS  [ESP+0],$src1\n"
10560           "\tFLD_S  [ESP+0]\n"
10561           "\tMOVSS  [ESP+0],$src0\n"
10562           "\tFLD_S  [ESP+0]\n"
10563      "loop:\tFPREM\n"
10564           "\tFWAIT\n"
10565           "\tFNSTSW AX\n"
10566           "\tSAHF\n"
10567           "\tJP     loop\n"
10568           "\tFSTP_S [ESP+0]\n"
10569           "\tMOVSS  $dst,[ESP+0]\n"
10570           "\tADD    ESP,4\n"
10571           "\tFSTP   ST0\t # Restore FPU Stack"
10572     %}
10573   ins_cost(250);
10574   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10575   ins_pipe( pipe_slow );
10576 %}
10577 
10578 
10579 //----------Arithmetic Conversion Instructions---------------------------------
10580 // The conversions operations are all Alpha sorted.  Please keep it that way!
10581 
10582 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10583   predicate(UseSSE==0);
10584   match(Set dst (RoundFloat src));
10585   ins_cost(125);
10586   format %{ "FST_S  $dst,$src\t# F-round" %}
10587   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10588   ins_pipe( fpu_mem_reg );
10589 %}
10590 
10591 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10592   predicate(UseSSE<=1);
10593   match(Set dst (RoundDouble src));
10594   ins_cost(125);
10595   format %{ "FST_D  $dst,$src\t# D-round" %}
10596   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10597   ins_pipe( fpu_mem_reg );
10598 %}
10599 
10600 // Force rounding to 24-bit precision and 6-bit exponent
10601 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10602   predicate(UseSSE==0);
10603   match(Set dst (ConvD2F src));
10604   format %{ "FST_S  $dst,$src\t# F-round" %}
10605   expand %{
10606     roundFloat_mem_reg(dst,src);
10607   %}
10608 %}
10609 
10610 // Force rounding to 24-bit precision and 6-bit exponent
10611 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10612   predicate(UseSSE==1);
10613   match(Set dst (ConvD2F src));
10614   effect( KILL cr );
10615   format %{ "SUB    ESP,4\n\t"
10616             "FST_S  [ESP],$src\t# F-round\n\t"
10617             "MOVSS  $dst,[ESP]\n\t"
10618             "ADD ESP,4" %}
10619   ins_encode %{
10620     __ subptr(rsp, 4);
10621     if ($src$$reg != FPR1L_enc) {
10622       __ fld_s($src$$reg-1);
10623       __ fstp_s(Address(rsp, 0));
10624     } else {
10625       __ fst_s(Address(rsp, 0));
10626     }
10627     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10628     __ addptr(rsp, 4);
10629   %}
10630   ins_pipe( pipe_slow );
10631 %}
10632 
10633 // Force rounding double precision to single precision
10634 instruct convD2F_reg(regF dst, regD src) %{
10635   predicate(UseSSE>=2);
10636   match(Set dst (ConvD2F src));
10637   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10638   ins_encode %{
10639     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10640   %}
10641   ins_pipe( pipe_slow );
10642 %}
10643 
10644 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10645   predicate(UseSSE==0);
10646   match(Set dst (ConvF2D src));
10647   format %{ "FST_S  $dst,$src\t# D-round" %}
10648   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10649   ins_pipe( fpu_reg_reg );
10650 %}
10651 
10652 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10653   predicate(UseSSE==1);
10654   match(Set dst (ConvF2D src));
10655   format %{ "FST_D  $dst,$src\t# D-round" %}
10656   expand %{
10657     roundDouble_mem_reg(dst,src);
10658   %}
10659 %}
10660 
10661 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10662   predicate(UseSSE==1);
10663   match(Set dst (ConvF2D src));
10664   effect( KILL cr );
10665   format %{ "SUB    ESP,4\n\t"
10666             "MOVSS  [ESP] $src\n\t"
10667             "FLD_S  [ESP]\n\t"
10668             "ADD    ESP,4\n\t"
10669             "FSTP   $dst\t# D-round" %}
10670   ins_encode %{
10671     __ subptr(rsp, 4);
10672     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10673     __ fld_s(Address(rsp, 0));
10674     __ addptr(rsp, 4);
10675     __ fstp_d($dst$$reg);
10676   %}
10677   ins_pipe( pipe_slow );
10678 %}
10679 
10680 instruct convF2D_reg(regD dst, regF src) %{
10681   predicate(UseSSE>=2);
10682   match(Set dst (ConvF2D src));
10683   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10684   ins_encode %{
10685     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10686   %}
10687   ins_pipe( pipe_slow );
10688 %}
10689 
10690 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10691 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10692   predicate(UseSSE<=1);
10693   match(Set dst (ConvD2I src));
10694   effect( KILL tmp, KILL cr );
10695   format %{ "FLD    $src\t# Convert double to int \n\t"
10696             "FLDCW  trunc mode\n\t"
10697             "SUB    ESP,4\n\t"
10698             "FISTp  [ESP + #0]\n\t"
10699             "FLDCW  std/24-bit mode\n\t"
10700             "POP    EAX\n\t"
10701             "CMP    EAX,0x80000000\n\t"
10702             "JNE,s  fast\n\t"
10703             "FLD_D  $src\n\t"
10704             "CALL   d2i_wrapper\n"
10705       "fast:" %}
10706   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10707   ins_pipe( pipe_slow );
10708 %}
10709 
10710 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10711 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10712   predicate(UseSSE>=2);
10713   match(Set dst (ConvD2I src));
10714   effect( KILL tmp, KILL cr );
10715   format %{ "CVTTSD2SI $dst, $src\n\t"
10716             "CMP    $dst,0x80000000\n\t"
10717             "JNE,s  fast\n\t"
10718             "SUB    ESP, 8\n\t"
10719             "MOVSD  [ESP], $src\n\t"
10720             "FLD_D  [ESP]\n\t"
10721             "ADD    ESP, 8\n\t"
10722             "CALL   d2i_wrapper\n"
10723       "fast:" %}
10724   ins_encode %{
10725     Label fast;
10726     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10727     __ cmpl($dst$$Register, 0x80000000);
10728     __ jccb(Assembler::notEqual, fast);
10729     __ subptr(rsp, 8);
10730     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10731     __ fld_d(Address(rsp, 0));
10732     __ addptr(rsp, 8);
10733     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10734     __ bind(fast);
10735   %}
10736   ins_pipe( pipe_slow );
10737 %}
10738 
10739 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10740   predicate(UseSSE<=1);
10741   match(Set dst (ConvD2L src));
10742   effect( KILL cr );
10743   format %{ "FLD    $src\t# Convert double to long\n\t"
10744             "FLDCW  trunc mode\n\t"
10745             "SUB    ESP,8\n\t"
10746             "FISTp  [ESP + #0]\n\t"
10747             "FLDCW  std/24-bit mode\n\t"
10748             "POP    EAX\n\t"
10749             "POP    EDX\n\t"
10750             "CMP    EDX,0x80000000\n\t"
10751             "JNE,s  fast\n\t"
10752             "TEST   EAX,EAX\n\t"
10753             "JNE,s  fast\n\t"
10754             "FLD    $src\n\t"
10755             "CALL   d2l_wrapper\n"
10756       "fast:" %}
10757   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10758   ins_pipe( pipe_slow );
10759 %}
10760 
10761 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10762 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10763   predicate (UseSSE>=2);
10764   match(Set dst (ConvD2L src));
10765   effect( KILL cr );
10766   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10767             "MOVSD  [ESP],$src\n\t"
10768             "FLD_D  [ESP]\n\t"
10769             "FLDCW  trunc mode\n\t"
10770             "FISTp  [ESP + #0]\n\t"
10771             "FLDCW  std/24-bit mode\n\t"
10772             "POP    EAX\n\t"
10773             "POP    EDX\n\t"
10774             "CMP    EDX,0x80000000\n\t"
10775             "JNE,s  fast\n\t"
10776             "TEST   EAX,EAX\n\t"
10777             "JNE,s  fast\n\t"
10778             "SUB    ESP,8\n\t"
10779             "MOVSD  [ESP],$src\n\t"
10780             "FLD_D  [ESP]\n\t"
10781             "ADD    ESP,8\n\t"
10782             "CALL   d2l_wrapper\n"
10783       "fast:" %}
10784   ins_encode %{
10785     Label fast;
10786     __ subptr(rsp, 8);
10787     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10788     __ fld_d(Address(rsp, 0));
10789     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10790     __ fistp_d(Address(rsp, 0));
10791     // Restore the rounding mode, mask the exception
10792     if (Compile::current()->in_24_bit_fp_mode()) {
10793       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10794     } else {
10795       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10796     }
10797     // Load the converted long, adjust CPU stack
10798     __ pop(rax);
10799     __ pop(rdx);
10800     __ cmpl(rdx, 0x80000000);
10801     __ jccb(Assembler::notEqual, fast);
10802     __ testl(rax, rax);
10803     __ jccb(Assembler::notEqual, fast);
10804     __ subptr(rsp, 8);
10805     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10806     __ fld_d(Address(rsp, 0));
10807     __ addptr(rsp, 8);
10808     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10809     __ bind(fast);
10810   %}
10811   ins_pipe( pipe_slow );
10812 %}
10813 
10814 // Convert a double to an int.  Java semantics require we do complex
10815 // manglations in the corner cases.  So we set the rounding mode to
10816 // 'zero', store the darned double down as an int, and reset the
10817 // rounding mode to 'nearest'.  The hardware stores a flag value down
10818 // if we would overflow or converted a NAN; we check for this and
10819 // and go the slow path if needed.
10820 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10821   predicate(UseSSE==0);
10822   match(Set dst (ConvF2I src));
10823   effect( KILL tmp, KILL cr );
10824   format %{ "FLD    $src\t# Convert float to int \n\t"
10825             "FLDCW  trunc mode\n\t"
10826             "SUB    ESP,4\n\t"
10827             "FISTp  [ESP + #0]\n\t"
10828             "FLDCW  std/24-bit mode\n\t"
10829             "POP    EAX\n\t"
10830             "CMP    EAX,0x80000000\n\t"
10831             "JNE,s  fast\n\t"
10832             "FLD    $src\n\t"
10833             "CALL   d2i_wrapper\n"
10834       "fast:" %}
10835   // DPR2I_encoding works for FPR2I
10836   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10837   ins_pipe( pipe_slow );
10838 %}
10839 
10840 // Convert a float in xmm to an int reg.
10841 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10842   predicate(UseSSE>=1);
10843   match(Set dst (ConvF2I src));
10844   effect( KILL tmp, KILL cr );
10845   format %{ "CVTTSS2SI $dst, $src\n\t"
10846             "CMP    $dst,0x80000000\n\t"
10847             "JNE,s  fast\n\t"
10848             "SUB    ESP, 4\n\t"
10849             "MOVSS  [ESP], $src\n\t"
10850             "FLD    [ESP]\n\t"
10851             "ADD    ESP, 4\n\t"
10852             "CALL   d2i_wrapper\n"
10853       "fast:" %}
10854   ins_encode %{
10855     Label fast;
10856     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10857     __ cmpl($dst$$Register, 0x80000000);
10858     __ jccb(Assembler::notEqual, fast);
10859     __ subptr(rsp, 4);
10860     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10861     __ fld_s(Address(rsp, 0));
10862     __ addptr(rsp, 4);
10863     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10864     __ bind(fast);
10865   %}
10866   ins_pipe( pipe_slow );
10867 %}
10868 
10869 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10870   predicate(UseSSE==0);
10871   match(Set dst (ConvF2L src));
10872   effect( KILL cr );
10873   format %{ "FLD    $src\t# Convert float to long\n\t"
10874             "FLDCW  trunc mode\n\t"
10875             "SUB    ESP,8\n\t"
10876             "FISTp  [ESP + #0]\n\t"
10877             "FLDCW  std/24-bit mode\n\t"
10878             "POP    EAX\n\t"
10879             "POP    EDX\n\t"
10880             "CMP    EDX,0x80000000\n\t"
10881             "JNE,s  fast\n\t"
10882             "TEST   EAX,EAX\n\t"
10883             "JNE,s  fast\n\t"
10884             "FLD    $src\n\t"
10885             "CALL   d2l_wrapper\n"
10886       "fast:" %}
10887   // DPR2L_encoding works for FPR2L
10888   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10889   ins_pipe( pipe_slow );
10890 %}
10891 
10892 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10893 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10894   predicate (UseSSE>=1);
10895   match(Set dst (ConvF2L src));
10896   effect( KILL cr );
10897   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10898             "MOVSS  [ESP],$src\n\t"
10899             "FLD_S  [ESP]\n\t"
10900             "FLDCW  trunc mode\n\t"
10901             "FISTp  [ESP + #0]\n\t"
10902             "FLDCW  std/24-bit mode\n\t"
10903             "POP    EAX\n\t"
10904             "POP    EDX\n\t"
10905             "CMP    EDX,0x80000000\n\t"
10906             "JNE,s  fast\n\t"
10907             "TEST   EAX,EAX\n\t"
10908             "JNE,s  fast\n\t"
10909             "SUB    ESP,4\t# Convert float to long\n\t"
10910             "MOVSS  [ESP],$src\n\t"
10911             "FLD_S  [ESP]\n\t"
10912             "ADD    ESP,4\n\t"
10913             "CALL   d2l_wrapper\n"
10914       "fast:" %}
10915   ins_encode %{
10916     Label fast;
10917     __ subptr(rsp, 8);
10918     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10919     __ fld_s(Address(rsp, 0));
10920     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10921     __ fistp_d(Address(rsp, 0));
10922     // Restore the rounding mode, mask the exception
10923     if (Compile::current()->in_24_bit_fp_mode()) {
10924       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10925     } else {
10926       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10927     }
10928     // Load the converted long, adjust CPU stack
10929     __ pop(rax);
10930     __ pop(rdx);
10931     __ cmpl(rdx, 0x80000000);
10932     __ jccb(Assembler::notEqual, fast);
10933     __ testl(rax, rax);
10934     __ jccb(Assembler::notEqual, fast);
10935     __ subptr(rsp, 4);
10936     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10937     __ fld_s(Address(rsp, 0));
10938     __ addptr(rsp, 4);
10939     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10940     __ bind(fast);
10941   %}
10942   ins_pipe( pipe_slow );
10943 %}
10944 
10945 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10946   predicate( UseSSE<=1 );
10947   match(Set dst (ConvI2D src));
10948   format %{ "FILD   $src\n\t"
10949             "FSTP   $dst" %}
10950   opcode(0xDB, 0x0);  /* DB /0 */
10951   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10952   ins_pipe( fpu_reg_mem );
10953 %}
10954 
10955 instruct convI2D_reg(regD dst, rRegI src) %{
10956   predicate( UseSSE>=2 && !UseXmmI2D );
10957   match(Set dst (ConvI2D src));
10958   format %{ "CVTSI2SD $dst,$src" %}
10959   ins_encode %{
10960     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10961   %}
10962   ins_pipe( pipe_slow );
10963 %}
10964 
10965 instruct convI2D_mem(regD dst, memory mem) %{
10966   predicate( UseSSE>=2 );
10967   match(Set dst (ConvI2D (LoadI mem)));
10968   format %{ "CVTSI2SD $dst,$mem" %}
10969   ins_encode %{
10970     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10971   %}
10972   ins_pipe( pipe_slow );
10973 %}
10974 
10975 instruct convXI2D_reg(regD dst, rRegI src)
10976 %{
10977   predicate( UseSSE>=2 && UseXmmI2D );
10978   match(Set dst (ConvI2D src));
10979 
10980   format %{ "MOVD  $dst,$src\n\t"
10981             "CVTDQ2PD $dst,$dst\t# i2d" %}
10982   ins_encode %{
10983     __ movdl($dst$$XMMRegister, $src$$Register);
10984     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10985   %}
10986   ins_pipe(pipe_slow); // XXX
10987 %}
10988 
10989 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10990   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10991   match(Set dst (ConvI2D (LoadI mem)));
10992   format %{ "FILD   $mem\n\t"
10993             "FSTP   $dst" %}
10994   opcode(0xDB);      /* DB /0 */
10995   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10996               Pop_Reg_DPR(dst));
10997   ins_pipe( fpu_reg_mem );
10998 %}
10999 
11000 // Convert a byte to a float; no rounding step needed.
11001 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11002   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11003   match(Set dst (ConvI2F src));
11004   format %{ "FILD   $src\n\t"
11005             "FSTP   $dst" %}
11006 
11007   opcode(0xDB, 0x0);  /* DB /0 */
11008   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11009   ins_pipe( fpu_reg_mem );
11010 %}
11011 
11012 // In 24-bit mode, force exponent rounding by storing back out
11013 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11014   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11015   match(Set dst (ConvI2F src));
11016   ins_cost(200);
11017   format %{ "FILD   $src\n\t"
11018             "FSTP_S $dst" %}
11019   opcode(0xDB, 0x0);  /* DB /0 */
11020   ins_encode( Push_Mem_I(src),
11021               Pop_Mem_FPR(dst));
11022   ins_pipe( fpu_mem_mem );
11023 %}
11024 
11025 // In 24-bit mode, force exponent rounding by storing back out
11026 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11027   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11028   match(Set dst (ConvI2F (LoadI mem)));
11029   ins_cost(200);
11030   format %{ "FILD   $mem\n\t"
11031             "FSTP_S $dst" %}
11032   opcode(0xDB);  /* DB /0 */
11033   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11034               Pop_Mem_FPR(dst));
11035   ins_pipe( fpu_mem_mem );
11036 %}
11037 
11038 // This instruction does not round to 24-bits
11039 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11040   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11041   match(Set dst (ConvI2F src));
11042   format %{ "FILD   $src\n\t"
11043             "FSTP   $dst" %}
11044   opcode(0xDB, 0x0);  /* DB /0 */
11045   ins_encode( Push_Mem_I(src),
11046               Pop_Reg_FPR(dst));
11047   ins_pipe( fpu_reg_mem );
11048 %}
11049 
11050 // This instruction does not round to 24-bits
11051 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11052   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11053   match(Set dst (ConvI2F (LoadI mem)));
11054   format %{ "FILD   $mem\n\t"
11055             "FSTP   $dst" %}
11056   opcode(0xDB);      /* DB /0 */
11057   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11058               Pop_Reg_FPR(dst));
11059   ins_pipe( fpu_reg_mem );
11060 %}
11061 
11062 // Convert an int to a float in xmm; no rounding step needed.
11063 instruct convI2F_reg(regF dst, rRegI src) %{
11064   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11065   match(Set dst (ConvI2F src));
11066   format %{ "CVTSI2SS $dst, $src" %}
11067   ins_encode %{
11068     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11069   %}
11070   ins_pipe( pipe_slow );
11071 %}
11072 
11073  instruct convXI2F_reg(regF dst, rRegI src)
11074 %{
11075   predicate( UseSSE>=2 && UseXmmI2F );
11076   match(Set dst (ConvI2F src));
11077 
11078   format %{ "MOVD  $dst,$src\n\t"
11079             "CVTDQ2PS $dst,$dst\t# i2f" %}
11080   ins_encode %{
11081     __ movdl($dst$$XMMRegister, $src$$Register);
11082     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11083   %}
11084   ins_pipe(pipe_slow); // XXX
11085 %}
11086 
11087 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11088   match(Set dst (ConvI2L src));
11089   effect(KILL cr);
11090   ins_cost(375);
11091   format %{ "MOV    $dst.lo,$src\n\t"
11092             "MOV    $dst.hi,$src\n\t"
11093             "SAR    $dst.hi,31" %}
11094   ins_encode(convert_int_long(dst,src));
11095   ins_pipe( ialu_reg_reg_long );
11096 %}
11097 
11098 // Zero-extend convert int to long
11099 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11100   match(Set dst (AndL (ConvI2L src) mask) );
11101   effect( KILL flags );
11102   ins_cost(250);
11103   format %{ "MOV    $dst.lo,$src\n\t"
11104             "XOR    $dst.hi,$dst.hi" %}
11105   opcode(0x33); // XOR
11106   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11107   ins_pipe( ialu_reg_reg_long );
11108 %}
11109 
11110 // Zero-extend long
11111 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11112   match(Set dst (AndL src mask) );
11113   effect( KILL flags );
11114   ins_cost(250);
11115   format %{ "MOV    $dst.lo,$src.lo\n\t"
11116             "XOR    $dst.hi,$dst.hi\n\t" %}
11117   opcode(0x33); // XOR
11118   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11119   ins_pipe( ialu_reg_reg_long );
11120 %}
11121 
11122 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11123   predicate (UseSSE<=1);
11124   match(Set dst (ConvL2D src));
11125   effect( KILL cr );
11126   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11127             "PUSH   $src.lo\n\t"
11128             "FILD   ST,[ESP + #0]\n\t"
11129             "ADD    ESP,8\n\t"
11130             "FSTP_D $dst\t# D-round" %}
11131   opcode(0xDF, 0x5);  /* DF /5 */
11132   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11133   ins_pipe( pipe_slow );
11134 %}
11135 
11136 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11137   predicate (UseSSE>=2);
11138   match(Set dst (ConvL2D src));
11139   effect( KILL cr );
11140   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11141             "PUSH   $src.lo\n\t"
11142             "FILD_D [ESP]\n\t"
11143             "FSTP_D [ESP]\n\t"
11144             "MOVSD  $dst,[ESP]\n\t"
11145             "ADD    ESP,8" %}
11146   opcode(0xDF, 0x5);  /* DF /5 */
11147   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11148   ins_pipe( pipe_slow );
11149 %}
11150 
11151 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11152   predicate (UseSSE>=1);
11153   match(Set dst (ConvL2F src));
11154   effect( KILL cr );
11155   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11156             "PUSH   $src.lo\n\t"
11157             "FILD_D [ESP]\n\t"
11158             "FSTP_S [ESP]\n\t"
11159             "MOVSS  $dst,[ESP]\n\t"
11160             "ADD    ESP,8" %}
11161   opcode(0xDF, 0x5);  /* DF /5 */
11162   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11163   ins_pipe( pipe_slow );
11164 %}
11165 
11166 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11167   match(Set dst (ConvL2F src));
11168   effect( KILL cr );
11169   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11170             "PUSH   $src.lo\n\t"
11171             "FILD   ST,[ESP + #0]\n\t"
11172             "ADD    ESP,8\n\t"
11173             "FSTP_S $dst\t# F-round" %}
11174   opcode(0xDF, 0x5);  /* DF /5 */
11175   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11176   ins_pipe( pipe_slow );
11177 %}
11178 
11179 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11180   match(Set dst (ConvL2I src));
11181   effect( DEF dst, USE src );
11182   format %{ "MOV    $dst,$src.lo" %}
11183   ins_encode(enc_CopyL_Lo(dst,src));
11184   ins_pipe( ialu_reg_reg );
11185 %}
11186 
11187 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11188   match(Set dst (MoveF2I src));
11189   effect( DEF dst, USE src );
11190   ins_cost(100);
11191   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11192   ins_encode %{
11193     __ movl($dst$$Register, Address(rsp, $src$$disp));
11194   %}
11195   ins_pipe( ialu_reg_mem );
11196 %}
11197 
11198 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11199   predicate(UseSSE==0);
11200   match(Set dst (MoveF2I src));
11201   effect( DEF dst, USE src );
11202 
11203   ins_cost(125);
11204   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11205   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11206   ins_pipe( fpu_mem_reg );
11207 %}
11208 
11209 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11210   predicate(UseSSE>=1);
11211   match(Set dst (MoveF2I src));
11212   effect( DEF dst, USE src );
11213 
11214   ins_cost(95);
11215   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11216   ins_encode %{
11217     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11218   %}
11219   ins_pipe( pipe_slow );
11220 %}
11221 
11222 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11223   predicate(UseSSE>=2);
11224   match(Set dst (MoveF2I src));
11225   effect( DEF dst, USE src );
11226   ins_cost(85);
11227   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11228   ins_encode %{
11229     __ movdl($dst$$Register, $src$$XMMRegister);
11230   %}
11231   ins_pipe( pipe_slow );
11232 %}
11233 
11234 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11235   match(Set dst (MoveI2F src));
11236   effect( DEF dst, USE src );
11237 
11238   ins_cost(100);
11239   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11240   ins_encode %{
11241     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11242   %}
11243   ins_pipe( ialu_mem_reg );
11244 %}
11245 
11246 
11247 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11248   predicate(UseSSE==0);
11249   match(Set dst (MoveI2F src));
11250   effect(DEF dst, USE src);
11251 
11252   ins_cost(125);
11253   format %{ "FLD_S  $src\n\t"
11254             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11255   opcode(0xD9);               /* D9 /0, FLD m32real */
11256   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11257               Pop_Reg_FPR(dst) );
11258   ins_pipe( fpu_reg_mem );
11259 %}
11260 
11261 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11262   predicate(UseSSE>=1);
11263   match(Set dst (MoveI2F src));
11264   effect( DEF dst, USE src );
11265 
11266   ins_cost(95);
11267   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11268   ins_encode %{
11269     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11270   %}
11271   ins_pipe( pipe_slow );
11272 %}
11273 
11274 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11275   predicate(UseSSE>=2);
11276   match(Set dst (MoveI2F src));
11277   effect( DEF dst, USE src );
11278 
11279   ins_cost(85);
11280   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11281   ins_encode %{
11282     __ movdl($dst$$XMMRegister, $src$$Register);
11283   %}
11284   ins_pipe( pipe_slow );
11285 %}
11286 
11287 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11288   match(Set dst (MoveD2L src));
11289   effect(DEF dst, USE src);
11290 
11291   ins_cost(250);
11292   format %{ "MOV    $dst.lo,$src\n\t"
11293             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11294   opcode(0x8B, 0x8B);
11295   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11296   ins_pipe( ialu_mem_long_reg );
11297 %}
11298 
11299 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11300   predicate(UseSSE<=1);
11301   match(Set dst (MoveD2L src));
11302   effect(DEF dst, USE src);
11303 
11304   ins_cost(125);
11305   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11306   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11307   ins_pipe( fpu_mem_reg );
11308 %}
11309 
11310 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11311   predicate(UseSSE>=2);
11312   match(Set dst (MoveD2L src));
11313   effect(DEF dst, USE src);
11314   ins_cost(95);
11315   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11316   ins_encode %{
11317     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11318   %}
11319   ins_pipe( pipe_slow );
11320 %}
11321 
11322 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11323   predicate(UseSSE>=2);
11324   match(Set dst (MoveD2L src));
11325   effect(DEF dst, USE src, TEMP tmp);
11326   ins_cost(85);
11327   format %{ "MOVD   $dst.lo,$src\n\t"
11328             "PSHUFLW $tmp,$src,0x4E\n\t"
11329             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11330   ins_encode %{
11331     __ movdl($dst$$Register, $src$$XMMRegister);
11332     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11333     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11334   %}
11335   ins_pipe( pipe_slow );
11336 %}
11337 
11338 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11339   match(Set dst (MoveL2D src));
11340   effect(DEF dst, USE src);
11341 
11342   ins_cost(200);
11343   format %{ "MOV    $dst,$src.lo\n\t"
11344             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11345   opcode(0x89, 0x89);
11346   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11347   ins_pipe( ialu_mem_long_reg );
11348 %}
11349 
11350 
11351 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11352   predicate(UseSSE<=1);
11353   match(Set dst (MoveL2D src));
11354   effect(DEF dst, USE src);
11355   ins_cost(125);
11356 
11357   format %{ "FLD_D  $src\n\t"
11358             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11359   opcode(0xDD);               /* DD /0, FLD m64real */
11360   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11361               Pop_Reg_DPR(dst) );
11362   ins_pipe( fpu_reg_mem );
11363 %}
11364 
11365 
11366 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11367   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11368   match(Set dst (MoveL2D src));
11369   effect(DEF dst, USE src);
11370 
11371   ins_cost(95);
11372   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11373   ins_encode %{
11374     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11375   %}
11376   ins_pipe( pipe_slow );
11377 %}
11378 
11379 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11380   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11381   match(Set dst (MoveL2D src));
11382   effect(DEF dst, USE src);
11383 
11384   ins_cost(95);
11385   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11386   ins_encode %{
11387     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11388   %}
11389   ins_pipe( pipe_slow );
11390 %}
11391 
11392 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11393   predicate(UseSSE>=2);
11394   match(Set dst (MoveL2D src));
11395   effect(TEMP dst, USE src, TEMP tmp);
11396   ins_cost(85);
11397   format %{ "MOVD   $dst,$src.lo\n\t"
11398             "MOVD   $tmp,$src.hi\n\t"
11399             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11400   ins_encode %{
11401     __ movdl($dst$$XMMRegister, $src$$Register);
11402     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11403     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11404   %}
11405   ins_pipe( pipe_slow );
11406 %}
11407 
11408 
11409 // =======================================================================
11410 // fast clearing of an array
11411 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11412   predicate(!UseFastStosb);
11413   match(Set dummy (ClearArray cnt base));
11414   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11415   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11416             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11417             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11418   ins_encode %{
11419     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11420   %}
11421   ins_pipe( pipe_slow );
11422 %}
11423 
11424 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11425   predicate(UseFastStosb);
11426   match(Set dummy (ClearArray cnt base));
11427   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11428   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11429             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11430             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11431   ins_encode %{
11432     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11433   %}
11434   ins_pipe( pipe_slow );
11435 %}
11436 
11437 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11438                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11439   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11440   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11441   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11442 
11443   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11444   ins_encode %{
11445     __ string_compare($str1$$Register, $str2$$Register,
11446                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11447                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11448   %}
11449   ins_pipe( pipe_slow );
11450 %}
11451 
11452 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11453                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11454   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11455   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11456   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11457 
11458   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11459   ins_encode %{
11460     __ string_compare($str1$$Register, $str2$$Register,
11461                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11462                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11463   %}
11464   ins_pipe( pipe_slow );
11465 %}
11466 
11467 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11468                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11469   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11470   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11471   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11472 
11473   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11474   ins_encode %{
11475     __ string_compare($str1$$Register, $str2$$Register,
11476                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11477                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11478   %}
11479   ins_pipe( pipe_slow );
11480 %}
11481 
11482 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11483                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11484   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11485   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11486   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11487 
11488   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11489   ins_encode %{
11490     __ string_compare($str2$$Register, $str1$$Register,
11491                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11492                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11493   %}
11494   ins_pipe( pipe_slow );
11495 %}
11496 
11497 // fast string equals
11498 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11499                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11500   match(Set result (StrEquals (Binary str1 str2) cnt));
11501   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11502 
11503   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11504   ins_encode %{
11505     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11506                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11507                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11508   %}
11509 
11510   ins_pipe( pipe_slow );
11511 %}
11512 
11513 // fast search of substring with known size.
11514 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11515                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11516   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11517   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11518   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11519 
11520   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11521   ins_encode %{
11522     int icnt2 = (int)$int_cnt2$$constant;
11523     if (icnt2 >= 16) {
11524       // IndexOf for constant substrings with size >= 16 elements
11525       // which don't need to be loaded through stack.
11526       __ string_indexofC8($str1$$Register, $str2$$Register,
11527                           $cnt1$$Register, $cnt2$$Register,
11528                           icnt2, $result$$Register,
11529                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11530     } else {
11531       // Small strings are loaded through stack if they cross page boundary.
11532       __ string_indexof($str1$$Register, $str2$$Register,
11533                         $cnt1$$Register, $cnt2$$Register,
11534                         icnt2, $result$$Register,
11535                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11536     }
11537   %}
11538   ins_pipe( pipe_slow );
11539 %}
11540 
11541 // fast search of substring with known size.
11542 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11543                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11544   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11545   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11546   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11547 
11548   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11549   ins_encode %{
11550     int icnt2 = (int)$int_cnt2$$constant;
11551     if (icnt2 >= 8) {
11552       // IndexOf for constant substrings with size >= 8 elements
11553       // which don't need to be loaded through stack.
11554       __ string_indexofC8($str1$$Register, $str2$$Register,
11555                           $cnt1$$Register, $cnt2$$Register,
11556                           icnt2, $result$$Register,
11557                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11558     } else {
11559       // Small strings are loaded through stack if they cross page boundary.
11560       __ string_indexof($str1$$Register, $str2$$Register,
11561                         $cnt1$$Register, $cnt2$$Register,
11562                         icnt2, $result$$Register,
11563                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11564     }
11565   %}
11566   ins_pipe( pipe_slow );
11567 %}
11568 
11569 // fast search of substring with known size.
11570 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11571                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11572   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11573   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11574   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11575 
11576   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11577   ins_encode %{
11578     int icnt2 = (int)$int_cnt2$$constant;
11579     if (icnt2 >= 8) {
11580       // IndexOf for constant substrings with size >= 8 elements
11581       // which don't need to be loaded through stack.
11582       __ string_indexofC8($str1$$Register, $str2$$Register,
11583                           $cnt1$$Register, $cnt2$$Register,
11584                           icnt2, $result$$Register,
11585                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11586     } else {
11587       // Small strings are loaded through stack if they cross page boundary.
11588       __ string_indexof($str1$$Register, $str2$$Register,
11589                         $cnt1$$Register, $cnt2$$Register,
11590                         icnt2, $result$$Register,
11591                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11592     }
11593   %}
11594   ins_pipe( pipe_slow );
11595 %}
11596 
11597 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11598                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11599   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11600   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11601   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11602 
11603   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11604   ins_encode %{
11605     __ string_indexof($str1$$Register, $str2$$Register,
11606                       $cnt1$$Register, $cnt2$$Register,
11607                       (-1), $result$$Register,
11608                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11609   %}
11610   ins_pipe( pipe_slow );
11611 %}
11612 
11613 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11614                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11615   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11616   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11617   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11618 
11619   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11620   ins_encode %{
11621     __ string_indexof($str1$$Register, $str2$$Register,
11622                       $cnt1$$Register, $cnt2$$Register,
11623                       (-1), $result$$Register,
11624                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11625   %}
11626   ins_pipe( pipe_slow );
11627 %}
11628 
11629 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11630                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11631   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11632   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11633   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11634 
11635   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11636   ins_encode %{
11637     __ string_indexof($str1$$Register, $str2$$Register,
11638                       $cnt1$$Register, $cnt2$$Register,
11639                       (-1), $result$$Register,
11640                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11641   %}
11642   ins_pipe( pipe_slow );
11643 %}
11644 
11645 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11646                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11647   predicate(UseSSE42Intrinsics);
11648   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11649   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11650   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11651   ins_encode %{
11652     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11653                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11654   %}
11655   ins_pipe( pipe_slow );
11656 %}
11657 
11658 // fast array equals
11659 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11660                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11661 %{
11662   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11663   match(Set result (AryEq ary1 ary2));
11664   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11665   //ins_cost(300);
11666 
11667   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11668   ins_encode %{
11669     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11670                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11671                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11672   %}
11673   ins_pipe( pipe_slow );
11674 %}
11675 
11676 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11677                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11678 %{
11679   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11680   match(Set result (AryEq ary1 ary2));
11681   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11682   //ins_cost(300);
11683 
11684   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11685   ins_encode %{
11686     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11687                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11688                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11689   %}
11690   ins_pipe( pipe_slow );
11691 %}
11692 
11693 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11694                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11695 %{
11696   match(Set result (HasNegatives ary1 len));
11697   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11698 
11699   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11700   ins_encode %{
11701     __ has_negatives($ary1$$Register, $len$$Register,
11702                      $result$$Register, $tmp3$$Register,
11703                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11704   %}
11705   ins_pipe( pipe_slow );
11706 %}
11707 
11708 // fast char[] to byte[] compression
11709 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11710                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11711   match(Set result (StrCompressedCopy src (Binary dst len)));
11712   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11713 
11714   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11715   ins_encode %{
11716     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11717                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11718                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11719   %}
11720   ins_pipe( pipe_slow );
11721 %}
11722 
11723 // fast byte[] to char[] inflation
11724 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11725                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11726   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11727   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11728 
11729   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11730   ins_encode %{
11731     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11732                           $tmp1$$XMMRegister, $tmp2$$Register);
11733   %}
11734   ins_pipe( pipe_slow );
11735 %}
11736 
11737 // encode char[] to byte[] in ISO_8859_1
11738 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11739                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11740                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11741   match(Set result (EncodeISOArray src (Binary dst len)));
11742   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11743 
11744   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11745   ins_encode %{
11746     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11747                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11748                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11749   %}
11750   ins_pipe( pipe_slow );
11751 %}
11752 
11753 
11754 //----------Control Flow Instructions------------------------------------------
11755 // Signed compare Instructions
11756 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11757   match(Set cr (CmpI op1 op2));
11758   effect( DEF cr, USE op1, USE op2 );
11759   format %{ "CMP    $op1,$op2" %}
11760   opcode(0x3B);  /* Opcode 3B /r */
11761   ins_encode( OpcP, RegReg( op1, op2) );
11762   ins_pipe( ialu_cr_reg_reg );
11763 %}
11764 
11765 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11766   match(Set cr (CmpI op1 op2));
11767   effect( DEF cr, USE op1 );
11768   format %{ "CMP    $op1,$op2" %}
11769   opcode(0x81,0x07);  /* Opcode 81 /7 */
11770   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11771   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11772   ins_pipe( ialu_cr_reg_imm );
11773 %}
11774 
11775 // Cisc-spilled version of cmpI_eReg
11776 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11777   match(Set cr (CmpI op1 (LoadI op2)));
11778 
11779   format %{ "CMP    $op1,$op2" %}
11780   ins_cost(500);
11781   opcode(0x3B);  /* Opcode 3B /r */
11782   ins_encode( OpcP, RegMem( op1, op2) );
11783   ins_pipe( ialu_cr_reg_mem );
11784 %}
11785 
11786 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11787   match(Set cr (CmpI src zero));
11788   effect( DEF cr, USE src );
11789 
11790   format %{ "TEST   $src,$src" %}
11791   opcode(0x85);
11792   ins_encode( OpcP, RegReg( src, src ) );
11793   ins_pipe( ialu_cr_reg_imm );
11794 %}
11795 
11796 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11797   match(Set cr (CmpI (AndI src con) zero));
11798 
11799   format %{ "TEST   $src,$con" %}
11800   opcode(0xF7,0x00);
11801   ins_encode( OpcP, RegOpc(src), Con32(con) );
11802   ins_pipe( ialu_cr_reg_imm );
11803 %}
11804 
11805 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11806   match(Set cr (CmpI (AndI src mem) zero));
11807 
11808   format %{ "TEST   $src,$mem" %}
11809   opcode(0x85);
11810   ins_encode( OpcP, RegMem( src, mem ) );
11811   ins_pipe( ialu_cr_reg_mem );
11812 %}
11813 
11814 // Unsigned compare Instructions; really, same as signed except they
11815 // produce an eFlagsRegU instead of eFlagsReg.
11816 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11817   match(Set cr (CmpU op1 op2));
11818 
11819   format %{ "CMPu   $op1,$op2" %}
11820   opcode(0x3B);  /* Opcode 3B /r */
11821   ins_encode( OpcP, RegReg( op1, op2) );
11822   ins_pipe( ialu_cr_reg_reg );
11823 %}
11824 
11825 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11826   match(Set cr (CmpU op1 op2));
11827 
11828   format %{ "CMPu   $op1,$op2" %}
11829   opcode(0x81,0x07);  /* Opcode 81 /7 */
11830   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11831   ins_pipe( ialu_cr_reg_imm );
11832 %}
11833 
11834 // // Cisc-spilled version of cmpU_eReg
11835 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11836   match(Set cr (CmpU op1 (LoadI op2)));
11837 
11838   format %{ "CMPu   $op1,$op2" %}
11839   ins_cost(500);
11840   opcode(0x3B);  /* Opcode 3B /r */
11841   ins_encode( OpcP, RegMem( op1, op2) );
11842   ins_pipe( ialu_cr_reg_mem );
11843 %}
11844 
11845 // // Cisc-spilled version of cmpU_eReg
11846 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11847 //  match(Set cr (CmpU (LoadI op1) op2));
11848 //
11849 //  format %{ "CMPu   $op1,$op2" %}
11850 //  ins_cost(500);
11851 //  opcode(0x39);  /* Opcode 39 /r */
11852 //  ins_encode( OpcP, RegMem( op1, op2) );
11853 //%}
11854 
11855 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11856   match(Set cr (CmpU src zero));
11857 
11858   format %{ "TESTu  $src,$src" %}
11859   opcode(0x85);
11860   ins_encode( OpcP, RegReg( src, src ) );
11861   ins_pipe( ialu_cr_reg_imm );
11862 %}
11863 
11864 // Unsigned pointer compare Instructions
11865 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11866   match(Set cr (CmpP op1 op2));
11867 
11868   format %{ "CMPu   $op1,$op2" %}
11869   opcode(0x3B);  /* Opcode 3B /r */
11870   ins_encode( OpcP, RegReg( op1, op2) );
11871   ins_pipe( ialu_cr_reg_reg );
11872 %}
11873 
11874 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11875   match(Set cr (CmpP op1 op2));
11876 
11877   format %{ "CMPu   $op1,$op2" %}
11878   opcode(0x81,0x07);  /* Opcode 81 /7 */
11879   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11880   ins_pipe( ialu_cr_reg_imm );
11881 %}
11882 
11883 // // Cisc-spilled version of cmpP_eReg
11884 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11885   match(Set cr (CmpP op1 (LoadP op2)));
11886 
11887   format %{ "CMPu   $op1,$op2" %}
11888   ins_cost(500);
11889   opcode(0x3B);  /* Opcode 3B /r */
11890   ins_encode( OpcP, RegMem( op1, op2) );
11891   ins_pipe( ialu_cr_reg_mem );
11892 %}
11893 
11894 // // Cisc-spilled version of cmpP_eReg
11895 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11896 //  match(Set cr (CmpP (LoadP op1) op2));
11897 //
11898 //  format %{ "CMPu   $op1,$op2" %}
11899 //  ins_cost(500);
11900 //  opcode(0x39);  /* Opcode 39 /r */
11901 //  ins_encode( OpcP, RegMem( op1, op2) );
11902 //%}
11903 
11904 // Compare raw pointer (used in out-of-heap check).
11905 // Only works because non-oop pointers must be raw pointers
11906 // and raw pointers have no anti-dependencies.
11907 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11908   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11909   match(Set cr (CmpP op1 (LoadP op2)));
11910 
11911   format %{ "CMPu   $op1,$op2" %}
11912   opcode(0x3B);  /* Opcode 3B /r */
11913   ins_encode( OpcP, RegMem( op1, op2) );
11914   ins_pipe( ialu_cr_reg_mem );
11915 %}
11916 
11917 //
11918 // This will generate a signed flags result. This should be ok
11919 // since any compare to a zero should be eq/neq.
11920 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11921   match(Set cr (CmpP src zero));
11922 
11923   format %{ "TEST   $src,$src" %}
11924   opcode(0x85);
11925   ins_encode( OpcP, RegReg( src, src ) );
11926   ins_pipe( ialu_cr_reg_imm );
11927 %}
11928 
11929 // Cisc-spilled version of testP_reg
11930 // This will generate a signed flags result. This should be ok
11931 // since any compare to a zero should be eq/neq.
11932 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11933   match(Set cr (CmpP (LoadP op) zero));
11934 
11935   format %{ "TEST   $op,0xFFFFFFFF" %}
11936   ins_cost(500);
11937   opcode(0xF7);               /* Opcode F7 /0 */
11938   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11939   ins_pipe( ialu_cr_reg_imm );
11940 %}
11941 
11942 // Yanked all unsigned pointer compare operations.
11943 // Pointer compares are done with CmpP which is already unsigned.
11944 
11945 //----------Max and Min--------------------------------------------------------
11946 // Min Instructions
11947 ////
11948 //   *** Min and Max using the conditional move are slower than the
11949 //   *** branch version on a Pentium III.
11950 // // Conditional move for min
11951 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11952 //  effect( USE_DEF op2, USE op1, USE cr );
11953 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11954 //  opcode(0x4C,0x0F);
11955 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11956 //  ins_pipe( pipe_cmov_reg );
11957 //%}
11958 //
11959 //// Min Register with Register (P6 version)
11960 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11961 //  predicate(VM_Version::supports_cmov() );
11962 //  match(Set op2 (MinI op1 op2));
11963 //  ins_cost(200);
11964 //  expand %{
11965 //    eFlagsReg cr;
11966 //    compI_eReg(cr,op1,op2);
11967 //    cmovI_reg_lt(op2,op1,cr);
11968 //  %}
11969 //%}
11970 
11971 // Min Register with Register (generic version)
11972 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11973   match(Set dst (MinI dst src));
11974   effect(KILL flags);
11975   ins_cost(300);
11976 
11977   format %{ "MIN    $dst,$src" %}
11978   opcode(0xCC);
11979   ins_encode( min_enc(dst,src) );
11980   ins_pipe( pipe_slow );
11981 %}
11982 
11983 // Max Register with Register
11984 //   *** Min and Max using the conditional move are slower than the
11985 //   *** branch version on a Pentium III.
11986 // // Conditional move for max
11987 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11988 //  effect( USE_DEF op2, USE op1, USE cr );
11989 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11990 //  opcode(0x4F,0x0F);
11991 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11992 //  ins_pipe( pipe_cmov_reg );
11993 //%}
11994 //
11995 // // Max Register with Register (P6 version)
11996 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11997 //  predicate(VM_Version::supports_cmov() );
11998 //  match(Set op2 (MaxI op1 op2));
11999 //  ins_cost(200);
12000 //  expand %{
12001 //    eFlagsReg cr;
12002 //    compI_eReg(cr,op1,op2);
12003 //    cmovI_reg_gt(op2,op1,cr);
12004 //  %}
12005 //%}
12006 
12007 // Max Register with Register (generic version)
12008 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12009   match(Set dst (MaxI dst src));
12010   effect(KILL flags);
12011   ins_cost(300);
12012 
12013   format %{ "MAX    $dst,$src" %}
12014   opcode(0xCC);
12015   ins_encode( max_enc(dst,src) );
12016   ins_pipe( pipe_slow );
12017 %}
12018 
12019 // ============================================================================
12020 // Counted Loop limit node which represents exact final iterator value.
12021 // Note: the resulting value should fit into integer range since
12022 // counted loops have limit check on overflow.
12023 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12024   match(Set limit (LoopLimit (Binary init limit) stride));
12025   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12026   ins_cost(300);
12027 
12028   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12029   ins_encode %{
12030     int strd = (int)$stride$$constant;
12031     assert(strd != 1 && strd != -1, "sanity");
12032     int m1 = (strd > 0) ? 1 : -1;
12033     // Convert limit to long (EAX:EDX)
12034     __ cdql();
12035     // Convert init to long (init:tmp)
12036     __ movl($tmp$$Register, $init$$Register);
12037     __ sarl($tmp$$Register, 31);
12038     // $limit - $init
12039     __ subl($limit$$Register, $init$$Register);
12040     __ sbbl($limit_hi$$Register, $tmp$$Register);
12041     // + ($stride - 1)
12042     if (strd > 0) {
12043       __ addl($limit$$Register, (strd - 1));
12044       __ adcl($limit_hi$$Register, 0);
12045       __ movl($tmp$$Register, strd);
12046     } else {
12047       __ addl($limit$$Register, (strd + 1));
12048       __ adcl($limit_hi$$Register, -1);
12049       __ lneg($limit_hi$$Register, $limit$$Register);
12050       __ movl($tmp$$Register, -strd);
12051     }
12052     // signed devision: (EAX:EDX) / pos_stride
12053     __ idivl($tmp$$Register);
12054     if (strd < 0) {
12055       // restore sign
12056       __ negl($tmp$$Register);
12057     }
12058     // (EAX) * stride
12059     __ mull($tmp$$Register);
12060     // + init (ignore upper bits)
12061     __ addl($limit$$Register, $init$$Register);
12062   %}
12063   ins_pipe( pipe_slow );
12064 %}
12065 
12066 // ============================================================================
12067 // Branch Instructions
12068 // Jump Table
12069 instruct jumpXtnd(rRegI switch_val) %{
12070   match(Jump switch_val);
12071   ins_cost(350);
12072   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12073   ins_encode %{
12074     // Jump to Address(table_base + switch_reg)
12075     Address index(noreg, $switch_val$$Register, Address::times_1);
12076     __ jump(ArrayAddress($constantaddress, index));
12077   %}
12078   ins_pipe(pipe_jmp);
12079 %}
12080 
12081 // Jump Direct - Label defines a relative address from JMP+1
12082 instruct jmpDir(label labl) %{
12083   match(Goto);
12084   effect(USE labl);
12085 
12086   ins_cost(300);
12087   format %{ "JMP    $labl" %}
12088   size(5);
12089   ins_encode %{
12090     Label* L = $labl$$label;
12091     __ jmp(*L, false); // Always long jump
12092   %}
12093   ins_pipe( pipe_jmp );
12094 %}
12095 
12096 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12097 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12098   match(If cop cr);
12099   effect(USE labl);
12100 
12101   ins_cost(300);
12102   format %{ "J$cop    $labl" %}
12103   size(6);
12104   ins_encode %{
12105     Label* L = $labl$$label;
12106     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12107   %}
12108   ins_pipe( pipe_jcc );
12109 %}
12110 
12111 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12112 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12113   match(CountedLoopEnd cop cr);
12114   effect(USE labl);
12115 
12116   ins_cost(300);
12117   format %{ "J$cop    $labl\t# Loop end" %}
12118   size(6);
12119   ins_encode %{
12120     Label* L = $labl$$label;
12121     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12122   %}
12123   ins_pipe( pipe_jcc );
12124 %}
12125 
12126 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12127 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12128   match(CountedLoopEnd cop cmp);
12129   effect(USE labl);
12130 
12131   ins_cost(300);
12132   format %{ "J$cop,u  $labl\t# Loop end" %}
12133   size(6);
12134   ins_encode %{
12135     Label* L = $labl$$label;
12136     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12137   %}
12138   ins_pipe( pipe_jcc );
12139 %}
12140 
12141 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12142   match(CountedLoopEnd cop cmp);
12143   effect(USE labl);
12144 
12145   ins_cost(200);
12146   format %{ "J$cop,u  $labl\t# Loop end" %}
12147   size(6);
12148   ins_encode %{
12149     Label* L = $labl$$label;
12150     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12151   %}
12152   ins_pipe( pipe_jcc );
12153 %}
12154 
12155 // Jump Direct Conditional - using unsigned comparison
12156 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12157   match(If cop cmp);
12158   effect(USE labl);
12159 
12160   ins_cost(300);
12161   format %{ "J$cop,u  $labl" %}
12162   size(6);
12163   ins_encode %{
12164     Label* L = $labl$$label;
12165     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12166   %}
12167   ins_pipe(pipe_jcc);
12168 %}
12169 
12170 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12171   match(If cop cmp);
12172   effect(USE labl);
12173 
12174   ins_cost(200);
12175   format %{ "J$cop,u  $labl" %}
12176   size(6);
12177   ins_encode %{
12178     Label* L = $labl$$label;
12179     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12180   %}
12181   ins_pipe(pipe_jcc);
12182 %}
12183 
12184 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12185   match(If cop cmp);
12186   effect(USE labl);
12187 
12188   ins_cost(200);
12189   format %{ $$template
12190     if ($cop$$cmpcode == Assembler::notEqual) {
12191       $$emit$$"JP,u   $labl\n\t"
12192       $$emit$$"J$cop,u   $labl"
12193     } else {
12194       $$emit$$"JP,u   done\n\t"
12195       $$emit$$"J$cop,u   $labl\n\t"
12196       $$emit$$"done:"
12197     }
12198   %}
12199   ins_encode %{
12200     Label* l = $labl$$label;
12201     if ($cop$$cmpcode == Assembler::notEqual) {
12202       __ jcc(Assembler::parity, *l, false);
12203       __ jcc(Assembler::notEqual, *l, false);
12204     } else if ($cop$$cmpcode == Assembler::equal) {
12205       Label done;
12206       __ jccb(Assembler::parity, done);
12207       __ jcc(Assembler::equal, *l, false);
12208       __ bind(done);
12209     } else {
12210        ShouldNotReachHere();
12211     }
12212   %}
12213   ins_pipe(pipe_jcc);
12214 %}
12215 
12216 // ============================================================================
12217 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12218 // array for an instance of the superklass.  Set a hidden internal cache on a
12219 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12220 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12221 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12222   match(Set result (PartialSubtypeCheck sub super));
12223   effect( KILL rcx, KILL cr );
12224 
12225   ins_cost(1100);  // slightly larger than the next version
12226   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12227             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12228             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12229             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12230             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12231             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12232             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12233      "miss:\t" %}
12234 
12235   opcode(0x1); // Force a XOR of EDI
12236   ins_encode( enc_PartialSubtypeCheck() );
12237   ins_pipe( pipe_slow );
12238 %}
12239 
12240 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12241   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12242   effect( KILL rcx, KILL result );
12243 
12244   ins_cost(1000);
12245   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12246             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12247             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12248             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12249             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12250             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12251      "miss:\t" %}
12252 
12253   opcode(0x0);  // No need to XOR EDI
12254   ins_encode( enc_PartialSubtypeCheck() );
12255   ins_pipe( pipe_slow );
12256 %}
12257 
12258 // ============================================================================
12259 // Branch Instructions -- short offset versions
12260 //
12261 // These instructions are used to replace jumps of a long offset (the default
12262 // match) with jumps of a shorter offset.  These instructions are all tagged
12263 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12264 // match rules in general matching.  Instead, the ADLC generates a conversion
12265 // method in the MachNode which can be used to do in-place replacement of the
12266 // long variant with the shorter variant.  The compiler will determine if a
12267 // branch can be taken by the is_short_branch_offset() predicate in the machine
12268 // specific code section of the file.
12269 
12270 // Jump Direct - Label defines a relative address from JMP+1
12271 instruct jmpDir_short(label labl) %{
12272   match(Goto);
12273   effect(USE labl);
12274 
12275   ins_cost(300);
12276   format %{ "JMP,s  $labl" %}
12277   size(2);
12278   ins_encode %{
12279     Label* L = $labl$$label;
12280     __ jmpb(*L);
12281   %}
12282   ins_pipe( pipe_jmp );
12283   ins_short_branch(1);
12284 %}
12285 
12286 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12287 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12288   match(If cop cr);
12289   effect(USE labl);
12290 
12291   ins_cost(300);
12292   format %{ "J$cop,s  $labl" %}
12293   size(2);
12294   ins_encode %{
12295     Label* L = $labl$$label;
12296     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12297   %}
12298   ins_pipe( pipe_jcc );
12299   ins_short_branch(1);
12300 %}
12301 
12302 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12303 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12304   match(CountedLoopEnd cop cr);
12305   effect(USE labl);
12306 
12307   ins_cost(300);
12308   format %{ "J$cop,s  $labl\t# Loop end" %}
12309   size(2);
12310   ins_encode %{
12311     Label* L = $labl$$label;
12312     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12313   %}
12314   ins_pipe( pipe_jcc );
12315   ins_short_branch(1);
12316 %}
12317 
12318 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12319 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12320   match(CountedLoopEnd cop cmp);
12321   effect(USE labl);
12322 
12323   ins_cost(300);
12324   format %{ "J$cop,us $labl\t# Loop end" %}
12325   size(2);
12326   ins_encode %{
12327     Label* L = $labl$$label;
12328     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12329   %}
12330   ins_pipe( pipe_jcc );
12331   ins_short_branch(1);
12332 %}
12333 
12334 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12335   match(CountedLoopEnd cop cmp);
12336   effect(USE labl);
12337 
12338   ins_cost(300);
12339   format %{ "J$cop,us $labl\t# Loop end" %}
12340   size(2);
12341   ins_encode %{
12342     Label* L = $labl$$label;
12343     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12344   %}
12345   ins_pipe( pipe_jcc );
12346   ins_short_branch(1);
12347 %}
12348 
12349 // Jump Direct Conditional - using unsigned comparison
12350 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12351   match(If cop cmp);
12352   effect(USE labl);
12353 
12354   ins_cost(300);
12355   format %{ "J$cop,us $labl" %}
12356   size(2);
12357   ins_encode %{
12358     Label* L = $labl$$label;
12359     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12360   %}
12361   ins_pipe( pipe_jcc );
12362   ins_short_branch(1);
12363 %}
12364 
12365 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12366   match(If cop cmp);
12367   effect(USE labl);
12368 
12369   ins_cost(300);
12370   format %{ "J$cop,us $labl" %}
12371   size(2);
12372   ins_encode %{
12373     Label* L = $labl$$label;
12374     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12375   %}
12376   ins_pipe( pipe_jcc );
12377   ins_short_branch(1);
12378 %}
12379 
12380 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12381   match(If cop cmp);
12382   effect(USE labl);
12383 
12384   ins_cost(300);
12385   format %{ $$template
12386     if ($cop$$cmpcode == Assembler::notEqual) {
12387       $$emit$$"JP,u,s   $labl\n\t"
12388       $$emit$$"J$cop,u,s   $labl"
12389     } else {
12390       $$emit$$"JP,u,s   done\n\t"
12391       $$emit$$"J$cop,u,s  $labl\n\t"
12392       $$emit$$"done:"
12393     }
12394   %}
12395   size(4);
12396   ins_encode %{
12397     Label* l = $labl$$label;
12398     if ($cop$$cmpcode == Assembler::notEqual) {
12399       __ jccb(Assembler::parity, *l);
12400       __ jccb(Assembler::notEqual, *l);
12401     } else if ($cop$$cmpcode == Assembler::equal) {
12402       Label done;
12403       __ jccb(Assembler::parity, done);
12404       __ jccb(Assembler::equal, *l);
12405       __ bind(done);
12406     } else {
12407        ShouldNotReachHere();
12408     }
12409   %}
12410   ins_pipe(pipe_jcc);
12411   ins_short_branch(1);
12412 %}
12413 
12414 // ============================================================================
12415 // Long Compare
12416 //
12417 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12418 // is tricky.  The flavor of compare used depends on whether we are testing
12419 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12420 // The GE test is the negated LT test.  The LE test can be had by commuting
12421 // the operands (yielding a GE test) and then negating; negate again for the
12422 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12423 // NE test is negated from that.
12424 
12425 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12426 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12427 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12428 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12429 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12430 // foo match ends up with the wrong leaf.  One fix is to not match both
12431 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12432 // both forms beat the trinary form of long-compare and both are very useful
12433 // on Intel which has so few registers.
12434 
12435 // Manifest a CmpL result in an integer register.  Very painful.
12436 // This is the test to avoid.
12437 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12438   match(Set dst (CmpL3 src1 src2));
12439   effect( KILL flags );
12440   ins_cost(1000);
12441   format %{ "XOR    $dst,$dst\n\t"
12442             "CMP    $src1.hi,$src2.hi\n\t"
12443             "JLT,s  m_one\n\t"
12444             "JGT,s  p_one\n\t"
12445             "CMP    $src1.lo,$src2.lo\n\t"
12446             "JB,s   m_one\n\t"
12447             "JEQ,s  done\n"
12448     "p_one:\tINC    $dst\n\t"
12449             "JMP,s  done\n"
12450     "m_one:\tDEC    $dst\n"
12451      "done:" %}
12452   ins_encode %{
12453     Label p_one, m_one, done;
12454     __ xorptr($dst$$Register, $dst$$Register);
12455     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12456     __ jccb(Assembler::less,    m_one);
12457     __ jccb(Assembler::greater, p_one);
12458     __ cmpl($src1$$Register, $src2$$Register);
12459     __ jccb(Assembler::below,   m_one);
12460     __ jccb(Assembler::equal,   done);
12461     __ bind(p_one);
12462     __ incrementl($dst$$Register);
12463     __ jmpb(done);
12464     __ bind(m_one);
12465     __ decrementl($dst$$Register);
12466     __ bind(done);
12467   %}
12468   ins_pipe( pipe_slow );
12469 %}
12470 
12471 //======
12472 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12473 // compares.  Can be used for LE or GT compares by reversing arguments.
12474 // NOT GOOD FOR EQ/NE tests.
12475 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12476   match( Set flags (CmpL src zero ));
12477   ins_cost(100);
12478   format %{ "TEST   $src.hi,$src.hi" %}
12479   opcode(0x85);
12480   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12481   ins_pipe( ialu_cr_reg_reg );
12482 %}
12483 
12484 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12485 // compares.  Can be used for LE or GT compares by reversing arguments.
12486 // NOT GOOD FOR EQ/NE tests.
12487 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12488   match( Set flags (CmpL src1 src2 ));
12489   effect( TEMP tmp );
12490   ins_cost(300);
12491   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12492             "MOV    $tmp,$src1.hi\n\t"
12493             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12494   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12495   ins_pipe( ialu_cr_reg_reg );
12496 %}
12497 
12498 // Long compares reg < zero/req OR reg >= zero/req.
12499 // Just a wrapper for a normal branch, plus the predicate test.
12500 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12501   match(If cmp flags);
12502   effect(USE labl);
12503   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12504   expand %{
12505     jmpCon(cmp,flags,labl);    // JLT or JGE...
12506   %}
12507 %}
12508 
12509 // Compare 2 longs and CMOVE longs.
12510 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12511   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12512   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12513   ins_cost(400);
12514   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12515             "CMOV$cmp $dst.hi,$src.hi" %}
12516   opcode(0x0F,0x40);
12517   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12518   ins_pipe( pipe_cmov_reg_long );
12519 %}
12520 
12521 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12522   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12523   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12524   ins_cost(500);
12525   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12526             "CMOV$cmp $dst.hi,$src.hi" %}
12527   opcode(0x0F,0x40);
12528   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12529   ins_pipe( pipe_cmov_reg_long );
12530 %}
12531 
12532 // Compare 2 longs and CMOVE ints.
12533 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12534   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12535   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12536   ins_cost(200);
12537   format %{ "CMOV$cmp $dst,$src" %}
12538   opcode(0x0F,0x40);
12539   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12540   ins_pipe( pipe_cmov_reg );
12541 %}
12542 
12543 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12544   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12545   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12546   ins_cost(250);
12547   format %{ "CMOV$cmp $dst,$src" %}
12548   opcode(0x0F,0x40);
12549   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12550   ins_pipe( pipe_cmov_mem );
12551 %}
12552 
12553 // Compare 2 longs and CMOVE ints.
12554 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12555   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12556   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12557   ins_cost(200);
12558   format %{ "CMOV$cmp $dst,$src" %}
12559   opcode(0x0F,0x40);
12560   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12561   ins_pipe( pipe_cmov_reg );
12562 %}
12563 
12564 // Compare 2 longs and CMOVE doubles
12565 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12566   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12567   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12568   ins_cost(200);
12569   expand %{
12570     fcmovDPR_regS(cmp,flags,dst,src);
12571   %}
12572 %}
12573 
12574 // Compare 2 longs and CMOVE doubles
12575 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12576   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12577   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12578   ins_cost(200);
12579   expand %{
12580     fcmovD_regS(cmp,flags,dst,src);
12581   %}
12582 %}
12583 
12584 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12585   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12586   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12587   ins_cost(200);
12588   expand %{
12589     fcmovFPR_regS(cmp,flags,dst,src);
12590   %}
12591 %}
12592 
12593 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12594   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12595   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12596   ins_cost(200);
12597   expand %{
12598     fcmovF_regS(cmp,flags,dst,src);
12599   %}
12600 %}
12601 
12602 //======
12603 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12604 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12605   match( Set flags (CmpL src zero ));
12606   effect(TEMP tmp);
12607   ins_cost(200);
12608   format %{ "MOV    $tmp,$src.lo\n\t"
12609             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12610   ins_encode( long_cmp_flags0( src, tmp ) );
12611   ins_pipe( ialu_reg_reg_long );
12612 %}
12613 
12614 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12615 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12616   match( Set flags (CmpL src1 src2 ));
12617   ins_cost(200+300);
12618   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12619             "JNE,s  skip\n\t"
12620             "CMP    $src1.hi,$src2.hi\n\t"
12621      "skip:\t" %}
12622   ins_encode( long_cmp_flags1( src1, src2 ) );
12623   ins_pipe( ialu_cr_reg_reg );
12624 %}
12625 
12626 // Long compare reg == zero/reg OR reg != zero/reg
12627 // Just a wrapper for a normal branch, plus the predicate test.
12628 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12629   match(If cmp flags);
12630   effect(USE labl);
12631   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12632   expand %{
12633     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12634   %}
12635 %}
12636 
12637 // Compare 2 longs and CMOVE longs.
12638 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12639   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12640   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12641   ins_cost(400);
12642   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12643             "CMOV$cmp $dst.hi,$src.hi" %}
12644   opcode(0x0F,0x40);
12645   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12646   ins_pipe( pipe_cmov_reg_long );
12647 %}
12648 
12649 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12650   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12651   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12652   ins_cost(500);
12653   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12654             "CMOV$cmp $dst.hi,$src.hi" %}
12655   opcode(0x0F,0x40);
12656   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12657   ins_pipe( pipe_cmov_reg_long );
12658 %}
12659 
12660 // Compare 2 longs and CMOVE ints.
12661 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12662   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12663   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12664   ins_cost(200);
12665   format %{ "CMOV$cmp $dst,$src" %}
12666   opcode(0x0F,0x40);
12667   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12668   ins_pipe( pipe_cmov_reg );
12669 %}
12670 
12671 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12672   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12673   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12674   ins_cost(250);
12675   format %{ "CMOV$cmp $dst,$src" %}
12676   opcode(0x0F,0x40);
12677   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12678   ins_pipe( pipe_cmov_mem );
12679 %}
12680 
12681 // Compare 2 longs and CMOVE ints.
12682 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12683   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12684   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12685   ins_cost(200);
12686   format %{ "CMOV$cmp $dst,$src" %}
12687   opcode(0x0F,0x40);
12688   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12689   ins_pipe( pipe_cmov_reg );
12690 %}
12691 
12692 // Compare 2 longs and CMOVE doubles
12693 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12694   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12695   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12696   ins_cost(200);
12697   expand %{
12698     fcmovDPR_regS(cmp,flags,dst,src);
12699   %}
12700 %}
12701 
12702 // Compare 2 longs and CMOVE doubles
12703 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12704   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12705   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12706   ins_cost(200);
12707   expand %{
12708     fcmovD_regS(cmp,flags,dst,src);
12709   %}
12710 %}
12711 
12712 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12713   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12714   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12715   ins_cost(200);
12716   expand %{
12717     fcmovFPR_regS(cmp,flags,dst,src);
12718   %}
12719 %}
12720 
12721 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12722   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12723   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12724   ins_cost(200);
12725   expand %{
12726     fcmovF_regS(cmp,flags,dst,src);
12727   %}
12728 %}
12729 
12730 //======
12731 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12732 // Same as cmpL_reg_flags_LEGT except must negate src
12733 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12734   match( Set flags (CmpL src zero ));
12735   effect( TEMP tmp );
12736   ins_cost(300);
12737   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12738             "CMP    $tmp,$src.lo\n\t"
12739             "SBB    $tmp,$src.hi\n\t" %}
12740   ins_encode( long_cmp_flags3(src, tmp) );
12741   ins_pipe( ialu_reg_reg_long );
12742 %}
12743 
12744 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12745 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12746 // requires a commuted test to get the same result.
12747 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12748   match( Set flags (CmpL src1 src2 ));
12749   effect( TEMP tmp );
12750   ins_cost(300);
12751   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12752             "MOV    $tmp,$src2.hi\n\t"
12753             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12754   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12755   ins_pipe( ialu_cr_reg_reg );
12756 %}
12757 
12758 // Long compares reg < zero/req OR reg >= zero/req.
12759 // Just a wrapper for a normal branch, plus the predicate test
12760 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12761   match(If cmp flags);
12762   effect(USE labl);
12763   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12764   ins_cost(300);
12765   expand %{
12766     jmpCon(cmp,flags,labl);    // JGT or JLE...
12767   %}
12768 %}
12769 
12770 // Compare 2 longs and CMOVE longs.
12771 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12772   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12773   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12774   ins_cost(400);
12775   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12776             "CMOV$cmp $dst.hi,$src.hi" %}
12777   opcode(0x0F,0x40);
12778   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12779   ins_pipe( pipe_cmov_reg_long );
12780 %}
12781 
12782 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12783   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12784   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12785   ins_cost(500);
12786   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12787             "CMOV$cmp $dst.hi,$src.hi+4" %}
12788   opcode(0x0F,0x40);
12789   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12790   ins_pipe( pipe_cmov_reg_long );
12791 %}
12792 
12793 // Compare 2 longs and CMOVE ints.
12794 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12795   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12796   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12797   ins_cost(200);
12798   format %{ "CMOV$cmp $dst,$src" %}
12799   opcode(0x0F,0x40);
12800   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12801   ins_pipe( pipe_cmov_reg );
12802 %}
12803 
12804 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12805   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12806   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12807   ins_cost(250);
12808   format %{ "CMOV$cmp $dst,$src" %}
12809   opcode(0x0F,0x40);
12810   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12811   ins_pipe( pipe_cmov_mem );
12812 %}
12813 
12814 // Compare 2 longs and CMOVE ptrs.
12815 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12816   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12817   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12818   ins_cost(200);
12819   format %{ "CMOV$cmp $dst,$src" %}
12820   opcode(0x0F,0x40);
12821   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12822   ins_pipe( pipe_cmov_reg );
12823 %}
12824 
12825 // Compare 2 longs and CMOVE doubles
12826 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12827   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12828   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12829   ins_cost(200);
12830   expand %{
12831     fcmovDPR_regS(cmp,flags,dst,src);
12832   %}
12833 %}
12834 
12835 // Compare 2 longs and CMOVE doubles
12836 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12837   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12838   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12839   ins_cost(200);
12840   expand %{
12841     fcmovD_regS(cmp,flags,dst,src);
12842   %}
12843 %}
12844 
12845 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12846   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12847   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12848   ins_cost(200);
12849   expand %{
12850     fcmovFPR_regS(cmp,flags,dst,src);
12851   %}
12852 %}
12853 
12854 
12855 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12856   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12857   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12858   ins_cost(200);
12859   expand %{
12860     fcmovF_regS(cmp,flags,dst,src);
12861   %}
12862 %}
12863 
12864 
12865 // ============================================================================
12866 // Procedure Call/Return Instructions
12867 // Call Java Static Instruction
12868 // Note: If this code changes, the corresponding ret_addr_offset() and
12869 //       compute_padding() functions will have to be adjusted.
12870 instruct CallStaticJavaDirect(method meth) %{
12871   match(CallStaticJava);
12872   effect(USE meth);
12873 
12874   ins_cost(300);
12875   format %{ "CALL,static " %}
12876   opcode(0xE8); /* E8 cd */
12877   ins_encode( pre_call_resets,
12878               Java_Static_Call( meth ),
12879               call_epilog,
12880               post_call_FPU );
12881   ins_pipe( pipe_slow );
12882   ins_alignment(4);
12883 %}
12884 
12885 // Call Java Dynamic Instruction
12886 // Note: If this code changes, the corresponding ret_addr_offset() and
12887 //       compute_padding() functions will have to be adjusted.
12888 instruct CallDynamicJavaDirect(method meth) %{
12889   match(CallDynamicJava);
12890   effect(USE meth);
12891 
12892   ins_cost(300);
12893   format %{ "MOV    EAX,(oop)-1\n\t"
12894             "CALL,dynamic" %}
12895   opcode(0xE8); /* E8 cd */
12896   ins_encode( pre_call_resets,
12897               Java_Dynamic_Call( meth ),
12898               call_epilog,
12899               post_call_FPU );
12900   ins_pipe( pipe_slow );
12901   ins_alignment(4);
12902 %}
12903 
12904 // Call Runtime Instruction
12905 instruct CallRuntimeDirect(method meth) %{
12906   match(CallRuntime );
12907   effect(USE meth);
12908 
12909   ins_cost(300);
12910   format %{ "CALL,runtime " %}
12911   opcode(0xE8); /* E8 cd */
12912   // Use FFREEs to clear entries in float stack
12913   ins_encode( pre_call_resets,
12914               FFree_Float_Stack_All,
12915               Java_To_Runtime( meth ),
12916               post_call_FPU );
12917   ins_pipe( pipe_slow );
12918 %}
12919 
12920 // Call runtime without safepoint
12921 instruct CallLeafDirect(method meth) %{
12922   match(CallLeaf);
12923   effect(USE meth);
12924 
12925   ins_cost(300);
12926   format %{ "CALL_LEAF,runtime " %}
12927   opcode(0xE8); /* E8 cd */
12928   ins_encode( pre_call_resets,
12929               FFree_Float_Stack_All,
12930               Java_To_Runtime( meth ),
12931               Verify_FPU_For_Leaf, post_call_FPU );
12932   ins_pipe( pipe_slow );
12933 %}
12934 
12935 instruct CallLeafNoFPDirect(method meth) %{
12936   match(CallLeafNoFP);
12937   effect(USE meth);
12938 
12939   ins_cost(300);
12940   format %{ "CALL_LEAF_NOFP,runtime " %}
12941   opcode(0xE8); /* E8 cd */
12942   ins_encode(Java_To_Runtime(meth));
12943   ins_pipe( pipe_slow );
12944 %}
12945 
12946 
12947 // Return Instruction
12948 // Remove the return address & jump to it.
12949 instruct Ret() %{
12950   match(Return);
12951   format %{ "RET" %}
12952   opcode(0xC3);
12953   ins_encode(OpcP);
12954   ins_pipe( pipe_jmp );
12955 %}
12956 
12957 // Tail Call; Jump from runtime stub to Java code.
12958 // Also known as an 'interprocedural jump'.
12959 // Target of jump will eventually return to caller.
12960 // TailJump below removes the return address.
12961 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12962   match(TailCall jump_target method_oop );
12963   ins_cost(300);
12964   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12965   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12966   ins_encode( OpcP, RegOpc(jump_target) );
12967   ins_pipe( pipe_jmp );
12968 %}
12969 
12970 
12971 // Tail Jump; remove the return address; jump to target.
12972 // TailCall above leaves the return address around.
12973 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12974   match( TailJump jump_target ex_oop );
12975   ins_cost(300);
12976   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12977             "JMP    $jump_target " %}
12978   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12979   ins_encode( enc_pop_rdx,
12980               OpcP, RegOpc(jump_target) );
12981   ins_pipe( pipe_jmp );
12982 %}
12983 
12984 // Create exception oop: created by stack-crawling runtime code.
12985 // Created exception is now available to this handler, and is setup
12986 // just prior to jumping to this handler.  No code emitted.
12987 instruct CreateException( eAXRegP ex_oop )
12988 %{
12989   match(Set ex_oop (CreateEx));
12990 
12991   size(0);
12992   // use the following format syntax
12993   format %{ "# exception oop is in EAX; no code emitted" %}
12994   ins_encode();
12995   ins_pipe( empty );
12996 %}
12997 
12998 
12999 // Rethrow exception:
13000 // The exception oop will come in the first argument position.
13001 // Then JUMP (not call) to the rethrow stub code.
13002 instruct RethrowException()
13003 %{
13004   match(Rethrow);
13005 
13006   // use the following format syntax
13007   format %{ "JMP    rethrow_stub" %}
13008   ins_encode(enc_rethrow);
13009   ins_pipe( pipe_jmp );
13010 %}
13011 
13012 // inlined locking and unlocking
13013 
13014 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13015   predicate(Compile::current()->use_rtm());
13016   match(Set cr (FastLock object box));
13017   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13018   ins_cost(300);
13019   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13020   ins_encode %{
13021     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13022                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13023                  _counters, _rtm_counters, _stack_rtm_counters,
13024                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13025                  true, ra_->C->profile_rtm());
13026   %}
13027   ins_pipe(pipe_slow);
13028 %}
13029 
13030 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13031   predicate(!Compile::current()->use_rtm());
13032   match(Set cr (FastLock object box));
13033   effect(TEMP tmp, TEMP scr, USE_KILL box);
13034   ins_cost(300);
13035   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13036   ins_encode %{
13037     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13038                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13039   %}
13040   ins_pipe(pipe_slow);
13041 %}
13042 
13043 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13044   match(Set cr (FastUnlock object box));
13045   effect(TEMP tmp, USE_KILL box);
13046   ins_cost(300);
13047   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13048   ins_encode %{
13049     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13050   %}
13051   ins_pipe(pipe_slow);
13052 %}
13053 
13054 
13055 
13056 // ============================================================================
13057 // Safepoint Instruction
13058 instruct safePoint_poll(eFlagsReg cr) %{
13059   match(SafePoint);
13060   effect(KILL cr);
13061 
13062   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13063   // On SPARC that might be acceptable as we can generate the address with
13064   // just a sethi, saving an or.  By polling at offset 0 we can end up
13065   // putting additional pressure on the index-0 in the D$.  Because of
13066   // alignment (just like the situation at hand) the lower indices tend
13067   // to see more traffic.  It'd be better to change the polling address
13068   // to offset 0 of the last $line in the polling page.
13069 
13070   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13071   ins_cost(125);
13072   size(6) ;
13073   ins_encode( Safepoint_Poll() );
13074   ins_pipe( ialu_reg_mem );
13075 %}
13076 
13077 
13078 // ============================================================================
13079 // This name is KNOWN by the ADLC and cannot be changed.
13080 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13081 // for this guy.
13082 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13083   match(Set dst (ThreadLocal));
13084   effect(DEF dst, KILL cr);
13085 
13086   format %{ "MOV    $dst, Thread::current()" %}
13087   ins_encode %{
13088     Register dstReg = as_Register($dst$$reg);
13089     __ get_thread(dstReg);
13090   %}
13091   ins_pipe( ialu_reg_fat );
13092 %}
13093 
13094 
13095 
13096 //----------PEEPHOLE RULES-----------------------------------------------------
13097 // These must follow all instruction definitions as they use the names
13098 // defined in the instructions definitions.
13099 //
13100 // peepmatch ( root_instr_name [preceding_instruction]* );
13101 //
13102 // peepconstraint %{
13103 // (instruction_number.operand_name relational_op instruction_number.operand_name
13104 //  [, ...] );
13105 // // instruction numbers are zero-based using left to right order in peepmatch
13106 //
13107 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13108 // // provide an instruction_number.operand_name for each operand that appears
13109 // // in the replacement instruction's match rule
13110 //
13111 // ---------VM FLAGS---------------------------------------------------------
13112 //
13113 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13114 //
13115 // Each peephole rule is given an identifying number starting with zero and
13116 // increasing by one in the order seen by the parser.  An individual peephole
13117 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13118 // on the command-line.
13119 //
13120 // ---------CURRENT LIMITATIONS----------------------------------------------
13121 //
13122 // Only match adjacent instructions in same basic block
13123 // Only equality constraints
13124 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13125 // Only one replacement instruction
13126 //
13127 // ---------EXAMPLE----------------------------------------------------------
13128 //
13129 // // pertinent parts of existing instructions in architecture description
13130 // instruct movI(rRegI dst, rRegI src) %{
13131 //   match(Set dst (CopyI src));
13132 // %}
13133 //
13134 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13135 //   match(Set dst (AddI dst src));
13136 //   effect(KILL cr);
13137 // %}
13138 //
13139 // // Change (inc mov) to lea
13140 // peephole %{
13141 //   // increment preceeded by register-register move
13142 //   peepmatch ( incI_eReg movI );
13143 //   // require that the destination register of the increment
13144 //   // match the destination register of the move
13145 //   peepconstraint ( 0.dst == 1.dst );
13146 //   // construct a replacement instruction that sets
13147 //   // the destination to ( move's source register + one )
13148 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13149 // %}
13150 //
13151 // Implementation no longer uses movX instructions since
13152 // machine-independent system no longer uses CopyX nodes.
13153 //
13154 // peephole %{
13155 //   peepmatch ( incI_eReg movI );
13156 //   peepconstraint ( 0.dst == 1.dst );
13157 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13158 // %}
13159 //
13160 // peephole %{
13161 //   peepmatch ( decI_eReg movI );
13162 //   peepconstraint ( 0.dst == 1.dst );
13163 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13164 // %}
13165 //
13166 // peephole %{
13167 //   peepmatch ( addI_eReg_imm movI );
13168 //   peepconstraint ( 0.dst == 1.dst );
13169 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13170 // %}
13171 //
13172 // peephole %{
13173 //   peepmatch ( addP_eReg_imm movP );
13174 //   peepconstraint ( 0.dst == 1.dst );
13175 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13176 // %}
13177 
13178 // // Change load of spilled value to only a spill
13179 // instruct storeI(memory mem, rRegI src) %{
13180 //   match(Set mem (StoreI mem src));
13181 // %}
13182 //
13183 // instruct loadI(rRegI dst, memory mem) %{
13184 //   match(Set dst (LoadI mem));
13185 // %}
13186 //
13187 peephole %{
13188   peepmatch ( loadI storeI );
13189   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13190   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13191 %}
13192 
13193 //----------SMARTSPILL RULES---------------------------------------------------
13194 // These must follow all instruction definitions as they use the names
13195 // defined in the instructions definitions.