New src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673 
 674   if (C->max_vector_size() > 16) {
 675     // Clear upper bits of YMM registers when current compiled code uses
 676     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 677     MacroAssembler masm(&cbuf);
 678     masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     MacroAssembler masm(&cbuf);
 683     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 684   }
 685 
 686   int framesize = C->frame_size_in_bytes();
 687   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 688   // Remove two words for return addr and rbp,
 689   framesize -= 2*wordSize;
 690 
 691   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 692 
 693   if (framesize >= 128) {
 694     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 695     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 696     emit_d32(cbuf, framesize);
 697   } else if (framesize) {
 698     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 699     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 700     emit_d8(cbuf, framesize);
 701   }
 702 
 703   emit_opcode(cbuf, 0x58 | EBP_enc);
 704 
 705   if (do_polling() && C->is_method_compilation()) {
 706     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 707     emit_opcode(cbuf,0x85);
 708     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 709     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 710   }
 711 }
 712 
 713 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 714   Compile *C = ra_->C;
 715   // If method set FPU control word, restore to standard control word
 716   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 717   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 718   if (do_polling() && C->is_method_compilation()) size += 6;
 719 
 720   int framesize = C->frame_size_in_bytes();
 721   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 722   // Remove two words for return addr and rbp,
 723   framesize -= 2*wordSize;
 724 
 725   size++; // popl rbp,
 726 
 727   if (framesize >= 128) {
 728     size += 6;
 729   } else {
 730     size += framesize ? 3 : 0;
 731   }
 732   return size;
 733 }
 734 
 735 int MachEpilogNode::reloc() const {
 736   return 0; // a large enough number
 737 }
 738 
 739 const Pipeline * MachEpilogNode::pipeline() const {
 740   return MachNode::pipeline_class();
 741 }
 742 
 743 int MachEpilogNode::safepoint_offset() const { return 0; }
 744 
 745 //=============================================================================
 746 
 747 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 748 static enum RC rc_class( OptoReg::Name reg ) {
 749 
 750   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 751   if (OptoReg::is_stack(reg)) return rc_stack;
 752 
 753   VMReg r = OptoReg::as_VMReg(reg);
 754   if (r->is_Register()) return rc_int;
 755   if (r->is_FloatRegister()) {
 756     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 757     return rc_float;
 758   }
 759   assert(r->is_XMMRegister(), "must be");
 760   return rc_xmm;
 761 }
 762 
 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 764                         int opcode, const char *op_str, int size, outputStream* st ) {
 765   if( cbuf ) {
 766     emit_opcode  (*cbuf, opcode );
 767     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 768 #ifndef PRODUCT
 769   } else if( !do_size ) {
 770     if( size != 0 ) st->print("\n\t");
 771     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 772       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 773       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 774     } else { // FLD, FST, PUSH, POP
 775       st->print("%s [ESP + #%d]",op_str,offset);
 776     }
 777 #endif
 778   }
 779   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 780   return size+3+offset_size;
 781 }
 782 
 783 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 785                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 786   int in_size_in_bits = Assembler::EVEX_32bit;
 787   int evex_encoding = 0;
 788   if (reg_lo+1 == reg_hi) {
 789     in_size_in_bits = Assembler::EVEX_64bit;
 790     evex_encoding = Assembler::VEX_W;
 791   }
 792   if (cbuf) {
 793     MacroAssembler _masm(cbuf);
 794     if (reg_lo+1 == reg_hi) { // double move?
 795       if (is_load) {
 796         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 797       } else {
 798         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 799       }
 800     } else {
 801       if (is_load) {
 802         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 803       } else {
 804         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 805       }
 806     }
 807 #ifndef PRODUCT
 808   } else if (!do_size) {
 809     if (size != 0) st->print("\n\t");
 810     if (reg_lo+1 == reg_hi) { // double move?
 811       if (is_load) st->print("%s %s,[ESP + #%d]",
 812                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 813                               Matcher::regName[reg_lo], offset);
 814       else         st->print("MOVSD  [ESP + #%d],%s",
 815                               offset, Matcher::regName[reg_lo]);
 816     } else {
 817       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 818                               Matcher::regName[reg_lo], offset);
 819       else         st->print("MOVSS  [ESP + #%d],%s",
 820                               offset, Matcher::regName[reg_lo]);
 821     }
 822 #endif
 823   }
 824   bool is_single_byte = false;
 825   if ((UseAVX > 2) && (offset != 0)) {
 826     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 827   }
 828   int offset_size = 0;
 829   if (UseAVX > 2 ) {
 830     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 831   } else {
 832     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 833   }
 834   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 835   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 836   return size+5+offset_size;
 837 }
 838 
 839 
 840 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 841                             int src_hi, int dst_hi, int size, outputStream* st ) {
 842   if (cbuf) {
 843     MacroAssembler _masm(cbuf);
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 883              as_Register(Matcher::_regEncode[src_lo]));
 884 #ifndef PRODUCT
 885   } else if (!do_size) {
 886     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 887 #endif
 888   }
 889   return (UseAVX> 2) ? 6 : 4;
 890 }
 891 
 892 
 893 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 894                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 895   // 32-bit
 896   if (cbuf) {
 897     MacroAssembler _masm(cbuf);
 898     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 899              as_XMMRegister(Matcher::_regEncode[src_lo]));
 900 #ifndef PRODUCT
 901   } else if (!do_size) {
 902     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 903 #endif
 904   }
 905   return (UseAVX> 2) ? 6 : 4;
 906 }
 907 
 908 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 909   if( cbuf ) {
 910     emit_opcode(*cbuf, 0x8B );
 911     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 912 #ifndef PRODUCT
 913   } else if( !do_size ) {
 914     if( size != 0 ) st->print("\n\t");
 915     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 916 #endif
 917   }
 918   return size+2;
 919 }
 920 
 921 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 922                                  int offset, int size, outputStream* st ) {
 923   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 924     if( cbuf ) {
 925       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 926       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 927 #ifndef PRODUCT
 928     } else if( !do_size ) {
 929       if( size != 0 ) st->print("\n\t");
 930       st->print("FLD    %s",Matcher::regName[src_lo]);
 931 #endif
 932     }
 933     size += 2;
 934   }
 935 
 936   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 937   const char *op_str;
 938   int op;
 939   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 940     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 941     op = 0xDD;
 942   } else {                   // 32-bit store
 943     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 944     op = 0xD9;
 945     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 946   }
 947 
 948   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 949 }
 950 
 951 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 952 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 953                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 954 
 955 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 956                             int stack_offset, int reg, uint ireg, outputStream* st);
 957 
 958 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 959                                      int dst_offset, uint ireg, outputStream* st) {
 960   int calc_size = 0;
 961   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 962   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 963   switch (ireg) {
 964   case Op_VecS:
 965     calc_size = 3+src_offset_size + 3+dst_offset_size;
 966     break;
 967   case Op_VecD:
 968     calc_size = 3+src_offset_size + 3+dst_offset_size;
 969     src_offset += 4;
 970     dst_offset += 4;
 971     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 972     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 973     calc_size += 3+src_offset_size + 3+dst_offset_size;
 974     break;
 975   case Op_VecX:
 976   case Op_VecY:
 977   case Op_VecZ:
 978     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 979     break;
 980   default:
 981     ShouldNotReachHere();
 982   }
 983   if (cbuf) {
 984     MacroAssembler _masm(cbuf);
 985     int offset = __ offset();
 986     switch (ireg) {
 987     case Op_VecS:
 988       __ pushl(Address(rsp, src_offset));
 989       __ popl (Address(rsp, dst_offset));
 990       break;
 991     case Op_VecD:
 992       __ pushl(Address(rsp, src_offset));
 993       __ popl (Address(rsp, dst_offset));
 994       __ pushl(Address(rsp, src_offset+4));
 995       __ popl (Address(rsp, dst_offset+4));
 996       break;
 997     case Op_VecX:
 998       __ movdqu(Address(rsp, -16), xmm0);
 999       __ movdqu(xmm0, Address(rsp, src_offset));
1000       __ movdqu(Address(rsp, dst_offset), xmm0);
1001       __ movdqu(xmm0, Address(rsp, -16));
1002       break;
1003     case Op_VecY:
1004       __ vmovdqu(Address(rsp, -32), xmm0);
1005       __ vmovdqu(xmm0, Address(rsp, src_offset));
1006       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1007       __ vmovdqu(xmm0, Address(rsp, -32));
1008     case Op_VecZ:
1009       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1010       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1011       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1012       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1013       break;
1014     default:
1015       ShouldNotReachHere();
1016     }
1017     int size = __ offset() - offset;
1018     assert(size == calc_size, "incorrect size calculattion");
1019     return size;
1020 #ifndef PRODUCT
1021   } else if (!do_size) {
1022     switch (ireg) {
1023     case Op_VecS:
1024       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1025                 "popl    [rsp + #%d]",
1026                 src_offset, dst_offset);
1027       break;
1028     case Op_VecD:
1029       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1030                 "popq    [rsp + #%d]\n\t"
1031                 "pushl   [rsp + #%d]\n\t"
1032                 "popq    [rsp + #%d]",
1033                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1034       break;
1035      case Op_VecX:
1036       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1037                 "movdqu  xmm0, [rsp + #%d]\n\t"
1038                 "movdqu  [rsp + #%d], xmm0\n\t"
1039                 "movdqu  xmm0, [rsp - #16]",
1040                 src_offset, dst_offset);
1041       break;
1042     case Op_VecY:
1043       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1044                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1045                 "vmovdqu [rsp + #%d], xmm0\n\t"
1046                 "vmovdqu xmm0, [rsp - #32]",
1047                 src_offset, dst_offset);
1048     case Op_VecZ:
1049       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #64]",
1053                 src_offset, dst_offset);
1054       break;
1055     default:
1056       ShouldNotReachHere();
1057     }
1058 #endif
1059   }
1060   return calc_size;
1061 }
1062 
1063 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1064   // Get registers to move
1065   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1066   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1067   OptoReg::Name dst_second = ra_->get_reg_second(this );
1068   OptoReg::Name dst_first = ra_->get_reg_first(this );
1069 
1070   enum RC src_second_rc = rc_class(src_second);
1071   enum RC src_first_rc = rc_class(src_first);
1072   enum RC dst_second_rc = rc_class(dst_second);
1073   enum RC dst_first_rc = rc_class(dst_first);
1074 
1075   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1076 
1077   // Generate spill code!
1078   int size = 0;
1079 
1080   if( src_first == dst_first && src_second == dst_second )
1081     return size;            // Self copy, no move
1082 
1083   if (bottom_type()->isa_vect() != NULL) {
1084     uint ireg = ideal_reg();
1085     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1086     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1087     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1088     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1089       // mem -> mem
1090       int src_offset = ra_->reg2offset(src_first);
1091       int dst_offset = ra_->reg2offset(dst_first);
1092       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1093     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1094       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1095     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1096       int stack_offset = ra_->reg2offset(dst_first);
1097       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1098     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1099       int stack_offset = ra_->reg2offset(src_first);
1100       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1101     } else {
1102       ShouldNotReachHere();
1103     }
1104   }
1105 
1106   // --------------------------------------
1107   // Check for mem-mem move.  push/pop to move.
1108   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1109     if( src_second == dst_first ) { // overlapping stack copy ranges
1110       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1111       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1112       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1113       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1114     }
1115     // move low bits
1116     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1117     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1118     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1119       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1120       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1121     }
1122     return size;
1123   }
1124 
1125   // --------------------------------------
1126   // Check for integer reg-reg copy
1127   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1128     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1129 
1130   // Check for integer store
1131   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1132     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1133 
1134   // Check for integer load
1135   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1136     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1137 
1138   // Check for integer reg-xmm reg copy
1139   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1140     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1141             "no 64 bit integer-float reg moves" );
1142     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1143   }
1144   // --------------------------------------
1145   // Check for float reg-reg copy
1146   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1148             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1149     if( cbuf ) {
1150 
1151       // Note the mucking with the register encode to compensate for the 0/1
1152       // indexing issue mentioned in a comment in the reg_def sections
1153       // for FPR registers many lines above here.
1154 
1155       if( src_first != FPR1L_num ) {
1156         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1157         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1158         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1159         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1160      } else {
1161         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1162         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1163      }
1164 #ifndef PRODUCT
1165     } else if( !do_size ) {
1166       if( size != 0 ) st->print("\n\t");
1167       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1168       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1169 #endif
1170     }
1171     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1172   }
1173 
1174   // Check for float store
1175   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1176     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1177   }
1178 
1179   // Check for float load
1180   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1181     int offset = ra_->reg2offset(src_first);
1182     const char *op_str;
1183     int op;
1184     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1185       op_str = "FLD_D";
1186       op = 0xDD;
1187     } else {                   // 32-bit load
1188       op_str = "FLD_S";
1189       op = 0xD9;
1190       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1191     }
1192     if( cbuf ) {
1193       emit_opcode  (*cbuf, op );
1194       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1195       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1196       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1197 #ifndef PRODUCT
1198     } else if( !do_size ) {
1199       if( size != 0 ) st->print("\n\t");
1200       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1201 #endif
1202     }
1203     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1204     return size + 3+offset_size+2;
1205   }
1206 
1207   // Check for xmm reg-reg copy
1208   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1209     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1210             (src_first+1 == src_second && dst_first+1 == dst_second),
1211             "no non-adjacent float-moves" );
1212     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1213   }
1214 
1215   // Check for xmm reg-integer reg copy
1216   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1217     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1218             "no 64 bit float-integer reg moves" );
1219     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm store
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1224     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1225   }
1226 
1227   // Check for float xmm load
1228   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1229     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1230   }
1231 
1232   // Copy from float reg to xmm reg
1233   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1234     // copy to the top of stack from floating point reg
1235     // and use LEA to preserve flags
1236     if( cbuf ) {
1237       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1238       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1239       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1240       emit_d8(*cbuf,0xF8);
1241 #ifndef PRODUCT
1242     } else if( !do_size ) {
1243       if( size != 0 ) st->print("\n\t");
1244       st->print("LEA    ESP,[ESP-8]");
1245 #endif
1246     }
1247     size += 4;
1248 
1249     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1250 
1251     // Copy from the temp memory to the xmm reg.
1252     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1253 
1254     if( cbuf ) {
1255       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1256       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1257       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1258       emit_d8(*cbuf,0x08);
1259 #ifndef PRODUCT
1260     } else if( !do_size ) {
1261       if( size != 0 ) st->print("\n\t");
1262       st->print("LEA    ESP,[ESP+8]");
1263 #endif
1264     }
1265     size += 4;
1266     return size;
1267   }
1268 
1269   assert( size > 0, "missed a case" );
1270 
1271   // --------------------------------------------------------------------
1272   // Check for second bits still needing moving.
1273   if( src_second == dst_second )
1274     return size;               // Self copy; no move
1275   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1276 
1277   // Check for second word int-int move
1278   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1279     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1280 
1281   // Check for second word integer store
1282   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1283     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1284 
1285   // Check for second word integer load
1286   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1287     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1288 
1289 
1290   Unimplemented();
1291   return 0; // Mute compiler
1292 }
1293 
1294 #ifndef PRODUCT
1295 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1296   implementation( NULL, ra_, false, st );
1297 }
1298 #endif
1299 
1300 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1301   implementation( &cbuf, ra_, false, NULL );
1302 }
1303 
1304 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1305   return implementation( NULL, ra_, true, NULL );
1306 }
1307 
1308 
1309 //=============================================================================
1310 #ifndef PRODUCT
1311 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1312   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1313   int reg = ra_->get_reg_first(this);
1314   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1315 }
1316 #endif
1317 
1318 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_encode(this);
1321   if( offset >= 128 ) {
1322     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1323     emit_rm(cbuf, 0x2, reg, 0x04);
1324     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1325     emit_d32(cbuf, offset);
1326   }
1327   else {
1328     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1329     emit_rm(cbuf, 0x1, reg, 0x04);
1330     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1331     emit_d8(cbuf, offset);
1332   }
1333 }
1334 
1335 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1336   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1337   if( offset >= 128 ) {
1338     return 7;
1339   }
1340   else {
1341     return 4;
1342   }
1343 }
1344 
1345 //=============================================================================
1346 #ifndef PRODUCT
1347 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1348   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1349   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1350   st->print_cr("\tNOP");
1351   st->print_cr("\tNOP");
1352   if( !OptoBreakpoint )
1353     st->print_cr("\tNOP");
1354 }
1355 #endif
1356 
1357 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1358   MacroAssembler masm(&cbuf);
1359 #ifdef ASSERT
1360   uint insts_size = cbuf.insts_size();
1361 #endif
1362   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1363   masm.jump_cc(Assembler::notEqual,
1364                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1365   /* WARNING these NOPs are critical so that verified entry point is properly
1366      aligned for patching by NativeJump::patch_verified_entry() */
1367   int nops_cnt = 2;
1368   if( !OptoBreakpoint ) // Leave space for int3
1369      nops_cnt += 1;
1370   masm.nop(nops_cnt);
1371 
1372   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1373 }
1374 
1375 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1376   return OptoBreakpoint ? 11 : 12;
1377 }
1378 
1379 
1380 //=============================================================================
1381 
1382 int Matcher::regnum_to_fpu_offset(int regnum) {
1383   return regnum - 32; // The FP registers are in the second chunk
1384 }
1385 
1386 // This is UltraSparc specific, true just means we have fast l2f conversion
1387 const bool Matcher::convL2FSupported(void) {
1388   return true;
1389 }
1390 
1391 // Is this branch offset short enough that a short branch can be used?
1392 //
1393 // NOTE: If the platform does not provide any short branch variants, then
1394 //       this method should return false for offset 0.
1395 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1396   // The passed offset is relative to address of the branch.
1397   // On 86 a branch displacement is calculated relative to address
1398   // of a next instruction.
1399   offset -= br_size;
1400 
1401   // the short version of jmpConUCF2 contains multiple branches,
1402   // making the reach slightly less
1403   if (rule == jmpConUCF2_rule)
1404     return (-126 <= offset && offset <= 125);
1405   return (-128 <= offset && offset <= 127);
1406 }
1407 
1408 const bool Matcher::isSimpleConstant64(jlong value) {
1409   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1410   return false;
1411 }
1412 
1413 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1414 const bool Matcher::init_array_count_is_in_bytes = false;
1415 
1416 // Threshold size for cleararray.
1417 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1418 
1419 // Needs 2 CMOV's for longs.
1420 const int Matcher::long_cmove_cost() { return 1; }
1421 
1422 // No CMOVF/CMOVD with SSE/SSE2
1423 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1424 
1425 // Does the CPU require late expand (see block.cpp for description of late expand)?
1426 const bool Matcher::require_postalloc_expand = false;
1427 
1428 // Should the Matcher clone shifts on addressing modes, expecting them to
1429 // be subsumed into complex addressing expressions or compute them into
1430 // registers?  True for Intel but false for most RISCs
1431 const bool Matcher::clone_shift_expressions = true;
1432 
1433 // Do we need to mask the count passed to shift instructions or does
1434 // the cpu only look at the lower 5/6 bits anyway?
1435 const bool Matcher::need_masked_shift_count = false;
1436 
1437 bool Matcher::narrow_oop_use_complex_address() {
1438   ShouldNotCallThis();
1439   return true;
1440 }
1441 
1442 bool Matcher::narrow_klass_use_complex_address() {
1443   ShouldNotCallThis();
1444   return true;
1445 }
1446 
1447 
1448 // Is it better to copy float constants, or load them directly from memory?
1449 // Intel can load a float constant from a direct address, requiring no
1450 // extra registers.  Most RISCs will have to materialize an address into a
1451 // register first, so they would do better to copy the constant from stack.
1452 const bool Matcher::rematerialize_float_constants = true;
1453 
1454 // If CPU can load and store mis-aligned doubles directly then no fixup is
1455 // needed.  Else we split the double into 2 integer pieces and move it
1456 // piece-by-piece.  Only happens when passing doubles into C code as the
1457 // Java calling convention forces doubles to be aligned.
1458 const bool Matcher::misaligned_doubles_ok = true;
1459 
1460 
1461 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1462   // Get the memory operand from the node
1463   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1464   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1465   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1466   uint opcnt     = 1;                 // First operand
1467   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1468   while( idx >= skipped+num_edges ) {
1469     skipped += num_edges;
1470     opcnt++;                          // Bump operand count
1471     assert( opcnt < numopnds, "Accessing non-existent operand" );
1472     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1473   }
1474 
1475   MachOper *memory = node->_opnds[opcnt];
1476   MachOper *new_memory = NULL;
1477   switch (memory->opcode()) {
1478   case DIRECT:
1479   case INDOFFSET32X:
1480     // No transformation necessary.
1481     return;
1482   case INDIRECT:
1483     new_memory = new indirect_win95_safeOper( );
1484     break;
1485   case INDOFFSET8:
1486     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1487     break;
1488   case INDOFFSET32:
1489     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1490     break;
1491   case INDINDEXOFFSET:
1492     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1493     break;
1494   case INDINDEXSCALE:
1495     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1496     break;
1497   case INDINDEXSCALEOFFSET:
1498     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1499     break;
1500   case LOAD_LONG_INDIRECT:
1501   case LOAD_LONG_INDOFFSET32:
1502     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1503     return;
1504   default:
1505     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1506     return;
1507   }
1508   node->_opnds[opcnt] = new_memory;
1509 }
1510 
1511 // Advertise here if the CPU requires explicit rounding operations
1512 // to implement the UseStrictFP mode.
1513 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1514 
1515 // Are floats conerted to double when stored to stack during deoptimization?
1516 // On x32 it is stored with convertion only when FPU is used for floats.
1517 bool Matcher::float_in_double() { return (UseSSE == 0); }
1518 
1519 // Do ints take an entire long register or just half?
1520 const bool Matcher::int_in_long = false;
1521 
1522 // Return whether or not this register is ever used as an argument.  This
1523 // function is used on startup to build the trampoline stubs in generateOptoStub.
1524 // Registers not mentioned will be killed by the VM call in the trampoline, and
1525 // arguments in those registers not be available to the callee.
1526 bool Matcher::can_be_java_arg( int reg ) {
1527   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1528   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1529   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1530   return false;
1531 }
1532 
1533 bool Matcher::is_spillable_arg( int reg ) {
1534   return can_be_java_arg(reg);
1535 }
1536 
1537 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1538   // Use hardware integer DIV instruction when
1539   // it is faster than a code which use multiply.
1540   // Only when constant divisor fits into 32 bit
1541   // (min_jint is excluded to get only correct
1542   // positive 32 bit values from negative).
1543   return VM_Version::has_fast_idiv() &&
1544          (divisor == (int)divisor && divisor != min_jint);
1545 }
1546 
1547 // Register for DIVI projection of divmodI
1548 RegMask Matcher::divI_proj_mask() {
1549   return EAX_REG_mask();
1550 }
1551 
1552 // Register for MODI projection of divmodI
1553 RegMask Matcher::modI_proj_mask() {
1554   return EDX_REG_mask();
1555 }
1556 
1557 // Register for DIVL projection of divmodL
1558 RegMask Matcher::divL_proj_mask() {
1559   ShouldNotReachHere();
1560   return RegMask();
1561 }
1562 
1563 // Register for MODL projection of divmodL
1564 RegMask Matcher::modL_proj_mask() {
1565   ShouldNotReachHere();
1566   return RegMask();
1567 }
1568 
1569 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1570   return NO_REG_mask();
1571 }
1572 
1573 // Returns true if the high 32 bits of the value is known to be zero.
1574 bool is_operand_hi32_zero(Node* n) {
1575   int opc = n->Opcode();
1576   if (opc == Op_AndL) {
1577     Node* o2 = n->in(2);
1578     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1579       return true;
1580     }
1581   }
1582   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1583     return true;
1584   }
1585   return false;
1586 }
1587 
1588 %}
1589 
1590 //----------ENCODING BLOCK-----------------------------------------------------
1591 // This block specifies the encoding classes used by the compiler to output
1592 // byte streams.  Encoding classes generate functions which are called by
1593 // Machine Instruction Nodes in order to generate the bit encoding of the
1594 // instruction.  Operands specify their base encoding interface with the
1595 // interface keyword.  There are currently supported four interfaces,
1596 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1597 // operand to generate a function which returns its register number when
1598 // queried.   CONST_INTER causes an operand to generate a function which
1599 // returns the value of the constant when queried.  MEMORY_INTER causes an
1600 // operand to generate four functions which return the Base Register, the
1601 // Index Register, the Scale Value, and the Offset Value of the operand when
1602 // queried.  COND_INTER causes an operand to generate six functions which
1603 // return the encoding code (ie - encoding bits for the instruction)
1604 // associated with each basic boolean condition for a conditional instruction.
1605 // Instructions specify two basic values for encoding.  They use the
1606 // ins_encode keyword to specify their encoding class (which must be one of
1607 // the class names specified in the encoding block), and they use the
1608 // opcode keyword to specify, in order, their primary, secondary, and
1609 // tertiary opcode.  Only the opcode sections which a particular instruction
1610 // needs for encoding need to be specified.
1611 encode %{
1612   // Build emit functions for each basic byte or larger field in the intel
1613   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1614   // code in the enc_class source block.  Emit functions will live in the
1615   // main source block for now.  In future, we can generalize this by
1616   // adding a syntax that specifies the sizes of fields in an order,
1617   // so that the adlc can build the emit functions automagically
1618 
1619   // Emit primary opcode
1620   enc_class OpcP %{
1621     emit_opcode(cbuf, $primary);
1622   %}
1623 
1624   // Emit secondary opcode
1625   enc_class OpcS %{
1626     emit_opcode(cbuf, $secondary);
1627   %}
1628 
1629   // Emit opcode directly
1630   enc_class Opcode(immI d8) %{
1631     emit_opcode(cbuf, $d8$$constant);
1632   %}
1633 
1634   enc_class SizePrefix %{
1635     emit_opcode(cbuf,0x66);
1636   %}
1637 
1638   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1639     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1640   %}
1641 
1642   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1643     emit_opcode(cbuf,$opcode$$constant);
1644     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1645   %}
1646 
1647   enc_class mov_r32_imm0( rRegI dst ) %{
1648     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1649     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1650   %}
1651 
1652   enc_class cdq_enc %{
1653     // Full implementation of Java idiv and irem; checks for
1654     // special case as described in JVM spec., p.243 & p.271.
1655     //
1656     //         normal case                           special case
1657     //
1658     // input : rax,: dividend                         min_int
1659     //         reg: divisor                          -1
1660     //
1661     // output: rax,: quotient  (= rax, idiv reg)       min_int
1662     //         rdx: remainder (= rax, irem reg)       0
1663     //
1664     //  Code sequnce:
1665     //
1666     //  81 F8 00 00 00 80    cmp         rax,80000000h
1667     //  0F 85 0B 00 00 00    jne         normal_case
1668     //  33 D2                xor         rdx,edx
1669     //  83 F9 FF             cmp         rcx,0FFh
1670     //  0F 84 03 00 00 00    je          done
1671     //                  normal_case:
1672     //  99                   cdq
1673     //  F7 F9                idiv        rax,ecx
1674     //                  done:
1675     //
1676     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1677     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1678     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1679     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1680     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1681     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1682     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1683     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1684     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1685     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1687     // normal_case:
1688     emit_opcode(cbuf,0x99);                                         // cdq
1689     // idiv (note: must be emitted by the user of this rule)
1690     // normal:
1691   %}
1692 
1693   // Dense encoding for older common ops
1694   enc_class Opc_plus(immI opcode, rRegI reg) %{
1695     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1696   %}
1697 
1698 
1699   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1700   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1701     // Check for 8-bit immediate, and set sign extend bit in opcode
1702     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1703       emit_opcode(cbuf, $primary | 0x02);
1704     }
1705     else {                          // If 32-bit immediate
1706       emit_opcode(cbuf, $primary);
1707     }
1708   %}
1709 
1710   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1711     // Emit primary opcode and set sign-extend bit
1712     // Check for 8-bit immediate, and set sign extend bit in opcode
1713     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1714       emit_opcode(cbuf, $primary | 0x02);    }
1715     else {                          // If 32-bit immediate
1716       emit_opcode(cbuf, $primary);
1717     }
1718     // Emit r/m byte with secondary opcode, after primary opcode.
1719     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1720   %}
1721 
1722   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1723     // Check for 8-bit immediate, and set sign extend bit in opcode
1724     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1725       $$$emit8$imm$$constant;
1726     }
1727     else {                          // If 32-bit immediate
1728       // Output immediate
1729       $$$emit32$imm$$constant;
1730     }
1731   %}
1732 
1733   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1734     // Emit primary opcode and set sign-extend bit
1735     // Check for 8-bit immediate, and set sign extend bit in opcode
1736     int con = (int)$imm$$constant; // Throw away top bits
1737     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1738     // Emit r/m byte with secondary opcode, after primary opcode.
1739     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1740     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1741     else                               emit_d32(cbuf,con);
1742   %}
1743 
1744   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1745     // Emit primary opcode and set sign-extend bit
1746     // Check for 8-bit immediate, and set sign extend bit in opcode
1747     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1748     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1749     // Emit r/m byte with tertiary opcode, after primary opcode.
1750     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1751     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1752     else                               emit_d32(cbuf,con);
1753   %}
1754 
1755   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1756     emit_cc(cbuf, $secondary, $dst$$reg );
1757   %}
1758 
1759   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1760     int destlo = $dst$$reg;
1761     int desthi = HIGH_FROM_LOW(destlo);
1762     // bswap lo
1763     emit_opcode(cbuf, 0x0F);
1764     emit_cc(cbuf, 0xC8, destlo);
1765     // bswap hi
1766     emit_opcode(cbuf, 0x0F);
1767     emit_cc(cbuf, 0xC8, desthi);
1768     // xchg lo and hi
1769     emit_opcode(cbuf, 0x87);
1770     emit_rm(cbuf, 0x3, destlo, desthi);
1771   %}
1772 
1773   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1774     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1775   %}
1776 
1777   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1778     $$$emit8$primary;
1779     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1780   %}
1781 
1782   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1783     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1784     emit_d8(cbuf, op >> 8 );
1785     emit_d8(cbuf, op & 255);
1786   %}
1787 
1788   // emulate a CMOV with a conditional branch around a MOV
1789   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1790     // Invert sense of branch from sense of CMOV
1791     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1792     emit_d8( cbuf, $brOffs$$constant );
1793   %}
1794 
1795   enc_class enc_PartialSubtypeCheck( ) %{
1796     Register Redi = as_Register(EDI_enc); // result register
1797     Register Reax = as_Register(EAX_enc); // super class
1798     Register Recx = as_Register(ECX_enc); // killed
1799     Register Resi = as_Register(ESI_enc); // sub class
1800     Label miss;
1801 
1802     MacroAssembler _masm(&cbuf);
1803     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1804                                      NULL, &miss,
1805                                      /*set_cond_codes:*/ true);
1806     if ($primary) {
1807       __ xorptr(Redi, Redi);
1808     }
1809     __ bind(miss);
1810   %}
1811 
1812   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1813     MacroAssembler masm(&cbuf);
1814     int start = masm.offset();
1815     if (UseSSE >= 2) {
1816       if (VerifyFPU) {
1817         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1818       }
1819     } else {
1820       // External c_calling_convention expects the FPU stack to be 'clean'.
1821       // Compiled code leaves it dirty.  Do cleanup now.
1822       masm.empty_FPU_stack();
1823     }
1824     if (sizeof_FFree_Float_Stack_All == -1) {
1825       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1826     } else {
1827       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1828     }
1829   %}
1830 
1831   enc_class Verify_FPU_For_Leaf %{
1832     if( VerifyFPU ) {
1833       MacroAssembler masm(&cbuf);
1834       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1835     }
1836   %}
1837 
1838   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1839     // This is the instruction starting address for relocation info.
1840     cbuf.set_insts_mark();
1841     $$$emit8$primary;
1842     // CALL directly to the runtime
1843     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1844                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1845 
1846     if (UseSSE >= 2) {
1847       MacroAssembler _masm(&cbuf);
1848       BasicType rt = tf()->return_type();
1849 
1850       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1851         // A C runtime call where the return value is unused.  In SSE2+
1852         // mode the result needs to be removed from the FPU stack.  It's
1853         // likely that this function call could be removed by the
1854         // optimizer if the C function is a pure function.
1855         __ ffree(0);
1856       } else if (rt == T_FLOAT) {
1857         __ lea(rsp, Address(rsp, -4));
1858         __ fstp_s(Address(rsp, 0));
1859         __ movflt(xmm0, Address(rsp, 0));
1860         __ lea(rsp, Address(rsp,  4));
1861       } else if (rt == T_DOUBLE) {
1862         __ lea(rsp, Address(rsp, -8));
1863         __ fstp_d(Address(rsp, 0));
1864         __ movdbl(xmm0, Address(rsp, 0));
1865         __ lea(rsp, Address(rsp,  8));
1866       }
1867     }
1868   %}
1869 
1870 
1871   enc_class pre_call_resets %{
1872     // If method sets FPU control word restore it here
1873     debug_only(int off0 = cbuf.insts_size());
1874     if (ra_->C->in_24_bit_fp_mode()) {
1875       MacroAssembler _masm(&cbuf);
1876       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1877     }
1878     if (ra_->C->max_vector_size() > 16) {
1879       // Clear upper bits of YMM registers when current compiled code uses
1880       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1881       MacroAssembler _masm(&cbuf);
1882       __ vzeroupper();
1883     }
1884     debug_only(int off1 = cbuf.insts_size());
1885     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1886   %}
1887 
1888   enc_class post_call_FPU %{
1889     // If method sets FPU control word do it here also
1890     if (Compile::current()->in_24_bit_fp_mode()) {
1891       MacroAssembler masm(&cbuf);
1892       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1893     }
1894   %}
1895 
1896   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1897     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1898     // who we intended to call.
1899     cbuf.set_insts_mark();
1900     $$$emit8$primary;
1901     if (!_method) {
1902       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1903                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1904     } else if (_optimized_virtual) {
1905       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1906                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1907     } else {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      static_call_Relocation::spec(), RELOC_IMM32 );
1910     }
1911     if (_method) {  // Emit stub for static call.
1912       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1913       if (stub == NULL) {
1914         ciEnv::current()->record_failure("CodeCache is full");
1915         return;
1916       }
1917     }
1918   %}
1919 
1920   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1921     MacroAssembler _masm(&cbuf);
1922     __ ic_call((address)$meth$$method);
1923   %}
1924 
1925   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1926     int disp = in_bytes(Method::from_compiled_offset());
1927     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1928 
1929     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1930     cbuf.set_insts_mark();
1931     $$$emit8$primary;
1932     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1933     emit_d8(cbuf, disp);             // Displacement
1934 
1935   %}
1936 
1937 //   Following encoding is no longer used, but may be restored if calling
1938 //   convention changes significantly.
1939 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1940 //
1941 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1942 //     // int ic_reg     = Matcher::inline_cache_reg();
1943 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1944 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1945 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1946 //
1947 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1948 //     // // so we load it immediately before the call
1949 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1950 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1951 //
1952 //     // xor rbp,ebp
1953 //     emit_opcode(cbuf, 0x33);
1954 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1955 //
1956 //     // CALL to interpreter.
1957 //     cbuf.set_insts_mark();
1958 //     $$$emit8$primary;
1959 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1960 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1961 //   %}
1962 
1963   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1964     $$$emit8$primary;
1965     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1966     $$$emit8$shift$$constant;
1967   %}
1968 
1969   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1970     // Load immediate does not have a zero or sign extended version
1971     // for 8-bit immediates
1972     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1973     $$$emit32$src$$constant;
1974   %}
1975 
1976   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1977     // Load immediate does not have a zero or sign extended version
1978     // for 8-bit immediates
1979     emit_opcode(cbuf, $primary + $dst$$reg);
1980     $$$emit32$src$$constant;
1981   %}
1982 
1983   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1984     // Load immediate does not have a zero or sign extended version
1985     // for 8-bit immediates
1986     int dst_enc = $dst$$reg;
1987     int src_con = $src$$constant & 0x0FFFFFFFFL;
1988     if (src_con == 0) {
1989       // xor dst, dst
1990       emit_opcode(cbuf, 0x33);
1991       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1992     } else {
1993       emit_opcode(cbuf, $primary + dst_enc);
1994       emit_d32(cbuf, src_con);
1995     }
1996   %}
1997 
1998   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1999     // Load immediate does not have a zero or sign extended version
2000     // for 8-bit immediates
2001     int dst_enc = $dst$$reg + 2;
2002     int src_con = ((julong)($src$$constant)) >> 32;
2003     if (src_con == 0) {
2004       // xor dst, dst
2005       emit_opcode(cbuf, 0x33);
2006       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2007     } else {
2008       emit_opcode(cbuf, $primary + dst_enc);
2009       emit_d32(cbuf, src_con);
2010     }
2011   %}
2012 
2013 
2014   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2015   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2016     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2017   %}
2018 
2019   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2020     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2021   %}
2022 
2023   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2024     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2025   %}
2026 
2027   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2028     $$$emit8$primary;
2029     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2030   %}
2031 
2032   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2033     $$$emit8$secondary;
2034     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2035   %}
2036 
2037   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2038     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2039   %}
2040 
2041   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2042     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2043   %}
2044 
2045   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2046     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2047   %}
2048 
2049   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2050     // Output immediate
2051     $$$emit32$src$$constant;
2052   %}
2053 
2054   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2055     // Output Float immediate bits
2056     jfloat jf = $src$$constant;
2057     int    jf_as_bits = jint_cast( jf );
2058     emit_d32(cbuf, jf_as_bits);
2059   %}
2060 
2061   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2062     // Output Float immediate bits
2063     jfloat jf = $src$$constant;
2064     int    jf_as_bits = jint_cast( jf );
2065     emit_d32(cbuf, jf_as_bits);
2066   %}
2067 
2068   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2069     // Output immediate
2070     $$$emit16$src$$constant;
2071   %}
2072 
2073   enc_class Con_d32(immI src) %{
2074     emit_d32(cbuf,$src$$constant);
2075   %}
2076 
2077   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2078     // Output immediate memory reference
2079     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2080     emit_d32(cbuf, 0x00);
2081   %}
2082 
2083   enc_class lock_prefix( ) %{
2084     if( os::is_MP() )
2085       emit_opcode(cbuf,0xF0);         // [Lock]
2086   %}
2087 
2088   // Cmp-xchg long value.
2089   // Note: we need to swap rbx, and rcx before and after the
2090   //       cmpxchg8 instruction because the instruction uses
2091   //       rcx as the high order word of the new value to store but
2092   //       our register encoding uses rbx,.
2093   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2094 
2095     // XCHG  rbx,ecx
2096     emit_opcode(cbuf,0x87);
2097     emit_opcode(cbuf,0xD9);
2098     // [Lock]
2099     if( os::is_MP() )
2100       emit_opcode(cbuf,0xF0);
2101     // CMPXCHG8 [Eptr]
2102     emit_opcode(cbuf,0x0F);
2103     emit_opcode(cbuf,0xC7);
2104     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2105     // XCHG  rbx,ecx
2106     emit_opcode(cbuf,0x87);
2107     emit_opcode(cbuf,0xD9);
2108   %}
2109 
2110   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2111     // [Lock]
2112     if( os::is_MP() )
2113       emit_opcode(cbuf,0xF0);
2114 
2115     // CMPXCHG [Eptr]
2116     emit_opcode(cbuf,0x0F);
2117     emit_opcode(cbuf,0xB1);
2118     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2119   %}
2120 
2121   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2122     int res_encoding = $res$$reg;
2123 
2124     // MOV  res,0
2125     emit_opcode( cbuf, 0xB8 + res_encoding);
2126     emit_d32( cbuf, 0 );
2127     // JNE,s  fail
2128     emit_opcode(cbuf,0x75);
2129     emit_d8(cbuf, 5 );
2130     // MOV  res,1
2131     emit_opcode( cbuf, 0xB8 + res_encoding);
2132     emit_d32( cbuf, 1 );
2133     // fail:
2134   %}
2135 
2136   enc_class set_instruction_start( ) %{
2137     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2138   %}
2139 
2140   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2141     int reg_encoding = $ereg$$reg;
2142     int base  = $mem$$base;
2143     int index = $mem$$index;
2144     int scale = $mem$$scale;
2145     int displace = $mem$$disp;
2146     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2147     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2148   %}
2149 
2150   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2151     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2152     int base  = $mem$$base;
2153     int index = $mem$$index;
2154     int scale = $mem$$scale;
2155     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2156     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2157     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2158   %}
2159 
2160   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2161     int r1, r2;
2162     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2163     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2164     emit_opcode(cbuf,0x0F);
2165     emit_opcode(cbuf,$tertiary);
2166     emit_rm(cbuf, 0x3, r1, r2);
2167     emit_d8(cbuf,$cnt$$constant);
2168     emit_d8(cbuf,$primary);
2169     emit_rm(cbuf, 0x3, $secondary, r1);
2170     emit_d8(cbuf,$cnt$$constant);
2171   %}
2172 
2173   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2174     emit_opcode( cbuf, 0x8B ); // Move
2175     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2176     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2177       emit_d8(cbuf,$primary);
2178       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2179       emit_d8(cbuf,$cnt$$constant-32);
2180     }
2181     emit_d8(cbuf,$primary);
2182     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2183     emit_d8(cbuf,31);
2184   %}
2185 
2186   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2187     int r1, r2;
2188     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190 
2191     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2192     emit_rm(cbuf, 0x3, r1, r2);
2193     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2194       emit_opcode(cbuf,$primary);
2195       emit_rm(cbuf, 0x3, $secondary, r1);
2196       emit_d8(cbuf,$cnt$$constant-32);
2197     }
2198     emit_opcode(cbuf,0x33);  // XOR r2,r2
2199     emit_rm(cbuf, 0x3, r2, r2);
2200   %}
2201 
2202   // Clone of RegMem but accepts an extra parameter to access each
2203   // half of a double in memory; it never needs relocation info.
2204   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2205     emit_opcode(cbuf,$opcode$$constant);
2206     int reg_encoding = $rm_reg$$reg;
2207     int base     = $mem$$base;
2208     int index    = $mem$$index;
2209     int scale    = $mem$$scale;
2210     int displace = $mem$$disp + $disp_for_half$$constant;
2211     relocInfo::relocType disp_reloc = relocInfo::none;
2212     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2213   %}
2214 
2215   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2216   //
2217   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2218   // and it never needs relocation information.
2219   // Frequently used to move data between FPU's Stack Top and memory.
2220   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2221     int rm_byte_opcode = $rm_opcode$$constant;
2222     int base     = $mem$$base;
2223     int index    = $mem$$index;
2224     int scale    = $mem$$scale;
2225     int displace = $mem$$disp;
2226     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2227     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2228   %}
2229 
2230   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2231     int rm_byte_opcode = $rm_opcode$$constant;
2232     int base     = $mem$$base;
2233     int index    = $mem$$index;
2234     int scale    = $mem$$scale;
2235     int displace = $mem$$disp;
2236     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2237     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2238   %}
2239 
2240   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2241     int reg_encoding = $dst$$reg;
2242     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2243     int index        = 0x04;            // 0x04 indicates no index
2244     int scale        = 0x00;            // 0x00 indicates no scale
2245     int displace     = $src1$$constant; // 0x00 indicates no displacement
2246     relocInfo::relocType disp_reloc = relocInfo::none;
2247     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2248   %}
2249 
2250   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2251     // Compare dst,src
2252     emit_opcode(cbuf,0x3B);
2253     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2254     // jmp dst < src around move
2255     emit_opcode(cbuf,0x7C);
2256     emit_d8(cbuf,2);
2257     // move dst,src
2258     emit_opcode(cbuf,0x8B);
2259     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2260   %}
2261 
2262   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2263     // Compare dst,src
2264     emit_opcode(cbuf,0x3B);
2265     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2266     // jmp dst > src around move
2267     emit_opcode(cbuf,0x7F);
2268     emit_d8(cbuf,2);
2269     // move dst,src
2270     emit_opcode(cbuf,0x8B);
2271     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2272   %}
2273 
2274   enc_class enc_FPR_store(memory mem, regDPR src) %{
2275     // If src is FPR1, we can just FST to store it.
2276     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2277     int reg_encoding = 0x2; // Just store
2278     int base  = $mem$$base;
2279     int index = $mem$$index;
2280     int scale = $mem$$scale;
2281     int displace = $mem$$disp;
2282     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2283     if( $src$$reg != FPR1L_enc ) {
2284       reg_encoding = 0x3;  // Store & pop
2285       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2286       emit_d8( cbuf, 0xC0-1+$src$$reg );
2287     }
2288     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2289     emit_opcode(cbuf,$primary);
2290     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2291   %}
2292 
2293   enc_class neg_reg(rRegI dst) %{
2294     // NEG $dst
2295     emit_opcode(cbuf,0xF7);
2296     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2297   %}
2298 
2299   enc_class setLT_reg(eCXRegI dst) %{
2300     // SETLT $dst
2301     emit_opcode(cbuf,0x0F);
2302     emit_opcode(cbuf,0x9C);
2303     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2304   %}
2305 
2306   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2307     int tmpReg = $tmp$$reg;
2308 
2309     // SUB $p,$q
2310     emit_opcode(cbuf,0x2B);
2311     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2312     // SBB $tmp,$tmp
2313     emit_opcode(cbuf,0x1B);
2314     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2315     // AND $tmp,$y
2316     emit_opcode(cbuf,0x23);
2317     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2318     // ADD $p,$tmp
2319     emit_opcode(cbuf,0x03);
2320     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2321   %}
2322 
2323   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2324     // TEST shift,32
2325     emit_opcode(cbuf,0xF7);
2326     emit_rm(cbuf, 0x3, 0, ECX_enc);
2327     emit_d32(cbuf,0x20);
2328     // JEQ,s small
2329     emit_opcode(cbuf, 0x74);
2330     emit_d8(cbuf, 0x04);
2331     // MOV    $dst.hi,$dst.lo
2332     emit_opcode( cbuf, 0x8B );
2333     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2334     // CLR    $dst.lo
2335     emit_opcode(cbuf, 0x33);
2336     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2337 // small:
2338     // SHLD   $dst.hi,$dst.lo,$shift
2339     emit_opcode(cbuf,0x0F);
2340     emit_opcode(cbuf,0xA5);
2341     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2342     // SHL    $dst.lo,$shift"
2343     emit_opcode(cbuf,0xD3);
2344     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2345   %}
2346 
2347   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2348     // TEST shift,32
2349     emit_opcode(cbuf,0xF7);
2350     emit_rm(cbuf, 0x3, 0, ECX_enc);
2351     emit_d32(cbuf,0x20);
2352     // JEQ,s small
2353     emit_opcode(cbuf, 0x74);
2354     emit_d8(cbuf, 0x04);
2355     // MOV    $dst.lo,$dst.hi
2356     emit_opcode( cbuf, 0x8B );
2357     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2358     // CLR    $dst.hi
2359     emit_opcode(cbuf, 0x33);
2360     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2361 // small:
2362     // SHRD   $dst.lo,$dst.hi,$shift
2363     emit_opcode(cbuf,0x0F);
2364     emit_opcode(cbuf,0xAD);
2365     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2366     // SHR    $dst.hi,$shift"
2367     emit_opcode(cbuf,0xD3);
2368     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2369   %}
2370 
2371   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2372     // TEST shift,32
2373     emit_opcode(cbuf,0xF7);
2374     emit_rm(cbuf, 0x3, 0, ECX_enc);
2375     emit_d32(cbuf,0x20);
2376     // JEQ,s small
2377     emit_opcode(cbuf, 0x74);
2378     emit_d8(cbuf, 0x05);
2379     // MOV    $dst.lo,$dst.hi
2380     emit_opcode( cbuf, 0x8B );
2381     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2382     // SAR    $dst.hi,31
2383     emit_opcode(cbuf, 0xC1);
2384     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2385     emit_d8(cbuf, 0x1F );
2386 // small:
2387     // SHRD   $dst.lo,$dst.hi,$shift
2388     emit_opcode(cbuf,0x0F);
2389     emit_opcode(cbuf,0xAD);
2390     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2391     // SAR    $dst.hi,$shift"
2392     emit_opcode(cbuf,0xD3);
2393     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2394   %}
2395 
2396 
2397   // ----------------- Encodings for floating point unit -----------------
2398   // May leave result in FPU-TOS or FPU reg depending on opcodes
2399   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2400     $$$emit8$primary;
2401     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2402   %}
2403 
2404   // Pop argument in FPR0 with FSTP ST(0)
2405   enc_class PopFPU() %{
2406     emit_opcode( cbuf, 0xDD );
2407     emit_d8( cbuf, 0xD8 );
2408   %}
2409 
2410   // !!!!! equivalent to Pop_Reg_F
2411   enc_class Pop_Reg_DPR( regDPR dst ) %{
2412     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2413     emit_d8( cbuf, 0xD8+$dst$$reg );
2414   %}
2415 
2416   enc_class Push_Reg_DPR( regDPR dst ) %{
2417     emit_opcode( cbuf, 0xD9 );
2418     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2419   %}
2420 
2421   enc_class strictfp_bias1( regDPR dst ) %{
2422     emit_opcode( cbuf, 0xDB );           // FLD m80real
2423     emit_opcode( cbuf, 0x2D );
2424     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2425     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2426     emit_opcode( cbuf, 0xC8+$dst$$reg );
2427   %}
2428 
2429   enc_class strictfp_bias2( regDPR dst ) %{
2430     emit_opcode( cbuf, 0xDB );           // FLD m80real
2431     emit_opcode( cbuf, 0x2D );
2432     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2433     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2434     emit_opcode( cbuf, 0xC8+$dst$$reg );
2435   %}
2436 
2437   // Special case for moving an integer register to a stack slot.
2438   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2439     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2440   %}
2441 
2442   // Special case for moving a register to a stack slot.
2443   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2444     // Opcode already emitted
2445     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2446     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2447     emit_d32(cbuf, $dst$$disp);   // Displacement
2448   %}
2449 
2450   // Push the integer in stackSlot 'src' onto FP-stack
2451   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2452     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2453   %}
2454 
2455   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2456   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2457     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2458   %}
2459 
2460   // Same as Pop_Mem_F except for opcode
2461   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2462   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2463     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2464   %}
2465 
2466   enc_class Pop_Reg_FPR( regFPR dst ) %{
2467     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2468     emit_d8( cbuf, 0xD8+$dst$$reg );
2469   %}
2470 
2471   enc_class Push_Reg_FPR( regFPR dst ) %{
2472     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2473     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2474   %}
2475 
2476   // Push FPU's float to a stack-slot, and pop FPU-stack
2477   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2478     int pop = 0x02;
2479     if ($src$$reg != FPR1L_enc) {
2480       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2481       emit_d8( cbuf, 0xC0-1+$src$$reg );
2482       pop = 0x03;
2483     }
2484     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2485   %}
2486 
2487   // Push FPU's double to a stack-slot, and pop FPU-stack
2488   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2489     int pop = 0x02;
2490     if ($src$$reg != FPR1L_enc) {
2491       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2492       emit_d8( cbuf, 0xC0-1+$src$$reg );
2493       pop = 0x03;
2494     }
2495     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2496   %}
2497 
2498   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2499   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2500     int pop = 0xD0 - 1; // -1 since we skip FLD
2501     if ($src$$reg != FPR1L_enc) {
2502       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2503       emit_d8( cbuf, 0xC0-1+$src$$reg );
2504       pop = 0xD8;
2505     }
2506     emit_opcode( cbuf, 0xDD );
2507     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2508   %}
2509 
2510 
2511   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2512     // load dst in FPR0
2513     emit_opcode( cbuf, 0xD9 );
2514     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2515     if ($src$$reg != FPR1L_enc) {
2516       // fincstp
2517       emit_opcode (cbuf, 0xD9);
2518       emit_opcode (cbuf, 0xF7);
2519       // swap src with FPR1:
2520       // FXCH FPR1 with src
2521       emit_opcode(cbuf, 0xD9);
2522       emit_d8(cbuf, 0xC8-1+$src$$reg );
2523       // fdecstp
2524       emit_opcode (cbuf, 0xD9);
2525       emit_opcode (cbuf, 0xF6);
2526     }
2527   %}
2528 
2529   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2530     MacroAssembler _masm(&cbuf);
2531     __ subptr(rsp, 8);
2532     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2533     __ fld_d(Address(rsp, 0));
2534     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2535     __ fld_d(Address(rsp, 0));
2536   %}
2537 
2538   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2539     MacroAssembler _masm(&cbuf);
2540     __ subptr(rsp, 4);
2541     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2542     __ fld_s(Address(rsp, 0));
2543     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2544     __ fld_s(Address(rsp, 0));
2545   %}
2546 
2547   enc_class Push_ResultD(regD dst) %{
2548     MacroAssembler _masm(&cbuf);
2549     __ fstp_d(Address(rsp, 0));
2550     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2551     __ addptr(rsp, 8);
2552   %}
2553 
2554   enc_class Push_ResultF(regF dst, immI d8) %{
2555     MacroAssembler _masm(&cbuf);
2556     __ fstp_s(Address(rsp, 0));
2557     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2558     __ addptr(rsp, $d8$$constant);
2559   %}
2560 
2561   enc_class Push_SrcD(regD src) %{
2562     MacroAssembler _masm(&cbuf);
2563     __ subptr(rsp, 8);
2564     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2565     __ fld_d(Address(rsp, 0));
2566   %}
2567 
2568   enc_class push_stack_temp_qword() %{
2569     MacroAssembler _masm(&cbuf);
2570     __ subptr(rsp, 8);
2571   %}
2572 
2573   enc_class pop_stack_temp_qword() %{
2574     MacroAssembler _masm(&cbuf);
2575     __ addptr(rsp, 8);
2576   %}
2577 
2578   enc_class push_xmm_to_fpr1(regD src) %{
2579     MacroAssembler _masm(&cbuf);
2580     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2581     __ fld_d(Address(rsp, 0));
2582   %}
2583 
2584   enc_class Push_Result_Mod_DPR( regDPR src) %{
2585     if ($src$$reg != FPR1L_enc) {
2586       // fincstp
2587       emit_opcode (cbuf, 0xD9);
2588       emit_opcode (cbuf, 0xF7);
2589       // FXCH FPR1 with src
2590       emit_opcode(cbuf, 0xD9);
2591       emit_d8(cbuf, 0xC8-1+$src$$reg );
2592       // fdecstp
2593       emit_opcode (cbuf, 0xD9);
2594       emit_opcode (cbuf, 0xF6);
2595     }
2596     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2597     // // FSTP   FPR$dst$$reg
2598     // emit_opcode( cbuf, 0xDD );
2599     // emit_d8( cbuf, 0xD8+$dst$$reg );
2600   %}
2601 
2602   enc_class fnstsw_sahf_skip_parity() %{
2603     // fnstsw ax
2604     emit_opcode( cbuf, 0xDF );
2605     emit_opcode( cbuf, 0xE0 );
2606     // sahf
2607     emit_opcode( cbuf, 0x9E );
2608     // jnp  ::skip
2609     emit_opcode( cbuf, 0x7B );
2610     emit_opcode( cbuf, 0x05 );
2611   %}
2612 
2613   enc_class emitModDPR() %{
2614     // fprem must be iterative
2615     // :: loop
2616     // fprem
2617     emit_opcode( cbuf, 0xD9 );
2618     emit_opcode( cbuf, 0xF8 );
2619     // wait
2620     emit_opcode( cbuf, 0x9b );
2621     // fnstsw ax
2622     emit_opcode( cbuf, 0xDF );
2623     emit_opcode( cbuf, 0xE0 );
2624     // sahf
2625     emit_opcode( cbuf, 0x9E );
2626     // jp  ::loop
2627     emit_opcode( cbuf, 0x0F );
2628     emit_opcode( cbuf, 0x8A );
2629     emit_opcode( cbuf, 0xF4 );
2630     emit_opcode( cbuf, 0xFF );
2631     emit_opcode( cbuf, 0xFF );
2632     emit_opcode( cbuf, 0xFF );
2633   %}
2634 
2635   enc_class fpu_flags() %{
2636     // fnstsw_ax
2637     emit_opcode( cbuf, 0xDF);
2638     emit_opcode( cbuf, 0xE0);
2639     // test ax,0x0400
2640     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2641     emit_opcode( cbuf, 0xA9 );
2642     emit_d16   ( cbuf, 0x0400 );
2643     // // // This sequence works, but stalls for 12-16 cycles on PPro
2644     // // test rax,0x0400
2645     // emit_opcode( cbuf, 0xA9 );
2646     // emit_d32   ( cbuf, 0x00000400 );
2647     //
2648     // jz exit (no unordered comparison)
2649     emit_opcode( cbuf, 0x74 );
2650     emit_d8    ( cbuf, 0x02 );
2651     // mov ah,1 - treat as LT case (set carry flag)
2652     emit_opcode( cbuf, 0xB4 );
2653     emit_d8    ( cbuf, 0x01 );
2654     // sahf
2655     emit_opcode( cbuf, 0x9E);
2656   %}
2657 
2658   enc_class cmpF_P6_fixup() %{
2659     // Fixup the integer flags in case comparison involved a NaN
2660     //
2661     // JNP exit (no unordered comparison, P-flag is set by NaN)
2662     emit_opcode( cbuf, 0x7B );
2663     emit_d8    ( cbuf, 0x03 );
2664     // MOV AH,1 - treat as LT case (set carry flag)
2665     emit_opcode( cbuf, 0xB4 );
2666     emit_d8    ( cbuf, 0x01 );
2667     // SAHF
2668     emit_opcode( cbuf, 0x9E);
2669     // NOP     // target for branch to avoid branch to branch
2670     emit_opcode( cbuf, 0x90);
2671   %}
2672 
2673 //     fnstsw_ax();
2674 //     sahf();
2675 //     movl(dst, nan_result);
2676 //     jcc(Assembler::parity, exit);
2677 //     movl(dst, less_result);
2678 //     jcc(Assembler::below, exit);
2679 //     movl(dst, equal_result);
2680 //     jcc(Assembler::equal, exit);
2681 //     movl(dst, greater_result);
2682 
2683 // less_result     =  1;
2684 // greater_result  = -1;
2685 // equal_result    = 0;
2686 // nan_result      = -1;
2687 
2688   enc_class CmpF_Result(rRegI dst) %{
2689     // fnstsw_ax();
2690     emit_opcode( cbuf, 0xDF);
2691     emit_opcode( cbuf, 0xE0);
2692     // sahf
2693     emit_opcode( cbuf, 0x9E);
2694     // movl(dst, nan_result);
2695     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2696     emit_d32( cbuf, -1 );
2697     // jcc(Assembler::parity, exit);
2698     emit_opcode( cbuf, 0x7A );
2699     emit_d8    ( cbuf, 0x13 );
2700     // movl(dst, less_result);
2701     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2702     emit_d32( cbuf, -1 );
2703     // jcc(Assembler::below, exit);
2704     emit_opcode( cbuf, 0x72 );
2705     emit_d8    ( cbuf, 0x0C );
2706     // movl(dst, equal_result);
2707     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2708     emit_d32( cbuf, 0 );
2709     // jcc(Assembler::equal, exit);
2710     emit_opcode( cbuf, 0x74 );
2711     emit_d8    ( cbuf, 0x05 );
2712     // movl(dst, greater_result);
2713     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2714     emit_d32( cbuf, 1 );
2715   %}
2716 
2717 
2718   // Compare the longs and set flags
2719   // BROKEN!  Do Not use as-is
2720   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2721     // CMP    $src1.hi,$src2.hi
2722     emit_opcode( cbuf, 0x3B );
2723     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2724     // JNE,s  done
2725     emit_opcode(cbuf,0x75);
2726     emit_d8(cbuf, 2 );
2727     // CMP    $src1.lo,$src2.lo
2728     emit_opcode( cbuf, 0x3B );
2729     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2730 // done:
2731   %}
2732 
2733   enc_class convert_int_long( regL dst, rRegI src ) %{
2734     // mov $dst.lo,$src
2735     int dst_encoding = $dst$$reg;
2736     int src_encoding = $src$$reg;
2737     encode_Copy( cbuf, dst_encoding  , src_encoding );
2738     // mov $dst.hi,$src
2739     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2740     // sar $dst.hi,31
2741     emit_opcode( cbuf, 0xC1 );
2742     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2743     emit_d8(cbuf, 0x1F );
2744   %}
2745 
2746   enc_class convert_long_double( eRegL src ) %{
2747     // push $src.hi
2748     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2749     // push $src.lo
2750     emit_opcode(cbuf, 0x50+$src$$reg  );
2751     // fild 64-bits at [SP]
2752     emit_opcode(cbuf,0xdf);
2753     emit_d8(cbuf, 0x6C);
2754     emit_d8(cbuf, 0x24);
2755     emit_d8(cbuf, 0x00);
2756     // pop stack
2757     emit_opcode(cbuf, 0x83); // add  SP, #8
2758     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2759     emit_d8(cbuf, 0x8);
2760   %}
2761 
2762   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2763     // IMUL   EDX:EAX,$src1
2764     emit_opcode( cbuf, 0xF7 );
2765     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2766     // SAR    EDX,$cnt-32
2767     int shift_count = ((int)$cnt$$constant) - 32;
2768     if (shift_count > 0) {
2769       emit_opcode(cbuf, 0xC1);
2770       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2771       emit_d8(cbuf, shift_count);
2772     }
2773   %}
2774 
2775   // this version doesn't have add sp, 8
2776   enc_class convert_long_double2( eRegL src ) %{
2777     // push $src.hi
2778     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2779     // push $src.lo
2780     emit_opcode(cbuf, 0x50+$src$$reg  );
2781     // fild 64-bits at [SP]
2782     emit_opcode(cbuf,0xdf);
2783     emit_d8(cbuf, 0x6C);
2784     emit_d8(cbuf, 0x24);
2785     emit_d8(cbuf, 0x00);
2786   %}
2787 
2788   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2789     // Basic idea: long = (long)int * (long)int
2790     // IMUL EDX:EAX, src
2791     emit_opcode( cbuf, 0xF7 );
2792     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2793   %}
2794 
2795   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2796     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2797     // MUL EDX:EAX, src
2798     emit_opcode( cbuf, 0xF7 );
2799     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2800   %}
2801 
2802   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2803     // Basic idea: lo(result) = lo(x_lo * y_lo)
2804     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2805     // MOV    $tmp,$src.lo
2806     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2807     // IMUL   $tmp,EDX
2808     emit_opcode( cbuf, 0x0F );
2809     emit_opcode( cbuf, 0xAF );
2810     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2811     // MOV    EDX,$src.hi
2812     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2813     // IMUL   EDX,EAX
2814     emit_opcode( cbuf, 0x0F );
2815     emit_opcode( cbuf, 0xAF );
2816     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2817     // ADD    $tmp,EDX
2818     emit_opcode( cbuf, 0x03 );
2819     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2820     // MUL   EDX:EAX,$src.lo
2821     emit_opcode( cbuf, 0xF7 );
2822     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2823     // ADD    EDX,ESI
2824     emit_opcode( cbuf, 0x03 );
2825     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2826   %}
2827 
2828   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2829     // Basic idea: lo(result) = lo(src * y_lo)
2830     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2831     // IMUL   $tmp,EDX,$src
2832     emit_opcode( cbuf, 0x6B );
2833     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2834     emit_d8( cbuf, (int)$src$$constant );
2835     // MOV    EDX,$src
2836     emit_opcode(cbuf, 0xB8 + EDX_enc);
2837     emit_d32( cbuf, (int)$src$$constant );
2838     // MUL   EDX:EAX,EDX
2839     emit_opcode( cbuf, 0xF7 );
2840     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2841     // ADD    EDX,ESI
2842     emit_opcode( cbuf, 0x03 );
2843     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2844   %}
2845 
2846   enc_class long_div( eRegL src1, eRegL src2 ) %{
2847     // PUSH src1.hi
2848     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2849     // PUSH src1.lo
2850     emit_opcode(cbuf,               0x50+$src1$$reg  );
2851     // PUSH src2.hi
2852     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2853     // PUSH src2.lo
2854     emit_opcode(cbuf,               0x50+$src2$$reg  );
2855     // CALL directly to the runtime
2856     cbuf.set_insts_mark();
2857     emit_opcode(cbuf,0xE8);       // Call into runtime
2858     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2859     // Restore stack
2860     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2861     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2862     emit_d8(cbuf, 4*4);
2863   %}
2864 
2865   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2866     // PUSH src1.hi
2867     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2868     // PUSH src1.lo
2869     emit_opcode(cbuf,               0x50+$src1$$reg  );
2870     // PUSH src2.hi
2871     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2872     // PUSH src2.lo
2873     emit_opcode(cbuf,               0x50+$src2$$reg  );
2874     // CALL directly to the runtime
2875     cbuf.set_insts_mark();
2876     emit_opcode(cbuf,0xE8);       // Call into runtime
2877     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2878     // Restore stack
2879     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2880     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2881     emit_d8(cbuf, 4*4);
2882   %}
2883 
2884   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2885     // MOV   $tmp,$src.lo
2886     emit_opcode(cbuf, 0x8B);
2887     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2888     // OR    $tmp,$src.hi
2889     emit_opcode(cbuf, 0x0B);
2890     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2891   %}
2892 
2893   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2894     // CMP    $src1.lo,$src2.lo
2895     emit_opcode( cbuf, 0x3B );
2896     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2897     // JNE,s  skip
2898     emit_cc(cbuf, 0x70, 0x5);
2899     emit_d8(cbuf,2);
2900     // CMP    $src1.hi,$src2.hi
2901     emit_opcode( cbuf, 0x3B );
2902     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2903   %}
2904 
2905   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2906     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2907     emit_opcode( cbuf, 0x3B );
2908     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2909     // MOV    $tmp,$src1.hi
2910     emit_opcode( cbuf, 0x8B );
2911     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2912     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2913     emit_opcode( cbuf, 0x1B );
2914     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2915   %}
2916 
2917   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2918     // XOR    $tmp,$tmp
2919     emit_opcode(cbuf,0x33);  // XOR
2920     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2921     // CMP    $tmp,$src.lo
2922     emit_opcode( cbuf, 0x3B );
2923     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2924     // SBB    $tmp,$src.hi
2925     emit_opcode( cbuf, 0x1B );
2926     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2927   %}
2928 
2929  // Sniff, sniff... smells like Gnu Superoptimizer
2930   enc_class neg_long( eRegL dst ) %{
2931     emit_opcode(cbuf,0xF7);    // NEG hi
2932     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2933     emit_opcode(cbuf,0xF7);    // NEG lo
2934     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2935     emit_opcode(cbuf,0x83);    // SBB hi,0
2936     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2937     emit_d8    (cbuf,0 );
2938   %}
2939 
2940   enc_class enc_pop_rdx() %{
2941     emit_opcode(cbuf,0x5A);
2942   %}
2943 
2944   enc_class enc_rethrow() %{
2945     cbuf.set_insts_mark();
2946     emit_opcode(cbuf, 0xE9);        // jmp    entry
2947     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2948                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2949   %}
2950 
2951 
2952   // Convert a double to an int.  Java semantics require we do complex
2953   // manglelations in the corner cases.  So we set the rounding mode to
2954   // 'zero', store the darned double down as an int, and reset the
2955   // rounding mode to 'nearest'.  The hardware throws an exception which
2956   // patches up the correct value directly to the stack.
2957   enc_class DPR2I_encoding( regDPR src ) %{
2958     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2959     // exceptions here, so that a NAN or other corner-case value will
2960     // thrown an exception (but normal values get converted at full speed).
2961     // However, I2C adapters and other float-stack manglers leave pending
2962     // invalid-op exceptions hanging.  We would have to clear them before
2963     // enabling them and that is more expensive than just testing for the
2964     // invalid value Intel stores down in the corner cases.
2965     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2966     emit_opcode(cbuf,0x2D);
2967     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2968     // Allocate a word
2969     emit_opcode(cbuf,0x83);            // SUB ESP,4
2970     emit_opcode(cbuf,0xEC);
2971     emit_d8(cbuf,0x04);
2972     // Encoding assumes a double has been pushed into FPR0.
2973     // Store down the double as an int, popping the FPU stack
2974     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2975     emit_opcode(cbuf,0x1C);
2976     emit_d8(cbuf,0x24);
2977     // Restore the rounding mode; mask the exception
2978     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2979     emit_opcode(cbuf,0x2D);
2980     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2981         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2982         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2983 
2984     // Load the converted int; adjust CPU stack
2985     emit_opcode(cbuf,0x58);       // POP EAX
2986     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2987     emit_d32   (cbuf,0x80000000); //         0x80000000
2988     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2989     emit_d8    (cbuf,0x07);       // Size of slow_call
2990     // Push src onto stack slow-path
2991     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2992     emit_d8    (cbuf,0xC0-1+$src$$reg );
2993     // CALL directly to the runtime
2994     cbuf.set_insts_mark();
2995     emit_opcode(cbuf,0xE8);       // Call into runtime
2996     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2997     // Carry on here...
2998   %}
2999 
3000   enc_class DPR2L_encoding( regDPR src ) %{
3001     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3002     emit_opcode(cbuf,0x2D);
3003     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3004     // Allocate a word
3005     emit_opcode(cbuf,0x83);            // SUB ESP,8
3006     emit_opcode(cbuf,0xEC);
3007     emit_d8(cbuf,0x08);
3008     // Encoding assumes a double has been pushed into FPR0.
3009     // Store down the double as a long, popping the FPU stack
3010     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3011     emit_opcode(cbuf,0x3C);
3012     emit_d8(cbuf,0x24);
3013     // Restore the rounding mode; mask the exception
3014     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3015     emit_opcode(cbuf,0x2D);
3016     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3017         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3018         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3019 
3020     // Load the converted int; adjust CPU stack
3021     emit_opcode(cbuf,0x58);       // POP EAX
3022     emit_opcode(cbuf,0x5A);       // POP EDX
3023     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3024     emit_d8    (cbuf,0xFA);       // rdx
3025     emit_d32   (cbuf,0x80000000); //         0x80000000
3026     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3027     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3028     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3029     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3030     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3031     emit_d8    (cbuf,0x07);       // Size of slow_call
3032     // Push src onto stack slow-path
3033     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3034     emit_d8    (cbuf,0xC0-1+$src$$reg );
3035     // CALL directly to the runtime
3036     cbuf.set_insts_mark();
3037     emit_opcode(cbuf,0xE8);       // Call into runtime
3038     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3039     // Carry on here...
3040   %}
3041 
3042   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3043     // Operand was loaded from memory into fp ST (stack top)
3044     // FMUL   ST,$src  /* D8 C8+i */
3045     emit_opcode(cbuf, 0xD8);
3046     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3047   %}
3048 
3049   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3050     // FADDP  ST,src2  /* D8 C0+i */
3051     emit_opcode(cbuf, 0xD8);
3052     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3053     //could use FADDP  src2,fpST  /* DE C0+i */
3054   %}
3055 
3056   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3057     // FADDP  src2,ST  /* DE C0+i */
3058     emit_opcode(cbuf, 0xDE);
3059     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3060   %}
3061 
3062   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3063     // Operand has been loaded into fp ST (stack top)
3064       // FSUB   ST,$src1
3065       emit_opcode(cbuf, 0xD8);
3066       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3067 
3068       // FDIV
3069       emit_opcode(cbuf, 0xD8);
3070       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3071   %}
3072 
3073   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3074     // Operand was loaded from memory into fp ST (stack top)
3075     // FADD   ST,$src  /* D8 C0+i */
3076     emit_opcode(cbuf, 0xD8);
3077     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3078 
3079     // FMUL  ST,src2  /* D8 C*+i */
3080     emit_opcode(cbuf, 0xD8);
3081     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3082   %}
3083 
3084 
3085   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3086     // Operand was loaded from memory into fp ST (stack top)
3087     // FADD   ST,$src  /* D8 C0+i */
3088     emit_opcode(cbuf, 0xD8);
3089     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3090 
3091     // FMULP  src2,ST  /* DE C8+i */
3092     emit_opcode(cbuf, 0xDE);
3093     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3094   %}
3095 
3096   // Atomically load the volatile long
3097   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3098     emit_opcode(cbuf,0xDF);
3099     int rm_byte_opcode = 0x05;
3100     int base     = $mem$$base;
3101     int index    = $mem$$index;
3102     int scale    = $mem$$scale;
3103     int displace = $mem$$disp;
3104     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3105     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3106     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3107   %}
3108 
3109   // Volatile Store Long.  Must be atomic, so move it into
3110   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3111   // target address before the store (for null-ptr checks)
3112   // so the memory operand is used twice in the encoding.
3113   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3114     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3115     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3116     emit_opcode(cbuf,0xDF);
3117     int rm_byte_opcode = 0x07;
3118     int base     = $mem$$base;
3119     int index    = $mem$$index;
3120     int scale    = $mem$$scale;
3121     int displace = $mem$$disp;
3122     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3123     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3124   %}
3125 
3126   // Safepoint Poll.  This polls the safepoint page, and causes an
3127   // exception if it is not readable. Unfortunately, it kills the condition code
3128   // in the process
3129   // We current use TESTL [spp],EDI
3130   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3131 
3132   enc_class Safepoint_Poll() %{
3133     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3134     emit_opcode(cbuf,0x85);
3135     emit_rm (cbuf, 0x0, 0x7, 0x5);
3136     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3137   %}
3138 %}
3139 
3140 
3141 //----------FRAME--------------------------------------------------------------
3142 // Definition of frame structure and management information.
3143 //
3144 //  S T A C K   L A Y O U T    Allocators stack-slot number
3145 //                             |   (to get allocators register number
3146 //  G  Owned by    |        |  v    add OptoReg::stack0())
3147 //  r   CALLER     |        |
3148 //  o     |        +--------+      pad to even-align allocators stack-slot
3149 //  w     V        |  pad0  |        numbers; owned by CALLER
3150 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3151 //  h     ^        |   in   |  5
3152 //        |        |  args  |  4   Holes in incoming args owned by SELF
3153 //  |     |        |        |  3
3154 //  |     |        +--------+
3155 //  V     |        | old out|      Empty on Intel, window on Sparc
3156 //        |    old |preserve|      Must be even aligned.
3157 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3158 //        |        |   in   |  3   area for Intel ret address
3159 //     Owned by    |preserve|      Empty on Sparc.
3160 //       SELF      +--------+
3161 //        |        |  pad2  |  2   pad to align old SP
3162 //        |        +--------+  1
3163 //        |        | locks  |  0
3164 //        |        +--------+----> OptoReg::stack0(), even aligned
3165 //        |        |  pad1  | 11   pad to align new SP
3166 //        |        +--------+
3167 //        |        |        | 10
3168 //        |        | spills |  9   spills
3169 //        V        |        |  8   (pad0 slot for callee)
3170 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3171 //        ^        |  out   |  7
3172 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3173 //     Owned by    +--------+
3174 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3175 //        |    new |preserve|      Must be even-aligned.
3176 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3177 //        |        |        |
3178 //
3179 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3180 //         known from SELF's arguments and the Java calling convention.
3181 //         Region 6-7 is determined per call site.
3182 // Note 2: If the calling convention leaves holes in the incoming argument
3183 //         area, those holes are owned by SELF.  Holes in the outgoing area
3184 //         are owned by the CALLEE.  Holes should not be nessecary in the
3185 //         incoming area, as the Java calling convention is completely under
3186 //         the control of the AD file.  Doubles can be sorted and packed to
3187 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3188 //         varargs C calling conventions.
3189 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3190 //         even aligned with pad0 as needed.
3191 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3192 //         region 6-11 is even aligned; it may be padded out more so that
3193 //         the region from SP to FP meets the minimum stack alignment.
3194 
3195 frame %{
3196   // What direction does stack grow in (assumed to be same for C & Java)
3197   stack_direction(TOWARDS_LOW);
3198 
3199   // These three registers define part of the calling convention
3200   // between compiled code and the interpreter.
3201   inline_cache_reg(EAX);                // Inline Cache Register
3202   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3203 
3204   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3205   cisc_spilling_operand_name(indOffset32);
3206 
3207   // Number of stack slots consumed by locking an object
3208   sync_stack_slots(1);
3209 
3210   // Compiled code's Frame Pointer
3211   frame_pointer(ESP);
3212   // Interpreter stores its frame pointer in a register which is
3213   // stored to the stack by I2CAdaptors.
3214   // I2CAdaptors convert from interpreted java to compiled java.
3215   interpreter_frame_pointer(EBP);
3216 
3217   // Stack alignment requirement
3218   // Alignment size in bytes (128-bit -> 16 bytes)
3219   stack_alignment(StackAlignmentInBytes);
3220 
3221   // Number of stack slots between incoming argument block and the start of
3222   // a new frame.  The PROLOG must add this many slots to the stack.  The
3223   // EPILOG must remove this many slots.  Intel needs one slot for
3224   // return address and one for rbp, (must save rbp)
3225   in_preserve_stack_slots(2+VerifyStackAtCalls);
3226 
3227   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3228   // for calls to C.  Supports the var-args backing area for register parms.
3229   varargs_C_out_slots_killed(0);
3230 
3231   // The after-PROLOG location of the return address.  Location of
3232   // return address specifies a type (REG or STACK) and a number
3233   // representing the register number (i.e. - use a register name) or
3234   // stack slot.
3235   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3236   // Otherwise, it is above the locks and verification slot and alignment word
3237   return_addr(STACK - 1 +
3238               round_to((Compile::current()->in_preserve_stack_slots() +
3239                         Compile::current()->fixed_slots()),
3240                        stack_alignment_in_slots()));
3241 
3242   // Body of function which returns an integer array locating
3243   // arguments either in registers or in stack slots.  Passed an array
3244   // of ideal registers called "sig" and a "length" count.  Stack-slot
3245   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3246   // arguments for a CALLEE.  Incoming stack arguments are
3247   // automatically biased by the preserve_stack_slots field above.
3248   calling_convention %{
3249     // No difference between ingoing/outgoing just pass false
3250     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3251   %}
3252 
3253 
3254   // Body of function which returns an integer array locating
3255   // arguments either in registers or in stack slots.  Passed an array
3256   // of ideal registers called "sig" and a "length" count.  Stack-slot
3257   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3258   // arguments for a CALLEE.  Incoming stack arguments are
3259   // automatically biased by the preserve_stack_slots field above.
3260   c_calling_convention %{
3261     // This is obviously always outgoing
3262     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3263   %}
3264 
3265   // Location of C & interpreter return values
3266   c_return_value %{
3267     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3268     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3269     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3270 
3271     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3272     // that C functions return float and double results in XMM0.
3273     if( ideal_reg == Op_RegD && UseSSE>=2 )
3274       return OptoRegPair(XMM0b_num,XMM0_num);
3275     if( ideal_reg == Op_RegF && UseSSE>=2 )
3276       return OptoRegPair(OptoReg::Bad,XMM0_num);
3277 
3278     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3279   %}
3280 
3281   // Location of return values
3282   return_value %{
3283     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3284     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3285     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3286     if( ideal_reg == Op_RegD && UseSSE>=2 )
3287       return OptoRegPair(XMM0b_num,XMM0_num);
3288     if( ideal_reg == Op_RegF && UseSSE>=1 )
3289       return OptoRegPair(OptoReg::Bad,XMM0_num);
3290     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3291   %}
3292 
3293 %}
3294 
3295 //----------ATTRIBUTES---------------------------------------------------------
3296 //----------Operand Attributes-------------------------------------------------
3297 op_attrib op_cost(0);        // Required cost attribute
3298 
3299 //----------Instruction Attributes---------------------------------------------
3300 ins_attrib ins_cost(100);       // Required cost attribute
3301 ins_attrib ins_size(8);         // Required size attribute (in bits)
3302 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3303                                 // non-matching short branch variant of some
3304                                                             // long branch?
3305 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3306                                 // specifies the alignment that some part of the instruction (not
3307                                 // necessarily the start) requires.  If > 1, a compute_padding()
3308                                 // function must be provided for the instruction
3309 
3310 //----------OPERANDS-----------------------------------------------------------
3311 // Operand definitions must precede instruction definitions for correct parsing
3312 // in the ADLC because operands constitute user defined types which are used in
3313 // instruction definitions.
3314 
3315 //----------Simple Operands----------------------------------------------------
3316 // Immediate Operands
3317 // Integer Immediate
3318 operand immI() %{
3319   match(ConI);
3320 
3321   op_cost(10);
3322   format %{ %}
3323   interface(CONST_INTER);
3324 %}
3325 
3326 // Constant for test vs zero
3327 operand immI0() %{
3328   predicate(n->get_int() == 0);
3329   match(ConI);
3330 
3331   op_cost(0);
3332   format %{ %}
3333   interface(CONST_INTER);
3334 %}
3335 
3336 // Constant for increment
3337 operand immI1() %{
3338   predicate(n->get_int() == 1);
3339   match(ConI);
3340 
3341   op_cost(0);
3342   format %{ %}
3343   interface(CONST_INTER);
3344 %}
3345 
3346 // Constant for decrement
3347 operand immI_M1() %{
3348   predicate(n->get_int() == -1);
3349   match(ConI);
3350 
3351   op_cost(0);
3352   format %{ %}
3353   interface(CONST_INTER);
3354 %}
3355 
3356 // Valid scale values for addressing modes
3357 operand immI2() %{
3358   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3359   match(ConI);
3360 
3361   format %{ %}
3362   interface(CONST_INTER);
3363 %}
3364 
3365 operand immI8() %{
3366   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3367   match(ConI);
3368 
3369   op_cost(5);
3370   format %{ %}
3371   interface(CONST_INTER);
3372 %}
3373 
3374 operand immI16() %{
3375   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3376   match(ConI);
3377 
3378   op_cost(10);
3379   format %{ %}
3380   interface(CONST_INTER);
3381 %}
3382 
3383 // Int Immediate non-negative
3384 operand immU31()
3385 %{
3386   predicate(n->get_int() >= 0);
3387   match(ConI);
3388 
3389   op_cost(0);
3390   format %{ %}
3391   interface(CONST_INTER);
3392 %}
3393 
3394 // Constant for long shifts
3395 operand immI_32() %{
3396   predicate( n->get_int() == 32 );
3397   match(ConI);
3398 
3399   op_cost(0);
3400   format %{ %}
3401   interface(CONST_INTER);
3402 %}
3403 
3404 operand immI_1_31() %{
3405   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3406   match(ConI);
3407 
3408   op_cost(0);
3409   format %{ %}
3410   interface(CONST_INTER);
3411 %}
3412 
3413 operand immI_32_63() %{
3414   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3415   match(ConI);
3416   op_cost(0);
3417 
3418   format %{ %}
3419   interface(CONST_INTER);
3420 %}
3421 
3422 operand immI_1() %{
3423   predicate( n->get_int() == 1 );
3424   match(ConI);
3425 
3426   op_cost(0);
3427   format %{ %}
3428   interface(CONST_INTER);
3429 %}
3430 
3431 operand immI_2() %{
3432   predicate( n->get_int() == 2 );
3433   match(ConI);
3434 
3435   op_cost(0);
3436   format %{ %}
3437   interface(CONST_INTER);
3438 %}
3439 
3440 operand immI_3() %{
3441   predicate( n->get_int() == 3 );
3442   match(ConI);
3443 
3444   op_cost(0);
3445   format %{ %}
3446   interface(CONST_INTER);
3447 %}
3448 
3449 // Pointer Immediate
3450 operand immP() %{
3451   match(ConP);
3452 
3453   op_cost(10);
3454   format %{ %}
3455   interface(CONST_INTER);
3456 %}
3457 
3458 // NULL Pointer Immediate
3459 operand immP0() %{
3460   predicate( n->get_ptr() == 0 );
3461   match(ConP);
3462   op_cost(0);
3463 
3464   format %{ %}
3465   interface(CONST_INTER);
3466 %}
3467 
3468 // Long Immediate
3469 operand immL() %{
3470   match(ConL);
3471 
3472   op_cost(20);
3473   format %{ %}
3474   interface(CONST_INTER);
3475 %}
3476 
3477 // Long Immediate zero
3478 operand immL0() %{
3479   predicate( n->get_long() == 0L );
3480   match(ConL);
3481   op_cost(0);
3482 
3483   format %{ %}
3484   interface(CONST_INTER);
3485 %}
3486 
3487 // Long Immediate zero
3488 operand immL_M1() %{
3489   predicate( n->get_long() == -1L );
3490   match(ConL);
3491   op_cost(0);
3492 
3493   format %{ %}
3494   interface(CONST_INTER);
3495 %}
3496 
3497 // Long immediate from 0 to 127.
3498 // Used for a shorter form of long mul by 10.
3499 operand immL_127() %{
3500   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3501   match(ConL);
3502   op_cost(0);
3503 
3504   format %{ %}
3505   interface(CONST_INTER);
3506 %}
3507 
3508 // Long Immediate: low 32-bit mask
3509 operand immL_32bits() %{
3510   predicate(n->get_long() == 0xFFFFFFFFL);
3511   match(ConL);
3512   op_cost(0);
3513 
3514   format %{ %}
3515   interface(CONST_INTER);
3516 %}
3517 
3518 // Long Immediate: low 32-bit mask
3519 operand immL32() %{
3520   predicate(n->get_long() == (int)(n->get_long()));
3521   match(ConL);
3522   op_cost(20);
3523 
3524   format %{ %}
3525   interface(CONST_INTER);
3526 %}
3527 
3528 //Double Immediate zero
3529 operand immDPR0() %{
3530   // Do additional (and counter-intuitive) test against NaN to work around VC++
3531   // bug that generates code such that NaNs compare equal to 0.0
3532   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3533   match(ConD);
3534 
3535   op_cost(5);
3536   format %{ %}
3537   interface(CONST_INTER);
3538 %}
3539 
3540 // Double Immediate one
3541 operand immDPR1() %{
3542   predicate( UseSSE<=1 && n->getd() == 1.0 );
3543   match(ConD);
3544 
3545   op_cost(5);
3546   format %{ %}
3547   interface(CONST_INTER);
3548 %}
3549 
3550 // Double Immediate
3551 operand immDPR() %{
3552   predicate(UseSSE<=1);
3553   match(ConD);
3554 
3555   op_cost(5);
3556   format %{ %}
3557   interface(CONST_INTER);
3558 %}
3559 
3560 operand immD() %{
3561   predicate(UseSSE>=2);
3562   match(ConD);
3563 
3564   op_cost(5);
3565   format %{ %}
3566   interface(CONST_INTER);
3567 %}
3568 
3569 // Double Immediate zero
3570 operand immD0() %{
3571   // Do additional (and counter-intuitive) test against NaN to work around VC++
3572   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3573   // compare equal to -0.0.
3574   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3575   match(ConD);
3576 
3577   format %{ %}
3578   interface(CONST_INTER);
3579 %}
3580 
3581 // Float Immediate zero
3582 operand immFPR0() %{
3583   predicate(UseSSE == 0 && n->getf() == 0.0F);
3584   match(ConF);
3585 
3586   op_cost(5);
3587   format %{ %}
3588   interface(CONST_INTER);
3589 %}
3590 
3591 // Float Immediate one
3592 operand immFPR1() %{
3593   predicate(UseSSE == 0 && n->getf() == 1.0F);
3594   match(ConF);
3595 
3596   op_cost(5);
3597   format %{ %}
3598   interface(CONST_INTER);
3599 %}
3600 
3601 // Float Immediate
3602 operand immFPR() %{
3603   predicate( UseSSE == 0 );
3604   match(ConF);
3605 
3606   op_cost(5);
3607   format %{ %}
3608   interface(CONST_INTER);
3609 %}
3610 
3611 // Float Immediate
3612 operand immF() %{
3613   predicate(UseSSE >= 1);
3614   match(ConF);
3615 
3616   op_cost(5);
3617   format %{ %}
3618   interface(CONST_INTER);
3619 %}
3620 
3621 // Float Immediate zero.  Zero and not -0.0
3622 operand immF0() %{
3623   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3624   match(ConF);
3625 
3626   op_cost(5);
3627   format %{ %}
3628   interface(CONST_INTER);
3629 %}
3630 
3631 // Immediates for special shifts (sign extend)
3632 
3633 // Constants for increment
3634 operand immI_16() %{
3635   predicate( n->get_int() == 16 );
3636   match(ConI);
3637 
3638   format %{ %}
3639   interface(CONST_INTER);
3640 %}
3641 
3642 operand immI_24() %{
3643   predicate( n->get_int() == 24 );
3644   match(ConI);
3645 
3646   format %{ %}
3647   interface(CONST_INTER);
3648 %}
3649 
3650 // Constant for byte-wide masking
3651 operand immI_255() %{
3652   predicate( n->get_int() == 255 );
3653   match(ConI);
3654 
3655   format %{ %}
3656   interface(CONST_INTER);
3657 %}
3658 
3659 // Constant for short-wide masking
3660 operand immI_65535() %{
3661   predicate(n->get_int() == 65535);
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 // Register Operands
3669 // Integer Register
3670 operand rRegI() %{
3671   constraint(ALLOC_IN_RC(int_reg));
3672   match(RegI);
3673   match(xRegI);
3674   match(eAXRegI);
3675   match(eBXRegI);
3676   match(eCXRegI);
3677   match(eDXRegI);
3678   match(eDIRegI);
3679   match(eSIRegI);
3680 
3681   format %{ %}
3682   interface(REG_INTER);
3683 %}
3684 
3685 // Subset of Integer Register
3686 operand xRegI(rRegI reg) %{
3687   constraint(ALLOC_IN_RC(int_x_reg));
3688   match(reg);
3689   match(eAXRegI);
3690   match(eBXRegI);
3691   match(eCXRegI);
3692   match(eDXRegI);
3693 
3694   format %{ %}
3695   interface(REG_INTER);
3696 %}
3697 
3698 // Special Registers
3699 operand eAXRegI(xRegI reg) %{
3700   constraint(ALLOC_IN_RC(eax_reg));
3701   match(reg);
3702   match(rRegI);
3703 
3704   format %{ "EAX" %}
3705   interface(REG_INTER);
3706 %}
3707 
3708 // Special Registers
3709 operand eBXRegI(xRegI reg) %{
3710   constraint(ALLOC_IN_RC(ebx_reg));
3711   match(reg);
3712   match(rRegI);
3713 
3714   format %{ "EBX" %}
3715   interface(REG_INTER);
3716 %}
3717 
3718 operand eCXRegI(xRegI reg) %{
3719   constraint(ALLOC_IN_RC(ecx_reg));
3720   match(reg);
3721   match(rRegI);
3722 
3723   format %{ "ECX" %}
3724   interface(REG_INTER);
3725 %}
3726 
3727 operand eDXRegI(xRegI reg) %{
3728   constraint(ALLOC_IN_RC(edx_reg));
3729   match(reg);
3730   match(rRegI);
3731 
3732   format %{ "EDX" %}
3733   interface(REG_INTER);
3734 %}
3735 
3736 operand eDIRegI(xRegI reg) %{
3737   constraint(ALLOC_IN_RC(edi_reg));
3738   match(reg);
3739   match(rRegI);
3740 
3741   format %{ "EDI" %}
3742   interface(REG_INTER);
3743 %}
3744 
3745 operand naxRegI() %{
3746   constraint(ALLOC_IN_RC(nax_reg));
3747   match(RegI);
3748   match(eCXRegI);
3749   match(eDXRegI);
3750   match(eSIRegI);
3751   match(eDIRegI);
3752 
3753   format %{ %}
3754   interface(REG_INTER);
3755 %}
3756 
3757 operand nadxRegI() %{
3758   constraint(ALLOC_IN_RC(nadx_reg));
3759   match(RegI);
3760   match(eBXRegI);
3761   match(eCXRegI);
3762   match(eSIRegI);
3763   match(eDIRegI);
3764 
3765   format %{ %}
3766   interface(REG_INTER);
3767 %}
3768 
3769 operand ncxRegI() %{
3770   constraint(ALLOC_IN_RC(ncx_reg));
3771   match(RegI);
3772   match(eAXRegI);
3773   match(eDXRegI);
3774   match(eSIRegI);
3775   match(eDIRegI);
3776 
3777   format %{ %}
3778   interface(REG_INTER);
3779 %}
3780 
3781 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3782 // //
3783 operand eSIRegI(xRegI reg) %{
3784    constraint(ALLOC_IN_RC(esi_reg));
3785    match(reg);
3786    match(rRegI);
3787 
3788    format %{ "ESI" %}
3789    interface(REG_INTER);
3790 %}
3791 
3792 // Pointer Register
3793 operand anyRegP() %{
3794   constraint(ALLOC_IN_RC(any_reg));
3795   match(RegP);
3796   match(eAXRegP);
3797   match(eBXRegP);
3798   match(eCXRegP);
3799   match(eDIRegP);
3800   match(eRegP);
3801 
3802   format %{ %}
3803   interface(REG_INTER);
3804 %}
3805 
3806 operand eRegP() %{
3807   constraint(ALLOC_IN_RC(int_reg));
3808   match(RegP);
3809   match(eAXRegP);
3810   match(eBXRegP);
3811   match(eCXRegP);
3812   match(eDIRegP);
3813 
3814   format %{ %}
3815   interface(REG_INTER);
3816 %}
3817 
3818 // On windows95, EBP is not safe to use for implicit null tests.
3819 operand eRegP_no_EBP() %{
3820   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3821   match(RegP);
3822   match(eAXRegP);
3823   match(eBXRegP);
3824   match(eCXRegP);
3825   match(eDIRegP);
3826 
3827   op_cost(100);
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 operand naxRegP() %{
3833   constraint(ALLOC_IN_RC(nax_reg));
3834   match(RegP);
3835   match(eBXRegP);
3836   match(eDXRegP);
3837   match(eCXRegP);
3838   match(eSIRegP);
3839   match(eDIRegP);
3840 
3841   format %{ %}
3842   interface(REG_INTER);
3843 %}
3844 
3845 operand nabxRegP() %{
3846   constraint(ALLOC_IN_RC(nabx_reg));
3847   match(RegP);
3848   match(eCXRegP);
3849   match(eDXRegP);
3850   match(eSIRegP);
3851   match(eDIRegP);
3852 
3853   format %{ %}
3854   interface(REG_INTER);
3855 %}
3856 
3857 operand pRegP() %{
3858   constraint(ALLOC_IN_RC(p_reg));
3859   match(RegP);
3860   match(eBXRegP);
3861   match(eDXRegP);
3862   match(eSIRegP);
3863   match(eDIRegP);
3864 
3865   format %{ %}
3866   interface(REG_INTER);
3867 %}
3868 
3869 // Special Registers
3870 // Return a pointer value
3871 operand eAXRegP(eRegP reg) %{
3872   constraint(ALLOC_IN_RC(eax_reg));
3873   match(reg);
3874   format %{ "EAX" %}
3875   interface(REG_INTER);
3876 %}
3877 
3878 // Used in AtomicAdd
3879 operand eBXRegP(eRegP reg) %{
3880   constraint(ALLOC_IN_RC(ebx_reg));
3881   match(reg);
3882   format %{ "EBX" %}
3883   interface(REG_INTER);
3884 %}
3885 
3886 // Tail-call (interprocedural jump) to interpreter
3887 operand eCXRegP(eRegP reg) %{
3888   constraint(ALLOC_IN_RC(ecx_reg));
3889   match(reg);
3890   format %{ "ECX" %}
3891   interface(REG_INTER);
3892 %}
3893 
3894 operand eSIRegP(eRegP reg) %{
3895   constraint(ALLOC_IN_RC(esi_reg));
3896   match(reg);
3897   format %{ "ESI" %}
3898   interface(REG_INTER);
3899 %}
3900 
3901 // Used in rep stosw
3902 operand eDIRegP(eRegP reg) %{
3903   constraint(ALLOC_IN_RC(edi_reg));
3904   match(reg);
3905   format %{ "EDI" %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 operand eRegL() %{
3910   constraint(ALLOC_IN_RC(long_reg));
3911   match(RegL);
3912   match(eADXRegL);
3913 
3914   format %{ %}
3915   interface(REG_INTER);
3916 %}
3917 
3918 operand eADXRegL( eRegL reg ) %{
3919   constraint(ALLOC_IN_RC(eadx_reg));
3920   match(reg);
3921 
3922   format %{ "EDX:EAX" %}
3923   interface(REG_INTER);
3924 %}
3925 
3926 operand eBCXRegL( eRegL reg ) %{
3927   constraint(ALLOC_IN_RC(ebcx_reg));
3928   match(reg);
3929 
3930   format %{ "EBX:ECX" %}
3931   interface(REG_INTER);
3932 %}
3933 
3934 // Special case for integer high multiply
3935 operand eADXRegL_low_only() %{
3936   constraint(ALLOC_IN_RC(eadx_reg));
3937   match(RegL);
3938 
3939   format %{ "EAX" %}
3940   interface(REG_INTER);
3941 %}
3942 
3943 // Flags register, used as output of compare instructions
3944 operand eFlagsReg() %{
3945   constraint(ALLOC_IN_RC(int_flags));
3946   match(RegFlags);
3947 
3948   format %{ "EFLAGS" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 // Flags register, used as output of FLOATING POINT compare instructions
3953 operand eFlagsRegU() %{
3954   constraint(ALLOC_IN_RC(int_flags));
3955   match(RegFlags);
3956 
3957   format %{ "EFLAGS_U" %}
3958   interface(REG_INTER);
3959 %}
3960 
3961 operand eFlagsRegUCF() %{
3962   constraint(ALLOC_IN_RC(int_flags));
3963   match(RegFlags);
3964   predicate(false);
3965 
3966   format %{ "EFLAGS_U_CF" %}
3967   interface(REG_INTER);
3968 %}
3969 
3970 // Condition Code Register used by long compare
3971 operand flagsReg_long_LTGE() %{
3972   constraint(ALLOC_IN_RC(int_flags));
3973   match(RegFlags);
3974   format %{ "FLAGS_LTGE" %}
3975   interface(REG_INTER);
3976 %}
3977 operand flagsReg_long_EQNE() %{
3978   constraint(ALLOC_IN_RC(int_flags));
3979   match(RegFlags);
3980   format %{ "FLAGS_EQNE" %}
3981   interface(REG_INTER);
3982 %}
3983 operand flagsReg_long_LEGT() %{
3984   constraint(ALLOC_IN_RC(int_flags));
3985   match(RegFlags);
3986   format %{ "FLAGS_LEGT" %}
3987   interface(REG_INTER);
3988 %}
3989 
3990 // Float register operands
3991 operand regDPR() %{
3992   predicate( UseSSE < 2 );
3993   constraint(ALLOC_IN_RC(fp_dbl_reg));
3994   match(RegD);
3995   match(regDPR1);
3996   match(regDPR2);
3997   format %{ %}
3998   interface(REG_INTER);
3999 %}
4000 
4001 operand regDPR1(regDPR reg) %{
4002   predicate( UseSSE < 2 );
4003   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4004   match(reg);
4005   format %{ "FPR1" %}
4006   interface(REG_INTER);
4007 %}
4008 
4009 operand regDPR2(regDPR reg) %{
4010   predicate( UseSSE < 2 );
4011   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4012   match(reg);
4013   format %{ "FPR2" %}
4014   interface(REG_INTER);
4015 %}
4016 
4017 operand regnotDPR1(regDPR reg) %{
4018   predicate( UseSSE < 2 );
4019   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4020   match(reg);
4021   format %{ %}
4022   interface(REG_INTER);
4023 %}
4024 
4025 // Float register operands
4026 operand regFPR() %{
4027   predicate( UseSSE < 2 );
4028   constraint(ALLOC_IN_RC(fp_flt_reg));
4029   match(RegF);
4030   match(regFPR1);
4031   format %{ %}
4032   interface(REG_INTER);
4033 %}
4034 
4035 // Float register operands
4036 operand regFPR1(regFPR reg) %{
4037   predicate( UseSSE < 2 );
4038   constraint(ALLOC_IN_RC(fp_flt_reg0));
4039   match(reg);
4040   format %{ "FPR1" %}
4041   interface(REG_INTER);
4042 %}
4043 
4044 // XMM Float register operands
4045 operand regF() %{
4046   predicate( UseSSE>=1 );
4047   constraint(ALLOC_IN_RC(float_reg_legacy));
4048   match(RegF);
4049   format %{ %}
4050   interface(REG_INTER);
4051 %}
4052 
4053 // XMM Double register operands
4054 operand regD() %{
4055   predicate( UseSSE>=2 );
4056   constraint(ALLOC_IN_RC(double_reg_legacy));
4057   match(RegD);
4058   format %{ %}
4059   interface(REG_INTER);
4060 %}
4061 
4062 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4063 // runtime code generation via reg_class_dynamic.
4064 operand vecS() %{
4065   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4066   match(VecS);
4067 
4068   format %{ %}
4069   interface(REG_INTER);
4070 %}
4071 
4072 operand vecD() %{
4073   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4074   match(VecD);
4075 
4076   format %{ %}
4077   interface(REG_INTER);
4078 %}
4079 
4080 operand vecX() %{
4081   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4082   match(VecX);
4083 
4084   format %{ %}
4085   interface(REG_INTER);
4086 %}
4087 
4088 operand vecY() %{
4089   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4090   match(VecY);
4091 
4092   format %{ %}
4093   interface(REG_INTER);
4094 %}
4095 
4096 //----------Memory Operands----------------------------------------------------
4097 // Direct Memory Operand
4098 operand direct(immP addr) %{
4099   match(addr);
4100 
4101   format %{ "[$addr]" %}
4102   interface(MEMORY_INTER) %{
4103     base(0xFFFFFFFF);
4104     index(0x4);
4105     scale(0x0);
4106     disp($addr);
4107   %}
4108 %}
4109 
4110 // Indirect Memory Operand
4111 operand indirect(eRegP reg) %{
4112   constraint(ALLOC_IN_RC(int_reg));
4113   match(reg);
4114 
4115   format %{ "[$reg]" %}
4116   interface(MEMORY_INTER) %{
4117     base($reg);
4118     index(0x4);
4119     scale(0x0);
4120     disp(0x0);
4121   %}
4122 %}
4123 
4124 // Indirect Memory Plus Short Offset Operand
4125 operand indOffset8(eRegP reg, immI8 off) %{
4126   match(AddP reg off);
4127 
4128   format %{ "[$reg + $off]" %}
4129   interface(MEMORY_INTER) %{
4130     base($reg);
4131     index(0x4);
4132     scale(0x0);
4133     disp($off);
4134   %}
4135 %}
4136 
4137 // Indirect Memory Plus Long Offset Operand
4138 operand indOffset32(eRegP reg, immI off) %{
4139   match(AddP reg off);
4140 
4141   format %{ "[$reg + $off]" %}
4142   interface(MEMORY_INTER) %{
4143     base($reg);
4144     index(0x4);
4145     scale(0x0);
4146     disp($off);
4147   %}
4148 %}
4149 
4150 // Indirect Memory Plus Long Offset Operand
4151 operand indOffset32X(rRegI reg, immP off) %{
4152   match(AddP off reg);
4153 
4154   format %{ "[$reg + $off]" %}
4155   interface(MEMORY_INTER) %{
4156     base($reg);
4157     index(0x4);
4158     scale(0x0);
4159     disp($off);
4160   %}
4161 %}
4162 
4163 // Indirect Memory Plus Index Register Plus Offset Operand
4164 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4165   match(AddP (AddP reg ireg) off);
4166 
4167   op_cost(10);
4168   format %{"[$reg + $off + $ireg]" %}
4169   interface(MEMORY_INTER) %{
4170     base($reg);
4171     index($ireg);
4172     scale(0x0);
4173     disp($off);
4174   %}
4175 %}
4176 
4177 // Indirect Memory Plus Index Register Plus Offset Operand
4178 operand indIndex(eRegP reg, rRegI ireg) %{
4179   match(AddP reg ireg);
4180 
4181   op_cost(10);
4182   format %{"[$reg + $ireg]" %}
4183   interface(MEMORY_INTER) %{
4184     base($reg);
4185     index($ireg);
4186     scale(0x0);
4187     disp(0x0);
4188   %}
4189 %}
4190 
4191 // // -------------------------------------------------------------------------
4192 // // 486 architecture doesn't support "scale * index + offset" with out a base
4193 // // -------------------------------------------------------------------------
4194 // // Scaled Memory Operands
4195 // // Indirect Memory Times Scale Plus Offset Operand
4196 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4197 //   match(AddP off (LShiftI ireg scale));
4198 //
4199 //   op_cost(10);
4200 //   format %{"[$off + $ireg << $scale]" %}
4201 //   interface(MEMORY_INTER) %{
4202 //     base(0x4);
4203 //     index($ireg);
4204 //     scale($scale);
4205 //     disp($off);
4206 //   %}
4207 // %}
4208 
4209 // Indirect Memory Times Scale Plus Index Register
4210 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4211   match(AddP reg (LShiftI ireg scale));
4212 
4213   op_cost(10);
4214   format %{"[$reg + $ireg << $scale]" %}
4215   interface(MEMORY_INTER) %{
4216     base($reg);
4217     index($ireg);
4218     scale($scale);
4219     disp(0x0);
4220   %}
4221 %}
4222 
4223 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4224 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4225   match(AddP (AddP reg (LShiftI ireg scale)) off);
4226 
4227   op_cost(10);
4228   format %{"[$reg + $off + $ireg << $scale]" %}
4229   interface(MEMORY_INTER) %{
4230     base($reg);
4231     index($ireg);
4232     scale($scale);
4233     disp($off);
4234   %}
4235 %}
4236 
4237 //----------Load Long Memory Operands------------------------------------------
4238 // The load-long idiom will use it's address expression again after loading
4239 // the first word of the long.  If the load-long destination overlaps with
4240 // registers used in the addressing expression, the 2nd half will be loaded
4241 // from a clobbered address.  Fix this by requiring that load-long use
4242 // address registers that do not overlap with the load-long target.
4243 
4244 // load-long support
4245 operand load_long_RegP() %{
4246   constraint(ALLOC_IN_RC(esi_reg));
4247   match(RegP);
4248   match(eSIRegP);
4249   op_cost(100);
4250   format %{  %}
4251   interface(REG_INTER);
4252 %}
4253 
4254 // Indirect Memory Operand Long
4255 operand load_long_indirect(load_long_RegP reg) %{
4256   constraint(ALLOC_IN_RC(esi_reg));
4257   match(reg);
4258 
4259   format %{ "[$reg]" %}
4260   interface(MEMORY_INTER) %{
4261     base($reg);
4262     index(0x4);
4263     scale(0x0);
4264     disp(0x0);
4265   %}
4266 %}
4267 
4268 // Indirect Memory Plus Long Offset Operand
4269 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4270   match(AddP reg off);
4271 
4272   format %{ "[$reg + $off]" %}
4273   interface(MEMORY_INTER) %{
4274     base($reg);
4275     index(0x4);
4276     scale(0x0);
4277     disp($off);
4278   %}
4279 %}
4280 
4281 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4282 
4283 
4284 //----------Special Memory Operands--------------------------------------------
4285 // Stack Slot Operand - This operand is used for loading and storing temporary
4286 //                      values on the stack where a match requires a value to
4287 //                      flow through memory.
4288 operand stackSlotP(sRegP reg) %{
4289   constraint(ALLOC_IN_RC(stack_slots));
4290   // No match rule because this operand is only generated in matching
4291   format %{ "[$reg]" %}
4292   interface(MEMORY_INTER) %{
4293     base(0x4);   // ESP
4294     index(0x4);  // No Index
4295     scale(0x0);  // No Scale
4296     disp($reg);  // Stack Offset
4297   %}
4298 %}
4299 
4300 operand stackSlotI(sRegI reg) %{
4301   constraint(ALLOC_IN_RC(stack_slots));
4302   // No match rule because this operand is only generated in matching
4303   format %{ "[$reg]" %}
4304   interface(MEMORY_INTER) %{
4305     base(0x4);   // ESP
4306     index(0x4);  // No Index
4307     scale(0x0);  // No Scale
4308     disp($reg);  // Stack Offset
4309   %}
4310 %}
4311 
4312 operand stackSlotF(sRegF reg) %{
4313   constraint(ALLOC_IN_RC(stack_slots));
4314   // No match rule because this operand is only generated in matching
4315   format %{ "[$reg]" %}
4316   interface(MEMORY_INTER) %{
4317     base(0x4);   // ESP
4318     index(0x4);  // No Index
4319     scale(0x0);  // No Scale
4320     disp($reg);  // Stack Offset
4321   %}
4322 %}
4323 
4324 operand stackSlotD(sRegD reg) %{
4325   constraint(ALLOC_IN_RC(stack_slots));
4326   // No match rule because this operand is only generated in matching
4327   format %{ "[$reg]" %}
4328   interface(MEMORY_INTER) %{
4329     base(0x4);   // ESP
4330     index(0x4);  // No Index
4331     scale(0x0);  // No Scale
4332     disp($reg);  // Stack Offset
4333   %}
4334 %}
4335 
4336 operand stackSlotL(sRegL reg) %{
4337   constraint(ALLOC_IN_RC(stack_slots));
4338   // No match rule because this operand is only generated in matching
4339   format %{ "[$reg]" %}
4340   interface(MEMORY_INTER) %{
4341     base(0x4);   // ESP
4342     index(0x4);  // No Index
4343     scale(0x0);  // No Scale
4344     disp($reg);  // Stack Offset
4345   %}
4346 %}
4347 
4348 //----------Memory Operands - Win95 Implicit Null Variants----------------
4349 // Indirect Memory Operand
4350 operand indirect_win95_safe(eRegP_no_EBP reg)
4351 %{
4352   constraint(ALLOC_IN_RC(int_reg));
4353   match(reg);
4354 
4355   op_cost(100);
4356   format %{ "[$reg]" %}
4357   interface(MEMORY_INTER) %{
4358     base($reg);
4359     index(0x4);
4360     scale(0x0);
4361     disp(0x0);
4362   %}
4363 %}
4364 
4365 // Indirect Memory Plus Short Offset Operand
4366 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4367 %{
4368   match(AddP reg off);
4369 
4370   op_cost(100);
4371   format %{ "[$reg + $off]" %}
4372   interface(MEMORY_INTER) %{
4373     base($reg);
4374     index(0x4);
4375     scale(0x0);
4376     disp($off);
4377   %}
4378 %}
4379 
4380 // Indirect Memory Plus Long Offset Operand
4381 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4382 %{
4383   match(AddP reg off);
4384 
4385   op_cost(100);
4386   format %{ "[$reg + $off]" %}
4387   interface(MEMORY_INTER) %{
4388     base($reg);
4389     index(0x4);
4390     scale(0x0);
4391     disp($off);
4392   %}
4393 %}
4394 
4395 // Indirect Memory Plus Index Register Plus Offset Operand
4396 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4397 %{
4398   match(AddP (AddP reg ireg) off);
4399 
4400   op_cost(100);
4401   format %{"[$reg + $off + $ireg]" %}
4402   interface(MEMORY_INTER) %{
4403     base($reg);
4404     index($ireg);
4405     scale(0x0);
4406     disp($off);
4407   %}
4408 %}
4409 
4410 // Indirect Memory Times Scale Plus Index Register
4411 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4412 %{
4413   match(AddP reg (LShiftI ireg scale));
4414 
4415   op_cost(100);
4416   format %{"[$reg + $ireg << $scale]" %}
4417   interface(MEMORY_INTER) %{
4418     base($reg);
4419     index($ireg);
4420     scale($scale);
4421     disp(0x0);
4422   %}
4423 %}
4424 
4425 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4426 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4427 %{
4428   match(AddP (AddP reg (LShiftI ireg scale)) off);
4429 
4430   op_cost(100);
4431   format %{"[$reg + $off + $ireg << $scale]" %}
4432   interface(MEMORY_INTER) %{
4433     base($reg);
4434     index($ireg);
4435     scale($scale);
4436     disp($off);
4437   %}
4438 %}
4439 
4440 //----------Conditional Branch Operands----------------------------------------
4441 // Comparison Op  - This is the operation of the comparison, and is limited to
4442 //                  the following set of codes:
4443 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4444 //
4445 // Other attributes of the comparison, such as unsignedness, are specified
4446 // by the comparison instruction that sets a condition code flags register.
4447 // That result is represented by a flags operand whose subtype is appropriate
4448 // to the unsignedness (etc.) of the comparison.
4449 //
4450 // Later, the instruction which matches both the Comparison Op (a Bool) and
4451 // the flags (produced by the Cmp) specifies the coding of the comparison op
4452 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4453 
4454 // Comparision Code
4455 operand cmpOp() %{
4456   match(Bool);
4457 
4458   format %{ "" %}
4459   interface(COND_INTER) %{
4460     equal(0x4, "e");
4461     not_equal(0x5, "ne");
4462     less(0xC, "l");
4463     greater_equal(0xD, "ge");
4464     less_equal(0xE, "le");
4465     greater(0xF, "g");
4466     overflow(0x0, "o");
4467     no_overflow(0x1, "no");
4468   %}
4469 %}
4470 
4471 // Comparison Code, unsigned compare.  Used by FP also, with
4472 // C2 (unordered) turned into GT or LT already.  The other bits
4473 // C0 and C3 are turned into Carry & Zero flags.
4474 operand cmpOpU() %{
4475   match(Bool);
4476 
4477   format %{ "" %}
4478   interface(COND_INTER) %{
4479     equal(0x4, "e");
4480     not_equal(0x5, "ne");
4481     less(0x2, "b");
4482     greater_equal(0x3, "nb");
4483     less_equal(0x6, "be");
4484     greater(0x7, "nbe");
4485     overflow(0x0, "o");
4486     no_overflow(0x1, "no");
4487   %}
4488 %}
4489 
4490 // Floating comparisons that don't require any fixup for the unordered case
4491 operand cmpOpUCF() %{
4492   match(Bool);
4493   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4494             n->as_Bool()->_test._test == BoolTest::ge ||
4495             n->as_Bool()->_test._test == BoolTest::le ||
4496             n->as_Bool()->_test._test == BoolTest::gt);
4497   format %{ "" %}
4498   interface(COND_INTER) %{
4499     equal(0x4, "e");
4500     not_equal(0x5, "ne");
4501     less(0x2, "b");
4502     greater_equal(0x3, "nb");
4503     less_equal(0x6, "be");
4504     greater(0x7, "nbe");
4505     overflow(0x0, "o");
4506     no_overflow(0x1, "no");
4507   %}
4508 %}
4509 
4510 
4511 // Floating comparisons that can be fixed up with extra conditional jumps
4512 operand cmpOpUCF2() %{
4513   match(Bool);
4514   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4515             n->as_Bool()->_test._test == BoolTest::eq);
4516   format %{ "" %}
4517   interface(COND_INTER) %{
4518     equal(0x4, "e");
4519     not_equal(0x5, "ne");
4520     less(0x2, "b");
4521     greater_equal(0x3, "nb");
4522     less_equal(0x6, "be");
4523     greater(0x7, "nbe");
4524     overflow(0x0, "o");
4525     no_overflow(0x1, "no");
4526   %}
4527 %}
4528 
4529 // Comparison Code for FP conditional move
4530 operand cmpOp_fcmov() %{
4531   match(Bool);
4532 
4533   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4534             n->as_Bool()->_test._test != BoolTest::no_overflow);
4535   format %{ "" %}
4536   interface(COND_INTER) %{
4537     equal        (0x0C8);
4538     not_equal    (0x1C8);
4539     less         (0x0C0);
4540     greater_equal(0x1C0);
4541     less_equal   (0x0D0);
4542     greater      (0x1D0);
4543     overflow(0x0, "o"); // not really supported by the instruction
4544     no_overflow(0x1, "no"); // not really supported by the instruction
4545   %}
4546 %}
4547 
4548 // Comparision Code used in long compares
4549 operand cmpOp_commute() %{
4550   match(Bool);
4551 
4552   format %{ "" %}
4553   interface(COND_INTER) %{
4554     equal(0x4, "e");
4555     not_equal(0x5, "ne");
4556     less(0xF, "g");
4557     greater_equal(0xE, "le");
4558     less_equal(0xD, "ge");
4559     greater(0xC, "l");
4560     overflow(0x0, "o");
4561     no_overflow(0x1, "no");
4562   %}
4563 %}
4564 
4565 //----------OPERAND CLASSES----------------------------------------------------
4566 // Operand Classes are groups of operands that are used as to simplify
4567 // instruction definitions by not requiring the AD writer to specify separate
4568 // instructions for every form of operand when the instruction accepts
4569 // multiple operand types with the same basic encoding and format.  The classic
4570 // case of this is memory operands.
4571 
4572 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4573                indIndex, indIndexScale, indIndexScaleOffset);
4574 
4575 // Long memory operations are encoded in 2 instructions and a +4 offset.
4576 // This means some kind of offset is always required and you cannot use
4577 // an oop as the offset (done when working on static globals).
4578 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4579                     indIndex, indIndexScale, indIndexScaleOffset);
4580 
4581 
4582 //----------PIPELINE-----------------------------------------------------------
4583 // Rules which define the behavior of the target architectures pipeline.
4584 pipeline %{
4585 
4586 //----------ATTRIBUTES---------------------------------------------------------
4587 attributes %{
4588   variable_size_instructions;        // Fixed size instructions
4589   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4590   instruction_unit_size = 1;         // An instruction is 1 bytes long
4591   instruction_fetch_unit_size = 16;  // The processor fetches one line
4592   instruction_fetch_units = 1;       // of 16 bytes
4593 
4594   // List of nop instructions
4595   nops( MachNop );
4596 %}
4597 
4598 //----------RESOURCES----------------------------------------------------------
4599 // Resources are the functional units available to the machine
4600 
4601 // Generic P2/P3 pipeline
4602 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4603 // 3 instructions decoded per cycle.
4604 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4605 // 2 ALU op, only ALU0 handles mul/div instructions.
4606 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4607            MS0, MS1, MEM = MS0 | MS1,
4608            BR, FPU,
4609            ALU0, ALU1, ALU = ALU0 | ALU1 );
4610 
4611 //----------PIPELINE DESCRIPTION-----------------------------------------------
4612 // Pipeline Description specifies the stages in the machine's pipeline
4613 
4614 // Generic P2/P3 pipeline
4615 pipe_desc(S0, S1, S2, S3, S4, S5);
4616 
4617 //----------PIPELINE CLASSES---------------------------------------------------
4618 // Pipeline Classes describe the stages in which input and output are
4619 // referenced by the hardware pipeline.
4620 
4621 // Naming convention: ialu or fpu
4622 // Then: _reg
4623 // Then: _reg if there is a 2nd register
4624 // Then: _long if it's a pair of instructions implementing a long
4625 // Then: _fat if it requires the big decoder
4626 //   Or: _mem if it requires the big decoder and a memory unit.
4627 
4628 // Integer ALU reg operation
4629 pipe_class ialu_reg(rRegI dst) %{
4630     single_instruction;
4631     dst    : S4(write);
4632     dst    : S3(read);
4633     DECODE : S0;        // any decoder
4634     ALU    : S3;        // any alu
4635 %}
4636 
4637 // Long ALU reg operation
4638 pipe_class ialu_reg_long(eRegL dst) %{
4639     instruction_count(2);
4640     dst    : S4(write);
4641     dst    : S3(read);
4642     DECODE : S0(2);     // any 2 decoders
4643     ALU    : S3(2);     // both alus
4644 %}
4645 
4646 // Integer ALU reg operation using big decoder
4647 pipe_class ialu_reg_fat(rRegI dst) %{
4648     single_instruction;
4649     dst    : S4(write);
4650     dst    : S3(read);
4651     D0     : S0;        // big decoder only
4652     ALU    : S3;        // any alu
4653 %}
4654 
4655 // Long ALU reg operation using big decoder
4656 pipe_class ialu_reg_long_fat(eRegL dst) %{
4657     instruction_count(2);
4658     dst    : S4(write);
4659     dst    : S3(read);
4660     D0     : S0(2);     // big decoder only; twice
4661     ALU    : S3(2);     // any 2 alus
4662 %}
4663 
4664 // Integer ALU reg-reg operation
4665 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4666     single_instruction;
4667     dst    : S4(write);
4668     src    : S3(read);
4669     DECODE : S0;        // any decoder
4670     ALU    : S3;        // any alu
4671 %}
4672 
4673 // Long ALU reg-reg operation
4674 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4675     instruction_count(2);
4676     dst    : S4(write);
4677     src    : S3(read);
4678     DECODE : S0(2);     // any 2 decoders
4679     ALU    : S3(2);     // both alus
4680 %}
4681 
4682 // Integer ALU reg-reg operation
4683 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4684     single_instruction;
4685     dst    : S4(write);
4686     src    : S3(read);
4687     D0     : S0;        // big decoder only
4688     ALU    : S3;        // any alu
4689 %}
4690 
4691 // Long ALU reg-reg operation
4692 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4693     instruction_count(2);
4694     dst    : S4(write);
4695     src    : S3(read);
4696     D0     : S0(2);     // big decoder only; twice
4697     ALU    : S3(2);     // both alus
4698 %}
4699 
4700 // Integer ALU reg-mem operation
4701 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4702     single_instruction;
4703     dst    : S5(write);
4704     mem    : S3(read);
4705     D0     : S0;        // big decoder only
4706     ALU    : S4;        // any alu
4707     MEM    : S3;        // any mem
4708 %}
4709 
4710 // Long ALU reg-mem operation
4711 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4712     instruction_count(2);
4713     dst    : S5(write);
4714     mem    : S3(read);
4715     D0     : S0(2);     // big decoder only; twice
4716     ALU    : S4(2);     // any 2 alus
4717     MEM    : S3(2);     // both mems
4718 %}
4719 
4720 // Integer mem operation (prefetch)
4721 pipe_class ialu_mem(memory mem)
4722 %{
4723     single_instruction;
4724     mem    : S3(read);
4725     D0     : S0;        // big decoder only
4726     MEM    : S3;        // any mem
4727 %}
4728 
4729 // Integer Store to Memory
4730 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4731     single_instruction;
4732     mem    : S3(read);
4733     src    : S5(read);
4734     D0     : S0;        // big decoder only
4735     ALU    : S4;        // any alu
4736     MEM    : S3;
4737 %}
4738 
4739 // Long Store to Memory
4740 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4741     instruction_count(2);
4742     mem    : S3(read);
4743     src    : S5(read);
4744     D0     : S0(2);     // big decoder only; twice
4745     ALU    : S4(2);     // any 2 alus
4746     MEM    : S3(2);     // Both mems
4747 %}
4748 
4749 // Integer Store to Memory
4750 pipe_class ialu_mem_imm(memory mem) %{
4751     single_instruction;
4752     mem    : S3(read);
4753     D0     : S0;        // big decoder only
4754     ALU    : S4;        // any alu
4755     MEM    : S3;
4756 %}
4757 
4758 // Integer ALU0 reg-reg operation
4759 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4760     single_instruction;
4761     dst    : S4(write);
4762     src    : S3(read);
4763     D0     : S0;        // Big decoder only
4764     ALU0   : S3;        // only alu0
4765 %}
4766 
4767 // Integer ALU0 reg-mem operation
4768 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4769     single_instruction;
4770     dst    : S5(write);
4771     mem    : S3(read);
4772     D0     : S0;        // big decoder only
4773     ALU0   : S4;        // ALU0 only
4774     MEM    : S3;        // any mem
4775 %}
4776 
4777 // Integer ALU reg-reg operation
4778 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4779     single_instruction;
4780     cr     : S4(write);
4781     src1   : S3(read);
4782     src2   : S3(read);
4783     DECODE : S0;        // any decoder
4784     ALU    : S3;        // any alu
4785 %}
4786 
4787 // Integer ALU reg-imm operation
4788 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4789     single_instruction;
4790     cr     : S4(write);
4791     src1   : S3(read);
4792     DECODE : S0;        // any decoder
4793     ALU    : S3;        // any alu
4794 %}
4795 
4796 // Integer ALU reg-mem operation
4797 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4798     single_instruction;
4799     cr     : S4(write);
4800     src1   : S3(read);
4801     src2   : S3(read);
4802     D0     : S0;        // big decoder only
4803     ALU    : S4;        // any alu
4804     MEM    : S3;
4805 %}
4806 
4807 // Conditional move reg-reg
4808 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4809     instruction_count(4);
4810     y      : S4(read);
4811     q      : S3(read);
4812     p      : S3(read);
4813     DECODE : S0(4);     // any decoder
4814 %}
4815 
4816 // Conditional move reg-reg
4817 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4818     single_instruction;
4819     dst    : S4(write);
4820     src    : S3(read);
4821     cr     : S3(read);
4822     DECODE : S0;        // any decoder
4823 %}
4824 
4825 // Conditional move reg-mem
4826 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4827     single_instruction;
4828     dst    : S4(write);
4829     src    : S3(read);
4830     cr     : S3(read);
4831     DECODE : S0;        // any decoder
4832     MEM    : S3;
4833 %}
4834 
4835 // Conditional move reg-reg long
4836 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4837     single_instruction;
4838     dst    : S4(write);
4839     src    : S3(read);
4840     cr     : S3(read);
4841     DECODE : S0(2);     // any 2 decoders
4842 %}
4843 
4844 // Conditional move double reg-reg
4845 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4846     single_instruction;
4847     dst    : S4(write);
4848     src    : S3(read);
4849     cr     : S3(read);
4850     DECODE : S0;        // any decoder
4851 %}
4852 
4853 // Float reg-reg operation
4854 pipe_class fpu_reg(regDPR dst) %{
4855     instruction_count(2);
4856     dst    : S3(read);
4857     DECODE : S0(2);     // any 2 decoders
4858     FPU    : S3;
4859 %}
4860 
4861 // Float reg-reg operation
4862 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4863     instruction_count(2);
4864     dst    : S4(write);
4865     src    : S3(read);
4866     DECODE : S0(2);     // any 2 decoders
4867     FPU    : S3;
4868 %}
4869 
4870 // Float reg-reg operation
4871 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4872     instruction_count(3);
4873     dst    : S4(write);
4874     src1   : S3(read);
4875     src2   : S3(read);
4876     DECODE : S0(3);     // any 3 decoders
4877     FPU    : S3(2);
4878 %}
4879 
4880 // Float reg-reg operation
4881 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4882     instruction_count(4);
4883     dst    : S4(write);
4884     src1   : S3(read);
4885     src2   : S3(read);
4886     src3   : S3(read);
4887     DECODE : S0(4);     // any 3 decoders
4888     FPU    : S3(2);
4889 %}
4890 
4891 // Float reg-reg operation
4892 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4893     instruction_count(4);
4894     dst    : S4(write);
4895     src1   : S3(read);
4896     src2   : S3(read);
4897     src3   : S3(read);
4898     DECODE : S1(3);     // any 3 decoders
4899     D0     : S0;        // Big decoder only
4900     FPU    : S3(2);
4901     MEM    : S3;
4902 %}
4903 
4904 // Float reg-mem operation
4905 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4906     instruction_count(2);
4907     dst    : S5(write);
4908     mem    : S3(read);
4909     D0     : S0;        // big decoder only
4910     DECODE : S1;        // any decoder for FPU POP
4911     FPU    : S4;
4912     MEM    : S3;        // any mem
4913 %}
4914 
4915 // Float reg-mem operation
4916 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4917     instruction_count(3);
4918     dst    : S5(write);
4919     src1   : S3(read);
4920     mem    : S3(read);
4921     D0     : S0;        // big decoder only
4922     DECODE : S1(2);     // any decoder for FPU POP
4923     FPU    : S4;
4924     MEM    : S3;        // any mem
4925 %}
4926 
4927 // Float mem-reg operation
4928 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4929     instruction_count(2);
4930     src    : S5(read);
4931     mem    : S3(read);
4932     DECODE : S0;        // any decoder for FPU PUSH
4933     D0     : S1;        // big decoder only
4934     FPU    : S4;
4935     MEM    : S3;        // any mem
4936 %}
4937 
4938 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4939     instruction_count(3);
4940     src1   : S3(read);
4941     src2   : S3(read);
4942     mem    : S3(read);
4943     DECODE : S0(2);     // any decoder for FPU PUSH
4944     D0     : S1;        // big decoder only
4945     FPU    : S4;
4946     MEM    : S3;        // any mem
4947 %}
4948 
4949 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4950     instruction_count(3);
4951     src1   : S3(read);
4952     src2   : S3(read);
4953     mem    : S4(read);
4954     DECODE : S0;        // any decoder for FPU PUSH
4955     D0     : S0(2);     // big decoder only
4956     FPU    : S4;
4957     MEM    : S3(2);     // any mem
4958 %}
4959 
4960 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4961     instruction_count(2);
4962     src1   : S3(read);
4963     dst    : S4(read);
4964     D0     : S0(2);     // big decoder only
4965     MEM    : S3(2);     // any mem
4966 %}
4967 
4968 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4969     instruction_count(3);
4970     src1   : S3(read);
4971     src2   : S3(read);
4972     dst    : S4(read);
4973     D0     : S0(3);     // big decoder only
4974     FPU    : S4;
4975     MEM    : S3(3);     // any mem
4976 %}
4977 
4978 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4979     instruction_count(3);
4980     src1   : S4(read);
4981     mem    : S4(read);
4982     DECODE : S0;        // any decoder for FPU PUSH
4983     D0     : S0(2);     // big decoder only
4984     FPU    : S4;
4985     MEM    : S3(2);     // any mem
4986 %}
4987 
4988 // Float load constant
4989 pipe_class fpu_reg_con(regDPR dst) %{
4990     instruction_count(2);
4991     dst    : S5(write);
4992     D0     : S0;        // big decoder only for the load
4993     DECODE : S1;        // any decoder for FPU POP
4994     FPU    : S4;
4995     MEM    : S3;        // any mem
4996 %}
4997 
4998 // Float load constant
4999 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5000     instruction_count(3);
5001     dst    : S5(write);
5002     src    : S3(read);
5003     D0     : S0;        // big decoder only for the load
5004     DECODE : S1(2);     // any decoder for FPU POP
5005     FPU    : S4;
5006     MEM    : S3;        // any mem
5007 %}
5008 
5009 // UnConditional branch
5010 pipe_class pipe_jmp( label labl ) %{
5011     single_instruction;
5012     BR   : S3;
5013 %}
5014 
5015 // Conditional branch
5016 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5017     single_instruction;
5018     cr    : S1(read);
5019     BR    : S3;
5020 %}
5021 
5022 // Allocation idiom
5023 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5024     instruction_count(1); force_serialization;
5025     fixed_latency(6);
5026     heap_ptr : S3(read);
5027     DECODE   : S0(3);
5028     D0       : S2;
5029     MEM      : S3;
5030     ALU      : S3(2);
5031     dst      : S5(write);
5032     BR       : S5;
5033 %}
5034 
5035 // Generic big/slow expanded idiom
5036 pipe_class pipe_slow(  ) %{
5037     instruction_count(10); multiple_bundles; force_serialization;
5038     fixed_latency(100);
5039     D0  : S0(2);
5040     MEM : S3(2);
5041 %}
5042 
5043 // The real do-nothing guy
5044 pipe_class empty( ) %{
5045     instruction_count(0);
5046 %}
5047 
5048 // Define the class for the Nop node
5049 define %{
5050    MachNop = empty;
5051 %}
5052 
5053 %}
5054 
5055 //----------INSTRUCTIONS-------------------------------------------------------
5056 //
5057 // match      -- States which machine-independent subtree may be replaced
5058 //               by this instruction.
5059 // ins_cost   -- The estimated cost of this instruction is used by instruction
5060 //               selection to identify a minimum cost tree of machine
5061 //               instructions that matches a tree of machine-independent
5062 //               instructions.
5063 // format     -- A string providing the disassembly for this instruction.
5064 //               The value of an instruction's operand may be inserted
5065 //               by referring to it with a '$' prefix.
5066 // opcode     -- Three instruction opcodes may be provided.  These are referred
5067 //               to within an encode class as $primary, $secondary, and $tertiary
5068 //               respectively.  The primary opcode is commonly used to
5069 //               indicate the type of machine instruction, while secondary
5070 //               and tertiary are often used for prefix options or addressing
5071 //               modes.
5072 // ins_encode -- A list of encode classes with parameters. The encode class
5073 //               name must have been defined in an 'enc_class' specification
5074 //               in the encode section of the architecture description.
5075 
5076 //----------BSWAP-Instruction--------------------------------------------------
5077 instruct bytes_reverse_int(rRegI dst) %{
5078   match(Set dst (ReverseBytesI dst));
5079 
5080   format %{ "BSWAP  $dst" %}
5081   opcode(0x0F, 0xC8);
5082   ins_encode( OpcP, OpcSReg(dst) );
5083   ins_pipe( ialu_reg );
5084 %}
5085 
5086 instruct bytes_reverse_long(eRegL dst) %{
5087   match(Set dst (ReverseBytesL dst));
5088 
5089   format %{ "BSWAP  $dst.lo\n\t"
5090             "BSWAP  $dst.hi\n\t"
5091             "XCHG   $dst.lo $dst.hi" %}
5092 
5093   ins_cost(125);
5094   ins_encode( bswap_long_bytes(dst) );
5095   ins_pipe( ialu_reg_reg);
5096 %}
5097 
5098 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5099   match(Set dst (ReverseBytesUS dst));
5100   effect(KILL cr);
5101 
5102   format %{ "BSWAP  $dst\n\t"
5103             "SHR    $dst,16\n\t" %}
5104   ins_encode %{
5105     __ bswapl($dst$$Register);
5106     __ shrl($dst$$Register, 16);
5107   %}
5108   ins_pipe( ialu_reg );
5109 %}
5110 
5111 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5112   match(Set dst (ReverseBytesS dst));
5113   effect(KILL cr);
5114 
5115   format %{ "BSWAP  $dst\n\t"
5116             "SAR    $dst,16\n\t" %}
5117   ins_encode %{
5118     __ bswapl($dst$$Register);
5119     __ sarl($dst$$Register, 16);
5120   %}
5121   ins_pipe( ialu_reg );
5122 %}
5123 
5124 
5125 //---------- Zeros Count Instructions ------------------------------------------
5126 
5127 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5128   predicate(UseCountLeadingZerosInstruction);
5129   match(Set dst (CountLeadingZerosI src));
5130   effect(KILL cr);
5131 
5132   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5133   ins_encode %{
5134     __ lzcntl($dst$$Register, $src$$Register);
5135   %}
5136   ins_pipe(ialu_reg);
5137 %}
5138 
5139 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5140   predicate(!UseCountLeadingZerosInstruction);
5141   match(Set dst (CountLeadingZerosI src));
5142   effect(KILL cr);
5143 
5144   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5145             "JNZ    skip\n\t"
5146             "MOV    $dst, -1\n"
5147       "skip:\n\t"
5148             "NEG    $dst\n\t"
5149             "ADD    $dst, 31" %}
5150   ins_encode %{
5151     Register Rdst = $dst$$Register;
5152     Register Rsrc = $src$$Register;
5153     Label skip;
5154     __ bsrl(Rdst, Rsrc);
5155     __ jccb(Assembler::notZero, skip);
5156     __ movl(Rdst, -1);
5157     __ bind(skip);
5158     __ negl(Rdst);
5159     __ addl(Rdst, BitsPerInt - 1);
5160   %}
5161   ins_pipe(ialu_reg);
5162 %}
5163 
5164 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5165   predicate(UseCountLeadingZerosInstruction);
5166   match(Set dst (CountLeadingZerosL src));
5167   effect(TEMP dst, KILL cr);
5168 
5169   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5170             "JNC    done\n\t"
5171             "LZCNT  $dst, $src.lo\n\t"
5172             "ADD    $dst, 32\n"
5173       "done:" %}
5174   ins_encode %{
5175     Register Rdst = $dst$$Register;
5176     Register Rsrc = $src$$Register;
5177     Label done;
5178     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5179     __ jccb(Assembler::carryClear, done);
5180     __ lzcntl(Rdst, Rsrc);
5181     __ addl(Rdst, BitsPerInt);
5182     __ bind(done);
5183   %}
5184   ins_pipe(ialu_reg);
5185 %}
5186 
5187 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5188   predicate(!UseCountLeadingZerosInstruction);
5189   match(Set dst (CountLeadingZerosL src));
5190   effect(TEMP dst, KILL cr);
5191 
5192   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5193             "JZ     msw_is_zero\n\t"
5194             "ADD    $dst, 32\n\t"
5195             "JMP    not_zero\n"
5196       "msw_is_zero:\n\t"
5197             "BSR    $dst, $src.lo\n\t"
5198             "JNZ    not_zero\n\t"
5199             "MOV    $dst, -1\n"
5200       "not_zero:\n\t"
5201             "NEG    $dst\n\t"
5202             "ADD    $dst, 63\n" %}
5203  ins_encode %{
5204     Register Rdst = $dst$$Register;
5205     Register Rsrc = $src$$Register;
5206     Label msw_is_zero;
5207     Label not_zero;
5208     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5209     __ jccb(Assembler::zero, msw_is_zero);
5210     __ addl(Rdst, BitsPerInt);
5211     __ jmpb(not_zero);
5212     __ bind(msw_is_zero);
5213     __ bsrl(Rdst, Rsrc);
5214     __ jccb(Assembler::notZero, not_zero);
5215     __ movl(Rdst, -1);
5216     __ bind(not_zero);
5217     __ negl(Rdst);
5218     __ addl(Rdst, BitsPerLong - 1);
5219   %}
5220   ins_pipe(ialu_reg);
5221 %}
5222 
5223 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5224   predicate(UseCountTrailingZerosInstruction);
5225   match(Set dst (CountTrailingZerosI src));
5226   effect(KILL cr);
5227 
5228   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5229   ins_encode %{
5230     __ tzcntl($dst$$Register, $src$$Register);
5231   %}
5232   ins_pipe(ialu_reg);
5233 %}
5234 
5235 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5236   predicate(!UseCountTrailingZerosInstruction);
5237   match(Set dst (CountTrailingZerosI src));
5238   effect(KILL cr);
5239 
5240   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5241             "JNZ    done\n\t"
5242             "MOV    $dst, 32\n"
5243       "done:" %}
5244   ins_encode %{
5245     Register Rdst = $dst$$Register;
5246     Label done;
5247     __ bsfl(Rdst, $src$$Register);
5248     __ jccb(Assembler::notZero, done);
5249     __ movl(Rdst, BitsPerInt);
5250     __ bind(done);
5251   %}
5252   ins_pipe(ialu_reg);
5253 %}
5254 
5255 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5256   predicate(UseCountTrailingZerosInstruction);
5257   match(Set dst (CountTrailingZerosL src));
5258   effect(TEMP dst, KILL cr);
5259 
5260   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5261             "JNC    done\n\t"
5262             "TZCNT  $dst, $src.hi\n\t"
5263             "ADD    $dst, 32\n"
5264             "done:" %}
5265   ins_encode %{
5266     Register Rdst = $dst$$Register;
5267     Register Rsrc = $src$$Register;
5268     Label done;
5269     __ tzcntl(Rdst, Rsrc);
5270     __ jccb(Assembler::carryClear, done);
5271     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5272     __ addl(Rdst, BitsPerInt);
5273     __ bind(done);
5274   %}
5275   ins_pipe(ialu_reg);
5276 %}
5277 
5278 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5279   predicate(!UseCountTrailingZerosInstruction);
5280   match(Set dst (CountTrailingZerosL src));
5281   effect(TEMP dst, KILL cr);
5282 
5283   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5284             "JNZ    done\n\t"
5285             "BSF    $dst, $src.hi\n\t"
5286             "JNZ    msw_not_zero\n\t"
5287             "MOV    $dst, 32\n"
5288       "msw_not_zero:\n\t"
5289             "ADD    $dst, 32\n"
5290       "done:" %}
5291   ins_encode %{
5292     Register Rdst = $dst$$Register;
5293     Register Rsrc = $src$$Register;
5294     Label msw_not_zero;
5295     Label done;
5296     __ bsfl(Rdst, Rsrc);
5297     __ jccb(Assembler::notZero, done);
5298     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5299     __ jccb(Assembler::notZero, msw_not_zero);
5300     __ movl(Rdst, BitsPerInt);
5301     __ bind(msw_not_zero);
5302     __ addl(Rdst, BitsPerInt);
5303     __ bind(done);
5304   %}
5305   ins_pipe(ialu_reg);
5306 %}
5307 
5308 
5309 //---------- Population Count Instructions -------------------------------------
5310 
5311 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5312   predicate(UsePopCountInstruction);
5313   match(Set dst (PopCountI src));
5314   effect(KILL cr);
5315 
5316   format %{ "POPCNT $dst, $src" %}
5317   ins_encode %{
5318     __ popcntl($dst$$Register, $src$$Register);
5319   %}
5320   ins_pipe(ialu_reg);
5321 %}
5322 
5323 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5324   predicate(UsePopCountInstruction);
5325   match(Set dst (PopCountI (LoadI mem)));
5326   effect(KILL cr);
5327 
5328   format %{ "POPCNT $dst, $mem" %}
5329   ins_encode %{
5330     __ popcntl($dst$$Register, $mem$$Address);
5331   %}
5332   ins_pipe(ialu_reg);
5333 %}
5334 
5335 // Note: Long.bitCount(long) returns an int.
5336 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5337   predicate(UsePopCountInstruction);
5338   match(Set dst (PopCountL src));
5339   effect(KILL cr, TEMP tmp, TEMP dst);
5340 
5341   format %{ "POPCNT $dst, $src.lo\n\t"
5342             "POPCNT $tmp, $src.hi\n\t"
5343             "ADD    $dst, $tmp" %}
5344   ins_encode %{
5345     __ popcntl($dst$$Register, $src$$Register);
5346     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5347     __ addl($dst$$Register, $tmp$$Register);
5348   %}
5349   ins_pipe(ialu_reg);
5350 %}
5351 
5352 // Note: Long.bitCount(long) returns an int.
5353 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5354   predicate(UsePopCountInstruction);
5355   match(Set dst (PopCountL (LoadL mem)));
5356   effect(KILL cr, TEMP tmp, TEMP dst);
5357 
5358   format %{ "POPCNT $dst, $mem\n\t"
5359             "POPCNT $tmp, $mem+4\n\t"
5360             "ADD    $dst, $tmp" %}
5361   ins_encode %{
5362     //__ popcntl($dst$$Register, $mem$$Address$$first);
5363     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5364     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5365     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5366     __ addl($dst$$Register, $tmp$$Register);
5367   %}
5368   ins_pipe(ialu_reg);
5369 %}
5370 
5371 
5372 //----------Load/Store/Move Instructions---------------------------------------
5373 //----------Load Instructions--------------------------------------------------
5374 // Load Byte (8bit signed)
5375 instruct loadB(xRegI dst, memory mem) %{
5376   match(Set dst (LoadB mem));
5377 
5378   ins_cost(125);
5379   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5380 
5381   ins_encode %{
5382     __ movsbl($dst$$Register, $mem$$Address);
5383   %}
5384 
5385   ins_pipe(ialu_reg_mem);
5386 %}
5387 
5388 // Load Byte (8bit signed) into Long Register
5389 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5390   match(Set dst (ConvI2L (LoadB mem)));
5391   effect(KILL cr);
5392 
5393   ins_cost(375);
5394   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5395             "MOV    $dst.hi,$dst.lo\n\t"
5396             "SAR    $dst.hi,7" %}
5397 
5398   ins_encode %{
5399     __ movsbl($dst$$Register, $mem$$Address);
5400     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5401     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5402   %}
5403 
5404   ins_pipe(ialu_reg_mem);
5405 %}
5406 
5407 // Load Unsigned Byte (8bit UNsigned)
5408 instruct loadUB(xRegI dst, memory mem) %{
5409   match(Set dst (LoadUB mem));
5410 
5411   ins_cost(125);
5412   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5413 
5414   ins_encode %{
5415     __ movzbl($dst$$Register, $mem$$Address);
5416   %}
5417 
5418   ins_pipe(ialu_reg_mem);
5419 %}
5420 
5421 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5422 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5423   match(Set dst (ConvI2L (LoadUB mem)));
5424   effect(KILL cr);
5425 
5426   ins_cost(250);
5427   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5428             "XOR    $dst.hi,$dst.hi" %}
5429 
5430   ins_encode %{
5431     Register Rdst = $dst$$Register;
5432     __ movzbl(Rdst, $mem$$Address);
5433     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5434   %}
5435 
5436   ins_pipe(ialu_reg_mem);
5437 %}
5438 
5439 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5440 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5441   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5442   effect(KILL cr);
5443 
5444   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5445             "XOR    $dst.hi,$dst.hi\n\t"
5446             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5447   ins_encode %{
5448     Register Rdst = $dst$$Register;
5449     __ movzbl(Rdst, $mem$$Address);
5450     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5451     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5452   %}
5453   ins_pipe(ialu_reg_mem);
5454 %}
5455 
5456 // Load Short (16bit signed)
5457 instruct loadS(rRegI dst, memory mem) %{
5458   match(Set dst (LoadS mem));
5459 
5460   ins_cost(125);
5461   format %{ "MOVSX  $dst,$mem\t# short" %}
5462 
5463   ins_encode %{
5464     __ movswl($dst$$Register, $mem$$Address);
5465   %}
5466 
5467   ins_pipe(ialu_reg_mem);
5468 %}
5469 
5470 // Load Short (16 bit signed) to Byte (8 bit signed)
5471 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5472   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5473 
5474   ins_cost(125);
5475   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5476   ins_encode %{
5477     __ movsbl($dst$$Register, $mem$$Address);
5478   %}
5479   ins_pipe(ialu_reg_mem);
5480 %}
5481 
5482 // Load Short (16bit signed) into Long Register
5483 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5484   match(Set dst (ConvI2L (LoadS mem)));
5485   effect(KILL cr);
5486 
5487   ins_cost(375);
5488   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5489             "MOV    $dst.hi,$dst.lo\n\t"
5490             "SAR    $dst.hi,15" %}
5491 
5492   ins_encode %{
5493     __ movswl($dst$$Register, $mem$$Address);
5494     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5495     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5496   %}
5497 
5498   ins_pipe(ialu_reg_mem);
5499 %}
5500 
5501 // Load Unsigned Short/Char (16bit unsigned)
5502 instruct loadUS(rRegI dst, memory mem) %{
5503   match(Set dst (LoadUS mem));
5504 
5505   ins_cost(125);
5506   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5507 
5508   ins_encode %{
5509     __ movzwl($dst$$Register, $mem$$Address);
5510   %}
5511 
5512   ins_pipe(ialu_reg_mem);
5513 %}
5514 
5515 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5516 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5517   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5518 
5519   ins_cost(125);
5520   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5521   ins_encode %{
5522     __ movsbl($dst$$Register, $mem$$Address);
5523   %}
5524   ins_pipe(ialu_reg_mem);
5525 %}
5526 
5527 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5528 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5529   match(Set dst (ConvI2L (LoadUS mem)));
5530   effect(KILL cr);
5531 
5532   ins_cost(250);
5533   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5534             "XOR    $dst.hi,$dst.hi" %}
5535 
5536   ins_encode %{
5537     __ movzwl($dst$$Register, $mem$$Address);
5538     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5539   %}
5540 
5541   ins_pipe(ialu_reg_mem);
5542 %}
5543 
5544 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5545 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5546   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5547   effect(KILL cr);
5548 
5549   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5550             "XOR    $dst.hi,$dst.hi" %}
5551   ins_encode %{
5552     Register Rdst = $dst$$Register;
5553     __ movzbl(Rdst, $mem$$Address);
5554     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5555   %}
5556   ins_pipe(ialu_reg_mem);
5557 %}
5558 
5559 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5560 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5561   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5562   effect(KILL cr);
5563 
5564   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5565             "XOR    $dst.hi,$dst.hi\n\t"
5566             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5567   ins_encode %{
5568     Register Rdst = $dst$$Register;
5569     __ movzwl(Rdst, $mem$$Address);
5570     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5571     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5572   %}
5573   ins_pipe(ialu_reg_mem);
5574 %}
5575 
5576 // Load Integer
5577 instruct loadI(rRegI dst, memory mem) %{
5578   match(Set dst (LoadI mem));
5579 
5580   ins_cost(125);
5581   format %{ "MOV    $dst,$mem\t# int" %}
5582 
5583   ins_encode %{
5584     __ movl($dst$$Register, $mem$$Address);
5585   %}
5586 
5587   ins_pipe(ialu_reg_mem);
5588 %}
5589 
5590 // Load Integer (32 bit signed) to Byte (8 bit signed)
5591 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5592   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5593 
5594   ins_cost(125);
5595   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5596   ins_encode %{
5597     __ movsbl($dst$$Register, $mem$$Address);
5598   %}
5599   ins_pipe(ialu_reg_mem);
5600 %}
5601 
5602 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5603 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5604   match(Set dst (AndI (LoadI mem) mask));
5605 
5606   ins_cost(125);
5607   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5608   ins_encode %{
5609     __ movzbl($dst$$Register, $mem$$Address);
5610   %}
5611   ins_pipe(ialu_reg_mem);
5612 %}
5613 
5614 // Load Integer (32 bit signed) to Short (16 bit signed)
5615 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5616   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5617 
5618   ins_cost(125);
5619   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5620   ins_encode %{
5621     __ movswl($dst$$Register, $mem$$Address);
5622   %}
5623   ins_pipe(ialu_reg_mem);
5624 %}
5625 
5626 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5627 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5628   match(Set dst (AndI (LoadI mem) mask));
5629 
5630   ins_cost(125);
5631   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5632   ins_encode %{
5633     __ movzwl($dst$$Register, $mem$$Address);
5634   %}
5635   ins_pipe(ialu_reg_mem);
5636 %}
5637 
5638 // Load Integer into Long Register
5639 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5640   match(Set dst (ConvI2L (LoadI mem)));
5641   effect(KILL cr);
5642 
5643   ins_cost(375);
5644   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5645             "MOV    $dst.hi,$dst.lo\n\t"
5646             "SAR    $dst.hi,31" %}
5647 
5648   ins_encode %{
5649     __ movl($dst$$Register, $mem$$Address);
5650     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5651     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5652   %}
5653 
5654   ins_pipe(ialu_reg_mem);
5655 %}
5656 
5657 // Load Integer with mask 0xFF into Long Register
5658 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5659   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5660   effect(KILL cr);
5661 
5662   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5663             "XOR    $dst.hi,$dst.hi" %}
5664   ins_encode %{
5665     Register Rdst = $dst$$Register;
5666     __ movzbl(Rdst, $mem$$Address);
5667     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5668   %}
5669   ins_pipe(ialu_reg_mem);
5670 %}
5671 
5672 // Load Integer with mask 0xFFFF into Long Register
5673 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5674   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5675   effect(KILL cr);
5676 
5677   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5678             "XOR    $dst.hi,$dst.hi" %}
5679   ins_encode %{
5680     Register Rdst = $dst$$Register;
5681     __ movzwl(Rdst, $mem$$Address);
5682     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5683   %}
5684   ins_pipe(ialu_reg_mem);
5685 %}
5686 
5687 // Load Integer with 31-bit mask into Long Register
5688 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5689   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5690   effect(KILL cr);
5691 
5692   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5693             "XOR    $dst.hi,$dst.hi\n\t"
5694             "AND    $dst.lo,$mask" %}
5695   ins_encode %{
5696     Register Rdst = $dst$$Register;
5697     __ movl(Rdst, $mem$$Address);
5698     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5699     __ andl(Rdst, $mask$$constant);
5700   %}
5701   ins_pipe(ialu_reg_mem);
5702 %}
5703 
5704 // Load Unsigned Integer into Long Register
5705 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5706   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5707   effect(KILL cr);
5708 
5709   ins_cost(250);
5710   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5711             "XOR    $dst.hi,$dst.hi" %}
5712 
5713   ins_encode %{
5714     __ movl($dst$$Register, $mem$$Address);
5715     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5716   %}
5717 
5718   ins_pipe(ialu_reg_mem);
5719 %}
5720 
5721 // Load Long.  Cannot clobber address while loading, so restrict address
5722 // register to ESI
5723 instruct loadL(eRegL dst, load_long_memory mem) %{
5724   predicate(!((LoadLNode*)n)->require_atomic_access());
5725   match(Set dst (LoadL mem));
5726 
5727   ins_cost(250);
5728   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5729             "MOV    $dst.hi,$mem+4" %}
5730 
5731   ins_encode %{
5732     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5733     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5734     __ movl($dst$$Register, Amemlo);
5735     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5736   %}
5737 
5738   ins_pipe(ialu_reg_long_mem);
5739 %}
5740 
5741 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5742 // then store it down to the stack and reload on the int
5743 // side.
5744 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5745   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5746   match(Set dst (LoadL mem));
5747 
5748   ins_cost(200);
5749   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5750             "FISTp  $dst" %}
5751   ins_encode(enc_loadL_volatile(mem,dst));
5752   ins_pipe( fpu_reg_mem );
5753 %}
5754 
5755 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5756   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5757   match(Set dst (LoadL mem));
5758   effect(TEMP tmp);
5759   ins_cost(180);
5760   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5761             "MOVSD  $dst,$tmp" %}
5762   ins_encode %{
5763     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5764     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5765   %}
5766   ins_pipe( pipe_slow );
5767 %}
5768 
5769 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5770   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5771   match(Set dst (LoadL mem));
5772   effect(TEMP tmp);
5773   ins_cost(160);
5774   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5775             "MOVD   $dst.lo,$tmp\n\t"
5776             "PSRLQ  $tmp,32\n\t"
5777             "MOVD   $dst.hi,$tmp" %}
5778   ins_encode %{
5779     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5780     __ movdl($dst$$Register, $tmp$$XMMRegister);
5781     __ psrlq($tmp$$XMMRegister, 32);
5782     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5783   %}
5784   ins_pipe( pipe_slow );
5785 %}
5786 
5787 // Load Range
5788 instruct loadRange(rRegI dst, memory mem) %{
5789   match(Set dst (LoadRange mem));
5790 
5791   ins_cost(125);
5792   format %{ "MOV    $dst,$mem" %}
5793   opcode(0x8B);
5794   ins_encode( OpcP, RegMem(dst,mem));
5795   ins_pipe( ialu_reg_mem );
5796 %}
5797 
5798 
5799 // Load Pointer
5800 instruct loadP(eRegP dst, memory mem) %{
5801   match(Set dst (LoadP mem));
5802 
5803   ins_cost(125);
5804   format %{ "MOV    $dst,$mem" %}
5805   opcode(0x8B);
5806   ins_encode( OpcP, RegMem(dst,mem));
5807   ins_pipe( ialu_reg_mem );
5808 %}
5809 
5810 // Load Klass Pointer
5811 instruct loadKlass(eRegP dst, memory mem) %{
5812   match(Set dst (LoadKlass mem));
5813 
5814   ins_cost(125);
5815   format %{ "MOV    $dst,$mem" %}
5816   opcode(0x8B);
5817   ins_encode( OpcP, RegMem(dst,mem));
5818   ins_pipe( ialu_reg_mem );
5819 %}
5820 
5821 // Load Double
5822 instruct loadDPR(regDPR dst, memory mem) %{
5823   predicate(UseSSE<=1);
5824   match(Set dst (LoadD mem));
5825 
5826   ins_cost(150);
5827   format %{ "FLD_D  ST,$mem\n\t"
5828             "FSTP   $dst" %}
5829   opcode(0xDD);               /* DD /0 */
5830   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5831               Pop_Reg_DPR(dst) );
5832   ins_pipe( fpu_reg_mem );
5833 %}
5834 
5835 // Load Double to XMM
5836 instruct loadD(regD dst, memory mem) %{
5837   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5838   match(Set dst (LoadD mem));
5839   ins_cost(145);
5840   format %{ "MOVSD  $dst,$mem" %}
5841   ins_encode %{
5842     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5843   %}
5844   ins_pipe( pipe_slow );
5845 %}
5846 
5847 instruct loadD_partial(regD dst, memory mem) %{
5848   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5849   match(Set dst (LoadD mem));
5850   ins_cost(145);
5851   format %{ "MOVLPD $dst,$mem" %}
5852   ins_encode %{
5853     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5854   %}
5855   ins_pipe( pipe_slow );
5856 %}
5857 
5858 // Load to XMM register (single-precision floating point)
5859 // MOVSS instruction
5860 instruct loadF(regF dst, memory mem) %{
5861   predicate(UseSSE>=1);
5862   match(Set dst (LoadF mem));
5863   ins_cost(145);
5864   format %{ "MOVSS  $dst,$mem" %}
5865   ins_encode %{
5866     __ movflt ($dst$$XMMRegister, $mem$$Address);
5867   %}
5868   ins_pipe( pipe_slow );
5869 %}
5870 
5871 // Load Float
5872 instruct loadFPR(regFPR dst, memory mem) %{
5873   predicate(UseSSE==0);
5874   match(Set dst (LoadF mem));
5875 
5876   ins_cost(150);
5877   format %{ "FLD_S  ST,$mem\n\t"
5878             "FSTP   $dst" %}
5879   opcode(0xD9);               /* D9 /0 */
5880   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5881               Pop_Reg_FPR(dst) );
5882   ins_pipe( fpu_reg_mem );
5883 %}
5884 
5885 // Load Effective Address
5886 instruct leaP8(eRegP dst, indOffset8 mem) %{
5887   match(Set dst mem);
5888 
5889   ins_cost(110);
5890   format %{ "LEA    $dst,$mem" %}
5891   opcode(0x8D);
5892   ins_encode( OpcP, RegMem(dst,mem));
5893   ins_pipe( ialu_reg_reg_fat );
5894 %}
5895 
5896 instruct leaP32(eRegP dst, indOffset32 mem) %{
5897   match(Set dst mem);
5898 
5899   ins_cost(110);
5900   format %{ "LEA    $dst,$mem" %}
5901   opcode(0x8D);
5902   ins_encode( OpcP, RegMem(dst,mem));
5903   ins_pipe( ialu_reg_reg_fat );
5904 %}
5905 
5906 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5907   match(Set dst mem);
5908 
5909   ins_cost(110);
5910   format %{ "LEA    $dst,$mem" %}
5911   opcode(0x8D);
5912   ins_encode( OpcP, RegMem(dst,mem));
5913   ins_pipe( ialu_reg_reg_fat );
5914 %}
5915 
5916 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5917   match(Set dst mem);
5918 
5919   ins_cost(110);
5920   format %{ "LEA    $dst,$mem" %}
5921   opcode(0x8D);
5922   ins_encode( OpcP, RegMem(dst,mem));
5923   ins_pipe( ialu_reg_reg_fat );
5924 %}
5925 
5926 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5927   match(Set dst mem);
5928 
5929   ins_cost(110);
5930   format %{ "LEA    $dst,$mem" %}
5931   opcode(0x8D);
5932   ins_encode( OpcP, RegMem(dst,mem));
5933   ins_pipe( ialu_reg_reg_fat );
5934 %}
5935 
5936 // Load Constant
5937 instruct loadConI(rRegI dst, immI src) %{
5938   match(Set dst src);
5939 
5940   format %{ "MOV    $dst,$src" %}
5941   ins_encode( LdImmI(dst, src) );
5942   ins_pipe( ialu_reg_fat );
5943 %}
5944 
5945 // Load Constant zero
5946 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5947   match(Set dst src);
5948   effect(KILL cr);
5949 
5950   ins_cost(50);
5951   format %{ "XOR    $dst,$dst" %}
5952   opcode(0x33);  /* + rd */
5953   ins_encode( OpcP, RegReg( dst, dst ) );
5954   ins_pipe( ialu_reg );
5955 %}
5956 
5957 instruct loadConP(eRegP dst, immP src) %{
5958   match(Set dst src);
5959 
5960   format %{ "MOV    $dst,$src" %}
5961   opcode(0xB8);  /* + rd */
5962   ins_encode( LdImmP(dst, src) );
5963   ins_pipe( ialu_reg_fat );
5964 %}
5965 
5966 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5967   match(Set dst src);
5968   effect(KILL cr);
5969   ins_cost(200);
5970   format %{ "MOV    $dst.lo,$src.lo\n\t"
5971             "MOV    $dst.hi,$src.hi" %}
5972   opcode(0xB8);
5973   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5974   ins_pipe( ialu_reg_long_fat );
5975 %}
5976 
5977 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5978   match(Set dst src);
5979   effect(KILL cr);
5980   ins_cost(150);
5981   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5982             "XOR    $dst.hi,$dst.hi" %}
5983   opcode(0x33,0x33);
5984   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5985   ins_pipe( ialu_reg_long );
5986 %}
5987 
5988 // The instruction usage is guarded by predicate in operand immFPR().
5989 instruct loadConFPR(regFPR dst, immFPR con) %{
5990   match(Set dst con);
5991   ins_cost(125);
5992   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5993             "FSTP   $dst" %}
5994   ins_encode %{
5995     __ fld_s($constantaddress($con));
5996     __ fstp_d($dst$$reg);
5997   %}
5998   ins_pipe(fpu_reg_con);
5999 %}
6000 
6001 // The instruction usage is guarded by predicate in operand immFPR0().
6002 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6003   match(Set dst con);
6004   ins_cost(125);
6005   format %{ "FLDZ   ST\n\t"
6006             "FSTP   $dst" %}
6007   ins_encode %{
6008     __ fldz();
6009     __ fstp_d($dst$$reg);
6010   %}
6011   ins_pipe(fpu_reg_con);
6012 %}
6013 
6014 // The instruction usage is guarded by predicate in operand immFPR1().
6015 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6016   match(Set dst con);
6017   ins_cost(125);
6018   format %{ "FLD1   ST\n\t"
6019             "FSTP   $dst" %}
6020   ins_encode %{
6021     __ fld1();
6022     __ fstp_d($dst$$reg);
6023   %}
6024   ins_pipe(fpu_reg_con);
6025 %}
6026 
6027 // The instruction usage is guarded by predicate in operand immF().
6028 instruct loadConF(regF dst, immF con) %{
6029   match(Set dst con);
6030   ins_cost(125);
6031   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6032   ins_encode %{
6033     __ movflt($dst$$XMMRegister, $constantaddress($con));
6034   %}
6035   ins_pipe(pipe_slow);
6036 %}
6037 
6038 // The instruction usage is guarded by predicate in operand immF0().
6039 instruct loadConF0(regF dst, immF0 src) %{
6040   match(Set dst src);
6041   ins_cost(100);
6042   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6043   ins_encode %{
6044     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6045   %}
6046   ins_pipe(pipe_slow);
6047 %}
6048 
6049 // The instruction usage is guarded by predicate in operand immDPR().
6050 instruct loadConDPR(regDPR dst, immDPR con) %{
6051   match(Set dst con);
6052   ins_cost(125);
6053 
6054   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6055             "FSTP   $dst" %}
6056   ins_encode %{
6057     __ fld_d($constantaddress($con));
6058     __ fstp_d($dst$$reg);
6059   %}
6060   ins_pipe(fpu_reg_con);
6061 %}
6062 
6063 // The instruction usage is guarded by predicate in operand immDPR0().
6064 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6065   match(Set dst con);
6066   ins_cost(125);
6067 
6068   format %{ "FLDZ   ST\n\t"
6069             "FSTP   $dst" %}
6070   ins_encode %{
6071     __ fldz();
6072     __ fstp_d($dst$$reg);
6073   %}
6074   ins_pipe(fpu_reg_con);
6075 %}
6076 
6077 // The instruction usage is guarded by predicate in operand immDPR1().
6078 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6079   match(Set dst con);
6080   ins_cost(125);
6081 
6082   format %{ "FLD1   ST\n\t"
6083             "FSTP   $dst" %}
6084   ins_encode %{
6085     __ fld1();
6086     __ fstp_d($dst$$reg);
6087   %}
6088   ins_pipe(fpu_reg_con);
6089 %}
6090 
6091 // The instruction usage is guarded by predicate in operand immD().
6092 instruct loadConD(regD dst, immD con) %{
6093   match(Set dst con);
6094   ins_cost(125);
6095   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6096   ins_encode %{
6097     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6098   %}
6099   ins_pipe(pipe_slow);
6100 %}
6101 
6102 // The instruction usage is guarded by predicate in operand immD0().
6103 instruct loadConD0(regD dst, immD0 src) %{
6104   match(Set dst src);
6105   ins_cost(100);
6106   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6107   ins_encode %{
6108     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6109   %}
6110   ins_pipe( pipe_slow );
6111 %}
6112 
6113 // Load Stack Slot
6114 instruct loadSSI(rRegI dst, stackSlotI src) %{
6115   match(Set dst src);
6116   ins_cost(125);
6117 
6118   format %{ "MOV    $dst,$src" %}
6119   opcode(0x8B);
6120   ins_encode( OpcP, RegMem(dst,src));
6121   ins_pipe( ialu_reg_mem );
6122 %}
6123 
6124 instruct loadSSL(eRegL dst, stackSlotL src) %{
6125   match(Set dst src);
6126 
6127   ins_cost(200);
6128   format %{ "MOV    $dst,$src.lo\n\t"
6129             "MOV    $dst+4,$src.hi" %}
6130   opcode(0x8B, 0x8B);
6131   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6132   ins_pipe( ialu_mem_long_reg );
6133 %}
6134 
6135 // Load Stack Slot
6136 instruct loadSSP(eRegP dst, stackSlotP src) %{
6137   match(Set dst src);
6138   ins_cost(125);
6139 
6140   format %{ "MOV    $dst,$src" %}
6141   opcode(0x8B);
6142   ins_encode( OpcP, RegMem(dst,src));
6143   ins_pipe( ialu_reg_mem );
6144 %}
6145 
6146 // Load Stack Slot
6147 instruct loadSSF(regFPR dst, stackSlotF src) %{
6148   match(Set dst src);
6149   ins_cost(125);
6150 
6151   format %{ "FLD_S  $src\n\t"
6152             "FSTP   $dst" %}
6153   opcode(0xD9);               /* D9 /0, FLD m32real */
6154   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6155               Pop_Reg_FPR(dst) );
6156   ins_pipe( fpu_reg_mem );
6157 %}
6158 
6159 // Load Stack Slot
6160 instruct loadSSD(regDPR dst, stackSlotD src) %{
6161   match(Set dst src);
6162   ins_cost(125);
6163 
6164   format %{ "FLD_D  $src\n\t"
6165             "FSTP   $dst" %}
6166   opcode(0xDD);               /* DD /0, FLD m64real */
6167   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6168               Pop_Reg_DPR(dst) );
6169   ins_pipe( fpu_reg_mem );
6170 %}
6171 
6172 // Prefetch instructions for allocation.
6173 // Must be safe to execute with invalid address (cannot fault).
6174 
6175 instruct prefetchAlloc0( memory mem ) %{
6176   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6177   match(PrefetchAllocation mem);
6178   ins_cost(0);
6179   size(0);
6180   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6181   ins_encode();
6182   ins_pipe(empty);
6183 %}
6184 
6185 instruct prefetchAlloc( memory mem ) %{
6186   predicate(AllocatePrefetchInstr==3);
6187   match( PrefetchAllocation mem );
6188   ins_cost(100);
6189 
6190   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6191   ins_encode %{
6192     __ prefetchw($mem$$Address);
6193   %}
6194   ins_pipe(ialu_mem);
6195 %}
6196 
6197 instruct prefetchAllocNTA( memory mem ) %{
6198   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6199   match(PrefetchAllocation mem);
6200   ins_cost(100);
6201 
6202   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6203   ins_encode %{
6204     __ prefetchnta($mem$$Address);
6205   %}
6206   ins_pipe(ialu_mem);
6207 %}
6208 
6209 instruct prefetchAllocT0( memory mem ) %{
6210   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6211   match(PrefetchAllocation mem);
6212   ins_cost(100);
6213 
6214   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6215   ins_encode %{
6216     __ prefetcht0($mem$$Address);
6217   %}
6218   ins_pipe(ialu_mem);
6219 %}
6220 
6221 instruct prefetchAllocT2( memory mem ) %{
6222   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6223   match(PrefetchAllocation mem);
6224   ins_cost(100);
6225 
6226   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6227   ins_encode %{
6228     __ prefetcht2($mem$$Address);
6229   %}
6230   ins_pipe(ialu_mem);
6231 %}
6232 
6233 //----------Store Instructions-------------------------------------------------
6234 
6235 // Store Byte
6236 instruct storeB(memory mem, xRegI src) %{
6237   match(Set mem (StoreB mem src));
6238 
6239   ins_cost(125);
6240   format %{ "MOV8   $mem,$src" %}
6241   opcode(0x88);
6242   ins_encode( OpcP, RegMem( src, mem ) );
6243   ins_pipe( ialu_mem_reg );
6244 %}
6245 
6246 // Store Char/Short
6247 instruct storeC(memory mem, rRegI src) %{
6248   match(Set mem (StoreC mem src));
6249 
6250   ins_cost(125);
6251   format %{ "MOV16  $mem,$src" %}
6252   opcode(0x89, 0x66);
6253   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6254   ins_pipe( ialu_mem_reg );
6255 %}
6256 
6257 // Store Integer
6258 instruct storeI(memory mem, rRegI src) %{
6259   match(Set mem (StoreI mem src));
6260 
6261   ins_cost(125);
6262   format %{ "MOV    $mem,$src" %}
6263   opcode(0x89);
6264   ins_encode( OpcP, RegMem( src, mem ) );
6265   ins_pipe( ialu_mem_reg );
6266 %}
6267 
6268 // Store Long
6269 instruct storeL(long_memory mem, eRegL src) %{
6270   predicate(!((StoreLNode*)n)->require_atomic_access());
6271   match(Set mem (StoreL mem src));
6272 
6273   ins_cost(200);
6274   format %{ "MOV    $mem,$src.lo\n\t"
6275             "MOV    $mem+4,$src.hi" %}
6276   opcode(0x89, 0x89);
6277   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6278   ins_pipe( ialu_mem_long_reg );
6279 %}
6280 
6281 // Store Long to Integer
6282 instruct storeL2I(memory mem, eRegL src) %{
6283   match(Set mem (StoreI mem (ConvL2I src)));
6284 
6285   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6286   ins_encode %{
6287     __ movl($mem$$Address, $src$$Register);
6288   %}
6289   ins_pipe(ialu_mem_reg);
6290 %}
6291 
6292 // Volatile Store Long.  Must be atomic, so move it into
6293 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6294 // target address before the store (for null-ptr checks)
6295 // so the memory operand is used twice in the encoding.
6296 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6297   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6298   match(Set mem (StoreL mem src));
6299   effect( KILL cr );
6300   ins_cost(400);
6301   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6302             "FILD   $src\n\t"
6303             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6304   opcode(0x3B);
6305   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6306   ins_pipe( fpu_reg_mem );
6307 %}
6308 
6309 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6310   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6311   match(Set mem (StoreL mem src));
6312   effect( TEMP tmp, KILL cr );
6313   ins_cost(380);
6314   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6315             "MOVSD  $tmp,$src\n\t"
6316             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6317   ins_encode %{
6318     __ cmpl(rax, $mem$$Address);
6319     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6320     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6321   %}
6322   ins_pipe( pipe_slow );
6323 %}
6324 
6325 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6326   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6327   match(Set mem (StoreL mem src));
6328   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6329   ins_cost(360);
6330   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6331             "MOVD   $tmp,$src.lo\n\t"
6332             "MOVD   $tmp2,$src.hi\n\t"
6333             "PUNPCKLDQ $tmp,$tmp2\n\t"
6334             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6335   ins_encode %{
6336     __ cmpl(rax, $mem$$Address);
6337     __ movdl($tmp$$XMMRegister, $src$$Register);
6338     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6339     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6340     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6341   %}
6342   ins_pipe( pipe_slow );
6343 %}
6344 
6345 // Store Pointer; for storing unknown oops and raw pointers
6346 instruct storeP(memory mem, anyRegP src) %{
6347   match(Set mem (StoreP mem src));
6348 
6349   ins_cost(125);
6350   format %{ "MOV    $mem,$src" %}
6351   opcode(0x89);
6352   ins_encode( OpcP, RegMem( src, mem ) );
6353   ins_pipe( ialu_mem_reg );
6354 %}
6355 
6356 // Store Integer Immediate
6357 instruct storeImmI(memory mem, immI src) %{
6358   match(Set mem (StoreI mem src));
6359 
6360   ins_cost(150);
6361   format %{ "MOV    $mem,$src" %}
6362   opcode(0xC7);               /* C7 /0 */
6363   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6364   ins_pipe( ialu_mem_imm );
6365 %}
6366 
6367 // Store Short/Char Immediate
6368 instruct storeImmI16(memory mem, immI16 src) %{
6369   predicate(UseStoreImmI16);
6370   match(Set mem (StoreC mem src));
6371 
6372   ins_cost(150);
6373   format %{ "MOV16  $mem,$src" %}
6374   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6375   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6376   ins_pipe( ialu_mem_imm );
6377 %}
6378 
6379 // Store Pointer Immediate; null pointers or constant oops that do not
6380 // need card-mark barriers.
6381 instruct storeImmP(memory mem, immP src) %{
6382   match(Set mem (StoreP mem src));
6383 
6384   ins_cost(150);
6385   format %{ "MOV    $mem,$src" %}
6386   opcode(0xC7);               /* C7 /0 */
6387   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6388   ins_pipe( ialu_mem_imm );
6389 %}
6390 
6391 // Store Byte Immediate
6392 instruct storeImmB(memory mem, immI8 src) %{
6393   match(Set mem (StoreB mem src));
6394 
6395   ins_cost(150);
6396   format %{ "MOV8   $mem,$src" %}
6397   opcode(0xC6);               /* C6 /0 */
6398   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6399   ins_pipe( ialu_mem_imm );
6400 %}
6401 
6402 // Store CMS card-mark Immediate
6403 instruct storeImmCM(memory mem, immI8 src) %{
6404   match(Set mem (StoreCM mem src));
6405 
6406   ins_cost(150);
6407   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6408   opcode(0xC6);               /* C6 /0 */
6409   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6410   ins_pipe( ialu_mem_imm );
6411 %}
6412 
6413 // Store Double
6414 instruct storeDPR( memory mem, regDPR1 src) %{
6415   predicate(UseSSE<=1);
6416   match(Set mem (StoreD mem src));
6417 
6418   ins_cost(100);
6419   format %{ "FST_D  $mem,$src" %}
6420   opcode(0xDD);       /* DD /2 */
6421   ins_encode( enc_FPR_store(mem,src) );
6422   ins_pipe( fpu_mem_reg );
6423 %}
6424 
6425 // Store double does rounding on x86
6426 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6427   predicate(UseSSE<=1);
6428   match(Set mem (StoreD mem (RoundDouble src)));
6429 
6430   ins_cost(100);
6431   format %{ "FST_D  $mem,$src\t# round" %}
6432   opcode(0xDD);       /* DD /2 */
6433   ins_encode( enc_FPR_store(mem,src) );
6434   ins_pipe( fpu_mem_reg );
6435 %}
6436 
6437 // Store XMM register to memory (double-precision floating points)
6438 // MOVSD instruction
6439 instruct storeD(memory mem, regD src) %{
6440   predicate(UseSSE>=2);
6441   match(Set mem (StoreD mem src));
6442   ins_cost(95);
6443   format %{ "MOVSD  $mem,$src" %}
6444   ins_encode %{
6445     __ movdbl($mem$$Address, $src$$XMMRegister);
6446   %}
6447   ins_pipe( pipe_slow );
6448 %}
6449 
6450 // Store XMM register to memory (single-precision floating point)
6451 // MOVSS instruction
6452 instruct storeF(memory mem, regF src) %{
6453   predicate(UseSSE>=1);
6454   match(Set mem (StoreF mem src));
6455   ins_cost(95);
6456   format %{ "MOVSS  $mem,$src" %}
6457   ins_encode %{
6458     __ movflt($mem$$Address, $src$$XMMRegister);
6459   %}
6460   ins_pipe( pipe_slow );
6461 %}
6462 
6463 // Store Float
6464 instruct storeFPR( memory mem, regFPR1 src) %{
6465   predicate(UseSSE==0);
6466   match(Set mem (StoreF mem src));
6467 
6468   ins_cost(100);
6469   format %{ "FST_S  $mem,$src" %}
6470   opcode(0xD9);       /* D9 /2 */
6471   ins_encode( enc_FPR_store(mem,src) );
6472   ins_pipe( fpu_mem_reg );
6473 %}
6474 
6475 // Store Float does rounding on x86
6476 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6477   predicate(UseSSE==0);
6478   match(Set mem (StoreF mem (RoundFloat src)));
6479 
6480   ins_cost(100);
6481   format %{ "FST_S  $mem,$src\t# round" %}
6482   opcode(0xD9);       /* D9 /2 */
6483   ins_encode( enc_FPR_store(mem,src) );
6484   ins_pipe( fpu_mem_reg );
6485 %}
6486 
6487 // Store Float does rounding on x86
6488 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6489   predicate(UseSSE<=1);
6490   match(Set mem (StoreF mem (ConvD2F src)));
6491 
6492   ins_cost(100);
6493   format %{ "FST_S  $mem,$src\t# D-round" %}
6494   opcode(0xD9);       /* D9 /2 */
6495   ins_encode( enc_FPR_store(mem,src) );
6496   ins_pipe( fpu_mem_reg );
6497 %}
6498 
6499 // Store immediate Float value (it is faster than store from FPU register)
6500 // The instruction usage is guarded by predicate in operand immFPR().
6501 instruct storeFPR_imm( memory mem, immFPR src) %{
6502   match(Set mem (StoreF mem src));
6503 
6504   ins_cost(50);
6505   format %{ "MOV    $mem,$src\t# store float" %}
6506   opcode(0xC7);               /* C7 /0 */
6507   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6508   ins_pipe( ialu_mem_imm );
6509 %}
6510 
6511 // Store immediate Float value (it is faster than store from XMM register)
6512 // The instruction usage is guarded by predicate in operand immF().
6513 instruct storeF_imm( memory mem, immF src) %{
6514   match(Set mem (StoreF mem src));
6515 
6516   ins_cost(50);
6517   format %{ "MOV    $mem,$src\t# store float" %}
6518   opcode(0xC7);               /* C7 /0 */
6519   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6520   ins_pipe( ialu_mem_imm );
6521 %}
6522 
6523 // Store Integer to stack slot
6524 instruct storeSSI(stackSlotI dst, rRegI src) %{
6525   match(Set dst src);
6526 
6527   ins_cost(100);
6528   format %{ "MOV    $dst,$src" %}
6529   opcode(0x89);
6530   ins_encode( OpcPRegSS( dst, src ) );
6531   ins_pipe( ialu_mem_reg );
6532 %}
6533 
6534 // Store Integer to stack slot
6535 instruct storeSSP(stackSlotP dst, eRegP src) %{
6536   match(Set dst src);
6537 
6538   ins_cost(100);
6539   format %{ "MOV    $dst,$src" %}
6540   opcode(0x89);
6541   ins_encode( OpcPRegSS( dst, src ) );
6542   ins_pipe( ialu_mem_reg );
6543 %}
6544 
6545 // Store Long to stack slot
6546 instruct storeSSL(stackSlotL dst, eRegL src) %{
6547   match(Set dst src);
6548 
6549   ins_cost(200);
6550   format %{ "MOV    $dst,$src.lo\n\t"
6551             "MOV    $dst+4,$src.hi" %}
6552   opcode(0x89, 0x89);
6553   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6554   ins_pipe( ialu_mem_long_reg );
6555 %}
6556 
6557 //----------MemBar Instructions-----------------------------------------------
6558 // Memory barrier flavors
6559 
6560 instruct membar_acquire() %{
6561   match(MemBarAcquire);
6562   match(LoadFence);
6563   ins_cost(400);
6564 
6565   size(0);
6566   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6567   ins_encode();
6568   ins_pipe(empty);
6569 %}
6570 
6571 instruct membar_acquire_lock() %{
6572   match(MemBarAcquireLock);
6573   ins_cost(0);
6574 
6575   size(0);
6576   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6577   ins_encode( );
6578   ins_pipe(empty);
6579 %}
6580 
6581 instruct membar_release() %{
6582   match(MemBarRelease);
6583   match(StoreFence);
6584   ins_cost(400);
6585 
6586   size(0);
6587   format %{ "MEMBAR-release ! (empty encoding)" %}
6588   ins_encode( );
6589   ins_pipe(empty);
6590 %}
6591 
6592 instruct membar_release_lock() %{
6593   match(MemBarReleaseLock);
6594   ins_cost(0);
6595 
6596   size(0);
6597   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6598   ins_encode( );
6599   ins_pipe(empty);
6600 %}
6601 
6602 instruct membar_volatile(eFlagsReg cr) %{
6603   match(MemBarVolatile);
6604   effect(KILL cr);
6605   ins_cost(400);
6606 
6607   format %{
6608     $$template
6609     if (os::is_MP()) {
6610       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6611     } else {
6612       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6613     }
6614   %}
6615   ins_encode %{
6616     __ membar(Assembler::StoreLoad);
6617   %}
6618   ins_pipe(pipe_slow);
6619 %}
6620 
6621 instruct unnecessary_membar_volatile() %{
6622   match(MemBarVolatile);
6623   predicate(Matcher::post_store_load_barrier(n));
6624   ins_cost(0);
6625 
6626   size(0);
6627   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6628   ins_encode( );
6629   ins_pipe(empty);
6630 %}
6631 
6632 instruct membar_storestore() %{
6633   match(MemBarStoreStore);
6634   ins_cost(0);
6635 
6636   size(0);
6637   format %{ "MEMBAR-storestore (empty encoding)" %}
6638   ins_encode( );
6639   ins_pipe(empty);
6640 %}
6641 
6642 //----------Move Instructions--------------------------------------------------
6643 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6644   match(Set dst (CastX2P src));
6645   format %{ "# X2P  $dst, $src" %}
6646   ins_encode( /*empty encoding*/ );
6647   ins_cost(0);
6648   ins_pipe(empty);
6649 %}
6650 
6651 instruct castP2X(rRegI dst, eRegP src ) %{
6652   match(Set dst (CastP2X src));
6653   ins_cost(50);
6654   format %{ "MOV    $dst, $src\t# CastP2X" %}
6655   ins_encode( enc_Copy( dst, src) );
6656   ins_pipe( ialu_reg_reg );
6657 %}
6658 
6659 //----------Conditional Move---------------------------------------------------
6660 // Conditional move
6661 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6662   predicate(!VM_Version::supports_cmov() );
6663   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6664   ins_cost(200);
6665   format %{ "J$cop,us skip\t# signed cmove\n\t"
6666             "MOV    $dst,$src\n"
6667       "skip:" %}
6668   ins_encode %{
6669     Label Lskip;
6670     // Invert sense of branch from sense of CMOV
6671     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6672     __ movl($dst$$Register, $src$$Register);
6673     __ bind(Lskip);
6674   %}
6675   ins_pipe( pipe_cmov_reg );
6676 %}
6677 
6678 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6679   predicate(!VM_Version::supports_cmov() );
6680   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6681   ins_cost(200);
6682   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6683             "MOV    $dst,$src\n"
6684       "skip:" %}
6685   ins_encode %{
6686     Label Lskip;
6687     // Invert sense of branch from sense of CMOV
6688     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6689     __ movl($dst$$Register, $src$$Register);
6690     __ bind(Lskip);
6691   %}
6692   ins_pipe( pipe_cmov_reg );
6693 %}
6694 
6695 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6696   predicate(VM_Version::supports_cmov() );
6697   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6698   ins_cost(200);
6699   format %{ "CMOV$cop $dst,$src" %}
6700   opcode(0x0F,0x40);
6701   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6702   ins_pipe( pipe_cmov_reg );
6703 %}
6704 
6705 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6706   predicate(VM_Version::supports_cmov() );
6707   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6708   ins_cost(200);
6709   format %{ "CMOV$cop $dst,$src" %}
6710   opcode(0x0F,0x40);
6711   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6712   ins_pipe( pipe_cmov_reg );
6713 %}
6714 
6715 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6716   predicate(VM_Version::supports_cmov() );
6717   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6718   ins_cost(200);
6719   expand %{
6720     cmovI_regU(cop, cr, dst, src);
6721   %}
6722 %}
6723 
6724 // Conditional move
6725 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6726   predicate(VM_Version::supports_cmov() );
6727   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6728   ins_cost(250);
6729   format %{ "CMOV$cop $dst,$src" %}
6730   opcode(0x0F,0x40);
6731   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6732   ins_pipe( pipe_cmov_mem );
6733 %}
6734 
6735 // Conditional move
6736 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6737   predicate(VM_Version::supports_cmov() );
6738   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6739   ins_cost(250);
6740   format %{ "CMOV$cop $dst,$src" %}
6741   opcode(0x0F,0x40);
6742   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6743   ins_pipe( pipe_cmov_mem );
6744 %}
6745 
6746 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6747   predicate(VM_Version::supports_cmov() );
6748   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6749   ins_cost(250);
6750   expand %{
6751     cmovI_memU(cop, cr, dst, src);
6752   %}
6753 %}
6754 
6755 // Conditional move
6756 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6757   predicate(VM_Version::supports_cmov() );
6758   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6759   ins_cost(200);
6760   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6761   opcode(0x0F,0x40);
6762   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6763   ins_pipe( pipe_cmov_reg );
6764 %}
6765 
6766 // Conditional move (non-P6 version)
6767 // Note:  a CMoveP is generated for  stubs and native wrappers
6768 //        regardless of whether we are on a P6, so we
6769 //        emulate a cmov here
6770 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6771   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6772   ins_cost(300);
6773   format %{ "Jn$cop   skip\n\t"
6774           "MOV    $dst,$src\t# pointer\n"
6775       "skip:" %}
6776   opcode(0x8b);
6777   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6778   ins_pipe( pipe_cmov_reg );
6779 %}
6780 
6781 // Conditional move
6782 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6783   predicate(VM_Version::supports_cmov() );
6784   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6785   ins_cost(200);
6786   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6787   opcode(0x0F,0x40);
6788   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6789   ins_pipe( pipe_cmov_reg );
6790 %}
6791 
6792 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6793   predicate(VM_Version::supports_cmov() );
6794   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6795   ins_cost(200);
6796   expand %{
6797     cmovP_regU(cop, cr, dst, src);
6798   %}
6799 %}
6800 
6801 // DISABLED: Requires the ADLC to emit a bottom_type call that
6802 // correctly meets the two pointer arguments; one is an incoming
6803 // register but the other is a memory operand.  ALSO appears to
6804 // be buggy with implicit null checks.
6805 //
6806 //// Conditional move
6807 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6808 //  predicate(VM_Version::supports_cmov() );
6809 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6810 //  ins_cost(250);
6811 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6812 //  opcode(0x0F,0x40);
6813 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6814 //  ins_pipe( pipe_cmov_mem );
6815 //%}
6816 //
6817 //// Conditional move
6818 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6819 //  predicate(VM_Version::supports_cmov() );
6820 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6821 //  ins_cost(250);
6822 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6823 //  opcode(0x0F,0x40);
6824 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6825 //  ins_pipe( pipe_cmov_mem );
6826 //%}
6827 
6828 // Conditional move
6829 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6830   predicate(UseSSE<=1);
6831   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6832   ins_cost(200);
6833   format %{ "FCMOV$cop $dst,$src\t# double" %}
6834   opcode(0xDA);
6835   ins_encode( enc_cmov_dpr(cop,src) );
6836   ins_pipe( pipe_cmovDPR_reg );
6837 %}
6838 
6839 // Conditional move
6840 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6841   predicate(UseSSE==0);
6842   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6843   ins_cost(200);
6844   format %{ "FCMOV$cop $dst,$src\t# float" %}
6845   opcode(0xDA);
6846   ins_encode( enc_cmov_dpr(cop,src) );
6847   ins_pipe( pipe_cmovDPR_reg );
6848 %}
6849 
6850 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6851 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6852   predicate(UseSSE<=1);
6853   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6854   ins_cost(200);
6855   format %{ "Jn$cop   skip\n\t"
6856             "MOV    $dst,$src\t# double\n"
6857       "skip:" %}
6858   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6859   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6860   ins_pipe( pipe_cmovDPR_reg );
6861 %}
6862 
6863 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6864 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6865   predicate(UseSSE==0);
6866   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6867   ins_cost(200);
6868   format %{ "Jn$cop    skip\n\t"
6869             "MOV    $dst,$src\t# float\n"
6870       "skip:" %}
6871   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6872   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6873   ins_pipe( pipe_cmovDPR_reg );
6874 %}
6875 
6876 // No CMOVE with SSE/SSE2
6877 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6878   predicate (UseSSE>=1);
6879   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6880   ins_cost(200);
6881   format %{ "Jn$cop   skip\n\t"
6882             "MOVSS  $dst,$src\t# float\n"
6883       "skip:" %}
6884   ins_encode %{
6885     Label skip;
6886     // Invert sense of branch from sense of CMOV
6887     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6888     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6889     __ bind(skip);
6890   %}
6891   ins_pipe( pipe_slow );
6892 %}
6893 
6894 // No CMOVE with SSE/SSE2
6895 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6896   predicate (UseSSE>=2);
6897   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6898   ins_cost(200);
6899   format %{ "Jn$cop   skip\n\t"
6900             "MOVSD  $dst,$src\t# float\n"
6901       "skip:" %}
6902   ins_encode %{
6903     Label skip;
6904     // Invert sense of branch from sense of CMOV
6905     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6906     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6907     __ bind(skip);
6908   %}
6909   ins_pipe( pipe_slow );
6910 %}
6911 
6912 // unsigned version
6913 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6914   predicate (UseSSE>=1);
6915   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6916   ins_cost(200);
6917   format %{ "Jn$cop   skip\n\t"
6918             "MOVSS  $dst,$src\t# float\n"
6919       "skip:" %}
6920   ins_encode %{
6921     Label skip;
6922     // Invert sense of branch from sense of CMOV
6923     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6924     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6925     __ bind(skip);
6926   %}
6927   ins_pipe( pipe_slow );
6928 %}
6929 
6930 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6931   predicate (UseSSE>=1);
6932   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6933   ins_cost(200);
6934   expand %{
6935     fcmovF_regU(cop, cr, dst, src);
6936   %}
6937 %}
6938 
6939 // unsigned version
6940 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6941   predicate (UseSSE>=2);
6942   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6943   ins_cost(200);
6944   format %{ "Jn$cop   skip\n\t"
6945             "MOVSD  $dst,$src\t# float\n"
6946       "skip:" %}
6947   ins_encode %{
6948     Label skip;
6949     // Invert sense of branch from sense of CMOV
6950     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6951     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6952     __ bind(skip);
6953   %}
6954   ins_pipe( pipe_slow );
6955 %}
6956 
6957 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6958   predicate (UseSSE>=2);
6959   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6960   ins_cost(200);
6961   expand %{
6962     fcmovD_regU(cop, cr, dst, src);
6963   %}
6964 %}
6965 
6966 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6967   predicate(VM_Version::supports_cmov() );
6968   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6969   ins_cost(200);
6970   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6971             "CMOV$cop $dst.hi,$src.hi" %}
6972   opcode(0x0F,0x40);
6973   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6974   ins_pipe( pipe_cmov_reg_long );
6975 %}
6976 
6977 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6978   predicate(VM_Version::supports_cmov() );
6979   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6980   ins_cost(200);
6981   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6982             "CMOV$cop $dst.hi,$src.hi" %}
6983   opcode(0x0F,0x40);
6984   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6985   ins_pipe( pipe_cmov_reg_long );
6986 %}
6987 
6988 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6989   predicate(VM_Version::supports_cmov() );
6990   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6991   ins_cost(200);
6992   expand %{
6993     cmovL_regU(cop, cr, dst, src);
6994   %}
6995 %}
6996 
6997 //----------Arithmetic Instructions--------------------------------------------
6998 //----------Addition Instructions----------------------------------------------
6999 
7000 // Integer Addition Instructions
7001 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7002   match(Set dst (AddI dst src));
7003   effect(KILL cr);
7004 
7005   size(2);
7006   format %{ "ADD    $dst,$src" %}
7007   opcode(0x03);
7008   ins_encode( OpcP, RegReg( dst, src) );
7009   ins_pipe( ialu_reg_reg );
7010 %}
7011 
7012 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7013   match(Set dst (AddI dst src));
7014   effect(KILL cr);
7015 
7016   format %{ "ADD    $dst,$src" %}
7017   opcode(0x81, 0x00); /* /0 id */
7018   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7019   ins_pipe( ialu_reg );
7020 %}
7021 
7022 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7023   predicate(UseIncDec);
7024   match(Set dst (AddI dst src));
7025   effect(KILL cr);
7026 
7027   size(1);
7028   format %{ "INC    $dst" %}
7029   opcode(0x40); /*  */
7030   ins_encode( Opc_plus( primary, dst ) );
7031   ins_pipe( ialu_reg );
7032 %}
7033 
7034 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7035   match(Set dst (AddI src0 src1));
7036   ins_cost(110);
7037 
7038   format %{ "LEA    $dst,[$src0 + $src1]" %}
7039   opcode(0x8D); /* 0x8D /r */
7040   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7041   ins_pipe( ialu_reg_reg );
7042 %}
7043 
7044 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7045   match(Set dst (AddP src0 src1));
7046   ins_cost(110);
7047 
7048   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7049   opcode(0x8D); /* 0x8D /r */
7050   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7051   ins_pipe( ialu_reg_reg );
7052 %}
7053 
7054 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7055   predicate(UseIncDec);
7056   match(Set dst (AddI dst src));
7057   effect(KILL cr);
7058 
7059   size(1);
7060   format %{ "DEC    $dst" %}
7061   opcode(0x48); /*  */
7062   ins_encode( Opc_plus( primary, dst ) );
7063   ins_pipe( ialu_reg );
7064 %}
7065 
7066 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7067   match(Set dst (AddP dst src));
7068   effect(KILL cr);
7069 
7070   size(2);
7071   format %{ "ADD    $dst,$src" %}
7072   opcode(0x03);
7073   ins_encode( OpcP, RegReg( dst, src) );
7074   ins_pipe( ialu_reg_reg );
7075 %}
7076 
7077 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7078   match(Set dst (AddP dst src));
7079   effect(KILL cr);
7080 
7081   format %{ "ADD    $dst,$src" %}
7082   opcode(0x81,0x00); /* Opcode 81 /0 id */
7083   // ins_encode( RegImm( dst, src) );
7084   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7085   ins_pipe( ialu_reg );
7086 %}
7087 
7088 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7089   match(Set dst (AddI dst (LoadI src)));
7090   effect(KILL cr);
7091 
7092   ins_cost(125);
7093   format %{ "ADD    $dst,$src" %}
7094   opcode(0x03);
7095   ins_encode( OpcP, RegMem( dst, src) );
7096   ins_pipe( ialu_reg_mem );
7097 %}
7098 
7099 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7100   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7101   effect(KILL cr);
7102 
7103   ins_cost(150);
7104   format %{ "ADD    $dst,$src" %}
7105   opcode(0x01);  /* Opcode 01 /r */
7106   ins_encode( OpcP, RegMem( src, dst ) );
7107   ins_pipe( ialu_mem_reg );
7108 %}
7109 
7110 // Add Memory with Immediate
7111 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7112   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7113   effect(KILL cr);
7114 
7115   ins_cost(125);
7116   format %{ "ADD    $dst,$src" %}
7117   opcode(0x81);               /* Opcode 81 /0 id */
7118   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7119   ins_pipe( ialu_mem_imm );
7120 %}
7121 
7122 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7123   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7124   effect(KILL cr);
7125 
7126   ins_cost(125);
7127   format %{ "INC    $dst" %}
7128   opcode(0xFF);               /* Opcode FF /0 */
7129   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7130   ins_pipe( ialu_mem_imm );
7131 %}
7132 
7133 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7134   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7135   effect(KILL cr);
7136 
7137   ins_cost(125);
7138   format %{ "DEC    $dst" %}
7139   opcode(0xFF);               /* Opcode FF /1 */
7140   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7141   ins_pipe( ialu_mem_imm );
7142 %}
7143 
7144 
7145 instruct checkCastPP( eRegP dst ) %{
7146   match(Set dst (CheckCastPP dst));
7147 
7148   size(0);
7149   format %{ "#checkcastPP of $dst" %}
7150   ins_encode( /*empty encoding*/ );
7151   ins_pipe( empty );
7152 %}
7153 
7154 instruct castPP( eRegP dst ) %{
7155   match(Set dst (CastPP dst));
7156   format %{ "#castPP of $dst" %}
7157   ins_encode( /*empty encoding*/ );
7158   ins_pipe( empty );
7159 %}
7160 
7161 instruct castII( rRegI dst ) %{
7162   match(Set dst (CastII dst));
7163   format %{ "#castII of $dst" %}
7164   ins_encode( /*empty encoding*/ );
7165   ins_cost(0);
7166   ins_pipe( empty );
7167 %}
7168 
7169 
7170 // Load-locked - same as a regular pointer load when used with compare-swap
7171 instruct loadPLocked(eRegP dst, memory mem) %{
7172   match(Set dst (LoadPLocked mem));
7173 
7174   ins_cost(125);
7175   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7176   opcode(0x8B);
7177   ins_encode( OpcP, RegMem(dst,mem));
7178   ins_pipe( ialu_reg_mem );
7179 %}
7180 
7181 // Conditional-store of the updated heap-top.
7182 // Used during allocation of the shared heap.
7183 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7184 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7185   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7186   // EAX is killed if there is contention, but then it's also unused.
7187   // In the common case of no contention, EAX holds the new oop address.
7188   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7189   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7190   ins_pipe( pipe_cmpxchg );
7191 %}
7192 
7193 // Conditional-store of an int value.
7194 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7195 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7196   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7197   effect(KILL oldval);
7198   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7199   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7200   ins_pipe( pipe_cmpxchg );
7201 %}
7202 
7203 // Conditional-store of a long value.
7204 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7205 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7206   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7207   effect(KILL oldval);
7208   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7209             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7210             "XCHG   EBX,ECX"
7211   %}
7212   ins_encode %{
7213     // Note: we need to swap rbx, and rcx before and after the
7214     //       cmpxchg8 instruction because the instruction uses
7215     //       rcx as the high order word of the new value to store but
7216     //       our register encoding uses rbx.
7217     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7218     if( os::is_MP() )
7219       __ lock();
7220     __ cmpxchg8($mem$$Address);
7221     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7222   %}
7223   ins_pipe( pipe_cmpxchg );
7224 %}
7225 
7226 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7227 
7228 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7229   predicate(VM_Version::supports_cx8());
7230   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7231   effect(KILL cr, KILL oldval);
7232   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7233             "MOV    $res,0\n\t"
7234             "JNE,s  fail\n\t"
7235             "MOV    $res,1\n"
7236           "fail:" %}
7237   ins_encode( enc_cmpxchg8(mem_ptr),
7238               enc_flags_ne_to_boolean(res) );
7239   ins_pipe( pipe_cmpxchg );
7240 %}
7241 
7242 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7243   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7244   effect(KILL cr, KILL oldval);
7245   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7246             "MOV    $res,0\n\t"
7247             "JNE,s  fail\n\t"
7248             "MOV    $res,1\n"
7249           "fail:" %}
7250   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7251   ins_pipe( pipe_cmpxchg );
7252 %}
7253 
7254 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7255   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7256   effect(KILL cr, KILL oldval);
7257   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7258             "MOV    $res,0\n\t"
7259             "JNE,s  fail\n\t"
7260             "MOV    $res,1\n"
7261           "fail:" %}
7262   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7263   ins_pipe( pipe_cmpxchg );
7264 %}
7265 
7266 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7267   predicate(n->as_LoadStore()->result_not_used());
7268   match(Set dummy (GetAndAddI mem add));
7269   effect(KILL cr);
7270   format %{ "ADDL  [$mem],$add" %}
7271   ins_encode %{
7272     if (os::is_MP()) { __ lock(); }
7273     __ addl($mem$$Address, $add$$constant);
7274   %}
7275   ins_pipe( pipe_cmpxchg );
7276 %}
7277 
7278 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7279   match(Set newval (GetAndAddI mem newval));
7280   effect(KILL cr);
7281   format %{ "XADDL  [$mem],$newval" %}
7282   ins_encode %{
7283     if (os::is_MP()) { __ lock(); }
7284     __ xaddl($mem$$Address, $newval$$Register);
7285   %}
7286   ins_pipe( pipe_cmpxchg );
7287 %}
7288 
7289 instruct xchgI( memory mem, rRegI newval) %{
7290   match(Set newval (GetAndSetI mem newval));
7291   format %{ "XCHGL  $newval,[$mem]" %}
7292   ins_encode %{
7293     __ xchgl($newval$$Register, $mem$$Address);
7294   %}
7295   ins_pipe( pipe_cmpxchg );
7296 %}
7297 
7298 instruct xchgP( memory mem, pRegP newval) %{
7299   match(Set newval (GetAndSetP mem newval));
7300   format %{ "XCHGL  $newval,[$mem]" %}
7301   ins_encode %{
7302     __ xchgl($newval$$Register, $mem$$Address);
7303   %}
7304   ins_pipe( pipe_cmpxchg );
7305 %}
7306 
7307 //----------Subtraction Instructions-------------------------------------------
7308 
7309 // Integer Subtraction Instructions
7310 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7311   match(Set dst (SubI dst src));
7312   effect(KILL cr);
7313 
7314   size(2);
7315   format %{ "SUB    $dst,$src" %}
7316   opcode(0x2B);
7317   ins_encode( OpcP, RegReg( dst, src) );
7318   ins_pipe( ialu_reg_reg );
7319 %}
7320 
7321 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7322   match(Set dst (SubI dst src));
7323   effect(KILL cr);
7324 
7325   format %{ "SUB    $dst,$src" %}
7326   opcode(0x81,0x05);  /* Opcode 81 /5 */
7327   // ins_encode( RegImm( dst, src) );
7328   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7329   ins_pipe( ialu_reg );
7330 %}
7331 
7332 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7333   match(Set dst (SubI dst (LoadI src)));
7334   effect(KILL cr);
7335 
7336   ins_cost(125);
7337   format %{ "SUB    $dst,$src" %}
7338   opcode(0x2B);
7339   ins_encode( OpcP, RegMem( dst, src) );
7340   ins_pipe( ialu_reg_mem );
7341 %}
7342 
7343 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7344   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7345   effect(KILL cr);
7346 
7347   ins_cost(150);
7348   format %{ "SUB    $dst,$src" %}
7349   opcode(0x29);  /* Opcode 29 /r */
7350   ins_encode( OpcP, RegMem( src, dst ) );
7351   ins_pipe( ialu_mem_reg );
7352 %}
7353 
7354 // Subtract from a pointer
7355 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7356   match(Set dst (AddP dst (SubI zero src)));
7357   effect(KILL cr);
7358 
7359   size(2);
7360   format %{ "SUB    $dst,$src" %}
7361   opcode(0x2B);
7362   ins_encode( OpcP, RegReg( dst, src) );
7363   ins_pipe( ialu_reg_reg );
7364 %}
7365 
7366 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7367   match(Set dst (SubI zero dst));
7368   effect(KILL cr);
7369 
7370   size(2);
7371   format %{ "NEG    $dst" %}
7372   opcode(0xF7,0x03);  // Opcode F7 /3
7373   ins_encode( OpcP, RegOpc( dst ) );
7374   ins_pipe( ialu_reg );
7375 %}
7376 
7377 //----------Multiplication/Division Instructions-------------------------------
7378 // Integer Multiplication Instructions
7379 // Multiply Register
7380 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7381   match(Set dst (MulI dst src));
7382   effect(KILL cr);
7383 
7384   size(3);
7385   ins_cost(300);
7386   format %{ "IMUL   $dst,$src" %}
7387   opcode(0xAF, 0x0F);
7388   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7389   ins_pipe( ialu_reg_reg_alu0 );
7390 %}
7391 
7392 // Multiply 32-bit Immediate
7393 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7394   match(Set dst (MulI src imm));
7395   effect(KILL cr);
7396 
7397   ins_cost(300);
7398   format %{ "IMUL   $dst,$src,$imm" %}
7399   opcode(0x69);  /* 69 /r id */
7400   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7401   ins_pipe( ialu_reg_reg_alu0 );
7402 %}
7403 
7404 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7405   match(Set dst src);
7406   effect(KILL cr);
7407 
7408   // Note that this is artificially increased to make it more expensive than loadConL
7409   ins_cost(250);
7410   format %{ "MOV    EAX,$src\t// low word only" %}
7411   opcode(0xB8);
7412   ins_encode( LdImmL_Lo(dst, src) );
7413   ins_pipe( ialu_reg_fat );
7414 %}
7415 
7416 // Multiply by 32-bit Immediate, taking the shifted high order results
7417 //  (special case for shift by 32)
7418 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7419   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7420   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7421              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7422              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7423   effect(USE src1, KILL cr);
7424 
7425   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7426   ins_cost(0*100 + 1*400 - 150);
7427   format %{ "IMUL   EDX:EAX,$src1" %}
7428   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7429   ins_pipe( pipe_slow );
7430 %}
7431 
7432 // Multiply by 32-bit Immediate, taking the shifted high order results
7433 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7434   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7435   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7436              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7437              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7438   effect(USE src1, KILL cr);
7439 
7440   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7441   ins_cost(1*100 + 1*400 - 150);
7442   format %{ "IMUL   EDX:EAX,$src1\n\t"
7443             "SAR    EDX,$cnt-32" %}
7444   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7445   ins_pipe( pipe_slow );
7446 %}
7447 
7448 // Multiply Memory 32-bit Immediate
7449 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7450   match(Set dst (MulI (LoadI src) imm));
7451   effect(KILL cr);
7452 
7453   ins_cost(300);
7454   format %{ "IMUL   $dst,$src,$imm" %}
7455   opcode(0x69);  /* 69 /r id */
7456   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7457   ins_pipe( ialu_reg_mem_alu0 );
7458 %}
7459 
7460 // Multiply Memory
7461 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7462   match(Set dst (MulI dst (LoadI src)));
7463   effect(KILL cr);
7464 
7465   ins_cost(350);
7466   format %{ "IMUL   $dst,$src" %}
7467   opcode(0xAF, 0x0F);
7468   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7469   ins_pipe( ialu_reg_mem_alu0 );
7470 %}
7471 
7472 // Multiply Register Int to Long
7473 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7474   // Basic Idea: long = (long)int * (long)int
7475   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7476   effect(DEF dst, USE src, USE src1, KILL flags);
7477 
7478   ins_cost(300);
7479   format %{ "IMUL   $dst,$src1" %}
7480 
7481   ins_encode( long_int_multiply( dst, src1 ) );
7482   ins_pipe( ialu_reg_reg_alu0 );
7483 %}
7484 
7485 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7486   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7487   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7488   effect(KILL flags);
7489 
7490   ins_cost(300);
7491   format %{ "MUL    $dst,$src1" %}
7492 
7493   ins_encode( long_uint_multiply(dst, src1) );
7494   ins_pipe( ialu_reg_reg_alu0 );
7495 %}
7496 
7497 // Multiply Register Long
7498 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7499   match(Set dst (MulL dst src));
7500   effect(KILL cr, TEMP tmp);
7501   ins_cost(4*100+3*400);
7502 // Basic idea: lo(result) = lo(x_lo * y_lo)
7503 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7504   format %{ "MOV    $tmp,$src.lo\n\t"
7505             "IMUL   $tmp,EDX\n\t"
7506             "MOV    EDX,$src.hi\n\t"
7507             "IMUL   EDX,EAX\n\t"
7508             "ADD    $tmp,EDX\n\t"
7509             "MUL    EDX:EAX,$src.lo\n\t"
7510             "ADD    EDX,$tmp" %}
7511   ins_encode( long_multiply( dst, src, tmp ) );
7512   ins_pipe( pipe_slow );
7513 %}
7514 
7515 // Multiply Register Long where the left operand's high 32 bits are zero
7516 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7517   predicate(is_operand_hi32_zero(n->in(1)));
7518   match(Set dst (MulL dst src));
7519   effect(KILL cr, TEMP tmp);
7520   ins_cost(2*100+2*400);
7521 // Basic idea: lo(result) = lo(x_lo * y_lo)
7522 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7523   format %{ "MOV    $tmp,$src.hi\n\t"
7524             "IMUL   $tmp,EAX\n\t"
7525             "MUL    EDX:EAX,$src.lo\n\t"
7526             "ADD    EDX,$tmp" %}
7527   ins_encode %{
7528     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7529     __ imull($tmp$$Register, rax);
7530     __ mull($src$$Register);
7531     __ addl(rdx, $tmp$$Register);
7532   %}
7533   ins_pipe( pipe_slow );
7534 %}
7535 
7536 // Multiply Register Long where the right operand's high 32 bits are zero
7537 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7538   predicate(is_operand_hi32_zero(n->in(2)));
7539   match(Set dst (MulL dst src));
7540   effect(KILL cr, TEMP tmp);
7541   ins_cost(2*100+2*400);
7542 // Basic idea: lo(result) = lo(x_lo * y_lo)
7543 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7544   format %{ "MOV    $tmp,$src.lo\n\t"
7545             "IMUL   $tmp,EDX\n\t"
7546             "MUL    EDX:EAX,$src.lo\n\t"
7547             "ADD    EDX,$tmp" %}
7548   ins_encode %{
7549     __ movl($tmp$$Register, $src$$Register);
7550     __ imull($tmp$$Register, rdx);
7551     __ mull($src$$Register);
7552     __ addl(rdx, $tmp$$Register);
7553   %}
7554   ins_pipe( pipe_slow );
7555 %}
7556 
7557 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7558 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7559   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7560   match(Set dst (MulL dst src));
7561   effect(KILL cr);
7562   ins_cost(1*400);
7563 // Basic idea: lo(result) = lo(x_lo * y_lo)
7564 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7565   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7566   ins_encode %{
7567     __ mull($src$$Register);
7568   %}
7569   ins_pipe( pipe_slow );
7570 %}
7571 
7572 // Multiply Register Long by small constant
7573 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7574   match(Set dst (MulL dst src));
7575   effect(KILL cr, TEMP tmp);
7576   ins_cost(2*100+2*400);
7577   size(12);
7578 // Basic idea: lo(result) = lo(src * EAX)
7579 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7580   format %{ "IMUL   $tmp,EDX,$src\n\t"
7581             "MOV    EDX,$src\n\t"
7582             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7583             "ADD    EDX,$tmp" %}
7584   ins_encode( long_multiply_con( dst, src, tmp ) );
7585   ins_pipe( pipe_slow );
7586 %}
7587 
7588 // Integer DIV with Register
7589 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7590   match(Set rax (DivI rax div));
7591   effect(KILL rdx, KILL cr);
7592   size(26);
7593   ins_cost(30*100+10*100);
7594   format %{ "CMP    EAX,0x80000000\n\t"
7595             "JNE,s  normal\n\t"
7596             "XOR    EDX,EDX\n\t"
7597             "CMP    ECX,-1\n\t"
7598             "JE,s   done\n"
7599     "normal: CDQ\n\t"
7600             "IDIV   $div\n\t"
7601     "done:"        %}
7602   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7603   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7604   ins_pipe( ialu_reg_reg_alu0 );
7605 %}
7606 
7607 // Divide Register Long
7608 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7609   match(Set dst (DivL src1 src2));
7610   effect( KILL cr, KILL cx, KILL bx );
7611   ins_cost(10000);
7612   format %{ "PUSH   $src1.hi\n\t"
7613             "PUSH   $src1.lo\n\t"
7614             "PUSH   $src2.hi\n\t"
7615             "PUSH   $src2.lo\n\t"
7616             "CALL   SharedRuntime::ldiv\n\t"
7617             "ADD    ESP,16" %}
7618   ins_encode( long_div(src1,src2) );
7619   ins_pipe( pipe_slow );
7620 %}
7621 
7622 // Integer DIVMOD with Register, both quotient and mod results
7623 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7624   match(DivModI rax div);
7625   effect(KILL cr);
7626   size(26);
7627   ins_cost(30*100+10*100);
7628   format %{ "CMP    EAX,0x80000000\n\t"
7629             "JNE,s  normal\n\t"
7630             "XOR    EDX,EDX\n\t"
7631             "CMP    ECX,-1\n\t"
7632             "JE,s   done\n"
7633     "normal: CDQ\n\t"
7634             "IDIV   $div\n\t"
7635     "done:"        %}
7636   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7637   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7638   ins_pipe( pipe_slow );
7639 %}
7640 
7641 // Integer MOD with Register
7642 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7643   match(Set rdx (ModI rax div));
7644   effect(KILL rax, KILL cr);
7645 
7646   size(26);
7647   ins_cost(300);
7648   format %{ "CDQ\n\t"
7649             "IDIV   $div" %}
7650   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7651   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7652   ins_pipe( ialu_reg_reg_alu0 );
7653 %}
7654 
7655 // Remainder Register Long
7656 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7657   match(Set dst (ModL src1 src2));
7658   effect( KILL cr, KILL cx, KILL bx );
7659   ins_cost(10000);
7660   format %{ "PUSH   $src1.hi\n\t"
7661             "PUSH   $src1.lo\n\t"
7662             "PUSH   $src2.hi\n\t"
7663             "PUSH   $src2.lo\n\t"
7664             "CALL   SharedRuntime::lrem\n\t"
7665             "ADD    ESP,16" %}
7666   ins_encode( long_mod(src1,src2) );
7667   ins_pipe( pipe_slow );
7668 %}
7669 
7670 // Divide Register Long (no special case since divisor != -1)
7671 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7672   match(Set dst (DivL dst imm));
7673   effect( TEMP tmp, TEMP tmp2, KILL cr );
7674   ins_cost(1000);
7675   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7676             "XOR    $tmp2,$tmp2\n\t"
7677             "CMP    $tmp,EDX\n\t"
7678             "JA,s   fast\n\t"
7679             "MOV    $tmp2,EAX\n\t"
7680             "MOV    EAX,EDX\n\t"
7681             "MOV    EDX,0\n\t"
7682             "JLE,s  pos\n\t"
7683             "LNEG   EAX : $tmp2\n\t"
7684             "DIV    $tmp # unsigned division\n\t"
7685             "XCHG   EAX,$tmp2\n\t"
7686             "DIV    $tmp\n\t"
7687             "LNEG   $tmp2 : EAX\n\t"
7688             "JMP,s  done\n"
7689     "pos:\n\t"
7690             "DIV    $tmp\n\t"
7691             "XCHG   EAX,$tmp2\n"
7692     "fast:\n\t"
7693             "DIV    $tmp\n"
7694     "done:\n\t"
7695             "MOV    EDX,$tmp2\n\t"
7696             "NEG    EDX:EAX # if $imm < 0" %}
7697   ins_encode %{
7698     int con = (int)$imm$$constant;
7699     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7700     int pcon = (con > 0) ? con : -con;
7701     Label Lfast, Lpos, Ldone;
7702 
7703     __ movl($tmp$$Register, pcon);
7704     __ xorl($tmp2$$Register,$tmp2$$Register);
7705     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7706     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7707 
7708     __ movl($tmp2$$Register, $dst$$Register); // save
7709     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7710     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7711     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7712 
7713     // Negative dividend.
7714     // convert value to positive to use unsigned division
7715     __ lneg($dst$$Register, $tmp2$$Register);
7716     __ divl($tmp$$Register);
7717     __ xchgl($dst$$Register, $tmp2$$Register);
7718     __ divl($tmp$$Register);
7719     // revert result back to negative
7720     __ lneg($tmp2$$Register, $dst$$Register);
7721     __ jmpb(Ldone);
7722 
7723     __ bind(Lpos);
7724     __ divl($tmp$$Register); // Use unsigned division
7725     __ xchgl($dst$$Register, $tmp2$$Register);
7726     // Fallthrow for final divide, tmp2 has 32 bit hi result
7727 
7728     __ bind(Lfast);
7729     // fast path: src is positive
7730     __ divl($tmp$$Register); // Use unsigned division
7731 
7732     __ bind(Ldone);
7733     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7734     if (con < 0) {
7735       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7736     }
7737   %}
7738   ins_pipe( pipe_slow );
7739 %}
7740 
7741 // Remainder Register Long (remainder fit into 32 bits)
7742 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7743   match(Set dst (ModL dst imm));
7744   effect( TEMP tmp, TEMP tmp2, KILL cr );
7745   ins_cost(1000);
7746   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7747             "CMP    $tmp,EDX\n\t"
7748             "JA,s   fast\n\t"
7749             "MOV    $tmp2,EAX\n\t"
7750             "MOV    EAX,EDX\n\t"
7751             "MOV    EDX,0\n\t"
7752             "JLE,s  pos\n\t"
7753             "LNEG   EAX : $tmp2\n\t"
7754             "DIV    $tmp # unsigned division\n\t"
7755             "MOV    EAX,$tmp2\n\t"
7756             "DIV    $tmp\n\t"
7757             "NEG    EDX\n\t"
7758             "JMP,s  done\n"
7759     "pos:\n\t"
7760             "DIV    $tmp\n\t"
7761             "MOV    EAX,$tmp2\n"
7762     "fast:\n\t"
7763             "DIV    $tmp\n"
7764     "done:\n\t"
7765             "MOV    EAX,EDX\n\t"
7766             "SAR    EDX,31\n\t" %}
7767   ins_encode %{
7768     int con = (int)$imm$$constant;
7769     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7770     int pcon = (con > 0) ? con : -con;
7771     Label  Lfast, Lpos, Ldone;
7772 
7773     __ movl($tmp$$Register, pcon);
7774     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7775     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7776 
7777     __ movl($tmp2$$Register, $dst$$Register); // save
7778     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7779     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7780     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7781 
7782     // Negative dividend.
7783     // convert value to positive to use unsigned division
7784     __ lneg($dst$$Register, $tmp2$$Register);
7785     __ divl($tmp$$Register);
7786     __ movl($dst$$Register, $tmp2$$Register);
7787     __ divl($tmp$$Register);
7788     // revert remainder back to negative
7789     __ negl(HIGH_FROM_LOW($dst$$Register));
7790     __ jmpb(Ldone);
7791 
7792     __ bind(Lpos);
7793     __ divl($tmp$$Register);
7794     __ movl($dst$$Register, $tmp2$$Register);
7795 
7796     __ bind(Lfast);
7797     // fast path: src is positive
7798     __ divl($tmp$$Register);
7799 
7800     __ bind(Ldone);
7801     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7802     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7803 
7804   %}
7805   ins_pipe( pipe_slow );
7806 %}
7807 
7808 // Integer Shift Instructions
7809 // Shift Left by one
7810 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7811   match(Set dst (LShiftI dst shift));
7812   effect(KILL cr);
7813 
7814   size(2);
7815   format %{ "SHL    $dst,$shift" %}
7816   opcode(0xD1, 0x4);  /* D1 /4 */
7817   ins_encode( OpcP, RegOpc( dst ) );
7818   ins_pipe( ialu_reg );
7819 %}
7820 
7821 // Shift Left by 8-bit immediate
7822 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7823   match(Set dst (LShiftI dst shift));
7824   effect(KILL cr);
7825 
7826   size(3);
7827   format %{ "SHL    $dst,$shift" %}
7828   opcode(0xC1, 0x4);  /* C1 /4 ib */
7829   ins_encode( RegOpcImm( dst, shift) );
7830   ins_pipe( ialu_reg );
7831 %}
7832 
7833 // Shift Left by variable
7834 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7835   match(Set dst (LShiftI dst shift));
7836   effect(KILL cr);
7837 
7838   size(2);
7839   format %{ "SHL    $dst,$shift" %}
7840   opcode(0xD3, 0x4);  /* D3 /4 */
7841   ins_encode( OpcP, RegOpc( dst ) );
7842   ins_pipe( ialu_reg_reg );
7843 %}
7844 
7845 // Arithmetic shift right by one
7846 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7847   match(Set dst (RShiftI dst shift));
7848   effect(KILL cr);
7849 
7850   size(2);
7851   format %{ "SAR    $dst,$shift" %}
7852   opcode(0xD1, 0x7);  /* D1 /7 */
7853   ins_encode( OpcP, RegOpc( dst ) );
7854   ins_pipe( ialu_reg );
7855 %}
7856 
7857 // Arithmetic shift right by one
7858 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7859   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7860   effect(KILL cr);
7861   format %{ "SAR    $dst,$shift" %}
7862   opcode(0xD1, 0x7);  /* D1 /7 */
7863   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7864   ins_pipe( ialu_mem_imm );
7865 %}
7866 
7867 // Arithmetic Shift Right by 8-bit immediate
7868 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7869   match(Set dst (RShiftI dst shift));
7870   effect(KILL cr);
7871 
7872   size(3);
7873   format %{ "SAR    $dst,$shift" %}
7874   opcode(0xC1, 0x7);  /* C1 /7 ib */
7875   ins_encode( RegOpcImm( dst, shift ) );
7876   ins_pipe( ialu_mem_imm );
7877 %}
7878 
7879 // Arithmetic Shift Right by 8-bit immediate
7880 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7881   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7882   effect(KILL cr);
7883 
7884   format %{ "SAR    $dst,$shift" %}
7885   opcode(0xC1, 0x7);  /* C1 /7 ib */
7886   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7887   ins_pipe( ialu_mem_imm );
7888 %}
7889 
7890 // Arithmetic Shift Right by variable
7891 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7892   match(Set dst (RShiftI dst shift));
7893   effect(KILL cr);
7894 
7895   size(2);
7896   format %{ "SAR    $dst,$shift" %}
7897   opcode(0xD3, 0x7);  /* D3 /7 */
7898   ins_encode( OpcP, RegOpc( dst ) );
7899   ins_pipe( ialu_reg_reg );
7900 %}
7901 
7902 // Logical shift right by one
7903 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7904   match(Set dst (URShiftI dst shift));
7905   effect(KILL cr);
7906 
7907   size(2);
7908   format %{ "SHR    $dst,$shift" %}
7909   opcode(0xD1, 0x5);  /* D1 /5 */
7910   ins_encode( OpcP, RegOpc( dst ) );
7911   ins_pipe( ialu_reg );
7912 %}
7913 
7914 // Logical Shift Right by 8-bit immediate
7915 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7916   match(Set dst (URShiftI dst shift));
7917   effect(KILL cr);
7918 
7919   size(3);
7920   format %{ "SHR    $dst,$shift" %}
7921   opcode(0xC1, 0x5);  /* C1 /5 ib */
7922   ins_encode( RegOpcImm( dst, shift) );
7923   ins_pipe( ialu_reg );
7924 %}
7925 
7926 
7927 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7928 // This idiom is used by the compiler for the i2b bytecode.
7929 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7930   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7931 
7932   size(3);
7933   format %{ "MOVSX  $dst,$src :8" %}
7934   ins_encode %{
7935     __ movsbl($dst$$Register, $src$$Register);
7936   %}
7937   ins_pipe(ialu_reg_reg);
7938 %}
7939 
7940 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7941 // This idiom is used by the compiler the i2s bytecode.
7942 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7943   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7944 
7945   size(3);
7946   format %{ "MOVSX  $dst,$src :16" %}
7947   ins_encode %{
7948     __ movswl($dst$$Register, $src$$Register);
7949   %}
7950   ins_pipe(ialu_reg_reg);
7951 %}
7952 
7953 
7954 // Logical Shift Right by variable
7955 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7956   match(Set dst (URShiftI dst shift));
7957   effect(KILL cr);
7958 
7959   size(2);
7960   format %{ "SHR    $dst,$shift" %}
7961   opcode(0xD3, 0x5);  /* D3 /5 */
7962   ins_encode( OpcP, RegOpc( dst ) );
7963   ins_pipe( ialu_reg_reg );
7964 %}
7965 
7966 
7967 //----------Logical Instructions-----------------------------------------------
7968 //----------Integer Logical Instructions---------------------------------------
7969 // And Instructions
7970 // And Register with Register
7971 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7972   match(Set dst (AndI dst src));
7973   effect(KILL cr);
7974 
7975   size(2);
7976   format %{ "AND    $dst,$src" %}
7977   opcode(0x23);
7978   ins_encode( OpcP, RegReg( dst, src) );
7979   ins_pipe( ialu_reg_reg );
7980 %}
7981 
7982 // And Register with Immediate
7983 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7984   match(Set dst (AndI dst src));
7985   effect(KILL cr);
7986 
7987   format %{ "AND    $dst,$src" %}
7988   opcode(0x81,0x04);  /* Opcode 81 /4 */
7989   // ins_encode( RegImm( dst, src) );
7990   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7991   ins_pipe( ialu_reg );
7992 %}
7993 
7994 // And Register with Memory
7995 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7996   match(Set dst (AndI dst (LoadI src)));
7997   effect(KILL cr);
7998 
7999   ins_cost(125);
8000   format %{ "AND    $dst,$src" %}
8001   opcode(0x23);
8002   ins_encode( OpcP, RegMem( dst, src) );
8003   ins_pipe( ialu_reg_mem );
8004 %}
8005 
8006 // And Memory with Register
8007 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8008   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8009   effect(KILL cr);
8010 
8011   ins_cost(150);
8012   format %{ "AND    $dst,$src" %}
8013   opcode(0x21);  /* Opcode 21 /r */
8014   ins_encode( OpcP, RegMem( src, dst ) );
8015   ins_pipe( ialu_mem_reg );
8016 %}
8017 
8018 // And Memory with Immediate
8019 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8020   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8021   effect(KILL cr);
8022 
8023   ins_cost(125);
8024   format %{ "AND    $dst,$src" %}
8025   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8026   // ins_encode( MemImm( dst, src) );
8027   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8028   ins_pipe( ialu_mem_imm );
8029 %}
8030 
8031 // BMI1 instructions
8032 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8033   match(Set dst (AndI (XorI src1 minus_1) src2));
8034   predicate(UseBMI1Instructions);
8035   effect(KILL cr);
8036 
8037   format %{ "ANDNL  $dst, $src1, $src2" %}
8038 
8039   ins_encode %{
8040     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8041   %}
8042   ins_pipe(ialu_reg);
8043 %}
8044 
8045 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8046   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8047   predicate(UseBMI1Instructions);
8048   effect(KILL cr);
8049 
8050   ins_cost(125);
8051   format %{ "ANDNL  $dst, $src1, $src2" %}
8052 
8053   ins_encode %{
8054     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8055   %}
8056   ins_pipe(ialu_reg_mem);
8057 %}
8058 
8059 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8060   match(Set dst (AndI (SubI imm_zero src) src));
8061   predicate(UseBMI1Instructions);
8062   effect(KILL cr);
8063 
8064   format %{ "BLSIL  $dst, $src" %}
8065 
8066   ins_encode %{
8067     __ blsil($dst$$Register, $src$$Register);
8068   %}
8069   ins_pipe(ialu_reg);
8070 %}
8071 
8072 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8073   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8074   predicate(UseBMI1Instructions);
8075   effect(KILL cr);
8076 
8077   ins_cost(125);
8078   format %{ "BLSIL  $dst, $src" %}
8079 
8080   ins_encode %{
8081     __ blsil($dst$$Register, $src$$Address);
8082   %}
8083   ins_pipe(ialu_reg_mem);
8084 %}
8085 
8086 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8087 %{
8088   match(Set dst (XorI (AddI src minus_1) src));
8089   predicate(UseBMI1Instructions);
8090   effect(KILL cr);
8091 
8092   format %{ "BLSMSKL $dst, $src" %}
8093 
8094   ins_encode %{
8095     __ blsmskl($dst$$Register, $src$$Register);
8096   %}
8097 
8098   ins_pipe(ialu_reg);
8099 %}
8100 
8101 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8102 %{
8103   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8104   predicate(UseBMI1Instructions);
8105   effect(KILL cr);
8106 
8107   ins_cost(125);
8108   format %{ "BLSMSKL $dst, $src" %}
8109 
8110   ins_encode %{
8111     __ blsmskl($dst$$Register, $src$$Address);
8112   %}
8113 
8114   ins_pipe(ialu_reg_mem);
8115 %}
8116 
8117 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8118 %{
8119   match(Set dst (AndI (AddI src minus_1) src) );
8120   predicate(UseBMI1Instructions);
8121   effect(KILL cr);
8122 
8123   format %{ "BLSRL  $dst, $src" %}
8124 
8125   ins_encode %{
8126     __ blsrl($dst$$Register, $src$$Register);
8127   %}
8128 
8129   ins_pipe(ialu_reg);
8130 %}
8131 
8132 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8133 %{
8134   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8135   predicate(UseBMI1Instructions);
8136   effect(KILL cr);
8137 
8138   ins_cost(125);
8139   format %{ "BLSRL  $dst, $src" %}
8140 
8141   ins_encode %{
8142     __ blsrl($dst$$Register, $src$$Address);
8143   %}
8144 
8145   ins_pipe(ialu_reg_mem);
8146 %}
8147 
8148 // Or Instructions
8149 // Or Register with Register
8150 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8151   match(Set dst (OrI dst src));
8152   effect(KILL cr);
8153 
8154   size(2);
8155   format %{ "OR     $dst,$src" %}
8156   opcode(0x0B);
8157   ins_encode( OpcP, RegReg( dst, src) );
8158   ins_pipe( ialu_reg_reg );
8159 %}
8160 
8161 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8162   match(Set dst (OrI dst (CastP2X src)));
8163   effect(KILL cr);
8164 
8165   size(2);
8166   format %{ "OR     $dst,$src" %}
8167   opcode(0x0B);
8168   ins_encode( OpcP, RegReg( dst, src) );
8169   ins_pipe( ialu_reg_reg );
8170 %}
8171 
8172 
8173 // Or Register with Immediate
8174 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8175   match(Set dst (OrI dst src));
8176   effect(KILL cr);
8177 
8178   format %{ "OR     $dst,$src" %}
8179   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8180   // ins_encode( RegImm( dst, src) );
8181   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8182   ins_pipe( ialu_reg );
8183 %}
8184 
8185 // Or Register with Memory
8186 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8187   match(Set dst (OrI dst (LoadI src)));
8188   effect(KILL cr);
8189 
8190   ins_cost(125);
8191   format %{ "OR     $dst,$src" %}
8192   opcode(0x0B);
8193   ins_encode( OpcP, RegMem( dst, src) );
8194   ins_pipe( ialu_reg_mem );
8195 %}
8196 
8197 // Or Memory with Register
8198 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8199   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8200   effect(KILL cr);
8201 
8202   ins_cost(150);
8203   format %{ "OR     $dst,$src" %}
8204   opcode(0x09);  /* Opcode 09 /r */
8205   ins_encode( OpcP, RegMem( src, dst ) );
8206   ins_pipe( ialu_mem_reg );
8207 %}
8208 
8209 // Or Memory with Immediate
8210 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8211   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8212   effect(KILL cr);
8213 
8214   ins_cost(125);
8215   format %{ "OR     $dst,$src" %}
8216   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8217   // ins_encode( MemImm( dst, src) );
8218   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8219   ins_pipe( ialu_mem_imm );
8220 %}
8221 
8222 // ROL/ROR
8223 // ROL expand
8224 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8225   effect(USE_DEF dst, USE shift, KILL cr);
8226 
8227   format %{ "ROL    $dst, $shift" %}
8228   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8229   ins_encode( OpcP, RegOpc( dst ));
8230   ins_pipe( ialu_reg );
8231 %}
8232 
8233 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8234   effect(USE_DEF dst, USE shift, KILL cr);
8235 
8236   format %{ "ROL    $dst, $shift" %}
8237   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8238   ins_encode( RegOpcImm(dst, shift) );
8239   ins_pipe(ialu_reg);
8240 %}
8241 
8242 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8243   effect(USE_DEF dst, USE shift, KILL cr);
8244 
8245   format %{ "ROL    $dst, $shift" %}
8246   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8247   ins_encode(OpcP, RegOpc(dst));
8248   ins_pipe( ialu_reg_reg );
8249 %}
8250 // end of ROL expand
8251 
8252 // ROL 32bit by one once
8253 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8254   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8255 
8256   expand %{
8257     rolI_eReg_imm1(dst, lshift, cr);
8258   %}
8259 %}
8260 
8261 // ROL 32bit var by imm8 once
8262 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8263   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8264   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8265 
8266   expand %{
8267     rolI_eReg_imm8(dst, lshift, cr);
8268   %}
8269 %}
8270 
8271 // ROL 32bit var by var once
8272 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8273   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8274 
8275   expand %{
8276     rolI_eReg_CL(dst, shift, cr);
8277   %}
8278 %}
8279 
8280 // ROL 32bit var by var once
8281 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8282   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8283 
8284   expand %{
8285     rolI_eReg_CL(dst, shift, cr);
8286   %}
8287 %}
8288 
8289 // ROR expand
8290 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8291   effect(USE_DEF dst, USE shift, KILL cr);
8292 
8293   format %{ "ROR    $dst, $shift" %}
8294   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8295   ins_encode( OpcP, RegOpc( dst ) );
8296   ins_pipe( ialu_reg );
8297 %}
8298 
8299 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8300   effect (USE_DEF dst, USE shift, KILL cr);
8301 
8302   format %{ "ROR    $dst, $shift" %}
8303   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8304   ins_encode( RegOpcImm(dst, shift) );
8305   ins_pipe( ialu_reg );
8306 %}
8307 
8308 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8309   effect(USE_DEF dst, USE shift, KILL cr);
8310 
8311   format %{ "ROR    $dst, $shift" %}
8312   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8313   ins_encode(OpcP, RegOpc(dst));
8314   ins_pipe( ialu_reg_reg );
8315 %}
8316 // end of ROR expand
8317 
8318 // ROR right once
8319 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8320   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8321 
8322   expand %{
8323     rorI_eReg_imm1(dst, rshift, cr);
8324   %}
8325 %}
8326 
8327 // ROR 32bit by immI8 once
8328 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8329   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8330   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8331 
8332   expand %{
8333     rorI_eReg_imm8(dst, rshift, cr);
8334   %}
8335 %}
8336 
8337 // ROR 32bit var by var once
8338 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8339   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8340 
8341   expand %{
8342     rorI_eReg_CL(dst, shift, cr);
8343   %}
8344 %}
8345 
8346 // ROR 32bit var by var once
8347 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8348   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8349 
8350   expand %{
8351     rorI_eReg_CL(dst, shift, cr);
8352   %}
8353 %}
8354 
8355 // Xor Instructions
8356 // Xor Register with Register
8357 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8358   match(Set dst (XorI dst src));
8359   effect(KILL cr);
8360 
8361   size(2);
8362   format %{ "XOR    $dst,$src" %}
8363   opcode(0x33);
8364   ins_encode( OpcP, RegReg( dst, src) );
8365   ins_pipe( ialu_reg_reg );
8366 %}
8367 
8368 // Xor Register with Immediate -1
8369 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8370   match(Set dst (XorI dst imm));
8371 
8372   size(2);
8373   format %{ "NOT    $dst" %}
8374   ins_encode %{
8375      __ notl($dst$$Register);
8376   %}
8377   ins_pipe( ialu_reg );
8378 %}
8379 
8380 // Xor Register with Immediate
8381 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8382   match(Set dst (XorI dst src));
8383   effect(KILL cr);
8384 
8385   format %{ "XOR    $dst,$src" %}
8386   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8387   // ins_encode( RegImm( dst, src) );
8388   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8389   ins_pipe( ialu_reg );
8390 %}
8391 
8392 // Xor Register with Memory
8393 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8394   match(Set dst (XorI dst (LoadI src)));
8395   effect(KILL cr);
8396 
8397   ins_cost(125);
8398   format %{ "XOR    $dst,$src" %}
8399   opcode(0x33);
8400   ins_encode( OpcP, RegMem(dst, src) );
8401   ins_pipe( ialu_reg_mem );
8402 %}
8403 
8404 // Xor Memory with Register
8405 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8406   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8407   effect(KILL cr);
8408 
8409   ins_cost(150);
8410   format %{ "XOR    $dst,$src" %}
8411   opcode(0x31);  /* Opcode 31 /r */
8412   ins_encode( OpcP, RegMem( src, dst ) );
8413   ins_pipe( ialu_mem_reg );
8414 %}
8415 
8416 // Xor Memory with Immediate
8417 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8418   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8419   effect(KILL cr);
8420 
8421   ins_cost(125);
8422   format %{ "XOR    $dst,$src" %}
8423   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8424   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8425   ins_pipe( ialu_mem_imm );
8426 %}
8427 
8428 //----------Convert Int to Boolean---------------------------------------------
8429 
8430 instruct movI_nocopy(rRegI dst, rRegI src) %{
8431   effect( DEF dst, USE src );
8432   format %{ "MOV    $dst,$src" %}
8433   ins_encode( enc_Copy( dst, src) );
8434   ins_pipe( ialu_reg_reg );
8435 %}
8436 
8437 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8438   effect( USE_DEF dst, USE src, KILL cr );
8439 
8440   size(4);
8441   format %{ "NEG    $dst\n\t"
8442             "ADC    $dst,$src" %}
8443   ins_encode( neg_reg(dst),
8444               OpcRegReg(0x13,dst,src) );
8445   ins_pipe( ialu_reg_reg_long );
8446 %}
8447 
8448 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8449   match(Set dst (Conv2B src));
8450 
8451   expand %{
8452     movI_nocopy(dst,src);
8453     ci2b(dst,src,cr);
8454   %}
8455 %}
8456 
8457 instruct movP_nocopy(rRegI dst, eRegP src) %{
8458   effect( DEF dst, USE src );
8459   format %{ "MOV    $dst,$src" %}
8460   ins_encode( enc_Copy( dst, src) );
8461   ins_pipe( ialu_reg_reg );
8462 %}
8463 
8464 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8465   effect( USE_DEF dst, USE src, KILL cr );
8466   format %{ "NEG    $dst\n\t"
8467             "ADC    $dst,$src" %}
8468   ins_encode( neg_reg(dst),
8469               OpcRegReg(0x13,dst,src) );
8470   ins_pipe( ialu_reg_reg_long );
8471 %}
8472 
8473 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8474   match(Set dst (Conv2B src));
8475 
8476   expand %{
8477     movP_nocopy(dst,src);
8478     cp2b(dst,src,cr);
8479   %}
8480 %}
8481 
8482 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8483   match(Set dst (CmpLTMask p q));
8484   effect(KILL cr);
8485   ins_cost(400);
8486 
8487   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8488   format %{ "XOR    $dst,$dst\n\t"
8489             "CMP    $p,$q\n\t"
8490             "SETlt  $dst\n\t"
8491             "NEG    $dst" %}
8492   ins_encode %{
8493     Register Rp = $p$$Register;
8494     Register Rq = $q$$Register;
8495     Register Rd = $dst$$Register;
8496     Label done;
8497     __ xorl(Rd, Rd);
8498     __ cmpl(Rp, Rq);
8499     __ setb(Assembler::less, Rd);
8500     __ negl(Rd);
8501   %}
8502 
8503   ins_pipe(pipe_slow);
8504 %}
8505 
8506 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8507   match(Set dst (CmpLTMask dst zero));
8508   effect(DEF dst, KILL cr);
8509   ins_cost(100);
8510 
8511   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8512   ins_encode %{
8513   __ sarl($dst$$Register, 31);
8514   %}
8515   ins_pipe(ialu_reg);
8516 %}
8517 
8518 /* better to save a register than avoid a branch */
8519 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8520   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8521   effect(KILL cr);
8522   ins_cost(400);
8523   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8524             "JGE    done\n\t"
8525             "ADD    $p,$y\n"
8526             "done:  " %}
8527   ins_encode %{
8528     Register Rp = $p$$Register;
8529     Register Rq = $q$$Register;
8530     Register Ry = $y$$Register;
8531     Label done;
8532     __ subl(Rp, Rq);
8533     __ jccb(Assembler::greaterEqual, done);
8534     __ addl(Rp, Ry);
8535     __ bind(done);
8536   %}
8537 
8538   ins_pipe(pipe_cmplt);
8539 %}
8540 
8541 /* better to save a register than avoid a branch */
8542 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8543   match(Set y (AndI (CmpLTMask p q) y));
8544   effect(KILL cr);
8545 
8546   ins_cost(300);
8547 
8548   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8549             "JLT      done\n\t"
8550             "XORL     $y, $y\n"
8551             "done:  " %}
8552   ins_encode %{
8553     Register Rp = $p$$Register;
8554     Register Rq = $q$$Register;
8555     Register Ry = $y$$Register;
8556     Label done;
8557     __ cmpl(Rp, Rq);
8558     __ jccb(Assembler::less, done);
8559     __ xorl(Ry, Ry);
8560     __ bind(done);
8561   %}
8562 
8563   ins_pipe(pipe_cmplt);
8564 %}
8565 
8566 /* If I enable this, I encourage spilling in the inner loop of compress.
8567 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8568   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8569 */
8570 //----------Overflow Math Instructions-----------------------------------------
8571 
8572 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8573 %{
8574   match(Set cr (OverflowAddI op1 op2));
8575   effect(DEF cr, USE_KILL op1, USE op2);
8576 
8577   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8578 
8579   ins_encode %{
8580     __ addl($op1$$Register, $op2$$Register);
8581   %}
8582   ins_pipe(ialu_reg_reg);
8583 %}
8584 
8585 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8586 %{
8587   match(Set cr (OverflowAddI op1 op2));
8588   effect(DEF cr, USE_KILL op1, USE op2);
8589 
8590   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8591 
8592   ins_encode %{
8593     __ addl($op1$$Register, $op2$$constant);
8594   %}
8595   ins_pipe(ialu_reg_reg);
8596 %}
8597 
8598 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8599 %{
8600   match(Set cr (OverflowSubI op1 op2));
8601 
8602   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8603   ins_encode %{
8604     __ cmpl($op1$$Register, $op2$$Register);
8605   %}
8606   ins_pipe(ialu_reg_reg);
8607 %}
8608 
8609 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8610 %{
8611   match(Set cr (OverflowSubI op1 op2));
8612 
8613   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8614   ins_encode %{
8615     __ cmpl($op1$$Register, $op2$$constant);
8616   %}
8617   ins_pipe(ialu_reg_reg);
8618 %}
8619 
8620 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8621 %{
8622   match(Set cr (OverflowSubI zero op2));
8623   effect(DEF cr, USE_KILL op2);
8624 
8625   format %{ "NEG    $op2\t# overflow check int" %}
8626   ins_encode %{
8627     __ negl($op2$$Register);
8628   %}
8629   ins_pipe(ialu_reg_reg);
8630 %}
8631 
8632 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8633 %{
8634   match(Set cr (OverflowMulI op1 op2));
8635   effect(DEF cr, USE_KILL op1, USE op2);
8636 
8637   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8638   ins_encode %{
8639     __ imull($op1$$Register, $op2$$Register);
8640   %}
8641   ins_pipe(ialu_reg_reg_alu0);
8642 %}
8643 
8644 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8645 %{
8646   match(Set cr (OverflowMulI op1 op2));
8647   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8648 
8649   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8650   ins_encode %{
8651     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8652   %}
8653   ins_pipe(ialu_reg_reg_alu0);
8654 %}
8655 
8656 //----------Long Instructions------------------------------------------------
8657 // Add Long Register with Register
8658 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8659   match(Set dst (AddL dst src));
8660   effect(KILL cr);
8661   ins_cost(200);
8662   format %{ "ADD    $dst.lo,$src.lo\n\t"
8663             "ADC    $dst.hi,$src.hi" %}
8664   opcode(0x03, 0x13);
8665   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8666   ins_pipe( ialu_reg_reg_long );
8667 %}
8668 
8669 // Add Long Register with Immediate
8670 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8671   match(Set dst (AddL dst src));
8672   effect(KILL cr);
8673   format %{ "ADD    $dst.lo,$src.lo\n\t"
8674             "ADC    $dst.hi,$src.hi" %}
8675   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8676   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8677   ins_pipe( ialu_reg_long );
8678 %}
8679 
8680 // Add Long Register with Memory
8681 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8682   match(Set dst (AddL dst (LoadL mem)));
8683   effect(KILL cr);
8684   ins_cost(125);
8685   format %{ "ADD    $dst.lo,$mem\n\t"
8686             "ADC    $dst.hi,$mem+4" %}
8687   opcode(0x03, 0x13);
8688   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8689   ins_pipe( ialu_reg_long_mem );
8690 %}
8691 
8692 // Subtract Long Register with Register.
8693 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8694   match(Set dst (SubL dst src));
8695   effect(KILL cr);
8696   ins_cost(200);
8697   format %{ "SUB    $dst.lo,$src.lo\n\t"
8698             "SBB    $dst.hi,$src.hi" %}
8699   opcode(0x2B, 0x1B);
8700   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8701   ins_pipe( ialu_reg_reg_long );
8702 %}
8703 
8704 // Subtract Long Register with Immediate
8705 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8706   match(Set dst (SubL dst src));
8707   effect(KILL cr);
8708   format %{ "SUB    $dst.lo,$src.lo\n\t"
8709             "SBB    $dst.hi,$src.hi" %}
8710   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8711   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8712   ins_pipe( ialu_reg_long );
8713 %}
8714 
8715 // Subtract Long Register with Memory
8716 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8717   match(Set dst (SubL dst (LoadL mem)));
8718   effect(KILL cr);
8719   ins_cost(125);
8720   format %{ "SUB    $dst.lo,$mem\n\t"
8721             "SBB    $dst.hi,$mem+4" %}
8722   opcode(0x2B, 0x1B);
8723   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8724   ins_pipe( ialu_reg_long_mem );
8725 %}
8726 
8727 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8728   match(Set dst (SubL zero dst));
8729   effect(KILL cr);
8730   ins_cost(300);
8731   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8732   ins_encode( neg_long(dst) );
8733   ins_pipe( ialu_reg_reg_long );
8734 %}
8735 
8736 // And Long Register with Register
8737 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8738   match(Set dst (AndL dst src));
8739   effect(KILL cr);
8740   format %{ "AND    $dst.lo,$src.lo\n\t"
8741             "AND    $dst.hi,$src.hi" %}
8742   opcode(0x23,0x23);
8743   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8744   ins_pipe( ialu_reg_reg_long );
8745 %}
8746 
8747 // And Long Register with Immediate
8748 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8749   match(Set dst (AndL dst src));
8750   effect(KILL cr);
8751   format %{ "AND    $dst.lo,$src.lo\n\t"
8752             "AND    $dst.hi,$src.hi" %}
8753   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8754   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8755   ins_pipe( ialu_reg_long );
8756 %}
8757 
8758 // And Long Register with Memory
8759 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8760   match(Set dst (AndL dst (LoadL mem)));
8761   effect(KILL cr);
8762   ins_cost(125);
8763   format %{ "AND    $dst.lo,$mem\n\t"
8764             "AND    $dst.hi,$mem+4" %}
8765   opcode(0x23, 0x23);
8766   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8767   ins_pipe( ialu_reg_long_mem );
8768 %}
8769 
8770 // BMI1 instructions
8771 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8772   match(Set dst (AndL (XorL src1 minus_1) src2));
8773   predicate(UseBMI1Instructions);
8774   effect(KILL cr, TEMP dst);
8775 
8776   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8777             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8778          %}
8779 
8780   ins_encode %{
8781     Register Rdst = $dst$$Register;
8782     Register Rsrc1 = $src1$$Register;
8783     Register Rsrc2 = $src2$$Register;
8784     __ andnl(Rdst, Rsrc1, Rsrc2);
8785     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8786   %}
8787   ins_pipe(ialu_reg_reg_long);
8788 %}
8789 
8790 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8791   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8792   predicate(UseBMI1Instructions);
8793   effect(KILL cr, TEMP dst);
8794 
8795   ins_cost(125);
8796   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8797             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8798          %}
8799 
8800   ins_encode %{
8801     Register Rdst = $dst$$Register;
8802     Register Rsrc1 = $src1$$Register;
8803     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8804 
8805     __ andnl(Rdst, Rsrc1, $src2$$Address);
8806     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8807   %}
8808   ins_pipe(ialu_reg_mem);
8809 %}
8810 
8811 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8812   match(Set dst (AndL (SubL imm_zero src) src));
8813   predicate(UseBMI1Instructions);
8814   effect(KILL cr, TEMP dst);
8815 
8816   format %{ "MOVL   $dst.hi, 0\n\t"
8817             "BLSIL  $dst.lo, $src.lo\n\t"
8818             "JNZ    done\n\t"
8819             "BLSIL  $dst.hi, $src.hi\n"
8820             "done:"
8821          %}
8822 
8823   ins_encode %{
8824     Label done;
8825     Register Rdst = $dst$$Register;
8826     Register Rsrc = $src$$Register;
8827     __ movl(HIGH_FROM_LOW(Rdst), 0);
8828     __ blsil(Rdst, Rsrc);
8829     __ jccb(Assembler::notZero, done);
8830     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8831     __ bind(done);
8832   %}
8833   ins_pipe(ialu_reg);
8834 %}
8835 
8836 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8837   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8838   predicate(UseBMI1Instructions);
8839   effect(KILL cr, TEMP dst);
8840 
8841   ins_cost(125);
8842   format %{ "MOVL   $dst.hi, 0\n\t"
8843             "BLSIL  $dst.lo, $src\n\t"
8844             "JNZ    done\n\t"
8845             "BLSIL  $dst.hi, $src+4\n"
8846             "done:"
8847          %}
8848 
8849   ins_encode %{
8850     Label done;
8851     Register Rdst = $dst$$Register;
8852     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8853 
8854     __ movl(HIGH_FROM_LOW(Rdst), 0);
8855     __ blsil(Rdst, $src$$Address);
8856     __ jccb(Assembler::notZero, done);
8857     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8858     __ bind(done);
8859   %}
8860   ins_pipe(ialu_reg_mem);
8861 %}
8862 
8863 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8864 %{
8865   match(Set dst (XorL (AddL src minus_1) src));
8866   predicate(UseBMI1Instructions);
8867   effect(KILL cr, TEMP dst);
8868 
8869   format %{ "MOVL    $dst.hi, 0\n\t"
8870             "BLSMSKL $dst.lo, $src.lo\n\t"
8871             "JNC     done\n\t"
8872             "BLSMSKL $dst.hi, $src.hi\n"
8873             "done:"
8874          %}
8875 
8876   ins_encode %{
8877     Label done;
8878     Register Rdst = $dst$$Register;
8879     Register Rsrc = $src$$Register;
8880     __ movl(HIGH_FROM_LOW(Rdst), 0);
8881     __ blsmskl(Rdst, Rsrc);
8882     __ jccb(Assembler::carryClear, done);
8883     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8884     __ bind(done);
8885   %}
8886 
8887   ins_pipe(ialu_reg);
8888 %}
8889 
8890 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8891 %{
8892   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8893   predicate(UseBMI1Instructions);
8894   effect(KILL cr, TEMP dst);
8895 
8896   ins_cost(125);
8897   format %{ "MOVL    $dst.hi, 0\n\t"
8898             "BLSMSKL $dst.lo, $src\n\t"
8899             "JNC     done\n\t"
8900             "BLSMSKL $dst.hi, $src+4\n"
8901             "done:"
8902          %}
8903 
8904   ins_encode %{
8905     Label done;
8906     Register Rdst = $dst$$Register;
8907     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8908 
8909     __ movl(HIGH_FROM_LOW(Rdst), 0);
8910     __ blsmskl(Rdst, $src$$Address);
8911     __ jccb(Assembler::carryClear, done);
8912     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8913     __ bind(done);
8914   %}
8915 
8916   ins_pipe(ialu_reg_mem);
8917 %}
8918 
8919 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8920 %{
8921   match(Set dst (AndL (AddL src minus_1) src) );
8922   predicate(UseBMI1Instructions);
8923   effect(KILL cr, TEMP dst);
8924 
8925   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8926             "BLSRL  $dst.lo, $src.lo\n\t"
8927             "JNC    done\n\t"
8928             "BLSRL  $dst.hi, $src.hi\n"
8929             "done:"
8930   %}
8931 
8932   ins_encode %{
8933     Label done;
8934     Register Rdst = $dst$$Register;
8935     Register Rsrc = $src$$Register;
8936     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8937     __ blsrl(Rdst, Rsrc);
8938     __ jccb(Assembler::carryClear, done);
8939     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8940     __ bind(done);
8941   %}
8942 
8943   ins_pipe(ialu_reg);
8944 %}
8945 
8946 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8947 %{
8948   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8949   predicate(UseBMI1Instructions);
8950   effect(KILL cr, TEMP dst);
8951 
8952   ins_cost(125);
8953   format %{ "MOVL   $dst.hi, $src+4\n\t"
8954             "BLSRL  $dst.lo, $src\n\t"
8955             "JNC    done\n\t"
8956             "BLSRL  $dst.hi, $src+4\n"
8957             "done:"
8958   %}
8959 
8960   ins_encode %{
8961     Label done;
8962     Register Rdst = $dst$$Register;
8963     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8964     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8965     __ blsrl(Rdst, $src$$Address);
8966     __ jccb(Assembler::carryClear, done);
8967     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8968     __ bind(done);
8969   %}
8970 
8971   ins_pipe(ialu_reg_mem);
8972 %}
8973 
8974 // Or Long Register with Register
8975 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8976   match(Set dst (OrL dst src));
8977   effect(KILL cr);
8978   format %{ "OR     $dst.lo,$src.lo\n\t"
8979             "OR     $dst.hi,$src.hi" %}
8980   opcode(0x0B,0x0B);
8981   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8982   ins_pipe( ialu_reg_reg_long );
8983 %}
8984 
8985 // Or Long Register with Immediate
8986 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8987   match(Set dst (OrL dst src));
8988   effect(KILL cr);
8989   format %{ "OR     $dst.lo,$src.lo\n\t"
8990             "OR     $dst.hi,$src.hi" %}
8991   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8992   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8993   ins_pipe( ialu_reg_long );
8994 %}
8995 
8996 // Or Long Register with Memory
8997 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8998   match(Set dst (OrL dst (LoadL mem)));
8999   effect(KILL cr);
9000   ins_cost(125);
9001   format %{ "OR     $dst.lo,$mem\n\t"
9002             "OR     $dst.hi,$mem+4" %}
9003   opcode(0x0B,0x0B);
9004   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9005   ins_pipe( ialu_reg_long_mem );
9006 %}
9007 
9008 // Xor Long Register with Register
9009 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9010   match(Set dst (XorL dst src));
9011   effect(KILL cr);
9012   format %{ "XOR    $dst.lo,$src.lo\n\t"
9013             "XOR    $dst.hi,$src.hi" %}
9014   opcode(0x33,0x33);
9015   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9016   ins_pipe( ialu_reg_reg_long );
9017 %}
9018 
9019 // Xor Long Register with Immediate -1
9020 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9021   match(Set dst (XorL dst imm));
9022   format %{ "NOT    $dst.lo\n\t"
9023             "NOT    $dst.hi" %}
9024   ins_encode %{
9025      __ notl($dst$$Register);
9026      __ notl(HIGH_FROM_LOW($dst$$Register));
9027   %}
9028   ins_pipe( ialu_reg_long );
9029 %}
9030 
9031 // Xor Long Register with Immediate
9032 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9033   match(Set dst (XorL dst src));
9034   effect(KILL cr);
9035   format %{ "XOR    $dst.lo,$src.lo\n\t"
9036             "XOR    $dst.hi,$src.hi" %}
9037   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9038   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9039   ins_pipe( ialu_reg_long );
9040 %}
9041 
9042 // Xor Long Register with Memory
9043 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9044   match(Set dst (XorL dst (LoadL mem)));
9045   effect(KILL cr);
9046   ins_cost(125);
9047   format %{ "XOR    $dst.lo,$mem\n\t"
9048             "XOR    $dst.hi,$mem+4" %}
9049   opcode(0x33,0x33);
9050   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9051   ins_pipe( ialu_reg_long_mem );
9052 %}
9053 
9054 // Shift Left Long by 1
9055 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9056   predicate(UseNewLongLShift);
9057   match(Set dst (LShiftL dst cnt));
9058   effect(KILL cr);
9059   ins_cost(100);
9060   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9061             "ADC    $dst.hi,$dst.hi" %}
9062   ins_encode %{
9063     __ addl($dst$$Register,$dst$$Register);
9064     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9065   %}
9066   ins_pipe( ialu_reg_long );
9067 %}
9068 
9069 // Shift Left Long by 2
9070 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9071   predicate(UseNewLongLShift);
9072   match(Set dst (LShiftL dst cnt));
9073   effect(KILL cr);
9074   ins_cost(100);
9075   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9076             "ADC    $dst.hi,$dst.hi\n\t"
9077             "ADD    $dst.lo,$dst.lo\n\t"
9078             "ADC    $dst.hi,$dst.hi" %}
9079   ins_encode %{
9080     __ addl($dst$$Register,$dst$$Register);
9081     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9082     __ addl($dst$$Register,$dst$$Register);
9083     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9084   %}
9085   ins_pipe( ialu_reg_long );
9086 %}
9087 
9088 // Shift Left Long by 3
9089 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9090   predicate(UseNewLongLShift);
9091   match(Set dst (LShiftL dst cnt));
9092   effect(KILL cr);
9093   ins_cost(100);
9094   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9095             "ADC    $dst.hi,$dst.hi\n\t"
9096             "ADD    $dst.lo,$dst.lo\n\t"
9097             "ADC    $dst.hi,$dst.hi\n\t"
9098             "ADD    $dst.lo,$dst.lo\n\t"
9099             "ADC    $dst.hi,$dst.hi" %}
9100   ins_encode %{
9101     __ addl($dst$$Register,$dst$$Register);
9102     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9103     __ addl($dst$$Register,$dst$$Register);
9104     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9105     __ addl($dst$$Register,$dst$$Register);
9106     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9107   %}
9108   ins_pipe( ialu_reg_long );
9109 %}
9110 
9111 // Shift Left Long by 1-31
9112 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9113   match(Set dst (LShiftL dst cnt));
9114   effect(KILL cr);
9115   ins_cost(200);
9116   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9117             "SHL    $dst.lo,$cnt" %}
9118   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9119   ins_encode( move_long_small_shift(dst,cnt) );
9120   ins_pipe( ialu_reg_long );
9121 %}
9122 
9123 // Shift Left Long by 32-63
9124 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9125   match(Set dst (LShiftL dst cnt));
9126   effect(KILL cr);
9127   ins_cost(300);
9128   format %{ "MOV    $dst.hi,$dst.lo\n"
9129           "\tSHL    $dst.hi,$cnt-32\n"
9130           "\tXOR    $dst.lo,$dst.lo" %}
9131   opcode(0xC1, 0x4);  /* C1 /4 ib */
9132   ins_encode( move_long_big_shift_clr(dst,cnt) );
9133   ins_pipe( ialu_reg_long );
9134 %}
9135 
9136 // Shift Left Long by variable
9137 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9138   match(Set dst (LShiftL dst shift));
9139   effect(KILL cr);
9140   ins_cost(500+200);
9141   size(17);
9142   format %{ "TEST   $shift,32\n\t"
9143             "JEQ,s  small\n\t"
9144             "MOV    $dst.hi,$dst.lo\n\t"
9145             "XOR    $dst.lo,$dst.lo\n"
9146     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9147             "SHL    $dst.lo,$shift" %}
9148   ins_encode( shift_left_long( dst, shift ) );
9149   ins_pipe( pipe_slow );
9150 %}
9151 
9152 // Shift Right Long by 1-31
9153 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9154   match(Set dst (URShiftL dst cnt));
9155   effect(KILL cr);
9156   ins_cost(200);
9157   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9158             "SHR    $dst.hi,$cnt" %}
9159   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9160   ins_encode( move_long_small_shift(dst,cnt) );
9161   ins_pipe( ialu_reg_long );
9162 %}
9163 
9164 // Shift Right Long by 32-63
9165 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9166   match(Set dst (URShiftL dst cnt));
9167   effect(KILL cr);
9168   ins_cost(300);
9169   format %{ "MOV    $dst.lo,$dst.hi\n"
9170           "\tSHR    $dst.lo,$cnt-32\n"
9171           "\tXOR    $dst.hi,$dst.hi" %}
9172   opcode(0xC1, 0x5);  /* C1 /5 ib */
9173   ins_encode( move_long_big_shift_clr(dst,cnt) );
9174   ins_pipe( ialu_reg_long );
9175 %}
9176 
9177 // Shift Right Long by variable
9178 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9179   match(Set dst (URShiftL dst shift));
9180   effect(KILL cr);
9181   ins_cost(600);
9182   size(17);
9183   format %{ "TEST   $shift,32\n\t"
9184             "JEQ,s  small\n\t"
9185             "MOV    $dst.lo,$dst.hi\n\t"
9186             "XOR    $dst.hi,$dst.hi\n"
9187     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9188             "SHR    $dst.hi,$shift" %}
9189   ins_encode( shift_right_long( dst, shift ) );
9190   ins_pipe( pipe_slow );
9191 %}
9192 
9193 // Shift Right Long by 1-31
9194 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9195   match(Set dst (RShiftL dst cnt));
9196   effect(KILL cr);
9197   ins_cost(200);
9198   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9199             "SAR    $dst.hi,$cnt" %}
9200   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9201   ins_encode( move_long_small_shift(dst,cnt) );
9202   ins_pipe( ialu_reg_long );
9203 %}
9204 
9205 // Shift Right Long by 32-63
9206 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9207   match(Set dst (RShiftL dst cnt));
9208   effect(KILL cr);
9209   ins_cost(300);
9210   format %{ "MOV    $dst.lo,$dst.hi\n"
9211           "\tSAR    $dst.lo,$cnt-32\n"
9212           "\tSAR    $dst.hi,31" %}
9213   opcode(0xC1, 0x7);  /* C1 /7 ib */
9214   ins_encode( move_long_big_shift_sign(dst,cnt) );
9215   ins_pipe( ialu_reg_long );
9216 %}
9217 
9218 // Shift Right arithmetic Long by variable
9219 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9220   match(Set dst (RShiftL dst shift));
9221   effect(KILL cr);
9222   ins_cost(600);
9223   size(18);
9224   format %{ "TEST   $shift,32\n\t"
9225             "JEQ,s  small\n\t"
9226             "MOV    $dst.lo,$dst.hi\n\t"
9227             "SAR    $dst.hi,31\n"
9228     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9229             "SAR    $dst.hi,$shift" %}
9230   ins_encode( shift_right_arith_long( dst, shift ) );
9231   ins_pipe( pipe_slow );
9232 %}
9233 
9234 
9235 //----------Double Instructions------------------------------------------------
9236 // Double Math
9237 
9238 // Compare & branch
9239 
9240 // P6 version of float compare, sets condition codes in EFLAGS
9241 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9242   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9243   match(Set cr (CmpD src1 src2));
9244   effect(KILL rax);
9245   ins_cost(150);
9246   format %{ "FLD    $src1\n\t"
9247             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9248             "JNP    exit\n\t"
9249             "MOV    ah,1       // saw a NaN, set CF\n\t"
9250             "SAHF\n"
9251      "exit:\tNOP               // avoid branch to branch" %}
9252   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9253   ins_encode( Push_Reg_DPR(src1),
9254               OpcP, RegOpc(src2),
9255               cmpF_P6_fixup );
9256   ins_pipe( pipe_slow );
9257 %}
9258 
9259 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9260   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9261   match(Set cr (CmpD src1 src2));
9262   ins_cost(150);
9263   format %{ "FLD    $src1\n\t"
9264             "FUCOMIP ST,$src2  // P6 instruction" %}
9265   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9266   ins_encode( Push_Reg_DPR(src1),
9267               OpcP, RegOpc(src2));
9268   ins_pipe( pipe_slow );
9269 %}
9270 
9271 // Compare & branch
9272 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9273   predicate(UseSSE<=1);
9274   match(Set cr (CmpD src1 src2));
9275   effect(KILL rax);
9276   ins_cost(200);
9277   format %{ "FLD    $src1\n\t"
9278             "FCOMp  $src2\n\t"
9279             "FNSTSW AX\n\t"
9280             "TEST   AX,0x400\n\t"
9281             "JZ,s   flags\n\t"
9282             "MOV    AH,1\t# unordered treat as LT\n"
9283     "flags:\tSAHF" %}
9284   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9285   ins_encode( Push_Reg_DPR(src1),
9286               OpcP, RegOpc(src2),
9287               fpu_flags);
9288   ins_pipe( pipe_slow );
9289 %}
9290 
9291 // Compare vs zero into -1,0,1
9292 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9293   predicate(UseSSE<=1);
9294   match(Set dst (CmpD3 src1 zero));
9295   effect(KILL cr, KILL rax);
9296   ins_cost(280);
9297   format %{ "FTSTD  $dst,$src1" %}
9298   opcode(0xE4, 0xD9);
9299   ins_encode( Push_Reg_DPR(src1),
9300               OpcS, OpcP, PopFPU,
9301               CmpF_Result(dst));
9302   ins_pipe( pipe_slow );
9303 %}
9304 
9305 // Compare into -1,0,1
9306 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9307   predicate(UseSSE<=1);
9308   match(Set dst (CmpD3 src1 src2));
9309   effect(KILL cr, KILL rax);
9310   ins_cost(300);
9311   format %{ "FCMPD  $dst,$src1,$src2" %}
9312   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9313   ins_encode( Push_Reg_DPR(src1),
9314               OpcP, RegOpc(src2),
9315               CmpF_Result(dst));
9316   ins_pipe( pipe_slow );
9317 %}
9318 
9319 // float compare and set condition codes in EFLAGS by XMM regs
9320 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9321   predicate(UseSSE>=2);
9322   match(Set cr (CmpD src1 src2));
9323   ins_cost(145);
9324   format %{ "UCOMISD $src1,$src2\n\t"
9325             "JNP,s   exit\n\t"
9326             "PUSHF\t# saw NaN, set CF\n\t"
9327             "AND     [rsp], #0xffffff2b\n\t"
9328             "POPF\n"
9329     "exit:" %}
9330   ins_encode %{
9331     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9332     emit_cmpfp_fixup(_masm);
9333   %}
9334   ins_pipe( pipe_slow );
9335 %}
9336 
9337 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9338   predicate(UseSSE>=2);
9339   match(Set cr (CmpD src1 src2));
9340   ins_cost(100);
9341   format %{ "UCOMISD $src1,$src2" %}
9342   ins_encode %{
9343     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9344   %}
9345   ins_pipe( pipe_slow );
9346 %}
9347 
9348 // float compare and set condition codes in EFLAGS by XMM regs
9349 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9350   predicate(UseSSE>=2);
9351   match(Set cr (CmpD src1 (LoadD src2)));
9352   ins_cost(145);
9353   format %{ "UCOMISD $src1,$src2\n\t"
9354             "JNP,s   exit\n\t"
9355             "PUSHF\t# saw NaN, set CF\n\t"
9356             "AND     [rsp], #0xffffff2b\n\t"
9357             "POPF\n"
9358     "exit:" %}
9359   ins_encode %{
9360     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9361     emit_cmpfp_fixup(_masm);
9362   %}
9363   ins_pipe( pipe_slow );
9364 %}
9365 
9366 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9367   predicate(UseSSE>=2);
9368   match(Set cr (CmpD src1 (LoadD src2)));
9369   ins_cost(100);
9370   format %{ "UCOMISD $src1,$src2" %}
9371   ins_encode %{
9372     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9373   %}
9374   ins_pipe( pipe_slow );
9375 %}
9376 
9377 // Compare into -1,0,1 in XMM
9378 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9379   predicate(UseSSE>=2);
9380   match(Set dst (CmpD3 src1 src2));
9381   effect(KILL cr);
9382   ins_cost(255);
9383   format %{ "UCOMISD $src1, $src2\n\t"
9384             "MOV     $dst, #-1\n\t"
9385             "JP,s    done\n\t"
9386             "JB,s    done\n\t"
9387             "SETNE   $dst\n\t"
9388             "MOVZB   $dst, $dst\n"
9389     "done:" %}
9390   ins_encode %{
9391     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9392     emit_cmpfp3(_masm, $dst$$Register);
9393   %}
9394   ins_pipe( pipe_slow );
9395 %}
9396 
9397 // Compare into -1,0,1 in XMM and memory
9398 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9399   predicate(UseSSE>=2);
9400   match(Set dst (CmpD3 src1 (LoadD src2)));
9401   effect(KILL cr);
9402   ins_cost(275);
9403   format %{ "UCOMISD $src1, $src2\n\t"
9404             "MOV     $dst, #-1\n\t"
9405             "JP,s    done\n\t"
9406             "JB,s    done\n\t"
9407             "SETNE   $dst\n\t"
9408             "MOVZB   $dst, $dst\n"
9409     "done:" %}
9410   ins_encode %{
9411     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9412     emit_cmpfp3(_masm, $dst$$Register);
9413   %}
9414   ins_pipe( pipe_slow );
9415 %}
9416 
9417 
9418 instruct subDPR_reg(regDPR dst, regDPR src) %{
9419   predicate (UseSSE <=1);
9420   match(Set dst (SubD dst src));
9421 
9422   format %{ "FLD    $src\n\t"
9423             "DSUBp  $dst,ST" %}
9424   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9425   ins_cost(150);
9426   ins_encode( Push_Reg_DPR(src),
9427               OpcP, RegOpc(dst) );
9428   ins_pipe( fpu_reg_reg );
9429 %}
9430 
9431 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9432   predicate (UseSSE <=1);
9433   match(Set dst (RoundDouble (SubD src1 src2)));
9434   ins_cost(250);
9435 
9436   format %{ "FLD    $src2\n\t"
9437             "DSUB   ST,$src1\n\t"
9438             "FSTP_D $dst\t# D-round" %}
9439   opcode(0xD8, 0x5);
9440   ins_encode( Push_Reg_DPR(src2),
9441               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9442   ins_pipe( fpu_mem_reg_reg );
9443 %}
9444 
9445 
9446 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9447   predicate (UseSSE <=1);
9448   match(Set dst (SubD dst (LoadD src)));
9449   ins_cost(150);
9450 
9451   format %{ "FLD    $src\n\t"
9452             "DSUBp  $dst,ST" %}
9453   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9454   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9455               OpcP, RegOpc(dst) );
9456   ins_pipe( fpu_reg_mem );
9457 %}
9458 
9459 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9460   predicate (UseSSE<=1);
9461   match(Set dst (AbsD src));
9462   ins_cost(100);
9463   format %{ "FABS" %}
9464   opcode(0xE1, 0xD9);
9465   ins_encode( OpcS, OpcP );
9466   ins_pipe( fpu_reg_reg );
9467 %}
9468 
9469 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9470   predicate(UseSSE<=1);
9471   match(Set dst (NegD src));
9472   ins_cost(100);
9473   format %{ "FCHS" %}
9474   opcode(0xE0, 0xD9);
9475   ins_encode( OpcS, OpcP );
9476   ins_pipe( fpu_reg_reg );
9477 %}
9478 
9479 instruct addDPR_reg(regDPR dst, regDPR src) %{
9480   predicate(UseSSE<=1);
9481   match(Set dst (AddD dst src));
9482   format %{ "FLD    $src\n\t"
9483             "DADD   $dst,ST" %}
9484   size(4);
9485   ins_cost(150);
9486   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9487   ins_encode( Push_Reg_DPR(src),
9488               OpcP, RegOpc(dst) );
9489   ins_pipe( fpu_reg_reg );
9490 %}
9491 
9492 
9493 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9494   predicate(UseSSE<=1);
9495   match(Set dst (RoundDouble (AddD src1 src2)));
9496   ins_cost(250);
9497 
9498   format %{ "FLD    $src2\n\t"
9499             "DADD   ST,$src1\n\t"
9500             "FSTP_D $dst\t# D-round" %}
9501   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9502   ins_encode( Push_Reg_DPR(src2),
9503               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9504   ins_pipe( fpu_mem_reg_reg );
9505 %}
9506 
9507 
9508 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9509   predicate(UseSSE<=1);
9510   match(Set dst (AddD dst (LoadD src)));
9511   ins_cost(150);
9512 
9513   format %{ "FLD    $src\n\t"
9514             "DADDp  $dst,ST" %}
9515   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9516   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9517               OpcP, RegOpc(dst) );
9518   ins_pipe( fpu_reg_mem );
9519 %}
9520 
9521 // add-to-memory
9522 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9523   predicate(UseSSE<=1);
9524   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9525   ins_cost(150);
9526 
9527   format %{ "FLD_D  $dst\n\t"
9528             "DADD   ST,$src\n\t"
9529             "FST_D  $dst" %}
9530   opcode(0xDD, 0x0);
9531   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9532               Opcode(0xD8), RegOpc(src),
9533               set_instruction_start,
9534               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9535   ins_pipe( fpu_reg_mem );
9536 %}
9537 
9538 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9539   predicate(UseSSE<=1);
9540   match(Set dst (AddD dst con));
9541   ins_cost(125);
9542   format %{ "FLD1\n\t"
9543             "DADDp  $dst,ST" %}
9544   ins_encode %{
9545     __ fld1();
9546     __ faddp($dst$$reg);
9547   %}
9548   ins_pipe(fpu_reg);
9549 %}
9550 
9551 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9552   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9553   match(Set dst (AddD dst con));
9554   ins_cost(200);
9555   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9556             "DADDp  $dst,ST" %}
9557   ins_encode %{
9558     __ fld_d($constantaddress($con));
9559     __ faddp($dst$$reg);
9560   %}
9561   ins_pipe(fpu_reg_mem);
9562 %}
9563 
9564 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9565   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9566   match(Set dst (RoundDouble (AddD src con)));
9567   ins_cost(200);
9568   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9569             "DADD   ST,$src\n\t"
9570             "FSTP_D $dst\t# D-round" %}
9571   ins_encode %{
9572     __ fld_d($constantaddress($con));
9573     __ fadd($src$$reg);
9574     __ fstp_d(Address(rsp, $dst$$disp));
9575   %}
9576   ins_pipe(fpu_mem_reg_con);
9577 %}
9578 
9579 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9580   predicate(UseSSE<=1);
9581   match(Set dst (MulD dst src));
9582   format %{ "FLD    $src\n\t"
9583             "DMULp  $dst,ST" %}
9584   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9585   ins_cost(150);
9586   ins_encode( Push_Reg_DPR(src),
9587               OpcP, RegOpc(dst) );
9588   ins_pipe( fpu_reg_reg );
9589 %}
9590 
9591 // Strict FP instruction biases argument before multiply then
9592 // biases result to avoid double rounding of subnormals.
9593 //
9594 // scale arg1 by multiplying arg1 by 2^(-15360)
9595 // load arg2
9596 // multiply scaled arg1 by arg2
9597 // rescale product by 2^(15360)
9598 //
9599 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9600   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9601   match(Set dst (MulD dst src));
9602   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9603 
9604   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9605             "DMULp  $dst,ST\n\t"
9606             "FLD    $src\n\t"
9607             "DMULp  $dst,ST\n\t"
9608             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9609             "DMULp  $dst,ST\n\t" %}
9610   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9611   ins_encode( strictfp_bias1(dst),
9612               Push_Reg_DPR(src),
9613               OpcP, RegOpc(dst),
9614               strictfp_bias2(dst) );
9615   ins_pipe( fpu_reg_reg );
9616 %}
9617 
9618 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9619   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9620   match(Set dst (MulD dst con));
9621   ins_cost(200);
9622   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9623             "DMULp  $dst,ST" %}
9624   ins_encode %{
9625     __ fld_d($constantaddress($con));
9626     __ fmulp($dst$$reg);
9627   %}
9628   ins_pipe(fpu_reg_mem);
9629 %}
9630 
9631 
9632 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9633   predicate( UseSSE<=1 );
9634   match(Set dst (MulD dst (LoadD src)));
9635   ins_cost(200);
9636   format %{ "FLD_D  $src\n\t"
9637             "DMULp  $dst,ST" %}
9638   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9639   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9640               OpcP, RegOpc(dst) );
9641   ins_pipe( fpu_reg_mem );
9642 %}
9643 
9644 //
9645 // Cisc-alternate to reg-reg multiply
9646 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9647   predicate( UseSSE<=1 );
9648   match(Set dst (MulD src (LoadD mem)));
9649   ins_cost(250);
9650   format %{ "FLD_D  $mem\n\t"
9651             "DMUL   ST,$src\n\t"
9652             "FSTP_D $dst" %}
9653   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9654   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9655               OpcReg_FPR(src),
9656               Pop_Reg_DPR(dst) );
9657   ins_pipe( fpu_reg_reg_mem );
9658 %}
9659 
9660 
9661 // MACRO3 -- addDPR a mulDPR
9662 // This instruction is a '2-address' instruction in that the result goes
9663 // back to src2.  This eliminates a move from the macro; possibly the
9664 // register allocator will have to add it back (and maybe not).
9665 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9666   predicate( UseSSE<=1 );
9667   match(Set src2 (AddD (MulD src0 src1) src2));
9668   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9669             "DMUL   ST,$src1\n\t"
9670             "DADDp  $src2,ST" %}
9671   ins_cost(250);
9672   opcode(0xDD); /* LoadD DD /0 */
9673   ins_encode( Push_Reg_FPR(src0),
9674               FMul_ST_reg(src1),
9675               FAddP_reg_ST(src2) );
9676   ins_pipe( fpu_reg_reg_reg );
9677 %}
9678 
9679 
9680 // MACRO3 -- subDPR a mulDPR
9681 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9682   predicate( UseSSE<=1 );
9683   match(Set src2 (SubD (MulD src0 src1) src2));
9684   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9685             "DMUL   ST,$src1\n\t"
9686             "DSUBRp $src2,ST" %}
9687   ins_cost(250);
9688   ins_encode( Push_Reg_FPR(src0),
9689               FMul_ST_reg(src1),
9690               Opcode(0xDE), Opc_plus(0xE0,src2));
9691   ins_pipe( fpu_reg_reg_reg );
9692 %}
9693 
9694 
9695 instruct divDPR_reg(regDPR dst, regDPR src) %{
9696   predicate( UseSSE<=1 );
9697   match(Set dst (DivD dst src));
9698 
9699   format %{ "FLD    $src\n\t"
9700             "FDIVp  $dst,ST" %}
9701   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9702   ins_cost(150);
9703   ins_encode( Push_Reg_DPR(src),
9704               OpcP, RegOpc(dst) );
9705   ins_pipe( fpu_reg_reg );
9706 %}
9707 
9708 // Strict FP instruction biases argument before division then
9709 // biases result, to avoid double rounding of subnormals.
9710 //
9711 // scale dividend by multiplying dividend by 2^(-15360)
9712 // load divisor
9713 // divide scaled dividend by divisor
9714 // rescale quotient by 2^(15360)
9715 //
9716 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9717   predicate (UseSSE<=1);
9718   match(Set dst (DivD dst src));
9719   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9720   ins_cost(01);
9721 
9722   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9723             "DMULp  $dst,ST\n\t"
9724             "FLD    $src\n\t"
9725             "FDIVp  $dst,ST\n\t"
9726             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9727             "DMULp  $dst,ST\n\t" %}
9728   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9729   ins_encode( strictfp_bias1(dst),
9730               Push_Reg_DPR(src),
9731               OpcP, RegOpc(dst),
9732               strictfp_bias2(dst) );
9733   ins_pipe( fpu_reg_reg );
9734 %}
9735 
9736 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9737   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9738   match(Set dst (RoundDouble (DivD src1 src2)));
9739 
9740   format %{ "FLD    $src1\n\t"
9741             "FDIV   ST,$src2\n\t"
9742             "FSTP_D $dst\t# D-round" %}
9743   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9744   ins_encode( Push_Reg_DPR(src1),
9745               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9746   ins_pipe( fpu_mem_reg_reg );
9747 %}
9748 
9749 
9750 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9751   predicate(UseSSE<=1);
9752   match(Set dst (ModD dst src));
9753   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9754 
9755   format %{ "DMOD   $dst,$src" %}
9756   ins_cost(250);
9757   ins_encode(Push_Reg_Mod_DPR(dst, src),
9758               emitModDPR(),
9759               Push_Result_Mod_DPR(src),
9760               Pop_Reg_DPR(dst));
9761   ins_pipe( pipe_slow );
9762 %}
9763 
9764 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9765   predicate(UseSSE>=2);
9766   match(Set dst (ModD src0 src1));
9767   effect(KILL rax, KILL cr);
9768 
9769   format %{ "SUB    ESP,8\t # DMOD\n"
9770           "\tMOVSD  [ESP+0],$src1\n"
9771           "\tFLD_D  [ESP+0]\n"
9772           "\tMOVSD  [ESP+0],$src0\n"
9773           "\tFLD_D  [ESP+0]\n"
9774      "loop:\tFPREM\n"
9775           "\tFWAIT\n"
9776           "\tFNSTSW AX\n"
9777           "\tSAHF\n"
9778           "\tJP     loop\n"
9779           "\tFSTP_D [ESP+0]\n"
9780           "\tMOVSD  $dst,[ESP+0]\n"
9781           "\tADD    ESP,8\n"
9782           "\tFSTP   ST0\t # Restore FPU Stack"
9783     %}
9784   ins_cost(250);
9785   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9786   ins_pipe( pipe_slow );
9787 %}
9788 
9789 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9790   predicate (UseSSE<=1);
9791   match(Set dst(TanD src));
9792   format %{ "DTAN   $dst" %}
9793   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9794               Opcode(0xDD), Opcode(0xD8));   // fstp st
9795   ins_pipe( pipe_slow );
9796 %}
9797 
9798 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9799   predicate (UseSSE>=2);
9800   match(Set dst(TanD dst));
9801   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9802   format %{ "DTAN   $dst" %}
9803   ins_encode( Push_SrcD(dst),
9804               Opcode(0xD9), Opcode(0xF2),    // fptan
9805               Opcode(0xDD), Opcode(0xD8),   // fstp st
9806               Push_ResultD(dst) );
9807   ins_pipe( pipe_slow );
9808 %}
9809 
9810 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9811   predicate (UseSSE<=1);
9812   match(Set dst(AtanD dst src));
9813   format %{ "DATA   $dst,$src" %}
9814   opcode(0xD9, 0xF3);
9815   ins_encode( Push_Reg_DPR(src),
9816               OpcP, OpcS, RegOpc(dst) );
9817   ins_pipe( pipe_slow );
9818 %}
9819 
9820 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9821   predicate (UseSSE>=2);
9822   match(Set dst(AtanD dst src));
9823   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9824   format %{ "DATA   $dst,$src" %}
9825   opcode(0xD9, 0xF3);
9826   ins_encode( Push_SrcD(src),
9827               OpcP, OpcS, Push_ResultD(dst) );
9828   ins_pipe( pipe_slow );
9829 %}
9830 
9831 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9832   predicate (UseSSE<=1);
9833   match(Set dst (SqrtD src));
9834   format %{ "DSQRT  $dst,$src" %}
9835   opcode(0xFA, 0xD9);
9836   ins_encode( Push_Reg_DPR(src),
9837               OpcS, OpcP, Pop_Reg_DPR(dst) );
9838   ins_pipe( pipe_slow );
9839 %}
9840 
9841 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9842   predicate (UseSSE<=1);
9843   match(Set Y (PowD X Y));  // Raise X to the Yth power
9844   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9845   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9846   ins_encode %{
9847     __ subptr(rsp, 8);
9848     __ fld_s($X$$reg - 1);
9849     __ fast_pow();
9850     __ addptr(rsp, 8);
9851   %}
9852   ins_pipe( pipe_slow );
9853 %}
9854 
9855 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9856   predicate (UseSSE>=2);
9857   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9858   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9859   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9860   ins_encode %{
9861     __ subptr(rsp, 8);
9862     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9863     __ fld_d(Address(rsp, 0));
9864     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9865     __ fld_d(Address(rsp, 0));
9866     __ fast_pow();
9867     __ fstp_d(Address(rsp, 0));
9868     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9869     __ addptr(rsp, 8);
9870   %}
9871   ins_pipe( pipe_slow );
9872 %}
9873 
9874 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9875   predicate (UseSSE<=1);
9876   // The source Double operand on FPU stack
9877   match(Set dst (Log10D src));
9878   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9879   // fxch         ; swap ST(0) with ST(1)
9880   // fyl2x        ; compute log_10(2) * log_2(x)
9881   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9882             "FXCH   \n\t"
9883             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9884          %}
9885   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9886               Opcode(0xD9), Opcode(0xC9),   // fxch
9887               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9888 
9889   ins_pipe( pipe_slow );
9890 %}
9891 
9892 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9893   predicate (UseSSE>=2);
9894   effect(KILL cr);
9895   match(Set dst (Log10D src));
9896   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9897   // fyl2x        ; compute log_10(2) * log_2(x)
9898   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9899             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9900          %}
9901   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9902               Push_SrcD(src),
9903               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9904               Push_ResultD(dst));
9905 
9906   ins_pipe( pipe_slow );
9907 %}
9908 
9909 //-------------Float Instructions-------------------------------
9910 // Float Math
9911 
9912 // Code for float compare:
9913 //     fcompp();
9914 //     fwait(); fnstsw_ax();
9915 //     sahf();
9916 //     movl(dst, unordered_result);
9917 //     jcc(Assembler::parity, exit);
9918 //     movl(dst, less_result);
9919 //     jcc(Assembler::below, exit);
9920 //     movl(dst, equal_result);
9921 //     jcc(Assembler::equal, exit);
9922 //     movl(dst, greater_result);
9923 //   exit:
9924 
9925 // P6 version of float compare, sets condition codes in EFLAGS
9926 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9927   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9928   match(Set cr (CmpF src1 src2));
9929   effect(KILL rax);
9930   ins_cost(150);
9931   format %{ "FLD    $src1\n\t"
9932             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9933             "JNP    exit\n\t"
9934             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9935             "SAHF\n"
9936      "exit:\tNOP               // avoid branch to branch" %}
9937   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9938   ins_encode( Push_Reg_DPR(src1),
9939               OpcP, RegOpc(src2),
9940               cmpF_P6_fixup );
9941   ins_pipe( pipe_slow );
9942 %}
9943 
9944 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9945   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9946   match(Set cr (CmpF src1 src2));
9947   ins_cost(100);
9948   format %{ "FLD    $src1\n\t"
9949             "FUCOMIP ST,$src2  // P6 instruction" %}
9950   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9951   ins_encode( Push_Reg_DPR(src1),
9952               OpcP, RegOpc(src2));
9953   ins_pipe( pipe_slow );
9954 %}
9955 
9956 
9957 // Compare & branch
9958 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9959   predicate(UseSSE == 0);
9960   match(Set cr (CmpF src1 src2));
9961   effect(KILL rax);
9962   ins_cost(200);
9963   format %{ "FLD    $src1\n\t"
9964             "FCOMp  $src2\n\t"
9965             "FNSTSW AX\n\t"
9966             "TEST   AX,0x400\n\t"
9967             "JZ,s   flags\n\t"
9968             "MOV    AH,1\t# unordered treat as LT\n"
9969     "flags:\tSAHF" %}
9970   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9971   ins_encode( Push_Reg_DPR(src1),
9972               OpcP, RegOpc(src2),
9973               fpu_flags);
9974   ins_pipe( pipe_slow );
9975 %}
9976 
9977 // Compare vs zero into -1,0,1
9978 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9979   predicate(UseSSE == 0);
9980   match(Set dst (CmpF3 src1 zero));
9981   effect(KILL cr, KILL rax);
9982   ins_cost(280);
9983   format %{ "FTSTF  $dst,$src1" %}
9984   opcode(0xE4, 0xD9);
9985   ins_encode( Push_Reg_DPR(src1),
9986               OpcS, OpcP, PopFPU,
9987               CmpF_Result(dst));
9988   ins_pipe( pipe_slow );
9989 %}
9990 
9991 // Compare into -1,0,1
9992 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9993   predicate(UseSSE == 0);
9994   match(Set dst (CmpF3 src1 src2));
9995   effect(KILL cr, KILL rax);
9996   ins_cost(300);
9997   format %{ "FCMPF  $dst,$src1,$src2" %}
9998   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9999   ins_encode( Push_Reg_DPR(src1),
10000               OpcP, RegOpc(src2),
10001               CmpF_Result(dst));
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 // float compare and set condition codes in EFLAGS by XMM regs
10006 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10007   predicate(UseSSE>=1);
10008   match(Set cr (CmpF src1 src2));
10009   ins_cost(145);
10010   format %{ "UCOMISS $src1,$src2\n\t"
10011             "JNP,s   exit\n\t"
10012             "PUSHF\t# saw NaN, set CF\n\t"
10013             "AND     [rsp], #0xffffff2b\n\t"
10014             "POPF\n"
10015     "exit:" %}
10016   ins_encode %{
10017     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10018     emit_cmpfp_fixup(_masm);
10019   %}
10020   ins_pipe( pipe_slow );
10021 %}
10022 
10023 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10024   predicate(UseSSE>=1);
10025   match(Set cr (CmpF src1 src2));
10026   ins_cost(100);
10027   format %{ "UCOMISS $src1,$src2" %}
10028   ins_encode %{
10029     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10030   %}
10031   ins_pipe( pipe_slow );
10032 %}
10033 
10034 // float compare and set condition codes in EFLAGS by XMM regs
10035 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10036   predicate(UseSSE>=1);
10037   match(Set cr (CmpF src1 (LoadF src2)));
10038   ins_cost(165);
10039   format %{ "UCOMISS $src1,$src2\n\t"
10040             "JNP,s   exit\n\t"
10041             "PUSHF\t# saw NaN, set CF\n\t"
10042             "AND     [rsp], #0xffffff2b\n\t"
10043             "POPF\n"
10044     "exit:" %}
10045   ins_encode %{
10046     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10047     emit_cmpfp_fixup(_masm);
10048   %}
10049   ins_pipe( pipe_slow );
10050 %}
10051 
10052 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10053   predicate(UseSSE>=1);
10054   match(Set cr (CmpF src1 (LoadF src2)));
10055   ins_cost(100);
10056   format %{ "UCOMISS $src1,$src2" %}
10057   ins_encode %{
10058     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10059   %}
10060   ins_pipe( pipe_slow );
10061 %}
10062 
10063 // Compare into -1,0,1 in XMM
10064 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10065   predicate(UseSSE>=1);
10066   match(Set dst (CmpF3 src1 src2));
10067   effect(KILL cr);
10068   ins_cost(255);
10069   format %{ "UCOMISS $src1, $src2\n\t"
10070             "MOV     $dst, #-1\n\t"
10071             "JP,s    done\n\t"
10072             "JB,s    done\n\t"
10073             "SETNE   $dst\n\t"
10074             "MOVZB   $dst, $dst\n"
10075     "done:" %}
10076   ins_encode %{
10077     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10078     emit_cmpfp3(_masm, $dst$$Register);
10079   %}
10080   ins_pipe( pipe_slow );
10081 %}
10082 
10083 // Compare into -1,0,1 in XMM and memory
10084 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10085   predicate(UseSSE>=1);
10086   match(Set dst (CmpF3 src1 (LoadF src2)));
10087   effect(KILL cr);
10088   ins_cost(275);
10089   format %{ "UCOMISS $src1, $src2\n\t"
10090             "MOV     $dst, #-1\n\t"
10091             "JP,s    done\n\t"
10092             "JB,s    done\n\t"
10093             "SETNE   $dst\n\t"
10094             "MOVZB   $dst, $dst\n"
10095     "done:" %}
10096   ins_encode %{
10097     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10098     emit_cmpfp3(_masm, $dst$$Register);
10099   %}
10100   ins_pipe( pipe_slow );
10101 %}
10102 
10103 // Spill to obtain 24-bit precision
10104 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10105   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10106   match(Set dst (SubF src1 src2));
10107 
10108   format %{ "FSUB   $dst,$src1 - $src2" %}
10109   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10110   ins_encode( Push_Reg_FPR(src1),
10111               OpcReg_FPR(src2),
10112               Pop_Mem_FPR(dst) );
10113   ins_pipe( fpu_mem_reg_reg );
10114 %}
10115 //
10116 // This instruction does not round to 24-bits
10117 instruct subFPR_reg(regFPR dst, regFPR src) %{
10118   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10119   match(Set dst (SubF dst src));
10120 
10121   format %{ "FSUB   $dst,$src" %}
10122   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10123   ins_encode( Push_Reg_FPR(src),
10124               OpcP, RegOpc(dst) );
10125   ins_pipe( fpu_reg_reg );
10126 %}
10127 
10128 // Spill to obtain 24-bit precision
10129 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10130   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10131   match(Set dst (AddF src1 src2));
10132 
10133   format %{ "FADD   $dst,$src1,$src2" %}
10134   opcode(0xD8, 0x0); /* D8 C0+i */
10135   ins_encode( Push_Reg_FPR(src2),
10136               OpcReg_FPR(src1),
10137               Pop_Mem_FPR(dst) );
10138   ins_pipe( fpu_mem_reg_reg );
10139 %}
10140 //
10141 // This instruction does not round to 24-bits
10142 instruct addFPR_reg(regFPR dst, regFPR src) %{
10143   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10144   match(Set dst (AddF dst src));
10145 
10146   format %{ "FLD    $src\n\t"
10147             "FADDp  $dst,ST" %}
10148   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10149   ins_encode( Push_Reg_FPR(src),
10150               OpcP, RegOpc(dst) );
10151   ins_pipe( fpu_reg_reg );
10152 %}
10153 
10154 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10155   predicate(UseSSE==0);
10156   match(Set dst (AbsF src));
10157   ins_cost(100);
10158   format %{ "FABS" %}
10159   opcode(0xE1, 0xD9);
10160   ins_encode( OpcS, OpcP );
10161   ins_pipe( fpu_reg_reg );
10162 %}
10163 
10164 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10165   predicate(UseSSE==0);
10166   match(Set dst (NegF src));
10167   ins_cost(100);
10168   format %{ "FCHS" %}
10169   opcode(0xE0, 0xD9);
10170   ins_encode( OpcS, OpcP );
10171   ins_pipe( fpu_reg_reg );
10172 %}
10173 
10174 // Cisc-alternate to addFPR_reg
10175 // Spill to obtain 24-bit precision
10176 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10177   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10178   match(Set dst (AddF src1 (LoadF src2)));
10179 
10180   format %{ "FLD    $src2\n\t"
10181             "FADD   ST,$src1\n\t"
10182             "FSTP_S $dst" %}
10183   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10184   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10185               OpcReg_FPR(src1),
10186               Pop_Mem_FPR(dst) );
10187   ins_pipe( fpu_mem_reg_mem );
10188 %}
10189 //
10190 // Cisc-alternate to addFPR_reg
10191 // This instruction does not round to 24-bits
10192 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10193   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10194   match(Set dst (AddF dst (LoadF src)));
10195 
10196   format %{ "FADD   $dst,$src" %}
10197   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10198   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10199               OpcP, RegOpc(dst) );
10200   ins_pipe( fpu_reg_mem );
10201 %}
10202 
10203 // // Following two instructions for _222_mpegaudio
10204 // Spill to obtain 24-bit precision
10205 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10206   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10207   match(Set dst (AddF src1 src2));
10208 
10209   format %{ "FADD   $dst,$src1,$src2" %}
10210   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10211   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10212               OpcReg_FPR(src2),
10213               Pop_Mem_FPR(dst) );
10214   ins_pipe( fpu_mem_reg_mem );
10215 %}
10216 
10217 // Cisc-spill variant
10218 // Spill to obtain 24-bit precision
10219 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10220   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10221   match(Set dst (AddF src1 (LoadF src2)));
10222 
10223   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10224   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10225   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10226               set_instruction_start,
10227               OpcP, RMopc_Mem(secondary,src1),
10228               Pop_Mem_FPR(dst) );
10229   ins_pipe( fpu_mem_mem_mem );
10230 %}
10231 
10232 // Spill to obtain 24-bit precision
10233 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10234   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10235   match(Set dst (AddF src1 src2));
10236 
10237   format %{ "FADD   $dst,$src1,$src2" %}
10238   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10239   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10240               set_instruction_start,
10241               OpcP, RMopc_Mem(secondary,src1),
10242               Pop_Mem_FPR(dst) );
10243   ins_pipe( fpu_mem_mem_mem );
10244 %}
10245 
10246 
10247 // Spill to obtain 24-bit precision
10248 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10249   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10250   match(Set dst (AddF src con));
10251   format %{ "FLD    $src\n\t"
10252             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10253             "FSTP_S $dst"  %}
10254   ins_encode %{
10255     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10256     __ fadd_s($constantaddress($con));
10257     __ fstp_s(Address(rsp, $dst$$disp));
10258   %}
10259   ins_pipe(fpu_mem_reg_con);
10260 %}
10261 //
10262 // This instruction does not round to 24-bits
10263 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10264   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10265   match(Set dst (AddF src con));
10266   format %{ "FLD    $src\n\t"
10267             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10268             "FSTP   $dst"  %}
10269   ins_encode %{
10270     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10271     __ fadd_s($constantaddress($con));
10272     __ fstp_d($dst$$reg);
10273   %}
10274   ins_pipe(fpu_reg_reg_con);
10275 %}
10276 
10277 // Spill to obtain 24-bit precision
10278 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10279   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10280   match(Set dst (MulF src1 src2));
10281 
10282   format %{ "FLD    $src1\n\t"
10283             "FMUL   $src2\n\t"
10284             "FSTP_S $dst"  %}
10285   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10286   ins_encode( Push_Reg_FPR(src1),
10287               OpcReg_FPR(src2),
10288               Pop_Mem_FPR(dst) );
10289   ins_pipe( fpu_mem_reg_reg );
10290 %}
10291 //
10292 // This instruction does not round to 24-bits
10293 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10294   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10295   match(Set dst (MulF src1 src2));
10296 
10297   format %{ "FLD    $src1\n\t"
10298             "FMUL   $src2\n\t"
10299             "FSTP_S $dst"  %}
10300   opcode(0xD8, 0x1); /* D8 C8+i */
10301   ins_encode( Push_Reg_FPR(src2),
10302               OpcReg_FPR(src1),
10303               Pop_Reg_FPR(dst) );
10304   ins_pipe( fpu_reg_reg_reg );
10305 %}
10306 
10307 
10308 // Spill to obtain 24-bit precision
10309 // Cisc-alternate to reg-reg multiply
10310 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10311   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10312   match(Set dst (MulF src1 (LoadF src2)));
10313 
10314   format %{ "FLD_S  $src2\n\t"
10315             "FMUL   $src1\n\t"
10316             "FSTP_S $dst"  %}
10317   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10318   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10319               OpcReg_FPR(src1),
10320               Pop_Mem_FPR(dst) );
10321   ins_pipe( fpu_mem_reg_mem );
10322 %}
10323 //
10324 // This instruction does not round to 24-bits
10325 // Cisc-alternate to reg-reg multiply
10326 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10327   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10328   match(Set dst (MulF src1 (LoadF src2)));
10329 
10330   format %{ "FMUL   $dst,$src1,$src2" %}
10331   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10332   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10333               OpcReg_FPR(src1),
10334               Pop_Reg_FPR(dst) );
10335   ins_pipe( fpu_reg_reg_mem );
10336 %}
10337 
10338 // Spill to obtain 24-bit precision
10339 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10340   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10341   match(Set dst (MulF src1 src2));
10342 
10343   format %{ "FMUL   $dst,$src1,$src2" %}
10344   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10345   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10346               set_instruction_start,
10347               OpcP, RMopc_Mem(secondary,src1),
10348               Pop_Mem_FPR(dst) );
10349   ins_pipe( fpu_mem_mem_mem );
10350 %}
10351 
10352 // Spill to obtain 24-bit precision
10353 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (MulF src con));
10356 
10357   format %{ "FLD    $src\n\t"
10358             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10359             "FSTP_S $dst"  %}
10360   ins_encode %{
10361     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10362     __ fmul_s($constantaddress($con));
10363     __ fstp_s(Address(rsp, $dst$$disp));
10364   %}
10365   ins_pipe(fpu_mem_reg_con);
10366 %}
10367 //
10368 // This instruction does not round to 24-bits
10369 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10370   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10371   match(Set dst (MulF src con));
10372 
10373   format %{ "FLD    $src\n\t"
10374             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10375             "FSTP   $dst"  %}
10376   ins_encode %{
10377     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10378     __ fmul_s($constantaddress($con));
10379     __ fstp_d($dst$$reg);
10380   %}
10381   ins_pipe(fpu_reg_reg_con);
10382 %}
10383 
10384 
10385 //
10386 // MACRO1 -- subsume unshared load into mulFPR
10387 // This instruction does not round to 24-bits
10388 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10389   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10390   match(Set dst (MulF (LoadF mem1) src));
10391 
10392   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10393             "FMUL   ST,$src\n\t"
10394             "FSTP   $dst" %}
10395   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10396   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10397               OpcReg_FPR(src),
10398               Pop_Reg_FPR(dst) );
10399   ins_pipe( fpu_reg_reg_mem );
10400 %}
10401 //
10402 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10403 // This instruction does not round to 24-bits
10404 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10405   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10406   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10407   ins_cost(95);
10408 
10409   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10410             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10411             "FADD   ST,$src2\n\t"
10412             "FSTP   $dst" %}
10413   opcode(0xD9); /* LoadF D9 /0 */
10414   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10415               FMul_ST_reg(src1),
10416               FAdd_ST_reg(src2),
10417               Pop_Reg_FPR(dst) );
10418   ins_pipe( fpu_reg_mem_reg_reg );
10419 %}
10420 
10421 // MACRO3 -- addFPR a mulFPR
10422 // This instruction does not round to 24-bits.  It is a '2-address'
10423 // instruction in that the result goes back to src2.  This eliminates
10424 // a move from the macro; possibly the register allocator will have
10425 // to add it back (and maybe not).
10426 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10427   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10428   match(Set src2 (AddF (MulF src0 src1) src2));
10429 
10430   format %{ "FLD    $src0     ===MACRO3===\n\t"
10431             "FMUL   ST,$src1\n\t"
10432             "FADDP  $src2,ST" %}
10433   opcode(0xD9); /* LoadF D9 /0 */
10434   ins_encode( Push_Reg_FPR(src0),
10435               FMul_ST_reg(src1),
10436               FAddP_reg_ST(src2) );
10437   ins_pipe( fpu_reg_reg_reg );
10438 %}
10439 
10440 // MACRO4 -- divFPR subFPR
10441 // This instruction does not round to 24-bits
10442 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10443   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10444   match(Set dst (DivF (SubF src2 src1) src3));
10445 
10446   format %{ "FLD    $src2   ===MACRO4===\n\t"
10447             "FSUB   ST,$src1\n\t"
10448             "FDIV   ST,$src3\n\t"
10449             "FSTP  $dst" %}
10450   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10451   ins_encode( Push_Reg_FPR(src2),
10452               subFPR_divFPR_encode(src1,src3),
10453               Pop_Reg_FPR(dst) );
10454   ins_pipe( fpu_reg_reg_reg_reg );
10455 %}
10456 
10457 // Spill to obtain 24-bit precision
10458 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10459   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10460   match(Set dst (DivF src1 src2));
10461 
10462   format %{ "FDIV   $dst,$src1,$src2" %}
10463   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10464   ins_encode( Push_Reg_FPR(src1),
10465               OpcReg_FPR(src2),
10466               Pop_Mem_FPR(dst) );
10467   ins_pipe( fpu_mem_reg_reg );
10468 %}
10469 //
10470 // This instruction does not round to 24-bits
10471 instruct divFPR_reg(regFPR dst, regFPR src) %{
10472   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10473   match(Set dst (DivF dst src));
10474 
10475   format %{ "FDIV   $dst,$src" %}
10476   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10477   ins_encode( Push_Reg_FPR(src),
10478               OpcP, RegOpc(dst) );
10479   ins_pipe( fpu_reg_reg );
10480 %}
10481 
10482 
10483 // Spill to obtain 24-bit precision
10484 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10485   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10486   match(Set dst (ModF src1 src2));
10487   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10488 
10489   format %{ "FMOD   $dst,$src1,$src2" %}
10490   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10491               emitModDPR(),
10492               Push_Result_Mod_DPR(src2),
10493               Pop_Mem_FPR(dst));
10494   ins_pipe( pipe_slow );
10495 %}
10496 //
10497 // This instruction does not round to 24-bits
10498 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10499   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10500   match(Set dst (ModF dst src));
10501   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10502 
10503   format %{ "FMOD   $dst,$src" %}
10504   ins_encode(Push_Reg_Mod_DPR(dst, src),
10505               emitModDPR(),
10506               Push_Result_Mod_DPR(src),
10507               Pop_Reg_FPR(dst));
10508   ins_pipe( pipe_slow );
10509 %}
10510 
10511 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10512   predicate(UseSSE>=1);
10513   match(Set dst (ModF src0 src1));
10514   effect(KILL rax, KILL cr);
10515   format %{ "SUB    ESP,4\t # FMOD\n"
10516           "\tMOVSS  [ESP+0],$src1\n"
10517           "\tFLD_S  [ESP+0]\n"
10518           "\tMOVSS  [ESP+0],$src0\n"
10519           "\tFLD_S  [ESP+0]\n"
10520      "loop:\tFPREM\n"
10521           "\tFWAIT\n"
10522           "\tFNSTSW AX\n"
10523           "\tSAHF\n"
10524           "\tJP     loop\n"
10525           "\tFSTP_S [ESP+0]\n"
10526           "\tMOVSS  $dst,[ESP+0]\n"
10527           "\tADD    ESP,4\n"
10528           "\tFSTP   ST0\t # Restore FPU Stack"
10529     %}
10530   ins_cost(250);
10531   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10532   ins_pipe( pipe_slow );
10533 %}
10534 
10535 
10536 //----------Arithmetic Conversion Instructions---------------------------------
10537 // The conversions operations are all Alpha sorted.  Please keep it that way!
10538 
10539 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10540   predicate(UseSSE==0);
10541   match(Set dst (RoundFloat src));
10542   ins_cost(125);
10543   format %{ "FST_S  $dst,$src\t# F-round" %}
10544   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10545   ins_pipe( fpu_mem_reg );
10546 %}
10547 
10548 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10549   predicate(UseSSE<=1);
10550   match(Set dst (RoundDouble src));
10551   ins_cost(125);
10552   format %{ "FST_D  $dst,$src\t# D-round" %}
10553   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10554   ins_pipe( fpu_mem_reg );
10555 %}
10556 
10557 // Force rounding to 24-bit precision and 6-bit exponent
10558 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10559   predicate(UseSSE==0);
10560   match(Set dst (ConvD2F src));
10561   format %{ "FST_S  $dst,$src\t# F-round" %}
10562   expand %{
10563     roundFloat_mem_reg(dst,src);
10564   %}
10565 %}
10566 
10567 // Force rounding to 24-bit precision and 6-bit exponent
10568 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10569   predicate(UseSSE==1);
10570   match(Set dst (ConvD2F src));
10571   effect( KILL cr );
10572   format %{ "SUB    ESP,4\n\t"
10573             "FST_S  [ESP],$src\t# F-round\n\t"
10574             "MOVSS  $dst,[ESP]\n\t"
10575             "ADD ESP,4" %}
10576   ins_encode %{
10577     __ subptr(rsp, 4);
10578     if ($src$$reg != FPR1L_enc) {
10579       __ fld_s($src$$reg-1);
10580       __ fstp_s(Address(rsp, 0));
10581     } else {
10582       __ fst_s(Address(rsp, 0));
10583     }
10584     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10585     __ addptr(rsp, 4);
10586   %}
10587   ins_pipe( pipe_slow );
10588 %}
10589 
10590 // Force rounding double precision to single precision
10591 instruct convD2F_reg(regF dst, regD src) %{
10592   predicate(UseSSE>=2);
10593   match(Set dst (ConvD2F src));
10594   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10595   ins_encode %{
10596     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10597   %}
10598   ins_pipe( pipe_slow );
10599 %}
10600 
10601 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10602   predicate(UseSSE==0);
10603   match(Set dst (ConvF2D src));
10604   format %{ "FST_S  $dst,$src\t# D-round" %}
10605   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10606   ins_pipe( fpu_reg_reg );
10607 %}
10608 
10609 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10610   predicate(UseSSE==1);
10611   match(Set dst (ConvF2D src));
10612   format %{ "FST_D  $dst,$src\t# D-round" %}
10613   expand %{
10614     roundDouble_mem_reg(dst,src);
10615   %}
10616 %}
10617 
10618 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10619   predicate(UseSSE==1);
10620   match(Set dst (ConvF2D src));
10621   effect( KILL cr );
10622   format %{ "SUB    ESP,4\n\t"
10623             "MOVSS  [ESP] $src\n\t"
10624             "FLD_S  [ESP]\n\t"
10625             "ADD    ESP,4\n\t"
10626             "FSTP   $dst\t# D-round" %}
10627   ins_encode %{
10628     __ subptr(rsp, 4);
10629     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10630     __ fld_s(Address(rsp, 0));
10631     __ addptr(rsp, 4);
10632     __ fstp_d($dst$$reg);
10633   %}
10634   ins_pipe( pipe_slow );
10635 %}
10636 
10637 instruct convF2D_reg(regD dst, regF src) %{
10638   predicate(UseSSE>=2);
10639   match(Set dst (ConvF2D src));
10640   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10641   ins_encode %{
10642     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10643   %}
10644   ins_pipe( pipe_slow );
10645 %}
10646 
10647 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10648 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10649   predicate(UseSSE<=1);
10650   match(Set dst (ConvD2I src));
10651   effect( KILL tmp, KILL cr );
10652   format %{ "FLD    $src\t# Convert double to int \n\t"
10653             "FLDCW  trunc mode\n\t"
10654             "SUB    ESP,4\n\t"
10655             "FISTp  [ESP + #0]\n\t"
10656             "FLDCW  std/24-bit mode\n\t"
10657             "POP    EAX\n\t"
10658             "CMP    EAX,0x80000000\n\t"
10659             "JNE,s  fast\n\t"
10660             "FLD_D  $src\n\t"
10661             "CALL   d2i_wrapper\n"
10662       "fast:" %}
10663   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10664   ins_pipe( pipe_slow );
10665 %}
10666 
10667 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10668 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10669   predicate(UseSSE>=2);
10670   match(Set dst (ConvD2I src));
10671   effect( KILL tmp, KILL cr );
10672   format %{ "CVTTSD2SI $dst, $src\n\t"
10673             "CMP    $dst,0x80000000\n\t"
10674             "JNE,s  fast\n\t"
10675             "SUB    ESP, 8\n\t"
10676             "MOVSD  [ESP], $src\n\t"
10677             "FLD_D  [ESP]\n\t"
10678             "ADD    ESP, 8\n\t"
10679             "CALL   d2i_wrapper\n"
10680       "fast:" %}
10681   ins_encode %{
10682     Label fast;
10683     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10684     __ cmpl($dst$$Register, 0x80000000);
10685     __ jccb(Assembler::notEqual, fast);
10686     __ subptr(rsp, 8);
10687     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10688     __ fld_d(Address(rsp, 0));
10689     __ addptr(rsp, 8);
10690     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10691     __ bind(fast);
10692   %}
10693   ins_pipe( pipe_slow );
10694 %}
10695 
10696 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10697   predicate(UseSSE<=1);
10698   match(Set dst (ConvD2L src));
10699   effect( KILL cr );
10700   format %{ "FLD    $src\t# Convert double to long\n\t"
10701             "FLDCW  trunc mode\n\t"
10702             "SUB    ESP,8\n\t"
10703             "FISTp  [ESP + #0]\n\t"
10704             "FLDCW  std/24-bit mode\n\t"
10705             "POP    EAX\n\t"
10706             "POP    EDX\n\t"
10707             "CMP    EDX,0x80000000\n\t"
10708             "JNE,s  fast\n\t"
10709             "TEST   EAX,EAX\n\t"
10710             "JNE,s  fast\n\t"
10711             "FLD    $src\n\t"
10712             "CALL   d2l_wrapper\n"
10713       "fast:" %}
10714   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10719 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10720   predicate (UseSSE>=2);
10721   match(Set dst (ConvD2L src));
10722   effect( KILL cr );
10723   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10724             "MOVSD  [ESP],$src\n\t"
10725             "FLD_D  [ESP]\n\t"
10726             "FLDCW  trunc mode\n\t"
10727             "FISTp  [ESP + #0]\n\t"
10728             "FLDCW  std/24-bit mode\n\t"
10729             "POP    EAX\n\t"
10730             "POP    EDX\n\t"
10731             "CMP    EDX,0x80000000\n\t"
10732             "JNE,s  fast\n\t"
10733             "TEST   EAX,EAX\n\t"
10734             "JNE,s  fast\n\t"
10735             "SUB    ESP,8\n\t"
10736             "MOVSD  [ESP],$src\n\t"
10737             "FLD_D  [ESP]\n\t"
10738             "ADD    ESP,8\n\t"
10739             "CALL   d2l_wrapper\n"
10740       "fast:" %}
10741   ins_encode %{
10742     Label fast;
10743     __ subptr(rsp, 8);
10744     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10745     __ fld_d(Address(rsp, 0));
10746     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10747     __ fistp_d(Address(rsp, 0));
10748     // Restore the rounding mode, mask the exception
10749     if (Compile::current()->in_24_bit_fp_mode()) {
10750       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10751     } else {
10752       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10753     }
10754     // Load the converted long, adjust CPU stack
10755     __ pop(rax);
10756     __ pop(rdx);
10757     __ cmpl(rdx, 0x80000000);
10758     __ jccb(Assembler::notEqual, fast);
10759     __ testl(rax, rax);
10760     __ jccb(Assembler::notEqual, fast);
10761     __ subptr(rsp, 8);
10762     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10763     __ fld_d(Address(rsp, 0));
10764     __ addptr(rsp, 8);
10765     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10766     __ bind(fast);
10767   %}
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 // Convert a double to an int.  Java semantics require we do complex
10772 // manglations in the corner cases.  So we set the rounding mode to
10773 // 'zero', store the darned double down as an int, and reset the
10774 // rounding mode to 'nearest'.  The hardware stores a flag value down
10775 // if we would overflow or converted a NAN; we check for this and
10776 // and go the slow path if needed.
10777 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10778   predicate(UseSSE==0);
10779   match(Set dst (ConvF2I src));
10780   effect( KILL tmp, KILL cr );
10781   format %{ "FLD    $src\t# Convert float to int \n\t"
10782             "FLDCW  trunc mode\n\t"
10783             "SUB    ESP,4\n\t"
10784             "FISTp  [ESP + #0]\n\t"
10785             "FLDCW  std/24-bit mode\n\t"
10786             "POP    EAX\n\t"
10787             "CMP    EAX,0x80000000\n\t"
10788             "JNE,s  fast\n\t"
10789             "FLD    $src\n\t"
10790             "CALL   d2i_wrapper\n"
10791       "fast:" %}
10792   // DPR2I_encoding works for FPR2I
10793   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10794   ins_pipe( pipe_slow );
10795 %}
10796 
10797 // Convert a float in xmm to an int reg.
10798 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10799   predicate(UseSSE>=1);
10800   match(Set dst (ConvF2I src));
10801   effect( KILL tmp, KILL cr );
10802   format %{ "CVTTSS2SI $dst, $src\n\t"
10803             "CMP    $dst,0x80000000\n\t"
10804             "JNE,s  fast\n\t"
10805             "SUB    ESP, 4\n\t"
10806             "MOVSS  [ESP], $src\n\t"
10807             "FLD    [ESP]\n\t"
10808             "ADD    ESP, 4\n\t"
10809             "CALL   d2i_wrapper\n"
10810       "fast:" %}
10811   ins_encode %{
10812     Label fast;
10813     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10814     __ cmpl($dst$$Register, 0x80000000);
10815     __ jccb(Assembler::notEqual, fast);
10816     __ subptr(rsp, 4);
10817     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10818     __ fld_s(Address(rsp, 0));
10819     __ addptr(rsp, 4);
10820     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10821     __ bind(fast);
10822   %}
10823   ins_pipe( pipe_slow );
10824 %}
10825 
10826 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10827   predicate(UseSSE==0);
10828   match(Set dst (ConvF2L src));
10829   effect( KILL cr );
10830   format %{ "FLD    $src\t# Convert float to long\n\t"
10831             "FLDCW  trunc mode\n\t"
10832             "SUB    ESP,8\n\t"
10833             "FISTp  [ESP + #0]\n\t"
10834             "FLDCW  std/24-bit mode\n\t"
10835             "POP    EAX\n\t"
10836             "POP    EDX\n\t"
10837             "CMP    EDX,0x80000000\n\t"
10838             "JNE,s  fast\n\t"
10839             "TEST   EAX,EAX\n\t"
10840             "JNE,s  fast\n\t"
10841             "FLD    $src\n\t"
10842             "CALL   d2l_wrapper\n"
10843       "fast:" %}
10844   // DPR2L_encoding works for FPR2L
10845   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10846   ins_pipe( pipe_slow );
10847 %}
10848 
10849 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10850 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10851   predicate (UseSSE>=1);
10852   match(Set dst (ConvF2L src));
10853   effect( KILL cr );
10854   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10855             "MOVSS  [ESP],$src\n\t"
10856             "FLD_S  [ESP]\n\t"
10857             "FLDCW  trunc mode\n\t"
10858             "FISTp  [ESP + #0]\n\t"
10859             "FLDCW  std/24-bit mode\n\t"
10860             "POP    EAX\n\t"
10861             "POP    EDX\n\t"
10862             "CMP    EDX,0x80000000\n\t"
10863             "JNE,s  fast\n\t"
10864             "TEST   EAX,EAX\n\t"
10865             "JNE,s  fast\n\t"
10866             "SUB    ESP,4\t# Convert float to long\n\t"
10867             "MOVSS  [ESP],$src\n\t"
10868             "FLD_S  [ESP]\n\t"
10869             "ADD    ESP,4\n\t"
10870             "CALL   d2l_wrapper\n"
10871       "fast:" %}
10872   ins_encode %{
10873     Label fast;
10874     __ subptr(rsp, 8);
10875     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10876     __ fld_s(Address(rsp, 0));
10877     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10878     __ fistp_d(Address(rsp, 0));
10879     // Restore the rounding mode, mask the exception
10880     if (Compile::current()->in_24_bit_fp_mode()) {
10881       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10882     } else {
10883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10884     }
10885     // Load the converted long, adjust CPU stack
10886     __ pop(rax);
10887     __ pop(rdx);
10888     __ cmpl(rdx, 0x80000000);
10889     __ jccb(Assembler::notEqual, fast);
10890     __ testl(rax, rax);
10891     __ jccb(Assembler::notEqual, fast);
10892     __ subptr(rsp, 4);
10893     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10894     __ fld_s(Address(rsp, 0));
10895     __ addptr(rsp, 4);
10896     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10897     __ bind(fast);
10898   %}
10899   ins_pipe( pipe_slow );
10900 %}
10901 
10902 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10903   predicate( UseSSE<=1 );
10904   match(Set dst (ConvI2D src));
10905   format %{ "FILD   $src\n\t"
10906             "FSTP   $dst" %}
10907   opcode(0xDB, 0x0);  /* DB /0 */
10908   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10909   ins_pipe( fpu_reg_mem );
10910 %}
10911 
10912 instruct convI2D_reg(regD dst, rRegI src) %{
10913   predicate( UseSSE>=2 && !UseXmmI2D );
10914   match(Set dst (ConvI2D src));
10915   format %{ "CVTSI2SD $dst,$src" %}
10916   ins_encode %{
10917     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10918   %}
10919   ins_pipe( pipe_slow );
10920 %}
10921 
10922 instruct convI2D_mem(regD dst, memory mem) %{
10923   predicate( UseSSE>=2 );
10924   match(Set dst (ConvI2D (LoadI mem)));
10925   format %{ "CVTSI2SD $dst,$mem" %}
10926   ins_encode %{
10927     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10928   %}
10929   ins_pipe( pipe_slow );
10930 %}
10931 
10932 instruct convXI2D_reg(regD dst, rRegI src)
10933 %{
10934   predicate( UseSSE>=2 && UseXmmI2D );
10935   match(Set dst (ConvI2D src));
10936 
10937   format %{ "MOVD  $dst,$src\n\t"
10938             "CVTDQ2PD $dst,$dst\t# i2d" %}
10939   ins_encode %{
10940     __ movdl($dst$$XMMRegister, $src$$Register);
10941     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10942   %}
10943   ins_pipe(pipe_slow); // XXX
10944 %}
10945 
10946 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10947   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10948   match(Set dst (ConvI2D (LoadI mem)));
10949   format %{ "FILD   $mem\n\t"
10950             "FSTP   $dst" %}
10951   opcode(0xDB);      /* DB /0 */
10952   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10953               Pop_Reg_DPR(dst));
10954   ins_pipe( fpu_reg_mem );
10955 %}
10956 
10957 // Convert a byte to a float; no rounding step needed.
10958 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10959   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10960   match(Set dst (ConvI2F src));
10961   format %{ "FILD   $src\n\t"
10962             "FSTP   $dst" %}
10963 
10964   opcode(0xDB, 0x0);  /* DB /0 */
10965   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10966   ins_pipe( fpu_reg_mem );
10967 %}
10968 
10969 // In 24-bit mode, force exponent rounding by storing back out
10970 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10971   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10972   match(Set dst (ConvI2F src));
10973   ins_cost(200);
10974   format %{ "FILD   $src\n\t"
10975             "FSTP_S $dst" %}
10976   opcode(0xDB, 0x0);  /* DB /0 */
10977   ins_encode( Push_Mem_I(src),
10978               Pop_Mem_FPR(dst));
10979   ins_pipe( fpu_mem_mem );
10980 %}
10981 
10982 // In 24-bit mode, force exponent rounding by storing back out
10983 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10984   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10985   match(Set dst (ConvI2F (LoadI mem)));
10986   ins_cost(200);
10987   format %{ "FILD   $mem\n\t"
10988             "FSTP_S $dst" %}
10989   opcode(0xDB);  /* DB /0 */
10990   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10991               Pop_Mem_FPR(dst));
10992   ins_pipe( fpu_mem_mem );
10993 %}
10994 
10995 // This instruction does not round to 24-bits
10996 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10997   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10998   match(Set dst (ConvI2F src));
10999   format %{ "FILD   $src\n\t"
11000             "FSTP   $dst" %}
11001   opcode(0xDB, 0x0);  /* DB /0 */
11002   ins_encode( Push_Mem_I(src),
11003               Pop_Reg_FPR(dst));
11004   ins_pipe( fpu_reg_mem );
11005 %}
11006 
11007 // This instruction does not round to 24-bits
11008 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11009   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11010   match(Set dst (ConvI2F (LoadI mem)));
11011   format %{ "FILD   $mem\n\t"
11012             "FSTP   $dst" %}
11013   opcode(0xDB);      /* DB /0 */
11014   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11015               Pop_Reg_FPR(dst));
11016   ins_pipe( fpu_reg_mem );
11017 %}
11018 
11019 // Convert an int to a float in xmm; no rounding step needed.
11020 instruct convI2F_reg(regF dst, rRegI src) %{
11021   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11022   match(Set dst (ConvI2F src));
11023   format %{ "CVTSI2SS $dst, $src" %}
11024   ins_encode %{
11025     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11026   %}
11027   ins_pipe( pipe_slow );
11028 %}
11029 
11030  instruct convXI2F_reg(regF dst, rRegI src)
11031 %{
11032   predicate( UseSSE>=2 && UseXmmI2F );
11033   match(Set dst (ConvI2F src));
11034 
11035   format %{ "MOVD  $dst,$src\n\t"
11036             "CVTDQ2PS $dst,$dst\t# i2f" %}
11037   ins_encode %{
11038     __ movdl($dst$$XMMRegister, $src$$Register);
11039     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11040   %}
11041   ins_pipe(pipe_slow); // XXX
11042 %}
11043 
11044 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11045   match(Set dst (ConvI2L src));
11046   effect(KILL cr);
11047   ins_cost(375);
11048   format %{ "MOV    $dst.lo,$src\n\t"
11049             "MOV    $dst.hi,$src\n\t"
11050             "SAR    $dst.hi,31" %}
11051   ins_encode(convert_int_long(dst,src));
11052   ins_pipe( ialu_reg_reg_long );
11053 %}
11054 
11055 // Zero-extend convert int to long
11056 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11057   match(Set dst (AndL (ConvI2L src) mask) );
11058   effect( KILL flags );
11059   ins_cost(250);
11060   format %{ "MOV    $dst.lo,$src\n\t"
11061             "XOR    $dst.hi,$dst.hi" %}
11062   opcode(0x33); // XOR
11063   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11064   ins_pipe( ialu_reg_reg_long );
11065 %}
11066 
11067 // Zero-extend long
11068 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11069   match(Set dst (AndL src mask) );
11070   effect( KILL flags );
11071   ins_cost(250);
11072   format %{ "MOV    $dst.lo,$src.lo\n\t"
11073             "XOR    $dst.hi,$dst.hi\n\t" %}
11074   opcode(0x33); // XOR
11075   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11076   ins_pipe( ialu_reg_reg_long );
11077 %}
11078 
11079 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11080   predicate (UseSSE<=1);
11081   match(Set dst (ConvL2D src));
11082   effect( KILL cr );
11083   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11084             "PUSH   $src.lo\n\t"
11085             "FILD   ST,[ESP + #0]\n\t"
11086             "ADD    ESP,8\n\t"
11087             "FSTP_D $dst\t# D-round" %}
11088   opcode(0xDF, 0x5);  /* DF /5 */
11089   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11090   ins_pipe( pipe_slow );
11091 %}
11092 
11093 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11094   predicate (UseSSE>=2);
11095   match(Set dst (ConvL2D src));
11096   effect( KILL cr );
11097   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11098             "PUSH   $src.lo\n\t"
11099             "FILD_D [ESP]\n\t"
11100             "FSTP_D [ESP]\n\t"
11101             "MOVSD  $dst,[ESP]\n\t"
11102             "ADD    ESP,8" %}
11103   opcode(0xDF, 0x5);  /* DF /5 */
11104   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11105   ins_pipe( pipe_slow );
11106 %}
11107 
11108 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11109   predicate (UseSSE>=1);
11110   match(Set dst (ConvL2F src));
11111   effect( KILL cr );
11112   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11113             "PUSH   $src.lo\n\t"
11114             "FILD_D [ESP]\n\t"
11115             "FSTP_S [ESP]\n\t"
11116             "MOVSS  $dst,[ESP]\n\t"
11117             "ADD    ESP,8" %}
11118   opcode(0xDF, 0x5);  /* DF /5 */
11119   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11120   ins_pipe( pipe_slow );
11121 %}
11122 
11123 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11124   match(Set dst (ConvL2F src));
11125   effect( KILL cr );
11126   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11127             "PUSH   $src.lo\n\t"
11128             "FILD   ST,[ESP + #0]\n\t"
11129             "ADD    ESP,8\n\t"
11130             "FSTP_S $dst\t# F-round" %}
11131   opcode(0xDF, 0x5);  /* DF /5 */
11132   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11133   ins_pipe( pipe_slow );
11134 %}
11135 
11136 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11137   match(Set dst (ConvL2I src));
11138   effect( DEF dst, USE src );
11139   format %{ "MOV    $dst,$src.lo" %}
11140   ins_encode(enc_CopyL_Lo(dst,src));
11141   ins_pipe( ialu_reg_reg );
11142 %}
11143 
11144 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11145   match(Set dst (MoveF2I src));
11146   effect( DEF dst, USE src );
11147   ins_cost(100);
11148   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11149   ins_encode %{
11150     __ movl($dst$$Register, Address(rsp, $src$$disp));
11151   %}
11152   ins_pipe( ialu_reg_mem );
11153 %}
11154 
11155 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11156   predicate(UseSSE==0);
11157   match(Set dst (MoveF2I src));
11158   effect( DEF dst, USE src );
11159 
11160   ins_cost(125);
11161   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11162   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11163   ins_pipe( fpu_mem_reg );
11164 %}
11165 
11166 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11167   predicate(UseSSE>=1);
11168   match(Set dst (MoveF2I src));
11169   effect( DEF dst, USE src );
11170 
11171   ins_cost(95);
11172   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11173   ins_encode %{
11174     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11175   %}
11176   ins_pipe( pipe_slow );
11177 %}
11178 
11179 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11180   predicate(UseSSE>=2);
11181   match(Set dst (MoveF2I src));
11182   effect( DEF dst, USE src );
11183   ins_cost(85);
11184   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11185   ins_encode %{
11186     __ movdl($dst$$Register, $src$$XMMRegister);
11187   %}
11188   ins_pipe( pipe_slow );
11189 %}
11190 
11191 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11192   match(Set dst (MoveI2F src));
11193   effect( DEF dst, USE src );
11194 
11195   ins_cost(100);
11196   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11197   ins_encode %{
11198     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11199   %}
11200   ins_pipe( ialu_mem_reg );
11201 %}
11202 
11203 
11204 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11205   predicate(UseSSE==0);
11206   match(Set dst (MoveI2F src));
11207   effect(DEF dst, USE src);
11208 
11209   ins_cost(125);
11210   format %{ "FLD_S  $src\n\t"
11211             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11212   opcode(0xD9);               /* D9 /0, FLD m32real */
11213   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11214               Pop_Reg_FPR(dst) );
11215   ins_pipe( fpu_reg_mem );
11216 %}
11217 
11218 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11219   predicate(UseSSE>=1);
11220   match(Set dst (MoveI2F src));
11221   effect( DEF dst, USE src );
11222 
11223   ins_cost(95);
11224   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11225   ins_encode %{
11226     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11227   %}
11228   ins_pipe( pipe_slow );
11229 %}
11230 
11231 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11232   predicate(UseSSE>=2);
11233   match(Set dst (MoveI2F src));
11234   effect( DEF dst, USE src );
11235 
11236   ins_cost(85);
11237   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11238   ins_encode %{
11239     __ movdl($dst$$XMMRegister, $src$$Register);
11240   %}
11241   ins_pipe( pipe_slow );
11242 %}
11243 
11244 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11245   match(Set dst (MoveD2L src));
11246   effect(DEF dst, USE src);
11247 
11248   ins_cost(250);
11249   format %{ "MOV    $dst.lo,$src\n\t"
11250             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11251   opcode(0x8B, 0x8B);
11252   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11253   ins_pipe( ialu_mem_long_reg );
11254 %}
11255 
11256 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11257   predicate(UseSSE<=1);
11258   match(Set dst (MoveD2L src));
11259   effect(DEF dst, USE src);
11260 
11261   ins_cost(125);
11262   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11263   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11264   ins_pipe( fpu_mem_reg );
11265 %}
11266 
11267 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11268   predicate(UseSSE>=2);
11269   match(Set dst (MoveD2L src));
11270   effect(DEF dst, USE src);
11271   ins_cost(95);
11272   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11273   ins_encode %{
11274     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11275   %}
11276   ins_pipe( pipe_slow );
11277 %}
11278 
11279 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11280   predicate(UseSSE>=2);
11281   match(Set dst (MoveD2L src));
11282   effect(DEF dst, USE src, TEMP tmp);
11283   ins_cost(85);
11284   format %{ "MOVD   $dst.lo,$src\n\t"
11285             "PSHUFLW $tmp,$src,0x4E\n\t"
11286             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11287   ins_encode %{
11288     __ movdl($dst$$Register, $src$$XMMRegister);
11289     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11290     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11291   %}
11292   ins_pipe( pipe_slow );
11293 %}
11294 
11295 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11296   match(Set dst (MoveL2D src));
11297   effect(DEF dst, USE src);
11298 
11299   ins_cost(200);
11300   format %{ "MOV    $dst,$src.lo\n\t"
11301             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11302   opcode(0x89, 0x89);
11303   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11304   ins_pipe( ialu_mem_long_reg );
11305 %}
11306 
11307 
11308 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11309   predicate(UseSSE<=1);
11310   match(Set dst (MoveL2D src));
11311   effect(DEF dst, USE src);
11312   ins_cost(125);
11313 
11314   format %{ "FLD_D  $src\n\t"
11315             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11316   opcode(0xDD);               /* DD /0, FLD m64real */
11317   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11318               Pop_Reg_DPR(dst) );
11319   ins_pipe( fpu_reg_mem );
11320 %}
11321 
11322 
11323 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11324   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11325   match(Set dst (MoveL2D src));
11326   effect(DEF dst, USE src);
11327 
11328   ins_cost(95);
11329   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11330   ins_encode %{
11331     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11332   %}
11333   ins_pipe( pipe_slow );
11334 %}
11335 
11336 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11337   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11338   match(Set dst (MoveL2D src));
11339   effect(DEF dst, USE src);
11340 
11341   ins_cost(95);
11342   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11343   ins_encode %{
11344     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11345   %}
11346   ins_pipe( pipe_slow );
11347 %}
11348 
11349 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11350   predicate(UseSSE>=2);
11351   match(Set dst (MoveL2D src));
11352   effect(TEMP dst, USE src, TEMP tmp);
11353   ins_cost(85);
11354   format %{ "MOVD   $dst,$src.lo\n\t"
11355             "MOVD   $tmp,$src.hi\n\t"
11356             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11357   ins_encode %{
11358     __ movdl($dst$$XMMRegister, $src$$Register);
11359     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11360     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11361   %}
11362   ins_pipe( pipe_slow );
11363 %}
11364 
11365 
11366 // =======================================================================
11367 // fast clearing of an array
11368 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11369   predicate(!UseFastStosb);
11370   match(Set dummy (ClearArray cnt base));
11371   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11372   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11373             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11374             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11375   ins_encode %{
11376     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11377   %}
11378   ins_pipe( pipe_slow );
11379 %}
11380 
11381 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11382   predicate(UseFastStosb);
11383   match(Set dummy (ClearArray cnt base));
11384   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11385   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11386             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11387             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11388   ins_encode %{
11389     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11390   %}
11391   ins_pipe( pipe_slow );
11392 %}
11393 
11394 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11395                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11396   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11397   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11398   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11399 
11400   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11401   ins_encode %{
11402     __ string_compare($str1$$Register, $str2$$Register,
11403                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11404                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11405   %}
11406   ins_pipe( pipe_slow );
11407 %}
11408 
11409 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11410                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11411   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11412   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11413   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11414 
11415   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11416   ins_encode %{
11417     __ string_compare($str1$$Register, $str2$$Register,
11418                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11419                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11420   %}
11421   ins_pipe( pipe_slow );
11422 %}
11423 
11424 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11425                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11426   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11427   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11428   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11429 
11430   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11431   ins_encode %{
11432     __ string_compare($str1$$Register, $str2$$Register,
11433                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11434                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11435   %}
11436   ins_pipe( pipe_slow );
11437 %}
11438 
11439 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11440                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11441   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11442   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11443   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11444 
11445   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11446   ins_encode %{
11447     __ string_compare($str2$$Register, $str1$$Register,
11448                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11449                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11450   %}
11451   ins_pipe( pipe_slow );
11452 %}
11453 
11454 // fast string equals
11455 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11456                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11457   match(Set result (StrEquals (Binary str1 str2) cnt));
11458   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11459 
11460   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11461   ins_encode %{
11462     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11463                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11464                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11465   %} 
11466 
11467   ins_pipe( pipe_slow );
11468 %}
11469 
11470 // fast search of substring with known size.
11471 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11472                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11473   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11474   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11475   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11476 
11477   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11478   ins_encode %{
11479     int icnt2 = (int)$int_cnt2$$constant;
11480     if (icnt2 >= 16) {
11481       // IndexOf for constant substrings with size >= 16 elements
11482       // which don't need to be loaded through stack.
11483       __ string_indexofC8($str1$$Register, $str2$$Register,
11484                           $cnt1$$Register, $cnt2$$Register,
11485                           icnt2, $result$$Register,
11486                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11487     } else {
11488       // Small strings are loaded through stack if they cross page boundary.
11489       __ string_indexof($str1$$Register, $str2$$Register,
11490                         $cnt1$$Register, $cnt2$$Register,
11491                         icnt2, $result$$Register,
11492                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11493     }
11494   %}
11495   ins_pipe( pipe_slow );
11496 %}
11497 
11498 // fast search of substring with known size.
11499 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11500                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11501   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11502   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11503   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11504 
11505   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11506   ins_encode %{
11507     int icnt2 = (int)$int_cnt2$$constant;
11508     if (icnt2 >= 8) {
11509       // IndexOf for constant substrings with size >= 8 elements
11510       // which don't need to be loaded through stack.
11511       __ string_indexofC8($str1$$Register, $str2$$Register,
11512                           $cnt1$$Register, $cnt2$$Register,
11513                           icnt2, $result$$Register,
11514                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11515     } else {
11516       // Small strings are loaded through stack if they cross page boundary.
11517       __ string_indexof($str1$$Register, $str2$$Register,
11518                         $cnt1$$Register, $cnt2$$Register,
11519                         icnt2, $result$$Register,
11520                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11521     }
11522   %}
11523   ins_pipe( pipe_slow );
11524 %}
11525 
11526 // fast search of substring with known size.
11527 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11528                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11529   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11530   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11531   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11532 
11533   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11534   ins_encode %{
11535     int icnt2 = (int)$int_cnt2$$constant;
11536     if (icnt2 >= 8) {
11537       // IndexOf for constant substrings with size >= 8 elements
11538       // which don't need to be loaded through stack.
11539       __ string_indexofC8($str1$$Register, $str2$$Register,
11540                           $cnt1$$Register, $cnt2$$Register,
11541                           icnt2, $result$$Register,
11542                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11543     } else {
11544       // Small strings are loaded through stack if they cross page boundary.
11545       __ string_indexof($str1$$Register, $str2$$Register,
11546                         $cnt1$$Register, $cnt2$$Register,
11547                         icnt2, $result$$Register,
11548                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11549     }
11550   %}
11551   ins_pipe( pipe_slow );
11552 %}
11553 
11554 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11555                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11556   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11557   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11558   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11559 
11560   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11561   ins_encode %{
11562     __ string_indexof($str1$$Register, $str2$$Register,
11563                       $cnt1$$Register, $cnt2$$Register,
11564                       (-1), $result$$Register,
11565                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11566   %}
11567   ins_pipe( pipe_slow );
11568 %}
11569 
11570 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11571                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11572   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11573   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11574   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11575 
11576   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11577   ins_encode %{
11578     __ string_indexof($str1$$Register, $str2$$Register,
11579                       $cnt1$$Register, $cnt2$$Register,
11580                       (-1), $result$$Register,
11581                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11582   %}
11583   ins_pipe( pipe_slow );
11584 %}
11585 
11586 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11587                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11588   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11589   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11590   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11591 
11592   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11593   ins_encode %{
11594     __ string_indexof($str1$$Register, $str2$$Register,
11595                       $cnt1$$Register, $cnt2$$Register,
11596                       (-1), $result$$Register,
11597                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11598   %}
11599   ins_pipe( pipe_slow );
11600 %}
11601 
11602 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11603                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11604   predicate(UseSSE42Intrinsics);
11605   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11606   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11607   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11608   ins_encode %{
11609     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11610                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11611   %}
11612   ins_pipe( pipe_slow );
11613 %}
11614 
11615 // fast array equals
11616 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11617                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11618 %{
11619   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11620   match(Set result (AryEq ary1 ary2));
11621   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11622   //ins_cost(300);
11623 
11624   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11625   ins_encode %{
11626     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11627                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11628                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11629   %}
11630   ins_pipe( pipe_slow );
11631 %}
11632 
11633 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11634                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11635 %{
11636   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11637   match(Set result (AryEq ary1 ary2));
11638   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11639   //ins_cost(300);
11640 
11641   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11642   ins_encode %{
11643     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11644                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11645                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11646   %}
11647   ins_pipe( pipe_slow );
11648 %}
11649 
11650 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11651                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11652 %{
11653   match(Set result (HasNegatives ary1 len));
11654   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11655 
11656   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11657   ins_encode %{
11658     __ has_negatives($ary1$$Register, $len$$Register,
11659                      $result$$Register, $tmp3$$Register,
11660                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11661   %}
11662   ins_pipe( pipe_slow );
11663 %}
11664 
11665 // fast char[] to byte[] compression
11666 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11667                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11668   match(Set result (StrCompressedCopy src (Binary dst len)));
11669   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11670 
11671   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11672   ins_encode %{
11673     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11674                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11675                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11676   %}
11677   ins_pipe( pipe_slow );
11678 %}
11679 
11680 // fast byte[] to char[] inflation
11681 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11682                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11683   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11684   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11685 
11686   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11687   ins_encode %{
11688     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11689                           $tmp1$$XMMRegister, $tmp2$$Register);
11690   %}
11691   ins_pipe( pipe_slow );
11692 %}
11693 
11694 // encode char[] to byte[] in ISO_8859_1
11695 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11696                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11697                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11698   match(Set result (EncodeISOArray src (Binary dst len)));
11699   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11700 
11701   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11702   ins_encode %{
11703     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11704                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11705                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11706   %}
11707   ins_pipe( pipe_slow );
11708 %}
11709 
11710 
11711 //----------Control Flow Instructions------------------------------------------
11712 // Signed compare Instructions
11713 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11714   match(Set cr (CmpI op1 op2));
11715   effect( DEF cr, USE op1, USE op2 );
11716   format %{ "CMP    $op1,$op2" %}
11717   opcode(0x3B);  /* Opcode 3B /r */
11718   ins_encode( OpcP, RegReg( op1, op2) );
11719   ins_pipe( ialu_cr_reg_reg );
11720 %}
11721 
11722 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11723   match(Set cr (CmpI op1 op2));
11724   effect( DEF cr, USE op1 );
11725   format %{ "CMP    $op1,$op2" %}
11726   opcode(0x81,0x07);  /* Opcode 81 /7 */
11727   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11728   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11729   ins_pipe( ialu_cr_reg_imm );
11730 %}
11731 
11732 // Cisc-spilled version of cmpI_eReg
11733 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11734   match(Set cr (CmpI op1 (LoadI op2)));
11735 
11736   format %{ "CMP    $op1,$op2" %}
11737   ins_cost(500);
11738   opcode(0x3B);  /* Opcode 3B /r */
11739   ins_encode( OpcP, RegMem( op1, op2) );
11740   ins_pipe( ialu_cr_reg_mem );
11741 %}
11742 
11743 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11744   match(Set cr (CmpI src zero));
11745   effect( DEF cr, USE src );
11746 
11747   format %{ "TEST   $src,$src" %}
11748   opcode(0x85);
11749   ins_encode( OpcP, RegReg( src, src ) );
11750   ins_pipe( ialu_cr_reg_imm );
11751 %}
11752 
11753 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11754   match(Set cr (CmpI (AndI src con) zero));
11755 
11756   format %{ "TEST   $src,$con" %}
11757   opcode(0xF7,0x00);
11758   ins_encode( OpcP, RegOpc(src), Con32(con) );
11759   ins_pipe( ialu_cr_reg_imm );
11760 %}
11761 
11762 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11763   match(Set cr (CmpI (AndI src mem) zero));
11764 
11765   format %{ "TEST   $src,$mem" %}
11766   opcode(0x85);
11767   ins_encode( OpcP, RegMem( src, mem ) );
11768   ins_pipe( ialu_cr_reg_mem );
11769 %}
11770 
11771 // Unsigned compare Instructions; really, same as signed except they
11772 // produce an eFlagsRegU instead of eFlagsReg.
11773 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11774   match(Set cr (CmpU op1 op2));
11775 
11776   format %{ "CMPu   $op1,$op2" %}
11777   opcode(0x3B);  /* Opcode 3B /r */
11778   ins_encode( OpcP, RegReg( op1, op2) );
11779   ins_pipe( ialu_cr_reg_reg );
11780 %}
11781 
11782 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11783   match(Set cr (CmpU op1 op2));
11784 
11785   format %{ "CMPu   $op1,$op2" %}
11786   opcode(0x81,0x07);  /* Opcode 81 /7 */
11787   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11788   ins_pipe( ialu_cr_reg_imm );
11789 %}
11790 
11791 // // Cisc-spilled version of cmpU_eReg
11792 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11793   match(Set cr (CmpU op1 (LoadI op2)));
11794 
11795   format %{ "CMPu   $op1,$op2" %}
11796   ins_cost(500);
11797   opcode(0x3B);  /* Opcode 3B /r */
11798   ins_encode( OpcP, RegMem( op1, op2) );
11799   ins_pipe( ialu_cr_reg_mem );
11800 %}
11801 
11802 // // Cisc-spilled version of cmpU_eReg
11803 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11804 //  match(Set cr (CmpU (LoadI op1) op2));
11805 //
11806 //  format %{ "CMPu   $op1,$op2" %}
11807 //  ins_cost(500);
11808 //  opcode(0x39);  /* Opcode 39 /r */
11809 //  ins_encode( OpcP, RegMem( op1, op2) );
11810 //%}
11811 
11812 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11813   match(Set cr (CmpU src zero));
11814 
11815   format %{ "TESTu  $src,$src" %}
11816   opcode(0x85);
11817   ins_encode( OpcP, RegReg( src, src ) );
11818   ins_pipe( ialu_cr_reg_imm );
11819 %}
11820 
11821 // Unsigned pointer compare Instructions
11822 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11823   match(Set cr (CmpP op1 op2));
11824 
11825   format %{ "CMPu   $op1,$op2" %}
11826   opcode(0x3B);  /* Opcode 3B /r */
11827   ins_encode( OpcP, RegReg( op1, op2) );
11828   ins_pipe( ialu_cr_reg_reg );
11829 %}
11830 
11831 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11832   match(Set cr (CmpP op1 op2));
11833 
11834   format %{ "CMPu   $op1,$op2" %}
11835   opcode(0x81,0x07);  /* Opcode 81 /7 */
11836   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11837   ins_pipe( ialu_cr_reg_imm );
11838 %}
11839 
11840 // // Cisc-spilled version of cmpP_eReg
11841 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11842   match(Set cr (CmpP op1 (LoadP op2)));
11843 
11844   format %{ "CMPu   $op1,$op2" %}
11845   ins_cost(500);
11846   opcode(0x3B);  /* Opcode 3B /r */
11847   ins_encode( OpcP, RegMem( op1, op2) );
11848   ins_pipe( ialu_cr_reg_mem );
11849 %}
11850 
11851 // // Cisc-spilled version of cmpP_eReg
11852 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11853 //  match(Set cr (CmpP (LoadP op1) op2));
11854 //
11855 //  format %{ "CMPu   $op1,$op2" %}
11856 //  ins_cost(500);
11857 //  opcode(0x39);  /* Opcode 39 /r */
11858 //  ins_encode( OpcP, RegMem( op1, op2) );
11859 //%}
11860 
11861 // Compare raw pointer (used in out-of-heap check).
11862 // Only works because non-oop pointers must be raw pointers
11863 // and raw pointers have no anti-dependencies.
11864 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11865   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11866   match(Set cr (CmpP op1 (LoadP op2)));
11867 
11868   format %{ "CMPu   $op1,$op2" %}
11869   opcode(0x3B);  /* Opcode 3B /r */
11870   ins_encode( OpcP, RegMem( op1, op2) );
11871   ins_pipe( ialu_cr_reg_mem );
11872 %}
11873 
11874 //
11875 // This will generate a signed flags result. This should be ok
11876 // since any compare to a zero should be eq/neq.
11877 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11878   match(Set cr (CmpP src zero));
11879 
11880   format %{ "TEST   $src,$src" %}
11881   opcode(0x85);
11882   ins_encode( OpcP, RegReg( src, src ) );
11883   ins_pipe( ialu_cr_reg_imm );
11884 %}
11885 
11886 // Cisc-spilled version of testP_reg
11887 // This will generate a signed flags result. This should be ok
11888 // since any compare to a zero should be eq/neq.
11889 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11890   match(Set cr (CmpP (LoadP op) zero));
11891 
11892   format %{ "TEST   $op,0xFFFFFFFF" %}
11893   ins_cost(500);
11894   opcode(0xF7);               /* Opcode F7 /0 */
11895   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11896   ins_pipe( ialu_cr_reg_imm );
11897 %}
11898 
11899 // Yanked all unsigned pointer compare operations.
11900 // Pointer compares are done with CmpP which is already unsigned.
11901 
11902 //----------Max and Min--------------------------------------------------------
11903 // Min Instructions
11904 ////
11905 //   *** Min and Max using the conditional move are slower than the
11906 //   *** branch version on a Pentium III.
11907 // // Conditional move for min
11908 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11909 //  effect( USE_DEF op2, USE op1, USE cr );
11910 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11911 //  opcode(0x4C,0x0F);
11912 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11913 //  ins_pipe( pipe_cmov_reg );
11914 //%}
11915 //
11916 //// Min Register with Register (P6 version)
11917 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11918 //  predicate(VM_Version::supports_cmov() );
11919 //  match(Set op2 (MinI op1 op2));
11920 //  ins_cost(200);
11921 //  expand %{
11922 //    eFlagsReg cr;
11923 //    compI_eReg(cr,op1,op2);
11924 //    cmovI_reg_lt(op2,op1,cr);
11925 //  %}
11926 //%}
11927 
11928 // Min Register with Register (generic version)
11929 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11930   match(Set dst (MinI dst src));
11931   effect(KILL flags);
11932   ins_cost(300);
11933 
11934   format %{ "MIN    $dst,$src" %}
11935   opcode(0xCC);
11936   ins_encode( min_enc(dst,src) );
11937   ins_pipe( pipe_slow );
11938 %}
11939 
11940 // Max Register with Register
11941 //   *** Min and Max using the conditional move are slower than the
11942 //   *** branch version on a Pentium III.
11943 // // Conditional move for max
11944 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11945 //  effect( USE_DEF op2, USE op1, USE cr );
11946 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11947 //  opcode(0x4F,0x0F);
11948 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11949 //  ins_pipe( pipe_cmov_reg );
11950 //%}
11951 //
11952 // // Max Register with Register (P6 version)
11953 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11954 //  predicate(VM_Version::supports_cmov() );
11955 //  match(Set op2 (MaxI op1 op2));
11956 //  ins_cost(200);
11957 //  expand %{
11958 //    eFlagsReg cr;
11959 //    compI_eReg(cr,op1,op2);
11960 //    cmovI_reg_gt(op2,op1,cr);
11961 //  %}
11962 //%}
11963 
11964 // Max Register with Register (generic version)
11965 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11966   match(Set dst (MaxI dst src));
11967   effect(KILL flags);
11968   ins_cost(300);
11969 
11970   format %{ "MAX    $dst,$src" %}
11971   opcode(0xCC);
11972   ins_encode( max_enc(dst,src) );
11973   ins_pipe( pipe_slow );
11974 %}
11975 
11976 // ============================================================================
11977 // Counted Loop limit node which represents exact final iterator value.
11978 // Note: the resulting value should fit into integer range since
11979 // counted loops have limit check on overflow.
11980 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11981   match(Set limit (LoopLimit (Binary init limit) stride));
11982   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11983   ins_cost(300);
11984 
11985   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11986   ins_encode %{
11987     int strd = (int)$stride$$constant;
11988     assert(strd != 1 && strd != -1, "sanity");
11989     int m1 = (strd > 0) ? 1 : -1;
11990     // Convert limit to long (EAX:EDX)
11991     __ cdql();
11992     // Convert init to long (init:tmp)
11993     __ movl($tmp$$Register, $init$$Register);
11994     __ sarl($tmp$$Register, 31);
11995     // $limit - $init
11996     __ subl($limit$$Register, $init$$Register);
11997     __ sbbl($limit_hi$$Register, $tmp$$Register);
11998     // + ($stride - 1)
11999     if (strd > 0) {
12000       __ addl($limit$$Register, (strd - 1));
12001       __ adcl($limit_hi$$Register, 0);
12002       __ movl($tmp$$Register, strd);
12003     } else {
12004       __ addl($limit$$Register, (strd + 1));
12005       __ adcl($limit_hi$$Register, -1);
12006       __ lneg($limit_hi$$Register, $limit$$Register);
12007       __ movl($tmp$$Register, -strd);
12008     }
12009     // signed devision: (EAX:EDX) / pos_stride
12010     __ idivl($tmp$$Register);
12011     if (strd < 0) {
12012       // restore sign
12013       __ negl($tmp$$Register);
12014     }
12015     // (EAX) * stride
12016     __ mull($tmp$$Register);
12017     // + init (ignore upper bits)
12018     __ addl($limit$$Register, $init$$Register);
12019   %}
12020   ins_pipe( pipe_slow );
12021 %}
12022 
12023 // ============================================================================
12024 // Branch Instructions
12025 // Jump Table
12026 instruct jumpXtnd(rRegI switch_val) %{
12027   match(Jump switch_val);
12028   ins_cost(350);
12029   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12030   ins_encode %{
12031     // Jump to Address(table_base + switch_reg)
12032     Address index(noreg, $switch_val$$Register, Address::times_1);
12033     __ jump(ArrayAddress($constantaddress, index));
12034   %}
12035   ins_pipe(pipe_jmp);
12036 %}
12037 
12038 // Jump Direct - Label defines a relative address from JMP+1
12039 instruct jmpDir(label labl) %{
12040   match(Goto);
12041   effect(USE labl);
12042 
12043   ins_cost(300);
12044   format %{ "JMP    $labl" %}
12045   size(5);
12046   ins_encode %{
12047     Label* L = $labl$$label;
12048     __ jmp(*L, false); // Always long jump
12049   %}
12050   ins_pipe( pipe_jmp );
12051 %}
12052 
12053 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12054 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12055   match(If cop cr);
12056   effect(USE labl);
12057 
12058   ins_cost(300);
12059   format %{ "J$cop    $labl" %}
12060   size(6);
12061   ins_encode %{
12062     Label* L = $labl$$label;
12063     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12064   %}
12065   ins_pipe( pipe_jcc );
12066 %}
12067 
12068 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12069 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12070   match(CountedLoopEnd cop cr);
12071   effect(USE labl);
12072 
12073   ins_cost(300);
12074   format %{ "J$cop    $labl\t# Loop end" %}
12075   size(6);
12076   ins_encode %{
12077     Label* L = $labl$$label;
12078     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12079   %}
12080   ins_pipe( pipe_jcc );
12081 %}
12082 
12083 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12084 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12085   match(CountedLoopEnd cop cmp);
12086   effect(USE labl);
12087 
12088   ins_cost(300);
12089   format %{ "J$cop,u  $labl\t# Loop end" %}
12090   size(6);
12091   ins_encode %{
12092     Label* L = $labl$$label;
12093     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12094   %}
12095   ins_pipe( pipe_jcc );
12096 %}
12097 
12098 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12099   match(CountedLoopEnd cop cmp);
12100   effect(USE labl);
12101 
12102   ins_cost(200);
12103   format %{ "J$cop,u  $labl\t# Loop end" %}
12104   size(6);
12105   ins_encode %{
12106     Label* L = $labl$$label;
12107     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12108   %}
12109   ins_pipe( pipe_jcc );
12110 %}
12111 
12112 // Jump Direct Conditional - using unsigned comparison
12113 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12114   match(If cop cmp);
12115   effect(USE labl);
12116 
12117   ins_cost(300);
12118   format %{ "J$cop,u  $labl" %}
12119   size(6);
12120   ins_encode %{
12121     Label* L = $labl$$label;
12122     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12123   %}
12124   ins_pipe(pipe_jcc);
12125 %}
12126 
12127 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12128   match(If cop cmp);
12129   effect(USE labl);
12130 
12131   ins_cost(200);
12132   format %{ "J$cop,u  $labl" %}
12133   size(6);
12134   ins_encode %{
12135     Label* L = $labl$$label;
12136     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12137   %}
12138   ins_pipe(pipe_jcc);
12139 %}
12140 
12141 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12142   match(If cop cmp);
12143   effect(USE labl);
12144 
12145   ins_cost(200);
12146   format %{ $$template
12147     if ($cop$$cmpcode == Assembler::notEqual) {
12148       $$emit$$"JP,u   $labl\n\t"
12149       $$emit$$"J$cop,u   $labl"
12150     } else {
12151       $$emit$$"JP,u   done\n\t"
12152       $$emit$$"J$cop,u   $labl\n\t"
12153       $$emit$$"done:"
12154     }
12155   %}
12156   ins_encode %{
12157     Label* l = $labl$$label;
12158     if ($cop$$cmpcode == Assembler::notEqual) {
12159       __ jcc(Assembler::parity, *l, false);
12160       __ jcc(Assembler::notEqual, *l, false);
12161     } else if ($cop$$cmpcode == Assembler::equal) {
12162       Label done;
12163       __ jccb(Assembler::parity, done);
12164       __ jcc(Assembler::equal, *l, false);
12165       __ bind(done);
12166     } else {
12167        ShouldNotReachHere();
12168     }
12169   %}
12170   ins_pipe(pipe_jcc);
12171 %}
12172 
12173 // ============================================================================
12174 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12175 // array for an instance of the superklass.  Set a hidden internal cache on a
12176 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12177 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12178 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12179   match(Set result (PartialSubtypeCheck sub super));
12180   effect( KILL rcx, KILL cr );
12181 
12182   ins_cost(1100);  // slightly larger than the next version
12183   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12184             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12185             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12186             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12187             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12188             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12189             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12190      "miss:\t" %}
12191 
12192   opcode(0x1); // Force a XOR of EDI
12193   ins_encode( enc_PartialSubtypeCheck() );
12194   ins_pipe( pipe_slow );
12195 %}
12196 
12197 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12198   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12199   effect( KILL rcx, KILL result );
12200 
12201   ins_cost(1000);
12202   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12203             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12204             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12205             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12206             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12207             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12208      "miss:\t" %}
12209 
12210   opcode(0x0);  // No need to XOR EDI
12211   ins_encode( enc_PartialSubtypeCheck() );
12212   ins_pipe( pipe_slow );
12213 %}
12214 
12215 // ============================================================================
12216 // Branch Instructions -- short offset versions
12217 //
12218 // These instructions are used to replace jumps of a long offset (the default
12219 // match) with jumps of a shorter offset.  These instructions are all tagged
12220 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12221 // match rules in general matching.  Instead, the ADLC generates a conversion
12222 // method in the MachNode which can be used to do in-place replacement of the
12223 // long variant with the shorter variant.  The compiler will determine if a
12224 // branch can be taken by the is_short_branch_offset() predicate in the machine
12225 // specific code section of the file.
12226 
12227 // Jump Direct - Label defines a relative address from JMP+1
12228 instruct jmpDir_short(label labl) %{
12229   match(Goto);
12230   effect(USE labl);
12231 
12232   ins_cost(300);
12233   format %{ "JMP,s  $labl" %}
12234   size(2);
12235   ins_encode %{
12236     Label* L = $labl$$label;
12237     __ jmpb(*L);
12238   %}
12239   ins_pipe( pipe_jmp );
12240   ins_short_branch(1);
12241 %}
12242 
12243 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12244 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12245   match(If cop cr);
12246   effect(USE labl);
12247 
12248   ins_cost(300);
12249   format %{ "J$cop,s  $labl" %}
12250   size(2);
12251   ins_encode %{
12252     Label* L = $labl$$label;
12253     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12254   %}
12255   ins_pipe( pipe_jcc );
12256   ins_short_branch(1);
12257 %}
12258 
12259 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12260 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12261   match(CountedLoopEnd cop cr);
12262   effect(USE labl);
12263 
12264   ins_cost(300);
12265   format %{ "J$cop,s  $labl\t# Loop end" %}
12266   size(2);
12267   ins_encode %{
12268     Label* L = $labl$$label;
12269     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12270   %}
12271   ins_pipe( pipe_jcc );
12272   ins_short_branch(1);
12273 %}
12274 
12275 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12276 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12277   match(CountedLoopEnd cop cmp);
12278   effect(USE labl);
12279 
12280   ins_cost(300);
12281   format %{ "J$cop,us $labl\t# Loop end" %}
12282   size(2);
12283   ins_encode %{
12284     Label* L = $labl$$label;
12285     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12286   %}
12287   ins_pipe( pipe_jcc );
12288   ins_short_branch(1);
12289 %}
12290 
12291 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12292   match(CountedLoopEnd cop cmp);
12293   effect(USE labl);
12294 
12295   ins_cost(300);
12296   format %{ "J$cop,us $labl\t# Loop end" %}
12297   size(2);
12298   ins_encode %{
12299     Label* L = $labl$$label;
12300     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12301   %}
12302   ins_pipe( pipe_jcc );
12303   ins_short_branch(1);
12304 %}
12305 
12306 // Jump Direct Conditional - using unsigned comparison
12307 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12308   match(If cop cmp);
12309   effect(USE labl);
12310 
12311   ins_cost(300);
12312   format %{ "J$cop,us $labl" %}
12313   size(2);
12314   ins_encode %{
12315     Label* L = $labl$$label;
12316     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12317   %}
12318   ins_pipe( pipe_jcc );
12319   ins_short_branch(1);
12320 %}
12321 
12322 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12323   match(If cop cmp);
12324   effect(USE labl);
12325 
12326   ins_cost(300);
12327   format %{ "J$cop,us $labl" %}
12328   size(2);
12329   ins_encode %{
12330     Label* L = $labl$$label;
12331     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12332   %}
12333   ins_pipe( pipe_jcc );
12334   ins_short_branch(1);
12335 %}
12336 
12337 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12338   match(If cop cmp);
12339   effect(USE labl);
12340 
12341   ins_cost(300);
12342   format %{ $$template
12343     if ($cop$$cmpcode == Assembler::notEqual) {
12344       $$emit$$"JP,u,s   $labl\n\t"
12345       $$emit$$"J$cop,u,s   $labl"
12346     } else {
12347       $$emit$$"JP,u,s   done\n\t"
12348       $$emit$$"J$cop,u,s  $labl\n\t"
12349       $$emit$$"done:"
12350     }
12351   %}
12352   size(4);
12353   ins_encode %{
12354     Label* l = $labl$$label;
12355     if ($cop$$cmpcode == Assembler::notEqual) {
12356       __ jccb(Assembler::parity, *l);
12357       __ jccb(Assembler::notEqual, *l);
12358     } else if ($cop$$cmpcode == Assembler::equal) {
12359       Label done;
12360       __ jccb(Assembler::parity, done);
12361       __ jccb(Assembler::equal, *l);
12362       __ bind(done);
12363     } else {
12364        ShouldNotReachHere();
12365     }
12366   %}
12367   ins_pipe(pipe_jcc);
12368   ins_short_branch(1);
12369 %}
12370 
12371 // ============================================================================
12372 // Long Compare
12373 //
12374 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12375 // is tricky.  The flavor of compare used depends on whether we are testing
12376 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12377 // The GE test is the negated LT test.  The LE test can be had by commuting
12378 // the operands (yielding a GE test) and then negating; negate again for the
12379 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12380 // NE test is negated from that.
12381 
12382 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12383 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12384 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12385 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12386 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12387 // foo match ends up with the wrong leaf.  One fix is to not match both
12388 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12389 // both forms beat the trinary form of long-compare and both are very useful
12390 // on Intel which has so few registers.
12391 
12392 // Manifest a CmpL result in an integer register.  Very painful.
12393 // This is the test to avoid.
12394 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12395   match(Set dst (CmpL3 src1 src2));
12396   effect( KILL flags );
12397   ins_cost(1000);
12398   format %{ "XOR    $dst,$dst\n\t"
12399             "CMP    $src1.hi,$src2.hi\n\t"
12400             "JLT,s  m_one\n\t"
12401             "JGT,s  p_one\n\t"
12402             "CMP    $src1.lo,$src2.lo\n\t"
12403             "JB,s   m_one\n\t"
12404             "JEQ,s  done\n"
12405     "p_one:\tINC    $dst\n\t"
12406             "JMP,s  done\n"
12407     "m_one:\tDEC    $dst\n"
12408      "done:" %}
12409   ins_encode %{
12410     Label p_one, m_one, done;
12411     __ xorptr($dst$$Register, $dst$$Register);
12412     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12413     __ jccb(Assembler::less,    m_one);
12414     __ jccb(Assembler::greater, p_one);
12415     __ cmpl($src1$$Register, $src2$$Register);
12416     __ jccb(Assembler::below,   m_one);
12417     __ jccb(Assembler::equal,   done);
12418     __ bind(p_one);
12419     __ incrementl($dst$$Register);
12420     __ jmpb(done);
12421     __ bind(m_one);
12422     __ decrementl($dst$$Register);
12423     __ bind(done);
12424   %}
12425   ins_pipe( pipe_slow );
12426 %}
12427 
12428 //======
12429 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12430 // compares.  Can be used for LE or GT compares by reversing arguments.
12431 // NOT GOOD FOR EQ/NE tests.
12432 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12433   match( Set flags (CmpL src zero ));
12434   ins_cost(100);
12435   format %{ "TEST   $src.hi,$src.hi" %}
12436   opcode(0x85);
12437   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12438   ins_pipe( ialu_cr_reg_reg );
12439 %}
12440 
12441 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12442 // compares.  Can be used for LE or GT compares by reversing arguments.
12443 // NOT GOOD FOR EQ/NE tests.
12444 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12445   match( Set flags (CmpL src1 src2 ));
12446   effect( TEMP tmp );
12447   ins_cost(300);
12448   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12449             "MOV    $tmp,$src1.hi\n\t"
12450             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12451   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12452   ins_pipe( ialu_cr_reg_reg );
12453 %}
12454 
12455 // Long compares reg < zero/req OR reg >= zero/req.
12456 // Just a wrapper for a normal branch, plus the predicate test.
12457 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12458   match(If cmp flags);
12459   effect(USE labl);
12460   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12461   expand %{
12462     jmpCon(cmp,flags,labl);    // JLT or JGE...
12463   %}
12464 %}
12465 
12466 // Compare 2 longs and CMOVE longs.
12467 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12468   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12469   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12470   ins_cost(400);
12471   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12472             "CMOV$cmp $dst.hi,$src.hi" %}
12473   opcode(0x0F,0x40);
12474   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12475   ins_pipe( pipe_cmov_reg_long );
12476 %}
12477 
12478 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12479   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12480   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12481   ins_cost(500);
12482   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12483             "CMOV$cmp $dst.hi,$src.hi" %}
12484   opcode(0x0F,0x40);
12485   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12486   ins_pipe( pipe_cmov_reg_long );
12487 %}
12488 
12489 // Compare 2 longs and CMOVE ints.
12490 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12491   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12492   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12493   ins_cost(200);
12494   format %{ "CMOV$cmp $dst,$src" %}
12495   opcode(0x0F,0x40);
12496   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12497   ins_pipe( pipe_cmov_reg );
12498 %}
12499 
12500 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12501   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12502   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12503   ins_cost(250);
12504   format %{ "CMOV$cmp $dst,$src" %}
12505   opcode(0x0F,0x40);
12506   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12507   ins_pipe( pipe_cmov_mem );
12508 %}
12509 
12510 // Compare 2 longs and CMOVE ints.
12511 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12512   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12513   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12514   ins_cost(200);
12515   format %{ "CMOV$cmp $dst,$src" %}
12516   opcode(0x0F,0x40);
12517   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12518   ins_pipe( pipe_cmov_reg );
12519 %}
12520 
12521 // Compare 2 longs and CMOVE doubles
12522 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12523   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12524   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12525   ins_cost(200);
12526   expand %{
12527     fcmovDPR_regS(cmp,flags,dst,src);
12528   %}
12529 %}
12530 
12531 // Compare 2 longs and CMOVE doubles
12532 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12533   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12534   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12535   ins_cost(200);
12536   expand %{
12537     fcmovD_regS(cmp,flags,dst,src);
12538   %}
12539 %}
12540 
12541 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12542   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12543   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12544   ins_cost(200);
12545   expand %{
12546     fcmovFPR_regS(cmp,flags,dst,src);
12547   %}
12548 %}
12549 
12550 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12551   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12552   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12553   ins_cost(200);
12554   expand %{
12555     fcmovF_regS(cmp,flags,dst,src);
12556   %}
12557 %}
12558 
12559 //======
12560 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12561 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12562   match( Set flags (CmpL src zero ));
12563   effect(TEMP tmp);
12564   ins_cost(200);
12565   format %{ "MOV    $tmp,$src.lo\n\t"
12566             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12567   ins_encode( long_cmp_flags0( src, tmp ) );
12568   ins_pipe( ialu_reg_reg_long );
12569 %}
12570 
12571 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12572 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12573   match( Set flags (CmpL src1 src2 ));
12574   ins_cost(200+300);
12575   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12576             "JNE,s  skip\n\t"
12577             "CMP    $src1.hi,$src2.hi\n\t"
12578      "skip:\t" %}
12579   ins_encode( long_cmp_flags1( src1, src2 ) );
12580   ins_pipe( ialu_cr_reg_reg );
12581 %}
12582 
12583 // Long compare reg == zero/reg OR reg != zero/reg
12584 // Just a wrapper for a normal branch, plus the predicate test.
12585 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12586   match(If cmp flags);
12587   effect(USE labl);
12588   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12589   expand %{
12590     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12591   %}
12592 %}
12593 
12594 // Compare 2 longs and CMOVE longs.
12595 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12596   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12597   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12598   ins_cost(400);
12599   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12600             "CMOV$cmp $dst.hi,$src.hi" %}
12601   opcode(0x0F,0x40);
12602   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12603   ins_pipe( pipe_cmov_reg_long );
12604 %}
12605 
12606 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12607   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12608   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12609   ins_cost(500);
12610   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12611             "CMOV$cmp $dst.hi,$src.hi" %}
12612   opcode(0x0F,0x40);
12613   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12614   ins_pipe( pipe_cmov_reg_long );
12615 %}
12616 
12617 // Compare 2 longs and CMOVE ints.
12618 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12619   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12620   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12621   ins_cost(200);
12622   format %{ "CMOV$cmp $dst,$src" %}
12623   opcode(0x0F,0x40);
12624   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12625   ins_pipe( pipe_cmov_reg );
12626 %}
12627 
12628 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12629   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12630   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12631   ins_cost(250);
12632   format %{ "CMOV$cmp $dst,$src" %}
12633   opcode(0x0F,0x40);
12634   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12635   ins_pipe( pipe_cmov_mem );
12636 %}
12637 
12638 // Compare 2 longs and CMOVE ints.
12639 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12640   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12641   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12642   ins_cost(200);
12643   format %{ "CMOV$cmp $dst,$src" %}
12644   opcode(0x0F,0x40);
12645   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12646   ins_pipe( pipe_cmov_reg );
12647 %}
12648 
12649 // Compare 2 longs and CMOVE doubles
12650 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12651   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12652   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12653   ins_cost(200);
12654   expand %{
12655     fcmovDPR_regS(cmp,flags,dst,src);
12656   %}
12657 %}
12658 
12659 // Compare 2 longs and CMOVE doubles
12660 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12661   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12662   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12663   ins_cost(200);
12664   expand %{
12665     fcmovD_regS(cmp,flags,dst,src);
12666   %}
12667 %}
12668 
12669 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12670   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12671   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12672   ins_cost(200);
12673   expand %{
12674     fcmovFPR_regS(cmp,flags,dst,src);
12675   %}
12676 %}
12677 
12678 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12679   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12680   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12681   ins_cost(200);
12682   expand %{
12683     fcmovF_regS(cmp,flags,dst,src);
12684   %}
12685 %}
12686 
12687 //======
12688 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12689 // Same as cmpL_reg_flags_LEGT except must negate src
12690 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12691   match( Set flags (CmpL src zero ));
12692   effect( TEMP tmp );
12693   ins_cost(300);
12694   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12695             "CMP    $tmp,$src.lo\n\t"
12696             "SBB    $tmp,$src.hi\n\t" %}
12697   ins_encode( long_cmp_flags3(src, tmp) );
12698   ins_pipe( ialu_reg_reg_long );
12699 %}
12700 
12701 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12702 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12703 // requires a commuted test to get the same result.
12704 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12705   match( Set flags (CmpL src1 src2 ));
12706   effect( TEMP tmp );
12707   ins_cost(300);
12708   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12709             "MOV    $tmp,$src2.hi\n\t"
12710             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12711   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12712   ins_pipe( ialu_cr_reg_reg );
12713 %}
12714 
12715 // Long compares reg < zero/req OR reg >= zero/req.
12716 // Just a wrapper for a normal branch, plus the predicate test
12717 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12718   match(If cmp flags);
12719   effect(USE labl);
12720   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12721   ins_cost(300);
12722   expand %{
12723     jmpCon(cmp,flags,labl);    // JGT or JLE...
12724   %}
12725 %}
12726 
12727 // Compare 2 longs and CMOVE longs.
12728 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12729   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12730   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12731   ins_cost(400);
12732   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12733             "CMOV$cmp $dst.hi,$src.hi" %}
12734   opcode(0x0F,0x40);
12735   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12736   ins_pipe( pipe_cmov_reg_long );
12737 %}
12738 
12739 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12740   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12741   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12742   ins_cost(500);
12743   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12744             "CMOV$cmp $dst.hi,$src.hi+4" %}
12745   opcode(0x0F,0x40);
12746   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12747   ins_pipe( pipe_cmov_reg_long );
12748 %}
12749 
12750 // Compare 2 longs and CMOVE ints.
12751 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12752   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12753   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12754   ins_cost(200);
12755   format %{ "CMOV$cmp $dst,$src" %}
12756   opcode(0x0F,0x40);
12757   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12758   ins_pipe( pipe_cmov_reg );
12759 %}
12760 
12761 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12762   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12763   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12764   ins_cost(250);
12765   format %{ "CMOV$cmp $dst,$src" %}
12766   opcode(0x0F,0x40);
12767   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12768   ins_pipe( pipe_cmov_mem );
12769 %}
12770 
12771 // Compare 2 longs and CMOVE ptrs.
12772 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12773   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12774   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12775   ins_cost(200);
12776   format %{ "CMOV$cmp $dst,$src" %}
12777   opcode(0x0F,0x40);
12778   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12779   ins_pipe( pipe_cmov_reg );
12780 %}
12781 
12782 // Compare 2 longs and CMOVE doubles
12783 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12784   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12785   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12786   ins_cost(200);
12787   expand %{
12788     fcmovDPR_regS(cmp,flags,dst,src);
12789   %}
12790 %}
12791 
12792 // Compare 2 longs and CMOVE doubles
12793 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12794   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12795   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12796   ins_cost(200);
12797   expand %{
12798     fcmovD_regS(cmp,flags,dst,src);
12799   %}
12800 %}
12801 
12802 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12803   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12804   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12805   ins_cost(200);
12806   expand %{
12807     fcmovFPR_regS(cmp,flags,dst,src);
12808   %}
12809 %}
12810 
12811 
12812 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12813   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12814   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12815   ins_cost(200);
12816   expand %{
12817     fcmovF_regS(cmp,flags,dst,src);
12818   %}
12819 %}
12820 
12821 
12822 // ============================================================================
12823 // Procedure Call/Return Instructions
12824 // Call Java Static Instruction
12825 // Note: If this code changes, the corresponding ret_addr_offset() and
12826 //       compute_padding() functions will have to be adjusted.
12827 instruct CallStaticJavaDirect(method meth) %{
12828   match(CallStaticJava);
12829   effect(USE meth);
12830 
12831   ins_cost(300);
12832   format %{ "CALL,static " %}
12833   opcode(0xE8); /* E8 cd */
12834   ins_encode( pre_call_resets,
12835               Java_Static_Call( meth ),
12836               call_epilog,
12837               post_call_FPU );
12838   ins_pipe( pipe_slow );
12839   ins_alignment(4);
12840 %}
12841 
12842 // Call Java Dynamic Instruction
12843 // Note: If this code changes, the corresponding ret_addr_offset() and
12844 //       compute_padding() functions will have to be adjusted.
12845 instruct CallDynamicJavaDirect(method meth) %{
12846   match(CallDynamicJava);
12847   effect(USE meth);
12848 
12849   ins_cost(300);
12850   format %{ "MOV    EAX,(oop)-1\n\t"
12851             "CALL,dynamic" %}
12852   opcode(0xE8); /* E8 cd */
12853   ins_encode( pre_call_resets,
12854               Java_Dynamic_Call( meth ),
12855               call_epilog,
12856               post_call_FPU );
12857   ins_pipe( pipe_slow );
12858   ins_alignment(4);
12859 %}
12860 
12861 // Call Runtime Instruction
12862 instruct CallRuntimeDirect(method meth) %{
12863   match(CallRuntime );
12864   effect(USE meth);
12865 
12866   ins_cost(300);
12867   format %{ "CALL,runtime " %}
12868   opcode(0xE8); /* E8 cd */
12869   // Use FFREEs to clear entries in float stack
12870   ins_encode( pre_call_resets,
12871               FFree_Float_Stack_All,
12872               Java_To_Runtime( meth ),
12873               post_call_FPU );
12874   ins_pipe( pipe_slow );
12875 %}
12876 
12877 // Call runtime without safepoint
12878 instruct CallLeafDirect(method meth) %{
12879   match(CallLeaf);
12880   effect(USE meth);
12881 
12882   ins_cost(300);
12883   format %{ "CALL_LEAF,runtime " %}
12884   opcode(0xE8); /* E8 cd */
12885   ins_encode( pre_call_resets,
12886               FFree_Float_Stack_All,
12887               Java_To_Runtime( meth ),
12888               Verify_FPU_For_Leaf, post_call_FPU );
12889   ins_pipe( pipe_slow );
12890 %}
12891 
12892 instruct CallLeafNoFPDirect(method meth) %{
12893   match(CallLeafNoFP);
12894   effect(USE meth);
12895 
12896   ins_cost(300);
12897   format %{ "CALL_LEAF_NOFP,runtime " %}
12898   opcode(0xE8); /* E8 cd */
12899   ins_encode(Java_To_Runtime(meth));
12900   ins_pipe( pipe_slow );
12901 %}
12902 
12903 
12904 // Return Instruction
12905 // Remove the return address & jump to it.
12906 instruct Ret() %{
12907   match(Return);
12908   format %{ "RET" %}
12909   opcode(0xC3);
12910   ins_encode(OpcP);
12911   ins_pipe( pipe_jmp );
12912 %}
12913 
12914 // Tail Call; Jump from runtime stub to Java code.
12915 // Also known as an 'interprocedural jump'.
12916 // Target of jump will eventually return to caller.
12917 // TailJump below removes the return address.
12918 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12919   match(TailCall jump_target method_oop );
12920   ins_cost(300);
12921   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12922   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12923   ins_encode( OpcP, RegOpc(jump_target) );
12924   ins_pipe( pipe_jmp );
12925 %}
12926 
12927 
12928 // Tail Jump; remove the return address; jump to target.
12929 // TailCall above leaves the return address around.
12930 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12931   match( TailJump jump_target ex_oop );
12932   ins_cost(300);
12933   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12934             "JMP    $jump_target " %}
12935   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12936   ins_encode( enc_pop_rdx,
12937               OpcP, RegOpc(jump_target) );
12938   ins_pipe( pipe_jmp );
12939 %}
12940 
12941 // Create exception oop: created by stack-crawling runtime code.
12942 // Created exception is now available to this handler, and is setup
12943 // just prior to jumping to this handler.  No code emitted.
12944 instruct CreateException( eAXRegP ex_oop )
12945 %{
12946   match(Set ex_oop (CreateEx));
12947 
12948   size(0);
12949   // use the following format syntax
12950   format %{ "# exception oop is in EAX; no code emitted" %}
12951   ins_encode();
12952   ins_pipe( empty );
12953 %}
12954 
12955 
12956 // Rethrow exception:
12957 // The exception oop will come in the first argument position.
12958 // Then JUMP (not call) to the rethrow stub code.
12959 instruct RethrowException()
12960 %{
12961   match(Rethrow);
12962 
12963   // use the following format syntax
12964   format %{ "JMP    rethrow_stub" %}
12965   ins_encode(enc_rethrow);
12966   ins_pipe( pipe_jmp );
12967 %}
12968 
12969 // inlined locking and unlocking
12970 
12971 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12972   predicate(Compile::current()->use_rtm());
12973   match(Set cr (FastLock object box));
12974   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12975   ins_cost(300);
12976   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12977   ins_encode %{
12978     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12979                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12980                  _counters, _rtm_counters, _stack_rtm_counters,
12981                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12982                  true, ra_->C->profile_rtm());
12983   %}
12984   ins_pipe(pipe_slow);
12985 %}
12986 
12987 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12988   predicate(!Compile::current()->use_rtm());
12989   match(Set cr (FastLock object box));
12990   effect(TEMP tmp, TEMP scr, USE_KILL box);
12991   ins_cost(300);
12992   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12993   ins_encode %{
12994     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12995                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12996   %}
12997   ins_pipe(pipe_slow);
12998 %}
12999 
13000 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13001   match(Set cr (FastUnlock object box));
13002   effect(TEMP tmp, USE_KILL box);
13003   ins_cost(300);
13004   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13005   ins_encode %{
13006     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13007   %}
13008   ins_pipe(pipe_slow);
13009 %}
13010 
13011 
13012 
13013 // ============================================================================
13014 // Safepoint Instruction
13015 instruct safePoint_poll(eFlagsReg cr) %{
13016   match(SafePoint);
13017   effect(KILL cr);
13018 
13019   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13020   // On SPARC that might be acceptable as we can generate the address with
13021   // just a sethi, saving an or.  By polling at offset 0 we can end up
13022   // putting additional pressure on the index-0 in the D$.  Because of
13023   // alignment (just like the situation at hand) the lower indices tend
13024   // to see more traffic.  It'd be better to change the polling address
13025   // to offset 0 of the last $line in the polling page.
13026 
13027   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13028   ins_cost(125);
13029   size(6) ;
13030   ins_encode( Safepoint_Poll() );
13031   ins_pipe( ialu_reg_mem );
13032 %}
13033 
13034 
13035 // ============================================================================
13036 // This name is KNOWN by the ADLC and cannot be changed.
13037 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13038 // for this guy.
13039 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13040   match(Set dst (ThreadLocal));
13041   effect(DEF dst, KILL cr);
13042 
13043   format %{ "MOV    $dst, Thread::current()" %}
13044   ins_encode %{
13045     Register dstReg = as_Register($dst$$reg);
13046     __ get_thread(dstReg);
13047   %}
13048   ins_pipe( ialu_reg_fat );
13049 %}
13050 
13051 
13052 
13053 //----------PEEPHOLE RULES-----------------------------------------------------
13054 // These must follow all instruction definitions as they use the names
13055 // defined in the instructions definitions.
13056 //
13057 // peepmatch ( root_instr_name [preceding_instruction]* );
13058 //
13059 // peepconstraint %{
13060 // (instruction_number.operand_name relational_op instruction_number.operand_name
13061 //  [, ...] );
13062 // // instruction numbers are zero-based using left to right order in peepmatch
13063 //
13064 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13065 // // provide an instruction_number.operand_name for each operand that appears
13066 // // in the replacement instruction's match rule
13067 //
13068 // ---------VM FLAGS---------------------------------------------------------
13069 //
13070 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13071 //
13072 // Each peephole rule is given an identifying number starting with zero and
13073 // increasing by one in the order seen by the parser.  An individual peephole
13074 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13075 // on the command-line.
13076 //
13077 // ---------CURRENT LIMITATIONS----------------------------------------------
13078 //
13079 // Only match adjacent instructions in same basic block
13080 // Only equality constraints
13081 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13082 // Only one replacement instruction
13083 //
13084 // ---------EXAMPLE----------------------------------------------------------
13085 //
13086 // // pertinent parts of existing instructions in architecture description
13087 // instruct movI(rRegI dst, rRegI src) %{
13088 //   match(Set dst (CopyI src));
13089 // %}
13090 //
13091 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13092 //   match(Set dst (AddI dst src));
13093 //   effect(KILL cr);
13094 // %}
13095 //
13096 // // Change (inc mov) to lea
13097 // peephole %{
13098 //   // increment preceeded by register-register move
13099 //   peepmatch ( incI_eReg movI );
13100 //   // require that the destination register of the increment
13101 //   // match the destination register of the move
13102 //   peepconstraint ( 0.dst == 1.dst );
13103 //   // construct a replacement instruction that sets
13104 //   // the destination to ( move's source register + one )
13105 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13106 // %}
13107 //
13108 // Implementation no longer uses movX instructions since
13109 // machine-independent system no longer uses CopyX nodes.
13110 //
13111 // peephole %{
13112 //   peepmatch ( incI_eReg movI );
13113 //   peepconstraint ( 0.dst == 1.dst );
13114 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13115 // %}
13116 //
13117 // peephole %{
13118 //   peepmatch ( decI_eReg movI );
13119 //   peepconstraint ( 0.dst == 1.dst );
13120 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13121 // %}
13122 //
13123 // peephole %{
13124 //   peepmatch ( addI_eReg_imm movI );
13125 //   peepconstraint ( 0.dst == 1.dst );
13126 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13127 // %}
13128 //
13129 // peephole %{
13130 //   peepmatch ( addP_eReg_imm movP );
13131 //   peepconstraint ( 0.dst == 1.dst );
13132 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13133 // %}
13134 
13135 // // Change load of spilled value to only a spill
13136 // instruct storeI(memory mem, rRegI src) %{
13137 //   match(Set mem (StoreI mem src));
13138 // %}
13139 //
13140 // instruct loadI(rRegI dst, memory mem) %{
13141 //   match(Set dst (LoadI mem));
13142 // %}
13143 //
13144 peephole %{
13145   peepmatch ( loadI storeI );
13146   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13147   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13148 %}
13149 
13150 //----------SMARTSPILL RULES---------------------------------------------------
13151 // These must follow all instruction definitions as they use the names
13152 // defined in the instructions definitions.