Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     if (reg_lo+1 == reg_hi) { // double move?
 799       if (is_load) {
 800         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     } else {
 805       if (is_load) {
 806         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 807       } else {
 808         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 809       }
 810     }
 811 #ifndef PRODUCT
 812   } else if (!do_size) {
 813     if (size != 0) st->print("\n\t");
 814     if (reg_lo+1 == reg_hi) { // double move?
 815       if (is_load) st->print("%s %s,[ESP + #%d]",
 816                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 817                               Matcher::regName[reg_lo], offset);
 818       else         st->print("MOVSD  [ESP + #%d],%s",
 819                               offset, Matcher::regName[reg_lo]);
 820     } else {
 821       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 822                               Matcher::regName[reg_lo], offset);
 823       else         st->print("MOVSS  [ESP + #%d],%s",
 824                               offset, Matcher::regName[reg_lo]);
 825     }
 826 #endif
 827   }
 828   bool is_single_byte = false;
 829   if ((UseAVX > 2) && (offset != 0)) {
 830     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 831   }
 832   int offset_size = 0;
 833   if (UseAVX > 2 ) {
 834     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 835   } else {
 836     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 837   }
 838   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 839   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 840   return size+5+offset_size;
 841 }
 842 
 843 
 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 845                             int src_hi, int dst_hi, int size, outputStream* st ) {
 846   if (cbuf) {
 847     MacroAssembler _masm(cbuf);
 848     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 849       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 850                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 851     } else {
 852       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 853                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 854     }
 855 #ifndef PRODUCT
 856   } else if (!do_size) {
 857     if (size != 0) st->print("\n\t");
 858     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 859       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 860         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       } else {
 862         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       }
 864     } else {
 865       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 866         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 867       } else {
 868         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 869       }
 870     }
 871 #endif
 872   }
 873   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 874   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 875   int sz = (UseAVX > 2) ? 6 : 4;
 876   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 877       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 878   return size + sz;
 879 }
 880 
 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 882                             int src_hi, int dst_hi, int size, outputStream* st ) {
 883   // 32-bit
 884   if (cbuf) {
 885     MacroAssembler _masm(cbuf);
 886     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 887              as_Register(Matcher::_regEncode[src_lo]));
 888 #ifndef PRODUCT
 889   } else if (!do_size) {
 890     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 891 #endif
 892   }
 893   return (UseAVX> 2) ? 6 : 4;
 894 }
 895 
 896 
 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 898                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 899   // 32-bit
 900   if (cbuf) {
 901     MacroAssembler _masm(cbuf);
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }   
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1016       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Threshold size for cleararray.
1424 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1425 
1426 // Needs 2 CMOV's for longs.
1427 const int Matcher::long_cmove_cost() { return 1; }
1428 
1429 // No CMOVF/CMOVD with SSE/SSE2
1430 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1431 
1432 // Does the CPU require late expand (see block.cpp for description of late expand)?
1433 const bool Matcher::require_postalloc_expand = false;
1434 
1435 // Should the Matcher clone shifts on addressing modes, expecting them to
1436 // be subsumed into complex addressing expressions or compute them into
1437 // registers?  True for Intel but false for most RISCs
1438 const bool Matcher::clone_shift_expressions = true;
1439 
1440 // Do we need to mask the count passed to shift instructions or does
1441 // the cpu only look at the lower 5/6 bits anyway?
1442 const bool Matcher::need_masked_shift_count = false;
1443 
1444 bool Matcher::narrow_oop_use_complex_address() {
1445   ShouldNotCallThis();
1446   return true;
1447 }
1448 
1449 bool Matcher::narrow_klass_use_complex_address() {
1450   ShouldNotCallThis();
1451   return true;
1452 }
1453 
1454 
1455 // Is it better to copy float constants, or load them directly from memory?
1456 // Intel can load a float constant from a direct address, requiring no
1457 // extra registers.  Most RISCs will have to materialize an address into a
1458 // register first, so they would do better to copy the constant from stack.
1459 const bool Matcher::rematerialize_float_constants = true;
1460 
1461 // If CPU can load and store mis-aligned doubles directly then no fixup is
1462 // needed.  Else we split the double into 2 integer pieces and move it
1463 // piece-by-piece.  Only happens when passing doubles into C code as the
1464 // Java calling convention forces doubles to be aligned.
1465 const bool Matcher::misaligned_doubles_ok = true;
1466 
1467 
1468 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1469   // Get the memory operand from the node
1470   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1471   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1472   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1473   uint opcnt     = 1;                 // First operand
1474   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1475   while( idx >= skipped+num_edges ) {
1476     skipped += num_edges;
1477     opcnt++;                          // Bump operand count
1478     assert( opcnt < numopnds, "Accessing non-existent operand" );
1479     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1480   }
1481 
1482   MachOper *memory = node->_opnds[opcnt];
1483   MachOper *new_memory = NULL;
1484   switch (memory->opcode()) {
1485   case DIRECT:
1486   case INDOFFSET32X:
1487     // No transformation necessary.
1488     return;
1489   case INDIRECT:
1490     new_memory = new indirect_win95_safeOper( );
1491     break;
1492   case INDOFFSET8:
1493     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1494     break;
1495   case INDOFFSET32:
1496     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1497     break;
1498   case INDINDEXOFFSET:
1499     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1500     break;
1501   case INDINDEXSCALE:
1502     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1503     break;
1504   case INDINDEXSCALEOFFSET:
1505     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1506     break;
1507   case LOAD_LONG_INDIRECT:
1508   case LOAD_LONG_INDOFFSET32:
1509     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1510     return;
1511   default:
1512     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1513     return;
1514   }
1515   node->_opnds[opcnt] = new_memory;
1516 }
1517 
1518 // Advertise here if the CPU requires explicit rounding operations
1519 // to implement the UseStrictFP mode.
1520 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1521 
1522 // Are floats conerted to double when stored to stack during deoptimization?
1523 // On x32 it is stored with convertion only when FPU is used for floats.
1524 bool Matcher::float_in_double() { return (UseSSE == 0); }
1525 
1526 // Do ints take an entire long register or just half?
1527 const bool Matcher::int_in_long = false;
1528 
1529 // Return whether or not this register is ever used as an argument.  This
1530 // function is used on startup to build the trampoline stubs in generateOptoStub.
1531 // Registers not mentioned will be killed by the VM call in the trampoline, and
1532 // arguments in those registers not be available to the callee.
1533 bool Matcher::can_be_java_arg( int reg ) {
1534   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1535   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1536   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1537   return false;
1538 }
1539 
1540 bool Matcher::is_spillable_arg( int reg ) {
1541   return can_be_java_arg(reg);
1542 }
1543 
1544 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1545   // Use hardware integer DIV instruction when
1546   // it is faster than a code which use multiply.
1547   // Only when constant divisor fits into 32 bit
1548   // (min_jint is excluded to get only correct
1549   // positive 32 bit values from negative).
1550   return VM_Version::has_fast_idiv() &&
1551          (divisor == (int)divisor && divisor != min_jint);
1552 }
1553 
1554 // Register for DIVI projection of divmodI
1555 RegMask Matcher::divI_proj_mask() {
1556   return EAX_REG_mask();
1557 }
1558 
1559 // Register for MODI projection of divmodI
1560 RegMask Matcher::modI_proj_mask() {
1561   return EDX_REG_mask();
1562 }
1563 
1564 // Register for DIVL projection of divmodL
1565 RegMask Matcher::divL_proj_mask() {
1566   ShouldNotReachHere();
1567   return RegMask();
1568 }
1569 
1570 // Register for MODL projection of divmodL
1571 RegMask Matcher::modL_proj_mask() {
1572   ShouldNotReachHere();
1573   return RegMask();
1574 }
1575 
1576 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1577   return NO_REG_mask();
1578 }
1579 
1580 // Returns true if the high 32 bits of the value is known to be zero.
1581 bool is_operand_hi32_zero(Node* n) {
1582   int opc = n->Opcode();
1583   if (opc == Op_AndL) {
1584     Node* o2 = n->in(2);
1585     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1586       return true;
1587     }
1588   }
1589   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1590     return true;
1591   }
1592   return false;
1593 }
1594 
1595 %}
1596 
1597 //----------ENCODING BLOCK-----------------------------------------------------
1598 // This block specifies the encoding classes used by the compiler to output
1599 // byte streams.  Encoding classes generate functions which are called by
1600 // Machine Instruction Nodes in order to generate the bit encoding of the
1601 // instruction.  Operands specify their base encoding interface with the
1602 // interface keyword.  There are currently supported four interfaces,
1603 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1604 // operand to generate a function which returns its register number when
1605 // queried.   CONST_INTER causes an operand to generate a function which
1606 // returns the value of the constant when queried.  MEMORY_INTER causes an
1607 // operand to generate four functions which return the Base Register, the
1608 // Index Register, the Scale Value, and the Offset Value of the operand when
1609 // queried.  COND_INTER causes an operand to generate six functions which
1610 // return the encoding code (ie - encoding bits for the instruction)
1611 // associated with each basic boolean condition for a conditional instruction.
1612 // Instructions specify two basic values for encoding.  They use the
1613 // ins_encode keyword to specify their encoding class (which must be one of
1614 // the class names specified in the encoding block), and they use the
1615 // opcode keyword to specify, in order, their primary, secondary, and
1616 // tertiary opcode.  Only the opcode sections which a particular instruction
1617 // needs for encoding need to be specified.
1618 encode %{
1619   // Build emit functions for each basic byte or larger field in the intel
1620   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1621   // code in the enc_class source block.  Emit functions will live in the
1622   // main source block for now.  In future, we can generalize this by
1623   // adding a syntax that specifies the sizes of fields in an order,
1624   // so that the adlc can build the emit functions automagically
1625 
1626   // Emit primary opcode
1627   enc_class OpcP %{
1628     emit_opcode(cbuf, $primary);
1629   %}
1630 
1631   // Emit secondary opcode
1632   enc_class OpcS %{
1633     emit_opcode(cbuf, $secondary);
1634   %}
1635 
1636   // Emit opcode directly
1637   enc_class Opcode(immI d8) %{
1638     emit_opcode(cbuf, $d8$$constant);
1639   %}
1640 
1641   enc_class SizePrefix %{
1642     emit_opcode(cbuf,0x66);
1643   %}
1644 
1645   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1646     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1647   %}
1648 
1649   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1650     emit_opcode(cbuf,$opcode$$constant);
1651     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1652   %}
1653 
1654   enc_class mov_r32_imm0( rRegI dst ) %{
1655     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1656     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1657   %}
1658 
1659   enc_class cdq_enc %{
1660     // Full implementation of Java idiv and irem; checks for
1661     // special case as described in JVM spec., p.243 & p.271.
1662     //
1663     //         normal case                           special case
1664     //
1665     // input : rax,: dividend                         min_int
1666     //         reg: divisor                          -1
1667     //
1668     // output: rax,: quotient  (= rax, idiv reg)       min_int
1669     //         rdx: remainder (= rax, irem reg)       0
1670     //
1671     //  Code sequnce:
1672     //
1673     //  81 F8 00 00 00 80    cmp         rax,80000000h
1674     //  0F 85 0B 00 00 00    jne         normal_case
1675     //  33 D2                xor         rdx,edx
1676     //  83 F9 FF             cmp         rcx,0FFh
1677     //  0F 84 03 00 00 00    je          done
1678     //                  normal_case:
1679     //  99                   cdq
1680     //  F7 F9                idiv        rax,ecx
1681     //                  done:
1682     //
1683     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1684     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1686     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1687     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1688     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1689     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1690     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1691     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1692     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1693     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1694     // normal_case:
1695     emit_opcode(cbuf,0x99);                                         // cdq
1696     // idiv (note: must be emitted by the user of this rule)
1697     // normal:
1698   %}
1699 
1700   // Dense encoding for older common ops
1701   enc_class Opc_plus(immI opcode, rRegI reg) %{
1702     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1703   %}
1704 
1705 
1706   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1707   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1708     // Check for 8-bit immediate, and set sign extend bit in opcode
1709     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1710       emit_opcode(cbuf, $primary | 0x02);
1711     }
1712     else {                          // If 32-bit immediate
1713       emit_opcode(cbuf, $primary);
1714     }
1715   %}
1716 
1717   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1718     // Emit primary opcode and set sign-extend bit
1719     // Check for 8-bit immediate, and set sign extend bit in opcode
1720     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1721       emit_opcode(cbuf, $primary | 0x02);    }
1722     else {                          // If 32-bit immediate
1723       emit_opcode(cbuf, $primary);
1724     }
1725     // Emit r/m byte with secondary opcode, after primary opcode.
1726     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1727   %}
1728 
1729   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1730     // Check for 8-bit immediate, and set sign extend bit in opcode
1731     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1732       $$$emit8$imm$$constant;
1733     }
1734     else {                          // If 32-bit immediate
1735       // Output immediate
1736       $$$emit32$imm$$constant;
1737     }
1738   %}
1739 
1740   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1741     // Emit primary opcode and set sign-extend bit
1742     // Check for 8-bit immediate, and set sign extend bit in opcode
1743     int con = (int)$imm$$constant; // Throw away top bits
1744     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1745     // Emit r/m byte with secondary opcode, after primary opcode.
1746     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1747     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1748     else                               emit_d32(cbuf,con);
1749   %}
1750 
1751   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1752     // Emit primary opcode and set sign-extend bit
1753     // Check for 8-bit immediate, and set sign extend bit in opcode
1754     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1755     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1756     // Emit r/m byte with tertiary opcode, after primary opcode.
1757     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1758     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1759     else                               emit_d32(cbuf,con);
1760   %}
1761 
1762   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1763     emit_cc(cbuf, $secondary, $dst$$reg );
1764   %}
1765 
1766   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1767     int destlo = $dst$$reg;
1768     int desthi = HIGH_FROM_LOW(destlo);
1769     // bswap lo
1770     emit_opcode(cbuf, 0x0F);
1771     emit_cc(cbuf, 0xC8, destlo);
1772     // bswap hi
1773     emit_opcode(cbuf, 0x0F);
1774     emit_cc(cbuf, 0xC8, desthi);
1775     // xchg lo and hi
1776     emit_opcode(cbuf, 0x87);
1777     emit_rm(cbuf, 0x3, destlo, desthi);
1778   %}
1779 
1780   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1781     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1782   %}
1783 
1784   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1785     $$$emit8$primary;
1786     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1787   %}
1788 
1789   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1790     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1791     emit_d8(cbuf, op >> 8 );
1792     emit_d8(cbuf, op & 255);
1793   %}
1794 
1795   // emulate a CMOV with a conditional branch around a MOV
1796   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1797     // Invert sense of branch from sense of CMOV
1798     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1799     emit_d8( cbuf, $brOffs$$constant );
1800   %}
1801 
1802   enc_class enc_PartialSubtypeCheck( ) %{
1803     Register Redi = as_Register(EDI_enc); // result register
1804     Register Reax = as_Register(EAX_enc); // super class
1805     Register Recx = as_Register(ECX_enc); // killed
1806     Register Resi = as_Register(ESI_enc); // sub class
1807     Label miss;
1808 
1809     MacroAssembler _masm(&cbuf);
1810     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1811                                      NULL, &miss,
1812                                      /*set_cond_codes:*/ true);
1813     if ($primary) {
1814       __ xorptr(Redi, Redi);
1815     }
1816     __ bind(miss);
1817   %}
1818 
1819   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1820     MacroAssembler masm(&cbuf);
1821     int start = masm.offset();
1822     if (UseSSE >= 2) {
1823       if (VerifyFPU) {
1824         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1825       }
1826     } else {
1827       // External c_calling_convention expects the FPU stack to be 'clean'.
1828       // Compiled code leaves it dirty.  Do cleanup now.
1829       masm.empty_FPU_stack();
1830     }
1831     if (sizeof_FFree_Float_Stack_All == -1) {
1832       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1833     } else {
1834       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1835     }
1836   %}
1837 
1838   enc_class Verify_FPU_For_Leaf %{
1839     if( VerifyFPU ) {
1840       MacroAssembler masm(&cbuf);
1841       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1842     }
1843   %}
1844 
1845   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1846     // This is the instruction starting address for relocation info.
1847     cbuf.set_insts_mark();
1848     $$$emit8$primary;
1849     // CALL directly to the runtime
1850     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1851                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1852 
1853     if (UseSSE >= 2) {
1854       MacroAssembler _masm(&cbuf);
1855       BasicType rt = tf()->return_type();
1856 
1857       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1858         // A C runtime call where the return value is unused.  In SSE2+
1859         // mode the result needs to be removed from the FPU stack.  It's
1860         // likely that this function call could be removed by the
1861         // optimizer if the C function is a pure function.
1862         __ ffree(0);
1863       } else if (rt == T_FLOAT) {
1864         __ lea(rsp, Address(rsp, -4));
1865         __ fstp_s(Address(rsp, 0));
1866         __ movflt(xmm0, Address(rsp, 0));
1867         __ lea(rsp, Address(rsp,  4));
1868       } else if (rt == T_DOUBLE) {
1869         __ lea(rsp, Address(rsp, -8));
1870         __ fstp_d(Address(rsp, 0));
1871         __ movdbl(xmm0, Address(rsp, 0));
1872         __ lea(rsp, Address(rsp,  8));
1873       }
1874     }
1875   %}
1876 
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     if (ra_->C->max_vector_size() > 16) {
1886       // Clear upper bits of YMM registers when current compiled code uses
1887       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1888       MacroAssembler _masm(&cbuf);
1889       __ vzeroupper();
1890     }
1891     debug_only(int off1 = cbuf.insts_size());
1892     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1893   %}
1894 
1895   enc_class post_call_FPU %{
1896     // If method sets FPU control word do it here also
1897     if (Compile::current()->in_24_bit_fp_mode()) {
1898       MacroAssembler masm(&cbuf);
1899       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1900     }
1901   %}
1902 
1903   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1904     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1905     // who we intended to call.
1906     cbuf.set_insts_mark();
1907     $$$emit8$primary;
1908 
1909     if (!_method) {
1910       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1911                      runtime_call_Relocation::spec(),
1912                      RELOC_IMM32);
1913     } else {
1914       int method_index = resolved_method_index(cbuf);
1915       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1916                                                   : static_call_Relocation::spec(method_index);
1917       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1918                      rspec, RELOC_DISP32);
1919       // Emit stubs for static call.
1920       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1921       if (stub == NULL) {
1922         ciEnv::current()->record_failure("CodeCache is full");
1923         return;
1924       }
1925     }
1926   %}
1927 
1928   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1929     MacroAssembler _masm(&cbuf);
1930     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1931   %}
1932 
1933   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1934     int disp = in_bytes(Method::from_compiled_offset());
1935     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1936 
1937     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1938     cbuf.set_insts_mark();
1939     $$$emit8$primary;
1940     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1941     emit_d8(cbuf, disp);             // Displacement
1942 
1943   %}
1944 
1945 //   Following encoding is no longer used, but may be restored if calling
1946 //   convention changes significantly.
1947 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1948 //
1949 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1950 //     // int ic_reg     = Matcher::inline_cache_reg();
1951 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1952 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1953 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1954 //
1955 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1956 //     // // so we load it immediately before the call
1957 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1958 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1959 //
1960 //     // xor rbp,ebp
1961 //     emit_opcode(cbuf, 0x33);
1962 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1963 //
1964 //     // CALL to interpreter.
1965 //     cbuf.set_insts_mark();
1966 //     $$$emit8$primary;
1967 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1968 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1969 //   %}
1970 
1971   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1972     $$$emit8$primary;
1973     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1974     $$$emit8$shift$$constant;
1975   %}
1976 
1977   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1978     // Load immediate does not have a zero or sign extended version
1979     // for 8-bit immediates
1980     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1981     $$$emit32$src$$constant;
1982   %}
1983 
1984   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1985     // Load immediate does not have a zero or sign extended version
1986     // for 8-bit immediates
1987     emit_opcode(cbuf, $primary + $dst$$reg);
1988     $$$emit32$src$$constant;
1989   %}
1990 
1991   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1992     // Load immediate does not have a zero or sign extended version
1993     // for 8-bit immediates
1994     int dst_enc = $dst$$reg;
1995     int src_con = $src$$constant & 0x0FFFFFFFFL;
1996     if (src_con == 0) {
1997       // xor dst, dst
1998       emit_opcode(cbuf, 0x33);
1999       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2000     } else {
2001       emit_opcode(cbuf, $primary + dst_enc);
2002       emit_d32(cbuf, src_con);
2003     }
2004   %}
2005 
2006   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2007     // Load immediate does not have a zero or sign extended version
2008     // for 8-bit immediates
2009     int dst_enc = $dst$$reg + 2;
2010     int src_con = ((julong)($src$$constant)) >> 32;
2011     if (src_con == 0) {
2012       // xor dst, dst
2013       emit_opcode(cbuf, 0x33);
2014       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2015     } else {
2016       emit_opcode(cbuf, $primary + dst_enc);
2017       emit_d32(cbuf, src_con);
2018     }
2019   %}
2020 
2021 
2022   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2023   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2024     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2025   %}
2026 
2027   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2028     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2029   %}
2030 
2031   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2032     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2033   %}
2034 
2035   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2036     $$$emit8$primary;
2037     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2038   %}
2039 
2040   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2041     $$$emit8$secondary;
2042     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2043   %}
2044 
2045   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2046     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2047   %}
2048 
2049   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2050     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2051   %}
2052 
2053   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2054     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2055   %}
2056 
2057   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2058     // Output immediate
2059     $$$emit32$src$$constant;
2060   %}
2061 
2062   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2063     // Output Float immediate bits
2064     jfloat jf = $src$$constant;
2065     int    jf_as_bits = jint_cast( jf );
2066     emit_d32(cbuf, jf_as_bits);
2067   %}
2068 
2069   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2070     // Output Float immediate bits
2071     jfloat jf = $src$$constant;
2072     int    jf_as_bits = jint_cast( jf );
2073     emit_d32(cbuf, jf_as_bits);
2074   %}
2075 
2076   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2077     // Output immediate
2078     $$$emit16$src$$constant;
2079   %}
2080 
2081   enc_class Con_d32(immI src) %{
2082     emit_d32(cbuf,$src$$constant);
2083   %}
2084 
2085   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2086     // Output immediate memory reference
2087     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2088     emit_d32(cbuf, 0x00);
2089   %}
2090 
2091   enc_class lock_prefix( ) %{
2092     if( os::is_MP() )
2093       emit_opcode(cbuf,0xF0);         // [Lock]
2094   %}
2095 
2096   // Cmp-xchg long value.
2097   // Note: we need to swap rbx, and rcx before and after the
2098   //       cmpxchg8 instruction because the instruction uses
2099   //       rcx as the high order word of the new value to store but
2100   //       our register encoding uses rbx,.
2101   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2102 
2103     // XCHG  rbx,ecx
2104     emit_opcode(cbuf,0x87);
2105     emit_opcode(cbuf,0xD9);
2106     // [Lock]
2107     if( os::is_MP() )
2108       emit_opcode(cbuf,0xF0);
2109     // CMPXCHG8 [Eptr]
2110     emit_opcode(cbuf,0x0F);
2111     emit_opcode(cbuf,0xC7);
2112     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2113     // XCHG  rbx,ecx
2114     emit_opcode(cbuf,0x87);
2115     emit_opcode(cbuf,0xD9);
2116   %}
2117 
2118   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2119     // [Lock]
2120     if( os::is_MP() )
2121       emit_opcode(cbuf,0xF0);
2122 
2123     // CMPXCHG [Eptr]
2124     emit_opcode(cbuf,0x0F);
2125     emit_opcode(cbuf,0xB1);
2126     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2127   %}
2128 
2129   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2130     int res_encoding = $res$$reg;
2131 
2132     // MOV  res,0
2133     emit_opcode( cbuf, 0xB8 + res_encoding);
2134     emit_d32( cbuf, 0 );
2135     // JNE,s  fail
2136     emit_opcode(cbuf,0x75);
2137     emit_d8(cbuf, 5 );
2138     // MOV  res,1
2139     emit_opcode( cbuf, 0xB8 + res_encoding);
2140     emit_d32( cbuf, 1 );
2141     // fail:
2142   %}
2143 
2144   enc_class set_instruction_start( ) %{
2145     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2146   %}
2147 
2148   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2149     int reg_encoding = $ereg$$reg;
2150     int base  = $mem$$base;
2151     int index = $mem$$index;
2152     int scale = $mem$$scale;
2153     int displace = $mem$$disp;
2154     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2155     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2156   %}
2157 
2158   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2159     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2160     int base  = $mem$$base;
2161     int index = $mem$$index;
2162     int scale = $mem$$scale;
2163     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2164     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2165     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2166   %}
2167 
2168   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2169     int r1, r2;
2170     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2171     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2172     emit_opcode(cbuf,0x0F);
2173     emit_opcode(cbuf,$tertiary);
2174     emit_rm(cbuf, 0x3, r1, r2);
2175     emit_d8(cbuf,$cnt$$constant);
2176     emit_d8(cbuf,$primary);
2177     emit_rm(cbuf, 0x3, $secondary, r1);
2178     emit_d8(cbuf,$cnt$$constant);
2179   %}
2180 
2181   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2182     emit_opcode( cbuf, 0x8B ); // Move
2183     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2184     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2185       emit_d8(cbuf,$primary);
2186       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2187       emit_d8(cbuf,$cnt$$constant-32);
2188     }
2189     emit_d8(cbuf,$primary);
2190     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2191     emit_d8(cbuf,31);
2192   %}
2193 
2194   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2195     int r1, r2;
2196     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2197     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2198 
2199     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2200     emit_rm(cbuf, 0x3, r1, r2);
2201     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2202       emit_opcode(cbuf,$primary);
2203       emit_rm(cbuf, 0x3, $secondary, r1);
2204       emit_d8(cbuf,$cnt$$constant-32);
2205     }
2206     emit_opcode(cbuf,0x33);  // XOR r2,r2
2207     emit_rm(cbuf, 0x3, r2, r2);
2208   %}
2209 
2210   // Clone of RegMem but accepts an extra parameter to access each
2211   // half of a double in memory; it never needs relocation info.
2212   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2213     emit_opcode(cbuf,$opcode$$constant);
2214     int reg_encoding = $rm_reg$$reg;
2215     int base     = $mem$$base;
2216     int index    = $mem$$index;
2217     int scale    = $mem$$scale;
2218     int displace = $mem$$disp + $disp_for_half$$constant;
2219     relocInfo::relocType disp_reloc = relocInfo::none;
2220     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2221   %}
2222 
2223   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2224   //
2225   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2226   // and it never needs relocation information.
2227   // Frequently used to move data between FPU's Stack Top and memory.
2228   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2229     int rm_byte_opcode = $rm_opcode$$constant;
2230     int base     = $mem$$base;
2231     int index    = $mem$$index;
2232     int scale    = $mem$$scale;
2233     int displace = $mem$$disp;
2234     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2235     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2236   %}
2237 
2238   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2239     int rm_byte_opcode = $rm_opcode$$constant;
2240     int base     = $mem$$base;
2241     int index    = $mem$$index;
2242     int scale    = $mem$$scale;
2243     int displace = $mem$$disp;
2244     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2245     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2246   %}
2247 
2248   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2249     int reg_encoding = $dst$$reg;
2250     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2251     int index        = 0x04;            // 0x04 indicates no index
2252     int scale        = 0x00;            // 0x00 indicates no scale
2253     int displace     = $src1$$constant; // 0x00 indicates no displacement
2254     relocInfo::relocType disp_reloc = relocInfo::none;
2255     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2256   %}
2257 
2258   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2259     // Compare dst,src
2260     emit_opcode(cbuf,0x3B);
2261     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2262     // jmp dst < src around move
2263     emit_opcode(cbuf,0x7C);
2264     emit_d8(cbuf,2);
2265     // move dst,src
2266     emit_opcode(cbuf,0x8B);
2267     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2268   %}
2269 
2270   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2271     // Compare dst,src
2272     emit_opcode(cbuf,0x3B);
2273     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2274     // jmp dst > src around move
2275     emit_opcode(cbuf,0x7F);
2276     emit_d8(cbuf,2);
2277     // move dst,src
2278     emit_opcode(cbuf,0x8B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280   %}
2281 
2282   enc_class enc_FPR_store(memory mem, regDPR src) %{
2283     // If src is FPR1, we can just FST to store it.
2284     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2285     int reg_encoding = 0x2; // Just store
2286     int base  = $mem$$base;
2287     int index = $mem$$index;
2288     int scale = $mem$$scale;
2289     int displace = $mem$$disp;
2290     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2291     if( $src$$reg != FPR1L_enc ) {
2292       reg_encoding = 0x3;  // Store & pop
2293       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2294       emit_d8( cbuf, 0xC0-1+$src$$reg );
2295     }
2296     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2297     emit_opcode(cbuf,$primary);
2298     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2299   %}
2300 
2301   enc_class neg_reg(rRegI dst) %{
2302     // NEG $dst
2303     emit_opcode(cbuf,0xF7);
2304     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2305   %}
2306 
2307   enc_class setLT_reg(eCXRegI dst) %{
2308     // SETLT $dst
2309     emit_opcode(cbuf,0x0F);
2310     emit_opcode(cbuf,0x9C);
2311     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2312   %}
2313 
2314   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2315     int tmpReg = $tmp$$reg;
2316 
2317     // SUB $p,$q
2318     emit_opcode(cbuf,0x2B);
2319     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2320     // SBB $tmp,$tmp
2321     emit_opcode(cbuf,0x1B);
2322     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2323     // AND $tmp,$y
2324     emit_opcode(cbuf,0x23);
2325     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2326     // ADD $p,$tmp
2327     emit_opcode(cbuf,0x03);
2328     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2329   %}
2330 
2331   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2332     // TEST shift,32
2333     emit_opcode(cbuf,0xF7);
2334     emit_rm(cbuf, 0x3, 0, ECX_enc);
2335     emit_d32(cbuf,0x20);
2336     // JEQ,s small
2337     emit_opcode(cbuf, 0x74);
2338     emit_d8(cbuf, 0x04);
2339     // MOV    $dst.hi,$dst.lo
2340     emit_opcode( cbuf, 0x8B );
2341     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2342     // CLR    $dst.lo
2343     emit_opcode(cbuf, 0x33);
2344     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2345 // small:
2346     // SHLD   $dst.hi,$dst.lo,$shift
2347     emit_opcode(cbuf,0x0F);
2348     emit_opcode(cbuf,0xA5);
2349     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2350     // SHL    $dst.lo,$shift"
2351     emit_opcode(cbuf,0xD3);
2352     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2353   %}
2354 
2355   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2356     // TEST shift,32
2357     emit_opcode(cbuf,0xF7);
2358     emit_rm(cbuf, 0x3, 0, ECX_enc);
2359     emit_d32(cbuf,0x20);
2360     // JEQ,s small
2361     emit_opcode(cbuf, 0x74);
2362     emit_d8(cbuf, 0x04);
2363     // MOV    $dst.lo,$dst.hi
2364     emit_opcode( cbuf, 0x8B );
2365     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2366     // CLR    $dst.hi
2367     emit_opcode(cbuf, 0x33);
2368     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2369 // small:
2370     // SHRD   $dst.lo,$dst.hi,$shift
2371     emit_opcode(cbuf,0x0F);
2372     emit_opcode(cbuf,0xAD);
2373     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2374     // SHR    $dst.hi,$shift"
2375     emit_opcode(cbuf,0xD3);
2376     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2377   %}
2378 
2379   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2380     // TEST shift,32
2381     emit_opcode(cbuf,0xF7);
2382     emit_rm(cbuf, 0x3, 0, ECX_enc);
2383     emit_d32(cbuf,0x20);
2384     // JEQ,s small
2385     emit_opcode(cbuf, 0x74);
2386     emit_d8(cbuf, 0x05);
2387     // MOV    $dst.lo,$dst.hi
2388     emit_opcode( cbuf, 0x8B );
2389     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2390     // SAR    $dst.hi,31
2391     emit_opcode(cbuf, 0xC1);
2392     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2393     emit_d8(cbuf, 0x1F );
2394 // small:
2395     // SHRD   $dst.lo,$dst.hi,$shift
2396     emit_opcode(cbuf,0x0F);
2397     emit_opcode(cbuf,0xAD);
2398     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2399     // SAR    $dst.hi,$shift"
2400     emit_opcode(cbuf,0xD3);
2401     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2402   %}
2403 
2404 
2405   // ----------------- Encodings for floating point unit -----------------
2406   // May leave result in FPU-TOS or FPU reg depending on opcodes
2407   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2408     $$$emit8$primary;
2409     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2410   %}
2411 
2412   // Pop argument in FPR0 with FSTP ST(0)
2413   enc_class PopFPU() %{
2414     emit_opcode( cbuf, 0xDD );
2415     emit_d8( cbuf, 0xD8 );
2416   %}
2417 
2418   // !!!!! equivalent to Pop_Reg_F
2419   enc_class Pop_Reg_DPR( regDPR dst ) %{
2420     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2421     emit_d8( cbuf, 0xD8+$dst$$reg );
2422   %}
2423 
2424   enc_class Push_Reg_DPR( regDPR dst ) %{
2425     emit_opcode( cbuf, 0xD9 );
2426     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2427   %}
2428 
2429   enc_class strictfp_bias1( regDPR dst ) %{
2430     emit_opcode( cbuf, 0xDB );           // FLD m80real
2431     emit_opcode( cbuf, 0x2D );
2432     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2433     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2434     emit_opcode( cbuf, 0xC8+$dst$$reg );
2435   %}
2436 
2437   enc_class strictfp_bias2( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDB );           // FLD m80real
2439     emit_opcode( cbuf, 0x2D );
2440     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2441     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2442     emit_opcode( cbuf, 0xC8+$dst$$reg );
2443   %}
2444 
2445   // Special case for moving an integer register to a stack slot.
2446   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2447     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2448   %}
2449 
2450   // Special case for moving a register to a stack slot.
2451   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2452     // Opcode already emitted
2453     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2454     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2455     emit_d32(cbuf, $dst$$disp);   // Displacement
2456   %}
2457 
2458   // Push the integer in stackSlot 'src' onto FP-stack
2459   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2460     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2461   %}
2462 
2463   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2464   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2465     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2466   %}
2467 
2468   // Same as Pop_Mem_F except for opcode
2469   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2470   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2471     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2472   %}
2473 
2474   enc_class Pop_Reg_FPR( regFPR dst ) %{
2475     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2476     emit_d8( cbuf, 0xD8+$dst$$reg );
2477   %}
2478 
2479   enc_class Push_Reg_FPR( regFPR dst ) %{
2480     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2481     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2482   %}
2483 
2484   // Push FPU's float to a stack-slot, and pop FPU-stack
2485   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2486     int pop = 0x02;
2487     if ($src$$reg != FPR1L_enc) {
2488       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2489       emit_d8( cbuf, 0xC0-1+$src$$reg );
2490       pop = 0x03;
2491     }
2492     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2493   %}
2494 
2495   // Push FPU's double to a stack-slot, and pop FPU-stack
2496   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2497     int pop = 0x02;
2498     if ($src$$reg != FPR1L_enc) {
2499       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2500       emit_d8( cbuf, 0xC0-1+$src$$reg );
2501       pop = 0x03;
2502     }
2503     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2504   %}
2505 
2506   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2507   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2508     int pop = 0xD0 - 1; // -1 since we skip FLD
2509     if ($src$$reg != FPR1L_enc) {
2510       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2511       emit_d8( cbuf, 0xC0-1+$src$$reg );
2512       pop = 0xD8;
2513     }
2514     emit_opcode( cbuf, 0xDD );
2515     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2516   %}
2517 
2518 
2519   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2520     // load dst in FPR0
2521     emit_opcode( cbuf, 0xD9 );
2522     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2523     if ($src$$reg != FPR1L_enc) {
2524       // fincstp
2525       emit_opcode (cbuf, 0xD9);
2526       emit_opcode (cbuf, 0xF7);
2527       // swap src with FPR1:
2528       // FXCH FPR1 with src
2529       emit_opcode(cbuf, 0xD9);
2530       emit_d8(cbuf, 0xC8-1+$src$$reg );
2531       // fdecstp
2532       emit_opcode (cbuf, 0xD9);
2533       emit_opcode (cbuf, 0xF6);
2534     }
2535   %}
2536 
2537   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2538     MacroAssembler _masm(&cbuf);
2539     __ subptr(rsp, 8);
2540     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2541     __ fld_d(Address(rsp, 0));
2542     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2543     __ fld_d(Address(rsp, 0));
2544   %}
2545 
2546   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2547     MacroAssembler _masm(&cbuf);
2548     __ subptr(rsp, 4);
2549     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2550     __ fld_s(Address(rsp, 0));
2551     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2552     __ fld_s(Address(rsp, 0));
2553   %}
2554 
2555   enc_class Push_ResultD(regD dst) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ fstp_d(Address(rsp, 0));
2558     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2559     __ addptr(rsp, 8);
2560   %}
2561 
2562   enc_class Push_ResultF(regF dst, immI d8) %{
2563     MacroAssembler _masm(&cbuf);
2564     __ fstp_s(Address(rsp, 0));
2565     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2566     __ addptr(rsp, $d8$$constant);
2567   %}
2568 
2569   enc_class Push_SrcD(regD src) %{
2570     MacroAssembler _masm(&cbuf);
2571     __ subptr(rsp, 8);
2572     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2573     __ fld_d(Address(rsp, 0));
2574   %}
2575 
2576   enc_class push_stack_temp_qword() %{
2577     MacroAssembler _masm(&cbuf);
2578     __ subptr(rsp, 8);
2579   %}
2580 
2581   enc_class pop_stack_temp_qword() %{
2582     MacroAssembler _masm(&cbuf);
2583     __ addptr(rsp, 8);
2584   %}
2585 
2586   enc_class push_xmm_to_fpr1(regD src) %{
2587     MacroAssembler _masm(&cbuf);
2588     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2589     __ fld_d(Address(rsp, 0));
2590   %}
2591 
2592   enc_class Push_Result_Mod_DPR( regDPR src) %{
2593     if ($src$$reg != FPR1L_enc) {
2594       // fincstp
2595       emit_opcode (cbuf, 0xD9);
2596       emit_opcode (cbuf, 0xF7);
2597       // FXCH FPR1 with src
2598       emit_opcode(cbuf, 0xD9);
2599       emit_d8(cbuf, 0xC8-1+$src$$reg );
2600       // fdecstp
2601       emit_opcode (cbuf, 0xD9);
2602       emit_opcode (cbuf, 0xF6);
2603     }
2604     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2605     // // FSTP   FPR$dst$$reg
2606     // emit_opcode( cbuf, 0xDD );
2607     // emit_d8( cbuf, 0xD8+$dst$$reg );
2608   %}
2609 
2610   enc_class fnstsw_sahf_skip_parity() %{
2611     // fnstsw ax
2612     emit_opcode( cbuf, 0xDF );
2613     emit_opcode( cbuf, 0xE0 );
2614     // sahf
2615     emit_opcode( cbuf, 0x9E );
2616     // jnp  ::skip
2617     emit_opcode( cbuf, 0x7B );
2618     emit_opcode( cbuf, 0x05 );
2619   %}
2620 
2621   enc_class emitModDPR() %{
2622     // fprem must be iterative
2623     // :: loop
2624     // fprem
2625     emit_opcode( cbuf, 0xD9 );
2626     emit_opcode( cbuf, 0xF8 );
2627     // wait
2628     emit_opcode( cbuf, 0x9b );
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jp  ::loop
2635     emit_opcode( cbuf, 0x0F );
2636     emit_opcode( cbuf, 0x8A );
2637     emit_opcode( cbuf, 0xF4 );
2638     emit_opcode( cbuf, 0xFF );
2639     emit_opcode( cbuf, 0xFF );
2640     emit_opcode( cbuf, 0xFF );
2641   %}
2642 
2643   enc_class fpu_flags() %{
2644     // fnstsw_ax
2645     emit_opcode( cbuf, 0xDF);
2646     emit_opcode( cbuf, 0xE0);
2647     // test ax,0x0400
2648     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2649     emit_opcode( cbuf, 0xA9 );
2650     emit_d16   ( cbuf, 0x0400 );
2651     // // // This sequence works, but stalls for 12-16 cycles on PPro
2652     // // test rax,0x0400
2653     // emit_opcode( cbuf, 0xA9 );
2654     // emit_d32   ( cbuf, 0x00000400 );
2655     //
2656     // jz exit (no unordered comparison)
2657     emit_opcode( cbuf, 0x74 );
2658     emit_d8    ( cbuf, 0x02 );
2659     // mov ah,1 - treat as LT case (set carry flag)
2660     emit_opcode( cbuf, 0xB4 );
2661     emit_d8    ( cbuf, 0x01 );
2662     // sahf
2663     emit_opcode( cbuf, 0x9E);
2664   %}
2665 
2666   enc_class cmpF_P6_fixup() %{
2667     // Fixup the integer flags in case comparison involved a NaN
2668     //
2669     // JNP exit (no unordered comparison, P-flag is set by NaN)
2670     emit_opcode( cbuf, 0x7B );
2671     emit_d8    ( cbuf, 0x03 );
2672     // MOV AH,1 - treat as LT case (set carry flag)
2673     emit_opcode( cbuf, 0xB4 );
2674     emit_d8    ( cbuf, 0x01 );
2675     // SAHF
2676     emit_opcode( cbuf, 0x9E);
2677     // NOP     // target for branch to avoid branch to branch
2678     emit_opcode( cbuf, 0x90);
2679   %}
2680 
2681 //     fnstsw_ax();
2682 //     sahf();
2683 //     movl(dst, nan_result);
2684 //     jcc(Assembler::parity, exit);
2685 //     movl(dst, less_result);
2686 //     jcc(Assembler::below, exit);
2687 //     movl(dst, equal_result);
2688 //     jcc(Assembler::equal, exit);
2689 //     movl(dst, greater_result);
2690 
2691 // less_result     =  1;
2692 // greater_result  = -1;
2693 // equal_result    = 0;
2694 // nan_result      = -1;
2695 
2696   enc_class CmpF_Result(rRegI dst) %{
2697     // fnstsw_ax();
2698     emit_opcode( cbuf, 0xDF);
2699     emit_opcode( cbuf, 0xE0);
2700     // sahf
2701     emit_opcode( cbuf, 0x9E);
2702     // movl(dst, nan_result);
2703     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2704     emit_d32( cbuf, -1 );
2705     // jcc(Assembler::parity, exit);
2706     emit_opcode( cbuf, 0x7A );
2707     emit_d8    ( cbuf, 0x13 );
2708     // movl(dst, less_result);
2709     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2710     emit_d32( cbuf, -1 );
2711     // jcc(Assembler::below, exit);
2712     emit_opcode( cbuf, 0x72 );
2713     emit_d8    ( cbuf, 0x0C );
2714     // movl(dst, equal_result);
2715     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2716     emit_d32( cbuf, 0 );
2717     // jcc(Assembler::equal, exit);
2718     emit_opcode( cbuf, 0x74 );
2719     emit_d8    ( cbuf, 0x05 );
2720     // movl(dst, greater_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, 1 );
2723   %}
2724 
2725 
2726   // Compare the longs and set flags
2727   // BROKEN!  Do Not use as-is
2728   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2729     // CMP    $src1.hi,$src2.hi
2730     emit_opcode( cbuf, 0x3B );
2731     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2732     // JNE,s  done
2733     emit_opcode(cbuf,0x75);
2734     emit_d8(cbuf, 2 );
2735     // CMP    $src1.lo,$src2.lo
2736     emit_opcode( cbuf, 0x3B );
2737     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2738 // done:
2739   %}
2740 
2741   enc_class convert_int_long( regL dst, rRegI src ) %{
2742     // mov $dst.lo,$src
2743     int dst_encoding = $dst$$reg;
2744     int src_encoding = $src$$reg;
2745     encode_Copy( cbuf, dst_encoding  , src_encoding );
2746     // mov $dst.hi,$src
2747     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2748     // sar $dst.hi,31
2749     emit_opcode( cbuf, 0xC1 );
2750     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2751     emit_d8(cbuf, 0x1F );
2752   %}
2753 
2754   enc_class convert_long_double( eRegL src ) %{
2755     // push $src.hi
2756     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2757     // push $src.lo
2758     emit_opcode(cbuf, 0x50+$src$$reg  );
2759     // fild 64-bits at [SP]
2760     emit_opcode(cbuf,0xdf);
2761     emit_d8(cbuf, 0x6C);
2762     emit_d8(cbuf, 0x24);
2763     emit_d8(cbuf, 0x00);
2764     // pop stack
2765     emit_opcode(cbuf, 0x83); // add  SP, #8
2766     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2767     emit_d8(cbuf, 0x8);
2768   %}
2769 
2770   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2771     // IMUL   EDX:EAX,$src1
2772     emit_opcode( cbuf, 0xF7 );
2773     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2774     // SAR    EDX,$cnt-32
2775     int shift_count = ((int)$cnt$$constant) - 32;
2776     if (shift_count > 0) {
2777       emit_opcode(cbuf, 0xC1);
2778       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2779       emit_d8(cbuf, shift_count);
2780     }
2781   %}
2782 
2783   // this version doesn't have add sp, 8
2784   enc_class convert_long_double2( eRegL src ) %{
2785     // push $src.hi
2786     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2787     // push $src.lo
2788     emit_opcode(cbuf, 0x50+$src$$reg  );
2789     // fild 64-bits at [SP]
2790     emit_opcode(cbuf,0xdf);
2791     emit_d8(cbuf, 0x6C);
2792     emit_d8(cbuf, 0x24);
2793     emit_d8(cbuf, 0x00);
2794   %}
2795 
2796   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2797     // Basic idea: long = (long)int * (long)int
2798     // IMUL EDX:EAX, src
2799     emit_opcode( cbuf, 0xF7 );
2800     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2801   %}
2802 
2803   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2804     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2805     // MUL EDX:EAX, src
2806     emit_opcode( cbuf, 0xF7 );
2807     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2808   %}
2809 
2810   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2811     // Basic idea: lo(result) = lo(x_lo * y_lo)
2812     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2813     // MOV    $tmp,$src.lo
2814     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2815     // IMUL   $tmp,EDX
2816     emit_opcode( cbuf, 0x0F );
2817     emit_opcode( cbuf, 0xAF );
2818     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2819     // MOV    EDX,$src.hi
2820     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2821     // IMUL   EDX,EAX
2822     emit_opcode( cbuf, 0x0F );
2823     emit_opcode( cbuf, 0xAF );
2824     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2825     // ADD    $tmp,EDX
2826     emit_opcode( cbuf, 0x03 );
2827     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2828     // MUL   EDX:EAX,$src.lo
2829     emit_opcode( cbuf, 0xF7 );
2830     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2831     // ADD    EDX,ESI
2832     emit_opcode( cbuf, 0x03 );
2833     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2834   %}
2835 
2836   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2837     // Basic idea: lo(result) = lo(src * y_lo)
2838     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2839     // IMUL   $tmp,EDX,$src
2840     emit_opcode( cbuf, 0x6B );
2841     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2842     emit_d8( cbuf, (int)$src$$constant );
2843     // MOV    EDX,$src
2844     emit_opcode(cbuf, 0xB8 + EDX_enc);
2845     emit_d32( cbuf, (int)$src$$constant );
2846     // MUL   EDX:EAX,EDX
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2852   %}
2853 
2854   enc_class long_div( eRegL src1, eRegL src2 ) %{
2855     // PUSH src1.hi
2856     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2857     // PUSH src1.lo
2858     emit_opcode(cbuf,               0x50+$src1$$reg  );
2859     // PUSH src2.hi
2860     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2861     // PUSH src2.lo
2862     emit_opcode(cbuf,               0x50+$src2$$reg  );
2863     // CALL directly to the runtime
2864     cbuf.set_insts_mark();
2865     emit_opcode(cbuf,0xE8);       // Call into runtime
2866     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2867     // Restore stack
2868     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2869     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2870     emit_d8(cbuf, 4*4);
2871   %}
2872 
2873   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2874     // PUSH src1.hi
2875     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2876     // PUSH src1.lo
2877     emit_opcode(cbuf,               0x50+$src1$$reg  );
2878     // PUSH src2.hi
2879     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2880     // PUSH src2.lo
2881     emit_opcode(cbuf,               0x50+$src2$$reg  );
2882     // CALL directly to the runtime
2883     cbuf.set_insts_mark();
2884     emit_opcode(cbuf,0xE8);       // Call into runtime
2885     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2886     // Restore stack
2887     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2888     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2889     emit_d8(cbuf, 4*4);
2890   %}
2891 
2892   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2893     // MOV   $tmp,$src.lo
2894     emit_opcode(cbuf, 0x8B);
2895     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2896     // OR    $tmp,$src.hi
2897     emit_opcode(cbuf, 0x0B);
2898     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2899   %}
2900 
2901   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2902     // CMP    $src1.lo,$src2.lo
2903     emit_opcode( cbuf, 0x3B );
2904     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2905     // JNE,s  skip
2906     emit_cc(cbuf, 0x70, 0x5);
2907     emit_d8(cbuf,2);
2908     // CMP    $src1.hi,$src2.hi
2909     emit_opcode( cbuf, 0x3B );
2910     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2911   %}
2912 
2913   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2914     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2915     emit_opcode( cbuf, 0x3B );
2916     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2917     // MOV    $tmp,$src1.hi
2918     emit_opcode( cbuf, 0x8B );
2919     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2920     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2921     emit_opcode( cbuf, 0x1B );
2922     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2923   %}
2924 
2925   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2926     // XOR    $tmp,$tmp
2927     emit_opcode(cbuf,0x33);  // XOR
2928     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2929     // CMP    $tmp,$src.lo
2930     emit_opcode( cbuf, 0x3B );
2931     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2932     // SBB    $tmp,$src.hi
2933     emit_opcode( cbuf, 0x1B );
2934     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2935   %}
2936 
2937  // Sniff, sniff... smells like Gnu Superoptimizer
2938   enc_class neg_long( eRegL dst ) %{
2939     emit_opcode(cbuf,0xF7);    // NEG hi
2940     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2941     emit_opcode(cbuf,0xF7);    // NEG lo
2942     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2943     emit_opcode(cbuf,0x83);    // SBB hi,0
2944     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2945     emit_d8    (cbuf,0 );
2946   %}
2947 
2948   enc_class enc_pop_rdx() %{
2949     emit_opcode(cbuf,0x5A);
2950   %}
2951 
2952   enc_class enc_rethrow() %{
2953     cbuf.set_insts_mark();
2954     emit_opcode(cbuf, 0xE9);        // jmp    entry
2955     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2956                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2957   %}
2958 
2959 
2960   // Convert a double to an int.  Java semantics require we do complex
2961   // manglelations in the corner cases.  So we set the rounding mode to
2962   // 'zero', store the darned double down as an int, and reset the
2963   // rounding mode to 'nearest'.  The hardware throws an exception which
2964   // patches up the correct value directly to the stack.
2965   enc_class DPR2I_encoding( regDPR src ) %{
2966     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2967     // exceptions here, so that a NAN or other corner-case value will
2968     // thrown an exception (but normal values get converted at full speed).
2969     // However, I2C adapters and other float-stack manglers leave pending
2970     // invalid-op exceptions hanging.  We would have to clear them before
2971     // enabling them and that is more expensive than just testing for the
2972     // invalid value Intel stores down in the corner cases.
2973     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2974     emit_opcode(cbuf,0x2D);
2975     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2976     // Allocate a word
2977     emit_opcode(cbuf,0x83);            // SUB ESP,4
2978     emit_opcode(cbuf,0xEC);
2979     emit_d8(cbuf,0x04);
2980     // Encoding assumes a double has been pushed into FPR0.
2981     // Store down the double as an int, popping the FPU stack
2982     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2983     emit_opcode(cbuf,0x1C);
2984     emit_d8(cbuf,0x24);
2985     // Restore the rounding mode; mask the exception
2986     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2987     emit_opcode(cbuf,0x2D);
2988     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2989         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2990         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2991 
2992     // Load the converted int; adjust CPU stack
2993     emit_opcode(cbuf,0x58);       // POP EAX
2994     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2995     emit_d32   (cbuf,0x80000000); //         0x80000000
2996     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2997     emit_d8    (cbuf,0x07);       // Size of slow_call
2998     // Push src onto stack slow-path
2999     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3000     emit_d8    (cbuf,0xC0-1+$src$$reg );
3001     // CALL directly to the runtime
3002     cbuf.set_insts_mark();
3003     emit_opcode(cbuf,0xE8);       // Call into runtime
3004     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3005     // Carry on here...
3006   %}
3007 
3008   enc_class DPR2L_encoding( regDPR src ) %{
3009     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3010     emit_opcode(cbuf,0x2D);
3011     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3012     // Allocate a word
3013     emit_opcode(cbuf,0x83);            // SUB ESP,8
3014     emit_opcode(cbuf,0xEC);
3015     emit_d8(cbuf,0x08);
3016     // Encoding assumes a double has been pushed into FPR0.
3017     // Store down the double as a long, popping the FPU stack
3018     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3019     emit_opcode(cbuf,0x3C);
3020     emit_d8(cbuf,0x24);
3021     // Restore the rounding mode; mask the exception
3022     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3023     emit_opcode(cbuf,0x2D);
3024     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3025         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3026         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3027 
3028     // Load the converted int; adjust CPU stack
3029     emit_opcode(cbuf,0x58);       // POP EAX
3030     emit_opcode(cbuf,0x5A);       // POP EDX
3031     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3032     emit_d8    (cbuf,0xFA);       // rdx
3033     emit_d32   (cbuf,0x80000000); //         0x80000000
3034     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3035     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3036     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3037     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3038     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3039     emit_d8    (cbuf,0x07);       // Size of slow_call
3040     // Push src onto stack slow-path
3041     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3042     emit_d8    (cbuf,0xC0-1+$src$$reg );
3043     // CALL directly to the runtime
3044     cbuf.set_insts_mark();
3045     emit_opcode(cbuf,0xE8);       // Call into runtime
3046     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3047     // Carry on here...
3048   %}
3049 
3050   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3051     // Operand was loaded from memory into fp ST (stack top)
3052     // FMUL   ST,$src  /* D8 C8+i */
3053     emit_opcode(cbuf, 0xD8);
3054     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3055   %}
3056 
3057   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3058     // FADDP  ST,src2  /* D8 C0+i */
3059     emit_opcode(cbuf, 0xD8);
3060     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3061     //could use FADDP  src2,fpST  /* DE C0+i */
3062   %}
3063 
3064   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3065     // FADDP  src2,ST  /* DE C0+i */
3066     emit_opcode(cbuf, 0xDE);
3067     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3068   %}
3069 
3070   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3071     // Operand has been loaded into fp ST (stack top)
3072       // FSUB   ST,$src1
3073       emit_opcode(cbuf, 0xD8);
3074       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3075 
3076       // FDIV
3077       emit_opcode(cbuf, 0xD8);
3078       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3079   %}
3080 
3081   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3082     // Operand was loaded from memory into fp ST (stack top)
3083     // FADD   ST,$src  /* D8 C0+i */
3084     emit_opcode(cbuf, 0xD8);
3085     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3086 
3087     // FMUL  ST,src2  /* D8 C*+i */
3088     emit_opcode(cbuf, 0xD8);
3089     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3090   %}
3091 
3092 
3093   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3094     // Operand was loaded from memory into fp ST (stack top)
3095     // FADD   ST,$src  /* D8 C0+i */
3096     emit_opcode(cbuf, 0xD8);
3097     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3098 
3099     // FMULP  src2,ST  /* DE C8+i */
3100     emit_opcode(cbuf, 0xDE);
3101     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3102   %}
3103 
3104   // Atomically load the volatile long
3105   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3106     emit_opcode(cbuf,0xDF);
3107     int rm_byte_opcode = 0x05;
3108     int base     = $mem$$base;
3109     int index    = $mem$$index;
3110     int scale    = $mem$$scale;
3111     int displace = $mem$$disp;
3112     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3113     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3114     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3115   %}
3116 
3117   // Volatile Store Long.  Must be atomic, so move it into
3118   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3119   // target address before the store (for null-ptr checks)
3120   // so the memory operand is used twice in the encoding.
3121   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3122     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3123     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x07;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132   %}
3133 
3134   // Safepoint Poll.  This polls the safepoint page, and causes an
3135   // exception if it is not readable. Unfortunately, it kills the condition code
3136   // in the process
3137   // We current use TESTL [spp],EDI
3138   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3139 
3140   enc_class Safepoint_Poll() %{
3141     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3142     emit_opcode(cbuf,0x85);
3143     emit_rm (cbuf, 0x0, 0x7, 0x5);
3144     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3145   %}
3146 %}
3147 
3148 
3149 //----------FRAME--------------------------------------------------------------
3150 // Definition of frame structure and management information.
3151 //
3152 //  S T A C K   L A Y O U T    Allocators stack-slot number
3153 //                             |   (to get allocators register number
3154 //  G  Owned by    |        |  v    add OptoReg::stack0())
3155 //  r   CALLER     |        |
3156 //  o     |        +--------+      pad to even-align allocators stack-slot
3157 //  w     V        |  pad0  |        numbers; owned by CALLER
3158 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3159 //  h     ^        |   in   |  5
3160 //        |        |  args  |  4   Holes in incoming args owned by SELF
3161 //  |     |        |        |  3
3162 //  |     |        +--------+
3163 //  V     |        | old out|      Empty on Intel, window on Sparc
3164 //        |    old |preserve|      Must be even aligned.
3165 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3166 //        |        |   in   |  3   area for Intel ret address
3167 //     Owned by    |preserve|      Empty on Sparc.
3168 //       SELF      +--------+
3169 //        |        |  pad2  |  2   pad to align old SP
3170 //        |        +--------+  1
3171 //        |        | locks  |  0
3172 //        |        +--------+----> OptoReg::stack0(), even aligned
3173 //        |        |  pad1  | 11   pad to align new SP
3174 //        |        +--------+
3175 //        |        |        | 10
3176 //        |        | spills |  9   spills
3177 //        V        |        |  8   (pad0 slot for callee)
3178 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3179 //        ^        |  out   |  7
3180 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3181 //     Owned by    +--------+
3182 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3183 //        |    new |preserve|      Must be even-aligned.
3184 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3185 //        |        |        |
3186 //
3187 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3188 //         known from SELF's arguments and the Java calling convention.
3189 //         Region 6-7 is determined per call site.
3190 // Note 2: If the calling convention leaves holes in the incoming argument
3191 //         area, those holes are owned by SELF.  Holes in the outgoing area
3192 //         are owned by the CALLEE.  Holes should not be nessecary in the
3193 //         incoming area, as the Java calling convention is completely under
3194 //         the control of the AD file.  Doubles can be sorted and packed to
3195 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3196 //         varargs C calling conventions.
3197 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3198 //         even aligned with pad0 as needed.
3199 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3200 //         region 6-11 is even aligned; it may be padded out more so that
3201 //         the region from SP to FP meets the minimum stack alignment.
3202 
3203 frame %{
3204   // What direction does stack grow in (assumed to be same for C & Java)
3205   stack_direction(TOWARDS_LOW);
3206 
3207   // These three registers define part of the calling convention
3208   // between compiled code and the interpreter.
3209   inline_cache_reg(EAX);                // Inline Cache Register
3210   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3211 
3212   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3213   cisc_spilling_operand_name(indOffset32);
3214 
3215   // Number of stack slots consumed by locking an object
3216   sync_stack_slots(1);
3217 
3218   // Compiled code's Frame Pointer
3219   frame_pointer(ESP);
3220   // Interpreter stores its frame pointer in a register which is
3221   // stored to the stack by I2CAdaptors.
3222   // I2CAdaptors convert from interpreted java to compiled java.
3223   interpreter_frame_pointer(EBP);
3224 
3225   // Stack alignment requirement
3226   // Alignment size in bytes (128-bit -> 16 bytes)
3227   stack_alignment(StackAlignmentInBytes);
3228 
3229   // Number of stack slots between incoming argument block and the start of
3230   // a new frame.  The PROLOG must add this many slots to the stack.  The
3231   // EPILOG must remove this many slots.  Intel needs one slot for
3232   // return address and one for rbp, (must save rbp)
3233   in_preserve_stack_slots(2+VerifyStackAtCalls);
3234 
3235   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3236   // for calls to C.  Supports the var-args backing area for register parms.
3237   varargs_C_out_slots_killed(0);
3238 
3239   // The after-PROLOG location of the return address.  Location of
3240   // return address specifies a type (REG or STACK) and a number
3241   // representing the register number (i.e. - use a register name) or
3242   // stack slot.
3243   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3244   // Otherwise, it is above the locks and verification slot and alignment word
3245   return_addr(STACK - 1 +
3246               round_to((Compile::current()->in_preserve_stack_slots() +
3247                         Compile::current()->fixed_slots()),
3248                        stack_alignment_in_slots()));
3249 
3250   // Body of function which returns an integer array locating
3251   // arguments either in registers or in stack slots.  Passed an array
3252   // of ideal registers called "sig" and a "length" count.  Stack-slot
3253   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3254   // arguments for a CALLEE.  Incoming stack arguments are
3255   // automatically biased by the preserve_stack_slots field above.
3256   calling_convention %{
3257     // No difference between ingoing/outgoing just pass false
3258     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3259   %}
3260 
3261 
3262   // Body of function which returns an integer array locating
3263   // arguments either in registers or in stack slots.  Passed an array
3264   // of ideal registers called "sig" and a "length" count.  Stack-slot
3265   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3266   // arguments for a CALLEE.  Incoming stack arguments are
3267   // automatically biased by the preserve_stack_slots field above.
3268   c_calling_convention %{
3269     // This is obviously always outgoing
3270     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3271   %}
3272 
3273   // Location of C & interpreter return values
3274   c_return_value %{
3275     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3276     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3277     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3278 
3279     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3280     // that C functions return float and double results in XMM0.
3281     if( ideal_reg == Op_RegD && UseSSE>=2 )
3282       return OptoRegPair(XMM0b_num,XMM0_num);
3283     if( ideal_reg == Op_RegF && UseSSE>=2 )
3284       return OptoRegPair(OptoReg::Bad,XMM0_num);
3285 
3286     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3287   %}
3288 
3289   // Location of return values
3290   return_value %{
3291     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3292     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3293     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3294     if( ideal_reg == Op_RegD && UseSSE>=2 )
3295       return OptoRegPair(XMM0b_num,XMM0_num);
3296     if( ideal_reg == Op_RegF && UseSSE>=1 )
3297       return OptoRegPair(OptoReg::Bad,XMM0_num);
3298     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3299   %}
3300 
3301 %}
3302 
3303 //----------ATTRIBUTES---------------------------------------------------------
3304 //----------Operand Attributes-------------------------------------------------
3305 op_attrib op_cost(0);        // Required cost attribute
3306 
3307 //----------Instruction Attributes---------------------------------------------
3308 ins_attrib ins_cost(100);       // Required cost attribute
3309 ins_attrib ins_size(8);         // Required size attribute (in bits)
3310 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3311                                 // non-matching short branch variant of some
3312                                                             // long branch?
3313 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3314                                 // specifies the alignment that some part of the instruction (not
3315                                 // necessarily the start) requires.  If > 1, a compute_padding()
3316                                 // function must be provided for the instruction
3317 
3318 //----------OPERANDS-----------------------------------------------------------
3319 // Operand definitions must precede instruction definitions for correct parsing
3320 // in the ADLC because operands constitute user defined types which are used in
3321 // instruction definitions.
3322 
3323 //----------Simple Operands----------------------------------------------------
3324 // Immediate Operands
3325 // Integer Immediate
3326 operand immI() %{
3327   match(ConI);
3328 
3329   op_cost(10);
3330   format %{ %}
3331   interface(CONST_INTER);
3332 %}
3333 
3334 // Constant for test vs zero
3335 operand immI0() %{
3336   predicate(n->get_int() == 0);
3337   match(ConI);
3338 
3339   op_cost(0);
3340   format %{ %}
3341   interface(CONST_INTER);
3342 %}
3343 
3344 // Constant for increment
3345 operand immI1() %{
3346   predicate(n->get_int() == 1);
3347   match(ConI);
3348 
3349   op_cost(0);
3350   format %{ %}
3351   interface(CONST_INTER);
3352 %}
3353 
3354 // Constant for decrement
3355 operand immI_M1() %{
3356   predicate(n->get_int() == -1);
3357   match(ConI);
3358 
3359   op_cost(0);
3360   format %{ %}
3361   interface(CONST_INTER);
3362 %}
3363 
3364 // Valid scale values for addressing modes
3365 operand immI2() %{
3366   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3367   match(ConI);
3368 
3369   format %{ %}
3370   interface(CONST_INTER);
3371 %}
3372 
3373 operand immI8() %{
3374   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3375   match(ConI);
3376 
3377   op_cost(5);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 operand immI16() %{
3383   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3384   match(ConI);
3385 
3386   op_cost(10);
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 // Int Immediate non-negative
3392 operand immU31()
3393 %{
3394   predicate(n->get_int() >= 0);
3395   match(ConI);
3396 
3397   op_cost(0);
3398   format %{ %}
3399   interface(CONST_INTER);
3400 %}
3401 
3402 // Constant for long shifts
3403 operand immI_32() %{
3404   predicate( n->get_int() == 32 );
3405   match(ConI);
3406 
3407   op_cost(0);
3408   format %{ %}
3409   interface(CONST_INTER);
3410 %}
3411 
3412 operand immI_1_31() %{
3413   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3414   match(ConI);
3415 
3416   op_cost(0);
3417   format %{ %}
3418   interface(CONST_INTER);
3419 %}
3420 
3421 operand immI_32_63() %{
3422   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3423   match(ConI);
3424   op_cost(0);
3425 
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1() %{
3431   predicate( n->get_int() == 1 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_2() %{
3440   predicate( n->get_int() == 2 );
3441   match(ConI);
3442 
3443   op_cost(0);
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_3() %{
3449   predicate( n->get_int() == 3 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 // Pointer Immediate
3458 operand immP() %{
3459   match(ConP);
3460 
3461   op_cost(10);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 // NULL Pointer Immediate
3467 operand immP0() %{
3468   predicate( n->get_ptr() == 0 );
3469   match(ConP);
3470   op_cost(0);
3471 
3472   format %{ %}
3473   interface(CONST_INTER);
3474 %}
3475 
3476 // Long Immediate
3477 operand immL() %{
3478   match(ConL);
3479 
3480   op_cost(20);
3481   format %{ %}
3482   interface(CONST_INTER);
3483 %}
3484 
3485 // Long Immediate zero
3486 operand immL0() %{
3487   predicate( n->get_long() == 0L );
3488   match(ConL);
3489   op_cost(0);
3490 
3491   format %{ %}
3492   interface(CONST_INTER);
3493 %}
3494 
3495 // Long Immediate zero
3496 operand immL_M1() %{
3497   predicate( n->get_long() == -1L );
3498   match(ConL);
3499   op_cost(0);
3500 
3501   format %{ %}
3502   interface(CONST_INTER);
3503 %}
3504 
3505 // Long immediate from 0 to 127.
3506 // Used for a shorter form of long mul by 10.
3507 operand immL_127() %{
3508   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3509   match(ConL);
3510   op_cost(0);
3511 
3512   format %{ %}
3513   interface(CONST_INTER);
3514 %}
3515 
3516 // Long Immediate: low 32-bit mask
3517 operand immL_32bits() %{
3518   predicate(n->get_long() == 0xFFFFFFFFL);
3519   match(ConL);
3520   op_cost(0);
3521 
3522   format %{ %}
3523   interface(CONST_INTER);
3524 %}
3525 
3526 // Long Immediate: low 32-bit mask
3527 operand immL32() %{
3528   predicate(n->get_long() == (int)(n->get_long()));
3529   match(ConL);
3530   op_cost(20);
3531 
3532   format %{ %}
3533   interface(CONST_INTER);
3534 %}
3535 
3536 //Double Immediate zero
3537 operand immDPR0() %{
3538   // Do additional (and counter-intuitive) test against NaN to work around VC++
3539   // bug that generates code such that NaNs compare equal to 0.0
3540   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3541   match(ConD);
3542 
3543   op_cost(5);
3544   format %{ %}
3545   interface(CONST_INTER);
3546 %}
3547 
3548 // Double Immediate one
3549 operand immDPR1() %{
3550   predicate( UseSSE<=1 && n->getd() == 1.0 );
3551   match(ConD);
3552 
3553   op_cost(5);
3554   format %{ %}
3555   interface(CONST_INTER);
3556 %}
3557 
3558 // Double Immediate
3559 operand immDPR() %{
3560   predicate(UseSSE<=1);
3561   match(ConD);
3562 
3563   op_cost(5);
3564   format %{ %}
3565   interface(CONST_INTER);
3566 %}
3567 
3568 operand immD() %{
3569   predicate(UseSSE>=2);
3570   match(ConD);
3571 
3572   op_cost(5);
3573   format %{ %}
3574   interface(CONST_INTER);
3575 %}
3576 
3577 // Double Immediate zero
3578 operand immD0() %{
3579   // Do additional (and counter-intuitive) test against NaN to work around VC++
3580   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3581   // compare equal to -0.0.
3582   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3583   match(ConD);
3584 
3585   format %{ %}
3586   interface(CONST_INTER);
3587 %}
3588 
3589 // Float Immediate zero
3590 operand immFPR0() %{
3591   predicate(UseSSE == 0 && n->getf() == 0.0F);
3592   match(ConF);
3593 
3594   op_cost(5);
3595   format %{ %}
3596   interface(CONST_INTER);
3597 %}
3598 
3599 // Float Immediate one
3600 operand immFPR1() %{
3601   predicate(UseSSE == 0 && n->getf() == 1.0F);
3602   match(ConF);
3603 
3604   op_cost(5);
3605   format %{ %}
3606   interface(CONST_INTER);
3607 %}
3608 
3609 // Float Immediate
3610 operand immFPR() %{
3611   predicate( UseSSE == 0 );
3612   match(ConF);
3613 
3614   op_cost(5);
3615   format %{ %}
3616   interface(CONST_INTER);
3617 %}
3618 
3619 // Float Immediate
3620 operand immF() %{
3621   predicate(UseSSE >= 1);
3622   match(ConF);
3623 
3624   op_cost(5);
3625   format %{ %}
3626   interface(CONST_INTER);
3627 %}
3628 
3629 // Float Immediate zero.  Zero and not -0.0
3630 operand immF0() %{
3631   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3632   match(ConF);
3633 
3634   op_cost(5);
3635   format %{ %}
3636   interface(CONST_INTER);
3637 %}
3638 
3639 // Immediates for special shifts (sign extend)
3640 
3641 // Constants for increment
3642 operand immI_16() %{
3643   predicate( n->get_int() == 16 );
3644   match(ConI);
3645 
3646   format %{ %}
3647   interface(CONST_INTER);
3648 %}
3649 
3650 operand immI_24() %{
3651   predicate( n->get_int() == 24 );
3652   match(ConI);
3653 
3654   format %{ %}
3655   interface(CONST_INTER);
3656 %}
3657 
3658 // Constant for byte-wide masking
3659 operand immI_255() %{
3660   predicate( n->get_int() == 255 );
3661   match(ConI);
3662 
3663   format %{ %}
3664   interface(CONST_INTER);
3665 %}
3666 
3667 // Constant for short-wide masking
3668 operand immI_65535() %{
3669   predicate(n->get_int() == 65535);
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Register Operands
3677 // Integer Register
3678 operand rRegI() %{
3679   constraint(ALLOC_IN_RC(int_reg));
3680   match(RegI);
3681   match(xRegI);
3682   match(eAXRegI);
3683   match(eBXRegI);
3684   match(eCXRegI);
3685   match(eDXRegI);
3686   match(eDIRegI);
3687   match(eSIRegI);
3688 
3689   format %{ %}
3690   interface(REG_INTER);
3691 %}
3692 
3693 // Subset of Integer Register
3694 operand xRegI(rRegI reg) %{
3695   constraint(ALLOC_IN_RC(int_x_reg));
3696   match(reg);
3697   match(eAXRegI);
3698   match(eBXRegI);
3699   match(eCXRegI);
3700   match(eDXRegI);
3701 
3702   format %{ %}
3703   interface(REG_INTER);
3704 %}
3705 
3706 // Special Registers
3707 operand eAXRegI(xRegI reg) %{
3708   constraint(ALLOC_IN_RC(eax_reg));
3709   match(reg);
3710   match(rRegI);
3711 
3712   format %{ "EAX" %}
3713   interface(REG_INTER);
3714 %}
3715 
3716 // Special Registers
3717 operand eBXRegI(xRegI reg) %{
3718   constraint(ALLOC_IN_RC(ebx_reg));
3719   match(reg);
3720   match(rRegI);
3721 
3722   format %{ "EBX" %}
3723   interface(REG_INTER);
3724 %}
3725 
3726 operand eCXRegI(xRegI reg) %{
3727   constraint(ALLOC_IN_RC(ecx_reg));
3728   match(reg);
3729   match(rRegI);
3730 
3731   format %{ "ECX" %}
3732   interface(REG_INTER);
3733 %}
3734 
3735 operand eDXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(edx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EDX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eDIRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(edi_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "EDI" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand naxRegI() %{
3754   constraint(ALLOC_IN_RC(nax_reg));
3755   match(RegI);
3756   match(eCXRegI);
3757   match(eDXRegI);
3758   match(eSIRegI);
3759   match(eDIRegI);
3760 
3761   format %{ %}
3762   interface(REG_INTER);
3763 %}
3764 
3765 operand nadxRegI() %{
3766   constraint(ALLOC_IN_RC(nadx_reg));
3767   match(RegI);
3768   match(eBXRegI);
3769   match(eCXRegI);
3770   match(eSIRegI);
3771   match(eDIRegI);
3772 
3773   format %{ %}
3774   interface(REG_INTER);
3775 %}
3776 
3777 operand ncxRegI() %{
3778   constraint(ALLOC_IN_RC(ncx_reg));
3779   match(RegI);
3780   match(eAXRegI);
3781   match(eDXRegI);
3782   match(eSIRegI);
3783   match(eDIRegI);
3784 
3785   format %{ %}
3786   interface(REG_INTER);
3787 %}
3788 
3789 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3790 // //
3791 operand eSIRegI(xRegI reg) %{
3792    constraint(ALLOC_IN_RC(esi_reg));
3793    match(reg);
3794    match(rRegI);
3795 
3796    format %{ "ESI" %}
3797    interface(REG_INTER);
3798 %}
3799 
3800 // Pointer Register
3801 operand anyRegP() %{
3802   constraint(ALLOC_IN_RC(any_reg));
3803   match(RegP);
3804   match(eAXRegP);
3805   match(eBXRegP);
3806   match(eCXRegP);
3807   match(eDIRegP);
3808   match(eRegP);
3809 
3810   format %{ %}
3811   interface(REG_INTER);
3812 %}
3813 
3814 operand eRegP() %{
3815   constraint(ALLOC_IN_RC(int_reg));
3816   match(RegP);
3817   match(eAXRegP);
3818   match(eBXRegP);
3819   match(eCXRegP);
3820   match(eDIRegP);
3821 
3822   format %{ %}
3823   interface(REG_INTER);
3824 %}
3825 
3826 // On windows95, EBP is not safe to use for implicit null tests.
3827 operand eRegP_no_EBP() %{
3828   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3829   match(RegP);
3830   match(eAXRegP);
3831   match(eBXRegP);
3832   match(eCXRegP);
3833   match(eDIRegP);
3834 
3835   op_cost(100);
3836   format %{ %}
3837   interface(REG_INTER);
3838 %}
3839 
3840 operand naxRegP() %{
3841   constraint(ALLOC_IN_RC(nax_reg));
3842   match(RegP);
3843   match(eBXRegP);
3844   match(eDXRegP);
3845   match(eCXRegP);
3846   match(eSIRegP);
3847   match(eDIRegP);
3848 
3849   format %{ %}
3850   interface(REG_INTER);
3851 %}
3852 
3853 operand nabxRegP() %{
3854   constraint(ALLOC_IN_RC(nabx_reg));
3855   match(RegP);
3856   match(eCXRegP);
3857   match(eDXRegP);
3858   match(eSIRegP);
3859   match(eDIRegP);
3860 
3861   format %{ %}
3862   interface(REG_INTER);
3863 %}
3864 
3865 operand pRegP() %{
3866   constraint(ALLOC_IN_RC(p_reg));
3867   match(RegP);
3868   match(eBXRegP);
3869   match(eDXRegP);
3870   match(eSIRegP);
3871   match(eDIRegP);
3872 
3873   format %{ %}
3874   interface(REG_INTER);
3875 %}
3876 
3877 // Special Registers
3878 // Return a pointer value
3879 operand eAXRegP(eRegP reg) %{
3880   constraint(ALLOC_IN_RC(eax_reg));
3881   match(reg);
3882   format %{ "EAX" %}
3883   interface(REG_INTER);
3884 %}
3885 
3886 // Used in AtomicAdd
3887 operand eBXRegP(eRegP reg) %{
3888   constraint(ALLOC_IN_RC(ebx_reg));
3889   match(reg);
3890   format %{ "EBX" %}
3891   interface(REG_INTER);
3892 %}
3893 
3894 // Tail-call (interprocedural jump) to interpreter
3895 operand eCXRegP(eRegP reg) %{
3896   constraint(ALLOC_IN_RC(ecx_reg));
3897   match(reg);
3898   format %{ "ECX" %}
3899   interface(REG_INTER);
3900 %}
3901 
3902 operand eSIRegP(eRegP reg) %{
3903   constraint(ALLOC_IN_RC(esi_reg));
3904   match(reg);
3905   format %{ "ESI" %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 // Used in rep stosw
3910 operand eDIRegP(eRegP reg) %{
3911   constraint(ALLOC_IN_RC(edi_reg));
3912   match(reg);
3913   format %{ "EDI" %}
3914   interface(REG_INTER);
3915 %}
3916 
3917 operand eRegL() %{
3918   constraint(ALLOC_IN_RC(long_reg));
3919   match(RegL);
3920   match(eADXRegL);
3921 
3922   format %{ %}
3923   interface(REG_INTER);
3924 %}
3925 
3926 operand eADXRegL( eRegL reg ) %{
3927   constraint(ALLOC_IN_RC(eadx_reg));
3928   match(reg);
3929 
3930   format %{ "EDX:EAX" %}
3931   interface(REG_INTER);
3932 %}
3933 
3934 operand eBCXRegL( eRegL reg ) %{
3935   constraint(ALLOC_IN_RC(ebcx_reg));
3936   match(reg);
3937 
3938   format %{ "EBX:ECX" %}
3939   interface(REG_INTER);
3940 %}
3941 
3942 // Special case for integer high multiply
3943 operand eADXRegL_low_only() %{
3944   constraint(ALLOC_IN_RC(eadx_reg));
3945   match(RegL);
3946 
3947   format %{ "EAX" %}
3948   interface(REG_INTER);
3949 %}
3950 
3951 // Flags register, used as output of compare instructions
3952 operand eFlagsReg() %{
3953   constraint(ALLOC_IN_RC(int_flags));
3954   match(RegFlags);
3955 
3956   format %{ "EFLAGS" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Flags register, used as output of FLOATING POINT compare instructions
3961 operand eFlagsRegU() %{
3962   constraint(ALLOC_IN_RC(int_flags));
3963   match(RegFlags);
3964 
3965   format %{ "EFLAGS_U" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 operand eFlagsRegUCF() %{
3970   constraint(ALLOC_IN_RC(int_flags));
3971   match(RegFlags);
3972   predicate(false);
3973 
3974   format %{ "EFLAGS_U_CF" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Condition Code Register used by long compare
3979 operand flagsReg_long_LTGE() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982   format %{ "FLAGS_LTGE" %}
3983   interface(REG_INTER);
3984 %}
3985 operand flagsReg_long_EQNE() %{
3986   constraint(ALLOC_IN_RC(int_flags));
3987   match(RegFlags);
3988   format %{ "FLAGS_EQNE" %}
3989   interface(REG_INTER);
3990 %}
3991 operand flagsReg_long_LEGT() %{
3992   constraint(ALLOC_IN_RC(int_flags));
3993   match(RegFlags);
3994   format %{ "FLAGS_LEGT" %}
3995   interface(REG_INTER);
3996 %}
3997 
3998 // Float register operands
3999 operand regDPR() %{
4000   predicate( UseSSE < 2 );
4001   constraint(ALLOC_IN_RC(fp_dbl_reg));
4002   match(RegD);
4003   match(regDPR1);
4004   match(regDPR2);
4005   format %{ %}
4006   interface(REG_INTER);
4007 %}
4008 
4009 operand regDPR1(regDPR reg) %{
4010   predicate( UseSSE < 2 );
4011   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4012   match(reg);
4013   format %{ "FPR1" %}
4014   interface(REG_INTER);
4015 %}
4016 
4017 operand regDPR2(regDPR reg) %{
4018   predicate( UseSSE < 2 );
4019   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4020   match(reg);
4021   format %{ "FPR2" %}
4022   interface(REG_INTER);
4023 %}
4024 
4025 operand regnotDPR1(regDPR reg) %{
4026   predicate( UseSSE < 2 );
4027   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4028   match(reg);
4029   format %{ %}
4030   interface(REG_INTER);
4031 %}
4032 
4033 // Float register operands
4034 operand regFPR() %{
4035   predicate( UseSSE < 2 );
4036   constraint(ALLOC_IN_RC(fp_flt_reg));
4037   match(RegF);
4038   match(regFPR1);
4039   format %{ %}
4040   interface(REG_INTER);
4041 %}
4042 
4043 // Float register operands
4044 operand regFPR1(regFPR reg) %{
4045   predicate( UseSSE < 2 );
4046   constraint(ALLOC_IN_RC(fp_flt_reg0));
4047   match(reg);
4048   format %{ "FPR1" %}
4049   interface(REG_INTER);
4050 %}
4051 
4052 // XMM Float register operands
4053 operand regF() %{
4054   predicate( UseSSE>=1 );
4055   constraint(ALLOC_IN_RC(float_reg_legacy));
4056   match(RegF);
4057   format %{ %}
4058   interface(REG_INTER);
4059 %}
4060 
4061 // XMM Double register operands
4062 operand regD() %{
4063   predicate( UseSSE>=2 );
4064   constraint(ALLOC_IN_RC(double_reg_legacy));
4065   match(RegD);
4066   format %{ %}
4067   interface(REG_INTER);
4068 %}
4069 
4070 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4071 // runtime code generation via reg_class_dynamic.
4072 operand vecS() %{
4073   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4074   match(VecS);
4075 
4076   format %{ %}
4077   interface(REG_INTER);
4078 %}
4079 
4080 operand vecD() %{
4081   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4082   match(VecD);
4083 
4084   format %{ %}
4085   interface(REG_INTER);
4086 %}
4087 
4088 operand vecX() %{
4089   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4090   match(VecX);
4091 
4092   format %{ %}
4093   interface(REG_INTER);
4094 %}
4095 
4096 operand vecY() %{
4097   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4098   match(VecY);
4099 
4100   format %{ %}
4101   interface(REG_INTER);
4102 %}
4103 
4104 //----------Memory Operands----------------------------------------------------
4105 // Direct Memory Operand
4106 operand direct(immP addr) %{
4107   match(addr);
4108 
4109   format %{ "[$addr]" %}
4110   interface(MEMORY_INTER) %{
4111     base(0xFFFFFFFF);
4112     index(0x4);
4113     scale(0x0);
4114     disp($addr);
4115   %}
4116 %}
4117 
4118 // Indirect Memory Operand
4119 operand indirect(eRegP reg) %{
4120   constraint(ALLOC_IN_RC(int_reg));
4121   match(reg);
4122 
4123   format %{ "[$reg]" %}
4124   interface(MEMORY_INTER) %{
4125     base($reg);
4126     index(0x4);
4127     scale(0x0);
4128     disp(0x0);
4129   %}
4130 %}
4131 
4132 // Indirect Memory Plus Short Offset Operand
4133 operand indOffset8(eRegP reg, immI8 off) %{
4134   match(AddP reg off);
4135 
4136   format %{ "[$reg + $off]" %}
4137   interface(MEMORY_INTER) %{
4138     base($reg);
4139     index(0x4);
4140     scale(0x0);
4141     disp($off);
4142   %}
4143 %}
4144 
4145 // Indirect Memory Plus Long Offset Operand
4146 operand indOffset32(eRegP reg, immI off) %{
4147   match(AddP reg off);
4148 
4149   format %{ "[$reg + $off]" %}
4150   interface(MEMORY_INTER) %{
4151     base($reg);
4152     index(0x4);
4153     scale(0x0);
4154     disp($off);
4155   %}
4156 %}
4157 
4158 // Indirect Memory Plus Long Offset Operand
4159 operand indOffset32X(rRegI reg, immP off) %{
4160   match(AddP off reg);
4161 
4162   format %{ "[$reg + $off]" %}
4163   interface(MEMORY_INTER) %{
4164     base($reg);
4165     index(0x4);
4166     scale(0x0);
4167     disp($off);
4168   %}
4169 %}
4170 
4171 // Indirect Memory Plus Index Register Plus Offset Operand
4172 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4173   match(AddP (AddP reg ireg) off);
4174 
4175   op_cost(10);
4176   format %{"[$reg + $off + $ireg]" %}
4177   interface(MEMORY_INTER) %{
4178     base($reg);
4179     index($ireg);
4180     scale(0x0);
4181     disp($off);
4182   %}
4183 %}
4184 
4185 // Indirect Memory Plus Index Register Plus Offset Operand
4186 operand indIndex(eRegP reg, rRegI ireg) %{
4187   match(AddP reg ireg);
4188 
4189   op_cost(10);
4190   format %{"[$reg + $ireg]" %}
4191   interface(MEMORY_INTER) %{
4192     base($reg);
4193     index($ireg);
4194     scale(0x0);
4195     disp(0x0);
4196   %}
4197 %}
4198 
4199 // // -------------------------------------------------------------------------
4200 // // 486 architecture doesn't support "scale * index + offset" with out a base
4201 // // -------------------------------------------------------------------------
4202 // // Scaled Memory Operands
4203 // // Indirect Memory Times Scale Plus Offset Operand
4204 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4205 //   match(AddP off (LShiftI ireg scale));
4206 //
4207 //   op_cost(10);
4208 //   format %{"[$off + $ireg << $scale]" %}
4209 //   interface(MEMORY_INTER) %{
4210 //     base(0x4);
4211 //     index($ireg);
4212 //     scale($scale);
4213 //     disp($off);
4214 //   %}
4215 // %}
4216 
4217 // Indirect Memory Times Scale Plus Index Register
4218 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4219   match(AddP reg (LShiftI ireg scale));
4220 
4221   op_cost(10);
4222   format %{"[$reg + $ireg << $scale]" %}
4223   interface(MEMORY_INTER) %{
4224     base($reg);
4225     index($ireg);
4226     scale($scale);
4227     disp(0x0);
4228   %}
4229 %}
4230 
4231 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4232 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4233   match(AddP (AddP reg (LShiftI ireg scale)) off);
4234 
4235   op_cost(10);
4236   format %{"[$reg + $off + $ireg << $scale]" %}
4237   interface(MEMORY_INTER) %{
4238     base($reg);
4239     index($ireg);
4240     scale($scale);
4241     disp($off);
4242   %}
4243 %}
4244 
4245 //----------Load Long Memory Operands------------------------------------------
4246 // The load-long idiom will use it's address expression again after loading
4247 // the first word of the long.  If the load-long destination overlaps with
4248 // registers used in the addressing expression, the 2nd half will be loaded
4249 // from a clobbered address.  Fix this by requiring that load-long use
4250 // address registers that do not overlap with the load-long target.
4251 
4252 // load-long support
4253 operand load_long_RegP() %{
4254   constraint(ALLOC_IN_RC(esi_reg));
4255   match(RegP);
4256   match(eSIRegP);
4257   op_cost(100);
4258   format %{  %}
4259   interface(REG_INTER);
4260 %}
4261 
4262 // Indirect Memory Operand Long
4263 operand load_long_indirect(load_long_RegP reg) %{
4264   constraint(ALLOC_IN_RC(esi_reg));
4265   match(reg);
4266 
4267   format %{ "[$reg]" %}
4268   interface(MEMORY_INTER) %{
4269     base($reg);
4270     index(0x4);
4271     scale(0x0);
4272     disp(0x0);
4273   %}
4274 %}
4275 
4276 // Indirect Memory Plus Long Offset Operand
4277 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4278   match(AddP reg off);
4279 
4280   format %{ "[$reg + $off]" %}
4281   interface(MEMORY_INTER) %{
4282     base($reg);
4283     index(0x4);
4284     scale(0x0);
4285     disp($off);
4286   %}
4287 %}
4288 
4289 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4290 
4291 
4292 //----------Special Memory Operands--------------------------------------------
4293 // Stack Slot Operand - This operand is used for loading and storing temporary
4294 //                      values on the stack where a match requires a value to
4295 //                      flow through memory.
4296 operand stackSlotP(sRegP reg) %{
4297   constraint(ALLOC_IN_RC(stack_slots));
4298   // No match rule because this operand is only generated in matching
4299   format %{ "[$reg]" %}
4300   interface(MEMORY_INTER) %{
4301     base(0x4);   // ESP
4302     index(0x4);  // No Index
4303     scale(0x0);  // No Scale
4304     disp($reg);  // Stack Offset
4305   %}
4306 %}
4307 
4308 operand stackSlotI(sRegI reg) %{
4309   constraint(ALLOC_IN_RC(stack_slots));
4310   // No match rule because this operand is only generated in matching
4311   format %{ "[$reg]" %}
4312   interface(MEMORY_INTER) %{
4313     base(0x4);   // ESP
4314     index(0x4);  // No Index
4315     scale(0x0);  // No Scale
4316     disp($reg);  // Stack Offset
4317   %}
4318 %}
4319 
4320 operand stackSlotF(sRegF reg) %{
4321   constraint(ALLOC_IN_RC(stack_slots));
4322   // No match rule because this operand is only generated in matching
4323   format %{ "[$reg]" %}
4324   interface(MEMORY_INTER) %{
4325     base(0x4);   // ESP
4326     index(0x4);  // No Index
4327     scale(0x0);  // No Scale
4328     disp($reg);  // Stack Offset
4329   %}
4330 %}
4331 
4332 operand stackSlotD(sRegD reg) %{
4333   constraint(ALLOC_IN_RC(stack_slots));
4334   // No match rule because this operand is only generated in matching
4335   format %{ "[$reg]" %}
4336   interface(MEMORY_INTER) %{
4337     base(0x4);   // ESP
4338     index(0x4);  // No Index
4339     scale(0x0);  // No Scale
4340     disp($reg);  // Stack Offset
4341   %}
4342 %}
4343 
4344 operand stackSlotL(sRegL reg) %{
4345   constraint(ALLOC_IN_RC(stack_slots));
4346   // No match rule because this operand is only generated in matching
4347   format %{ "[$reg]" %}
4348   interface(MEMORY_INTER) %{
4349     base(0x4);   // ESP
4350     index(0x4);  // No Index
4351     scale(0x0);  // No Scale
4352     disp($reg);  // Stack Offset
4353   %}
4354 %}
4355 
4356 //----------Memory Operands - Win95 Implicit Null Variants----------------
4357 // Indirect Memory Operand
4358 operand indirect_win95_safe(eRegP_no_EBP reg)
4359 %{
4360   constraint(ALLOC_IN_RC(int_reg));
4361   match(reg);
4362 
4363   op_cost(100);
4364   format %{ "[$reg]" %}
4365   interface(MEMORY_INTER) %{
4366     base($reg);
4367     index(0x4);
4368     scale(0x0);
4369     disp(0x0);
4370   %}
4371 %}
4372 
4373 // Indirect Memory Plus Short Offset Operand
4374 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4375 %{
4376   match(AddP reg off);
4377 
4378   op_cost(100);
4379   format %{ "[$reg + $off]" %}
4380   interface(MEMORY_INTER) %{
4381     base($reg);
4382     index(0x4);
4383     scale(0x0);
4384     disp($off);
4385   %}
4386 %}
4387 
4388 // Indirect Memory Plus Long Offset Operand
4389 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4390 %{
4391   match(AddP reg off);
4392 
4393   op_cost(100);
4394   format %{ "[$reg + $off]" %}
4395   interface(MEMORY_INTER) %{
4396     base($reg);
4397     index(0x4);
4398     scale(0x0);
4399     disp($off);
4400   %}
4401 %}
4402 
4403 // Indirect Memory Plus Index Register Plus Offset Operand
4404 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4405 %{
4406   match(AddP (AddP reg ireg) off);
4407 
4408   op_cost(100);
4409   format %{"[$reg + $off + $ireg]" %}
4410   interface(MEMORY_INTER) %{
4411     base($reg);
4412     index($ireg);
4413     scale(0x0);
4414     disp($off);
4415   %}
4416 %}
4417 
4418 // Indirect Memory Times Scale Plus Index Register
4419 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4420 %{
4421   match(AddP reg (LShiftI ireg scale));
4422 
4423   op_cost(100);
4424   format %{"[$reg + $ireg << $scale]" %}
4425   interface(MEMORY_INTER) %{
4426     base($reg);
4427     index($ireg);
4428     scale($scale);
4429     disp(0x0);
4430   %}
4431 %}
4432 
4433 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4434 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4435 %{
4436   match(AddP (AddP reg (LShiftI ireg scale)) off);
4437 
4438   op_cost(100);
4439   format %{"[$reg + $off + $ireg << $scale]" %}
4440   interface(MEMORY_INTER) %{
4441     base($reg);
4442     index($ireg);
4443     scale($scale);
4444     disp($off);
4445   %}
4446 %}
4447 
4448 //----------Conditional Branch Operands----------------------------------------
4449 // Comparison Op  - This is the operation of the comparison, and is limited to
4450 //                  the following set of codes:
4451 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4452 //
4453 // Other attributes of the comparison, such as unsignedness, are specified
4454 // by the comparison instruction that sets a condition code flags register.
4455 // That result is represented by a flags operand whose subtype is appropriate
4456 // to the unsignedness (etc.) of the comparison.
4457 //
4458 // Later, the instruction which matches both the Comparison Op (a Bool) and
4459 // the flags (produced by the Cmp) specifies the coding of the comparison op
4460 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4461 
4462 // Comparision Code
4463 operand cmpOp() %{
4464   match(Bool);
4465 
4466   format %{ "" %}
4467   interface(COND_INTER) %{
4468     equal(0x4, "e");
4469     not_equal(0x5, "ne");
4470     less(0xC, "l");
4471     greater_equal(0xD, "ge");
4472     less_equal(0xE, "le");
4473     greater(0xF, "g");
4474     overflow(0x0, "o");
4475     no_overflow(0x1, "no");
4476   %}
4477 %}
4478 
4479 // Comparison Code, unsigned compare.  Used by FP also, with
4480 // C2 (unordered) turned into GT or LT already.  The other bits
4481 // C0 and C3 are turned into Carry & Zero flags.
4482 operand cmpOpU() %{
4483   match(Bool);
4484 
4485   format %{ "" %}
4486   interface(COND_INTER) %{
4487     equal(0x4, "e");
4488     not_equal(0x5, "ne");
4489     less(0x2, "b");
4490     greater_equal(0x3, "nb");
4491     less_equal(0x6, "be");
4492     greater(0x7, "nbe");
4493     overflow(0x0, "o");
4494     no_overflow(0x1, "no");
4495   %}
4496 %}
4497 
4498 // Floating comparisons that don't require any fixup for the unordered case
4499 operand cmpOpUCF() %{
4500   match(Bool);
4501   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4502             n->as_Bool()->_test._test == BoolTest::ge ||
4503             n->as_Bool()->_test._test == BoolTest::le ||
4504             n->as_Bool()->_test._test == BoolTest::gt);
4505   format %{ "" %}
4506   interface(COND_INTER) %{
4507     equal(0x4, "e");
4508     not_equal(0x5, "ne");
4509     less(0x2, "b");
4510     greater_equal(0x3, "nb");
4511     less_equal(0x6, "be");
4512     greater(0x7, "nbe");
4513     overflow(0x0, "o");
4514     no_overflow(0x1, "no");
4515   %}
4516 %}
4517 
4518 
4519 // Floating comparisons that can be fixed up with extra conditional jumps
4520 operand cmpOpUCF2() %{
4521   match(Bool);
4522   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4523             n->as_Bool()->_test._test == BoolTest::eq);
4524   format %{ "" %}
4525   interface(COND_INTER) %{
4526     equal(0x4, "e");
4527     not_equal(0x5, "ne");
4528     less(0x2, "b");
4529     greater_equal(0x3, "nb");
4530     less_equal(0x6, "be");
4531     greater(0x7, "nbe");
4532     overflow(0x0, "o");
4533     no_overflow(0x1, "no");
4534   %}
4535 %}
4536 
4537 // Comparison Code for FP conditional move
4538 operand cmpOp_fcmov() %{
4539   match(Bool);
4540 
4541   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4542             n->as_Bool()->_test._test != BoolTest::no_overflow);
4543   format %{ "" %}
4544   interface(COND_INTER) %{
4545     equal        (0x0C8);
4546     not_equal    (0x1C8);
4547     less         (0x0C0);
4548     greater_equal(0x1C0);
4549     less_equal   (0x0D0);
4550     greater      (0x1D0);
4551     overflow(0x0, "o"); // not really supported by the instruction
4552     no_overflow(0x1, "no"); // not really supported by the instruction
4553   %}
4554 %}
4555 
4556 // Comparision Code used in long compares
4557 operand cmpOp_commute() %{
4558   match(Bool);
4559 
4560   format %{ "" %}
4561   interface(COND_INTER) %{
4562     equal(0x4, "e");
4563     not_equal(0x5, "ne");
4564     less(0xF, "g");
4565     greater_equal(0xE, "le");
4566     less_equal(0xD, "ge");
4567     greater(0xC, "l");
4568     overflow(0x0, "o");
4569     no_overflow(0x1, "no");
4570   %}
4571 %}
4572 
4573 //----------OPERAND CLASSES----------------------------------------------------
4574 // Operand Classes are groups of operands that are used as to simplify
4575 // instruction definitions by not requiring the AD writer to specify separate
4576 // instructions for every form of operand when the instruction accepts
4577 // multiple operand types with the same basic encoding and format.  The classic
4578 // case of this is memory operands.
4579 
4580 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4581                indIndex, indIndexScale, indIndexScaleOffset);
4582 
4583 // Long memory operations are encoded in 2 instructions and a +4 offset.
4584 // This means some kind of offset is always required and you cannot use
4585 // an oop as the offset (done when working on static globals).
4586 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4587                     indIndex, indIndexScale, indIndexScaleOffset);
4588 
4589 
4590 //----------PIPELINE-----------------------------------------------------------
4591 // Rules which define the behavior of the target architectures pipeline.
4592 pipeline %{
4593 
4594 //----------ATTRIBUTES---------------------------------------------------------
4595 attributes %{
4596   variable_size_instructions;        // Fixed size instructions
4597   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4598   instruction_unit_size = 1;         // An instruction is 1 bytes long
4599   instruction_fetch_unit_size = 16;  // The processor fetches one line
4600   instruction_fetch_units = 1;       // of 16 bytes
4601 
4602   // List of nop instructions
4603   nops( MachNop );
4604 %}
4605 
4606 //----------RESOURCES----------------------------------------------------------
4607 // Resources are the functional units available to the machine
4608 
4609 // Generic P2/P3 pipeline
4610 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4611 // 3 instructions decoded per cycle.
4612 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4613 // 2 ALU op, only ALU0 handles mul/div instructions.
4614 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4615            MS0, MS1, MEM = MS0 | MS1,
4616            BR, FPU,
4617            ALU0, ALU1, ALU = ALU0 | ALU1 );
4618 
4619 //----------PIPELINE DESCRIPTION-----------------------------------------------
4620 // Pipeline Description specifies the stages in the machine's pipeline
4621 
4622 // Generic P2/P3 pipeline
4623 pipe_desc(S0, S1, S2, S3, S4, S5);
4624 
4625 //----------PIPELINE CLASSES---------------------------------------------------
4626 // Pipeline Classes describe the stages in which input and output are
4627 // referenced by the hardware pipeline.
4628 
4629 // Naming convention: ialu or fpu
4630 // Then: _reg
4631 // Then: _reg if there is a 2nd register
4632 // Then: _long if it's a pair of instructions implementing a long
4633 // Then: _fat if it requires the big decoder
4634 //   Or: _mem if it requires the big decoder and a memory unit.
4635 
4636 // Integer ALU reg operation
4637 pipe_class ialu_reg(rRegI dst) %{
4638     single_instruction;
4639     dst    : S4(write);
4640     dst    : S3(read);
4641     DECODE : S0;        // any decoder
4642     ALU    : S3;        // any alu
4643 %}
4644 
4645 // Long ALU reg operation
4646 pipe_class ialu_reg_long(eRegL dst) %{
4647     instruction_count(2);
4648     dst    : S4(write);
4649     dst    : S3(read);
4650     DECODE : S0(2);     // any 2 decoders
4651     ALU    : S3(2);     // both alus
4652 %}
4653 
4654 // Integer ALU reg operation using big decoder
4655 pipe_class ialu_reg_fat(rRegI dst) %{
4656     single_instruction;
4657     dst    : S4(write);
4658     dst    : S3(read);
4659     D0     : S0;        // big decoder only
4660     ALU    : S3;        // any alu
4661 %}
4662 
4663 // Long ALU reg operation using big decoder
4664 pipe_class ialu_reg_long_fat(eRegL dst) %{
4665     instruction_count(2);
4666     dst    : S4(write);
4667     dst    : S3(read);
4668     D0     : S0(2);     // big decoder only; twice
4669     ALU    : S3(2);     // any 2 alus
4670 %}
4671 
4672 // Integer ALU reg-reg operation
4673 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4674     single_instruction;
4675     dst    : S4(write);
4676     src    : S3(read);
4677     DECODE : S0;        // any decoder
4678     ALU    : S3;        // any alu
4679 %}
4680 
4681 // Long ALU reg-reg operation
4682 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4683     instruction_count(2);
4684     dst    : S4(write);
4685     src    : S3(read);
4686     DECODE : S0(2);     // any 2 decoders
4687     ALU    : S3(2);     // both alus
4688 %}
4689 
4690 // Integer ALU reg-reg operation
4691 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4692     single_instruction;
4693     dst    : S4(write);
4694     src    : S3(read);
4695     D0     : S0;        // big decoder only
4696     ALU    : S3;        // any alu
4697 %}
4698 
4699 // Long ALU reg-reg operation
4700 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4701     instruction_count(2);
4702     dst    : S4(write);
4703     src    : S3(read);
4704     D0     : S0(2);     // big decoder only; twice
4705     ALU    : S3(2);     // both alus
4706 %}
4707 
4708 // Integer ALU reg-mem operation
4709 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4710     single_instruction;
4711     dst    : S5(write);
4712     mem    : S3(read);
4713     D0     : S0;        // big decoder only
4714     ALU    : S4;        // any alu
4715     MEM    : S3;        // any mem
4716 %}
4717 
4718 // Long ALU reg-mem operation
4719 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4720     instruction_count(2);
4721     dst    : S5(write);
4722     mem    : S3(read);
4723     D0     : S0(2);     // big decoder only; twice
4724     ALU    : S4(2);     // any 2 alus
4725     MEM    : S3(2);     // both mems
4726 %}
4727 
4728 // Integer mem operation (prefetch)
4729 pipe_class ialu_mem(memory mem)
4730 %{
4731     single_instruction;
4732     mem    : S3(read);
4733     D0     : S0;        // big decoder only
4734     MEM    : S3;        // any mem
4735 %}
4736 
4737 // Integer Store to Memory
4738 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4739     single_instruction;
4740     mem    : S3(read);
4741     src    : S5(read);
4742     D0     : S0;        // big decoder only
4743     ALU    : S4;        // any alu
4744     MEM    : S3;
4745 %}
4746 
4747 // Long Store to Memory
4748 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4749     instruction_count(2);
4750     mem    : S3(read);
4751     src    : S5(read);
4752     D0     : S0(2);     // big decoder only; twice
4753     ALU    : S4(2);     // any 2 alus
4754     MEM    : S3(2);     // Both mems
4755 %}
4756 
4757 // Integer Store to Memory
4758 pipe_class ialu_mem_imm(memory mem) %{
4759     single_instruction;
4760     mem    : S3(read);
4761     D0     : S0;        // big decoder only
4762     ALU    : S4;        // any alu
4763     MEM    : S3;
4764 %}
4765 
4766 // Integer ALU0 reg-reg operation
4767 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4768     single_instruction;
4769     dst    : S4(write);
4770     src    : S3(read);
4771     D0     : S0;        // Big decoder only
4772     ALU0   : S3;        // only alu0
4773 %}
4774 
4775 // Integer ALU0 reg-mem operation
4776 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4777     single_instruction;
4778     dst    : S5(write);
4779     mem    : S3(read);
4780     D0     : S0;        // big decoder only
4781     ALU0   : S4;        // ALU0 only
4782     MEM    : S3;        // any mem
4783 %}
4784 
4785 // Integer ALU reg-reg operation
4786 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4787     single_instruction;
4788     cr     : S4(write);
4789     src1   : S3(read);
4790     src2   : S3(read);
4791     DECODE : S0;        // any decoder
4792     ALU    : S3;        // any alu
4793 %}
4794 
4795 // Integer ALU reg-imm operation
4796 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4797     single_instruction;
4798     cr     : S4(write);
4799     src1   : S3(read);
4800     DECODE : S0;        // any decoder
4801     ALU    : S3;        // any alu
4802 %}
4803 
4804 // Integer ALU reg-mem operation
4805 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4806     single_instruction;
4807     cr     : S4(write);
4808     src1   : S3(read);
4809     src2   : S3(read);
4810     D0     : S0;        // big decoder only
4811     ALU    : S4;        // any alu
4812     MEM    : S3;
4813 %}
4814 
4815 // Conditional move reg-reg
4816 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4817     instruction_count(4);
4818     y      : S4(read);
4819     q      : S3(read);
4820     p      : S3(read);
4821     DECODE : S0(4);     // any decoder
4822 %}
4823 
4824 // Conditional move reg-reg
4825 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4826     single_instruction;
4827     dst    : S4(write);
4828     src    : S3(read);
4829     cr     : S3(read);
4830     DECODE : S0;        // any decoder
4831 %}
4832 
4833 // Conditional move reg-mem
4834 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4835     single_instruction;
4836     dst    : S4(write);
4837     src    : S3(read);
4838     cr     : S3(read);
4839     DECODE : S0;        // any decoder
4840     MEM    : S3;
4841 %}
4842 
4843 // Conditional move reg-reg long
4844 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4845     single_instruction;
4846     dst    : S4(write);
4847     src    : S3(read);
4848     cr     : S3(read);
4849     DECODE : S0(2);     // any 2 decoders
4850 %}
4851 
4852 // Conditional move double reg-reg
4853 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4854     single_instruction;
4855     dst    : S4(write);
4856     src    : S3(read);
4857     cr     : S3(read);
4858     DECODE : S0;        // any decoder
4859 %}
4860 
4861 // Float reg-reg operation
4862 pipe_class fpu_reg(regDPR dst) %{
4863     instruction_count(2);
4864     dst    : S3(read);
4865     DECODE : S0(2);     // any 2 decoders
4866     FPU    : S3;
4867 %}
4868 
4869 // Float reg-reg operation
4870 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4871     instruction_count(2);
4872     dst    : S4(write);
4873     src    : S3(read);
4874     DECODE : S0(2);     // any 2 decoders
4875     FPU    : S3;
4876 %}
4877 
4878 // Float reg-reg operation
4879 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4880     instruction_count(3);
4881     dst    : S4(write);
4882     src1   : S3(read);
4883     src2   : S3(read);
4884     DECODE : S0(3);     // any 3 decoders
4885     FPU    : S3(2);
4886 %}
4887 
4888 // Float reg-reg operation
4889 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4890     instruction_count(4);
4891     dst    : S4(write);
4892     src1   : S3(read);
4893     src2   : S3(read);
4894     src3   : S3(read);
4895     DECODE : S0(4);     // any 3 decoders
4896     FPU    : S3(2);
4897 %}
4898 
4899 // Float reg-reg operation
4900 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4901     instruction_count(4);
4902     dst    : S4(write);
4903     src1   : S3(read);
4904     src2   : S3(read);
4905     src3   : S3(read);
4906     DECODE : S1(3);     // any 3 decoders
4907     D0     : S0;        // Big decoder only
4908     FPU    : S3(2);
4909     MEM    : S3;
4910 %}
4911 
4912 // Float reg-mem operation
4913 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4914     instruction_count(2);
4915     dst    : S5(write);
4916     mem    : S3(read);
4917     D0     : S0;        // big decoder only
4918     DECODE : S1;        // any decoder for FPU POP
4919     FPU    : S4;
4920     MEM    : S3;        // any mem
4921 %}
4922 
4923 // Float reg-mem operation
4924 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4925     instruction_count(3);
4926     dst    : S5(write);
4927     src1   : S3(read);
4928     mem    : S3(read);
4929     D0     : S0;        // big decoder only
4930     DECODE : S1(2);     // any decoder for FPU POP
4931     FPU    : S4;
4932     MEM    : S3;        // any mem
4933 %}
4934 
4935 // Float mem-reg operation
4936 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4937     instruction_count(2);
4938     src    : S5(read);
4939     mem    : S3(read);
4940     DECODE : S0;        // any decoder for FPU PUSH
4941     D0     : S1;        // big decoder only
4942     FPU    : S4;
4943     MEM    : S3;        // any mem
4944 %}
4945 
4946 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4947     instruction_count(3);
4948     src1   : S3(read);
4949     src2   : S3(read);
4950     mem    : S3(read);
4951     DECODE : S0(2);     // any decoder for FPU PUSH
4952     D0     : S1;        // big decoder only
4953     FPU    : S4;
4954     MEM    : S3;        // any mem
4955 %}
4956 
4957 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4958     instruction_count(3);
4959     src1   : S3(read);
4960     src2   : S3(read);
4961     mem    : S4(read);
4962     DECODE : S0;        // any decoder for FPU PUSH
4963     D0     : S0(2);     // big decoder only
4964     FPU    : S4;
4965     MEM    : S3(2);     // any mem
4966 %}
4967 
4968 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4969     instruction_count(2);
4970     src1   : S3(read);
4971     dst    : S4(read);
4972     D0     : S0(2);     // big decoder only
4973     MEM    : S3(2);     // any mem
4974 %}
4975 
4976 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4977     instruction_count(3);
4978     src1   : S3(read);
4979     src2   : S3(read);
4980     dst    : S4(read);
4981     D0     : S0(3);     // big decoder only
4982     FPU    : S4;
4983     MEM    : S3(3);     // any mem
4984 %}
4985 
4986 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4987     instruction_count(3);
4988     src1   : S4(read);
4989     mem    : S4(read);
4990     DECODE : S0;        // any decoder for FPU PUSH
4991     D0     : S0(2);     // big decoder only
4992     FPU    : S4;
4993     MEM    : S3(2);     // any mem
4994 %}
4995 
4996 // Float load constant
4997 pipe_class fpu_reg_con(regDPR dst) %{
4998     instruction_count(2);
4999     dst    : S5(write);
5000     D0     : S0;        // big decoder only for the load
5001     DECODE : S1;        // any decoder for FPU POP
5002     FPU    : S4;
5003     MEM    : S3;        // any mem
5004 %}
5005 
5006 // Float load constant
5007 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5008     instruction_count(3);
5009     dst    : S5(write);
5010     src    : S3(read);
5011     D0     : S0;        // big decoder only for the load
5012     DECODE : S1(2);     // any decoder for FPU POP
5013     FPU    : S4;
5014     MEM    : S3;        // any mem
5015 %}
5016 
5017 // UnConditional branch
5018 pipe_class pipe_jmp( label labl ) %{
5019     single_instruction;
5020     BR   : S3;
5021 %}
5022 
5023 // Conditional branch
5024 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5025     single_instruction;
5026     cr    : S1(read);
5027     BR    : S3;
5028 %}
5029 
5030 // Allocation idiom
5031 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5032     instruction_count(1); force_serialization;
5033     fixed_latency(6);
5034     heap_ptr : S3(read);
5035     DECODE   : S0(3);
5036     D0       : S2;
5037     MEM      : S3;
5038     ALU      : S3(2);
5039     dst      : S5(write);
5040     BR       : S5;
5041 %}
5042 
5043 // Generic big/slow expanded idiom
5044 pipe_class pipe_slow(  ) %{
5045     instruction_count(10); multiple_bundles; force_serialization;
5046     fixed_latency(100);
5047     D0  : S0(2);
5048     MEM : S3(2);
5049 %}
5050 
5051 // The real do-nothing guy
5052 pipe_class empty( ) %{
5053     instruction_count(0);
5054 %}
5055 
5056 // Define the class for the Nop node
5057 define %{
5058    MachNop = empty;
5059 %}
5060 
5061 %}
5062 
5063 //----------INSTRUCTIONS-------------------------------------------------------
5064 //
5065 // match      -- States which machine-independent subtree may be replaced
5066 //               by this instruction.
5067 // ins_cost   -- The estimated cost of this instruction is used by instruction
5068 //               selection to identify a minimum cost tree of machine
5069 //               instructions that matches a tree of machine-independent
5070 //               instructions.
5071 // format     -- A string providing the disassembly for this instruction.
5072 //               The value of an instruction's operand may be inserted
5073 //               by referring to it with a '$' prefix.
5074 // opcode     -- Three instruction opcodes may be provided.  These are referred
5075 //               to within an encode class as $primary, $secondary, and $tertiary
5076 //               respectively.  The primary opcode is commonly used to
5077 //               indicate the type of machine instruction, while secondary
5078 //               and tertiary are often used for prefix options or addressing
5079 //               modes.
5080 // ins_encode -- A list of encode classes with parameters. The encode class
5081 //               name must have been defined in an 'enc_class' specification
5082 //               in the encode section of the architecture description.
5083 
5084 //----------BSWAP-Instruction--------------------------------------------------
5085 instruct bytes_reverse_int(rRegI dst) %{
5086   match(Set dst (ReverseBytesI dst));
5087 
5088   format %{ "BSWAP  $dst" %}
5089   opcode(0x0F, 0xC8);
5090   ins_encode( OpcP, OpcSReg(dst) );
5091   ins_pipe( ialu_reg );
5092 %}
5093 
5094 instruct bytes_reverse_long(eRegL dst) %{
5095   match(Set dst (ReverseBytesL dst));
5096 
5097   format %{ "BSWAP  $dst.lo\n\t"
5098             "BSWAP  $dst.hi\n\t"
5099             "XCHG   $dst.lo $dst.hi" %}
5100 
5101   ins_cost(125);
5102   ins_encode( bswap_long_bytes(dst) );
5103   ins_pipe( ialu_reg_reg);
5104 %}
5105 
5106 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5107   match(Set dst (ReverseBytesUS dst));
5108   effect(KILL cr);
5109 
5110   format %{ "BSWAP  $dst\n\t"
5111             "SHR    $dst,16\n\t" %}
5112   ins_encode %{
5113     __ bswapl($dst$$Register);
5114     __ shrl($dst$$Register, 16);
5115   %}
5116   ins_pipe( ialu_reg );
5117 %}
5118 
5119 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5120   match(Set dst (ReverseBytesS dst));
5121   effect(KILL cr);
5122 
5123   format %{ "BSWAP  $dst\n\t"
5124             "SAR    $dst,16\n\t" %}
5125   ins_encode %{
5126     __ bswapl($dst$$Register);
5127     __ sarl($dst$$Register, 16);
5128   %}
5129   ins_pipe( ialu_reg );
5130 %}
5131 
5132 
5133 //---------- Zeros Count Instructions ------------------------------------------
5134 
5135 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5136   predicate(UseCountLeadingZerosInstruction);
5137   match(Set dst (CountLeadingZerosI src));
5138   effect(KILL cr);
5139 
5140   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5141   ins_encode %{
5142     __ lzcntl($dst$$Register, $src$$Register);
5143   %}
5144   ins_pipe(ialu_reg);
5145 %}
5146 
5147 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5148   predicate(!UseCountLeadingZerosInstruction);
5149   match(Set dst (CountLeadingZerosI src));
5150   effect(KILL cr);
5151 
5152   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5153             "JNZ    skip\n\t"
5154             "MOV    $dst, -1\n"
5155       "skip:\n\t"
5156             "NEG    $dst\n\t"
5157             "ADD    $dst, 31" %}
5158   ins_encode %{
5159     Register Rdst = $dst$$Register;
5160     Register Rsrc = $src$$Register;
5161     Label skip;
5162     __ bsrl(Rdst, Rsrc);
5163     __ jccb(Assembler::notZero, skip);
5164     __ movl(Rdst, -1);
5165     __ bind(skip);
5166     __ negl(Rdst);
5167     __ addl(Rdst, BitsPerInt - 1);
5168   %}
5169   ins_pipe(ialu_reg);
5170 %}
5171 
5172 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5173   predicate(UseCountLeadingZerosInstruction);
5174   match(Set dst (CountLeadingZerosL src));
5175   effect(TEMP dst, KILL cr);
5176 
5177   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5178             "JNC    done\n\t"
5179             "LZCNT  $dst, $src.lo\n\t"
5180             "ADD    $dst, 32\n"
5181       "done:" %}
5182   ins_encode %{
5183     Register Rdst = $dst$$Register;
5184     Register Rsrc = $src$$Register;
5185     Label done;
5186     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5187     __ jccb(Assembler::carryClear, done);
5188     __ lzcntl(Rdst, Rsrc);
5189     __ addl(Rdst, BitsPerInt);
5190     __ bind(done);
5191   %}
5192   ins_pipe(ialu_reg);
5193 %}
5194 
5195 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5196   predicate(!UseCountLeadingZerosInstruction);
5197   match(Set dst (CountLeadingZerosL src));
5198   effect(TEMP dst, KILL cr);
5199 
5200   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5201             "JZ     msw_is_zero\n\t"
5202             "ADD    $dst, 32\n\t"
5203             "JMP    not_zero\n"
5204       "msw_is_zero:\n\t"
5205             "BSR    $dst, $src.lo\n\t"
5206             "JNZ    not_zero\n\t"
5207             "MOV    $dst, -1\n"
5208       "not_zero:\n\t"
5209             "NEG    $dst\n\t"
5210             "ADD    $dst, 63\n" %}
5211  ins_encode %{
5212     Register Rdst = $dst$$Register;
5213     Register Rsrc = $src$$Register;
5214     Label msw_is_zero;
5215     Label not_zero;
5216     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5217     __ jccb(Assembler::zero, msw_is_zero);
5218     __ addl(Rdst, BitsPerInt);
5219     __ jmpb(not_zero);
5220     __ bind(msw_is_zero);
5221     __ bsrl(Rdst, Rsrc);
5222     __ jccb(Assembler::notZero, not_zero);
5223     __ movl(Rdst, -1);
5224     __ bind(not_zero);
5225     __ negl(Rdst);
5226     __ addl(Rdst, BitsPerLong - 1);
5227   %}
5228   ins_pipe(ialu_reg);
5229 %}
5230 
5231 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5232   predicate(UseCountTrailingZerosInstruction);
5233   match(Set dst (CountTrailingZerosI src));
5234   effect(KILL cr);
5235 
5236   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5237   ins_encode %{
5238     __ tzcntl($dst$$Register, $src$$Register);
5239   %}
5240   ins_pipe(ialu_reg);
5241 %}
5242 
5243 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5244   predicate(!UseCountTrailingZerosInstruction);
5245   match(Set dst (CountTrailingZerosI src));
5246   effect(KILL cr);
5247 
5248   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5249             "JNZ    done\n\t"
5250             "MOV    $dst, 32\n"
5251       "done:" %}
5252   ins_encode %{
5253     Register Rdst = $dst$$Register;
5254     Label done;
5255     __ bsfl(Rdst, $src$$Register);
5256     __ jccb(Assembler::notZero, done);
5257     __ movl(Rdst, BitsPerInt);
5258     __ bind(done);
5259   %}
5260   ins_pipe(ialu_reg);
5261 %}
5262 
5263 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5264   predicate(UseCountTrailingZerosInstruction);
5265   match(Set dst (CountTrailingZerosL src));
5266   effect(TEMP dst, KILL cr);
5267 
5268   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5269             "JNC    done\n\t"
5270             "TZCNT  $dst, $src.hi\n\t"
5271             "ADD    $dst, 32\n"
5272             "done:" %}
5273   ins_encode %{
5274     Register Rdst = $dst$$Register;
5275     Register Rsrc = $src$$Register;
5276     Label done;
5277     __ tzcntl(Rdst, Rsrc);
5278     __ jccb(Assembler::carryClear, done);
5279     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5280     __ addl(Rdst, BitsPerInt);
5281     __ bind(done);
5282   %}
5283   ins_pipe(ialu_reg);
5284 %}
5285 
5286 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5287   predicate(!UseCountTrailingZerosInstruction);
5288   match(Set dst (CountTrailingZerosL src));
5289   effect(TEMP dst, KILL cr);
5290 
5291   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5292             "JNZ    done\n\t"
5293             "BSF    $dst, $src.hi\n\t"
5294             "JNZ    msw_not_zero\n\t"
5295             "MOV    $dst, 32\n"
5296       "msw_not_zero:\n\t"
5297             "ADD    $dst, 32\n"
5298       "done:" %}
5299   ins_encode %{
5300     Register Rdst = $dst$$Register;
5301     Register Rsrc = $src$$Register;
5302     Label msw_not_zero;
5303     Label done;
5304     __ bsfl(Rdst, Rsrc);
5305     __ jccb(Assembler::notZero, done);
5306     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5307     __ jccb(Assembler::notZero, msw_not_zero);
5308     __ movl(Rdst, BitsPerInt);
5309     __ bind(msw_not_zero);
5310     __ addl(Rdst, BitsPerInt);
5311     __ bind(done);
5312   %}
5313   ins_pipe(ialu_reg);
5314 %}
5315 
5316 
5317 //---------- Population Count Instructions -------------------------------------
5318 
5319 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5320   predicate(UsePopCountInstruction);
5321   match(Set dst (PopCountI src));
5322   effect(KILL cr);
5323 
5324   format %{ "POPCNT $dst, $src" %}
5325   ins_encode %{
5326     __ popcntl($dst$$Register, $src$$Register);
5327   %}
5328   ins_pipe(ialu_reg);
5329 %}
5330 
5331 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5332   predicate(UsePopCountInstruction);
5333   match(Set dst (PopCountI (LoadI mem)));
5334   effect(KILL cr);
5335 
5336   format %{ "POPCNT $dst, $mem" %}
5337   ins_encode %{
5338     __ popcntl($dst$$Register, $mem$$Address);
5339   %}
5340   ins_pipe(ialu_reg);
5341 %}
5342 
5343 // Note: Long.bitCount(long) returns an int.
5344 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5345   predicate(UsePopCountInstruction);
5346   match(Set dst (PopCountL src));
5347   effect(KILL cr, TEMP tmp, TEMP dst);
5348 
5349   format %{ "POPCNT $dst, $src.lo\n\t"
5350             "POPCNT $tmp, $src.hi\n\t"
5351             "ADD    $dst, $tmp" %}
5352   ins_encode %{
5353     __ popcntl($dst$$Register, $src$$Register);
5354     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5355     __ addl($dst$$Register, $tmp$$Register);
5356   %}
5357   ins_pipe(ialu_reg);
5358 %}
5359 
5360 // Note: Long.bitCount(long) returns an int.
5361 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5362   predicate(UsePopCountInstruction);
5363   match(Set dst (PopCountL (LoadL mem)));
5364   effect(KILL cr, TEMP tmp, TEMP dst);
5365 
5366   format %{ "POPCNT $dst, $mem\n\t"
5367             "POPCNT $tmp, $mem+4\n\t"
5368             "ADD    $dst, $tmp" %}
5369   ins_encode %{
5370     //__ popcntl($dst$$Register, $mem$$Address$$first);
5371     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5372     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5373     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5374     __ addl($dst$$Register, $tmp$$Register);
5375   %}
5376   ins_pipe(ialu_reg);
5377 %}
5378 
5379 
5380 //----------Load/Store/Move Instructions---------------------------------------
5381 //----------Load Instructions--------------------------------------------------
5382 // Load Byte (8bit signed)
5383 instruct loadB(xRegI dst, memory mem) %{
5384   match(Set dst (LoadB mem));
5385 
5386   ins_cost(125);
5387   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5388 
5389   ins_encode %{
5390     __ movsbl($dst$$Register, $mem$$Address);
5391   %}
5392 
5393   ins_pipe(ialu_reg_mem);
5394 %}
5395 
5396 // Load Byte (8bit signed) into Long Register
5397 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5398   match(Set dst (ConvI2L (LoadB mem)));
5399   effect(KILL cr);
5400 
5401   ins_cost(375);
5402   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5403             "MOV    $dst.hi,$dst.lo\n\t"
5404             "SAR    $dst.hi,7" %}
5405 
5406   ins_encode %{
5407     __ movsbl($dst$$Register, $mem$$Address);
5408     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5409     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5410   %}
5411 
5412   ins_pipe(ialu_reg_mem);
5413 %}
5414 
5415 // Load Unsigned Byte (8bit UNsigned)
5416 instruct loadUB(xRegI dst, memory mem) %{
5417   match(Set dst (LoadUB mem));
5418 
5419   ins_cost(125);
5420   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5421 
5422   ins_encode %{
5423     __ movzbl($dst$$Register, $mem$$Address);
5424   %}
5425 
5426   ins_pipe(ialu_reg_mem);
5427 %}
5428 
5429 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5430 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5431   match(Set dst (ConvI2L (LoadUB mem)));
5432   effect(KILL cr);
5433 
5434   ins_cost(250);
5435   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5436             "XOR    $dst.hi,$dst.hi" %}
5437 
5438   ins_encode %{
5439     Register Rdst = $dst$$Register;
5440     __ movzbl(Rdst, $mem$$Address);
5441     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5442   %}
5443 
5444   ins_pipe(ialu_reg_mem);
5445 %}
5446 
5447 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5448 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5449   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5450   effect(KILL cr);
5451 
5452   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5453             "XOR    $dst.hi,$dst.hi\n\t"
5454             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5455   ins_encode %{
5456     Register Rdst = $dst$$Register;
5457     __ movzbl(Rdst, $mem$$Address);
5458     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5459     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5460   %}
5461   ins_pipe(ialu_reg_mem);
5462 %}
5463 
5464 // Load Short (16bit signed)
5465 instruct loadS(rRegI dst, memory mem) %{
5466   match(Set dst (LoadS mem));
5467 
5468   ins_cost(125);
5469   format %{ "MOVSX  $dst,$mem\t# short" %}
5470 
5471   ins_encode %{
5472     __ movswl($dst$$Register, $mem$$Address);
5473   %}
5474 
5475   ins_pipe(ialu_reg_mem);
5476 %}
5477 
5478 // Load Short (16 bit signed) to Byte (8 bit signed)
5479 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5480   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5481 
5482   ins_cost(125);
5483   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5484   ins_encode %{
5485     __ movsbl($dst$$Register, $mem$$Address);
5486   %}
5487   ins_pipe(ialu_reg_mem);
5488 %}
5489 
5490 // Load Short (16bit signed) into Long Register
5491 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5492   match(Set dst (ConvI2L (LoadS mem)));
5493   effect(KILL cr);
5494 
5495   ins_cost(375);
5496   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5497             "MOV    $dst.hi,$dst.lo\n\t"
5498             "SAR    $dst.hi,15" %}
5499 
5500   ins_encode %{
5501     __ movswl($dst$$Register, $mem$$Address);
5502     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5503     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5504   %}
5505 
5506   ins_pipe(ialu_reg_mem);
5507 %}
5508 
5509 // Load Unsigned Short/Char (16bit unsigned)
5510 instruct loadUS(rRegI dst, memory mem) %{
5511   match(Set dst (LoadUS mem));
5512 
5513   ins_cost(125);
5514   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5515 
5516   ins_encode %{
5517     __ movzwl($dst$$Register, $mem$$Address);
5518   %}
5519 
5520   ins_pipe(ialu_reg_mem);
5521 %}
5522 
5523 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5524 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5525   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5526 
5527   ins_cost(125);
5528   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5529   ins_encode %{
5530     __ movsbl($dst$$Register, $mem$$Address);
5531   %}
5532   ins_pipe(ialu_reg_mem);
5533 %}
5534 
5535 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5536 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5537   match(Set dst (ConvI2L (LoadUS mem)));
5538   effect(KILL cr);
5539 
5540   ins_cost(250);
5541   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5542             "XOR    $dst.hi,$dst.hi" %}
5543 
5544   ins_encode %{
5545     __ movzwl($dst$$Register, $mem$$Address);
5546     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5547   %}
5548 
5549   ins_pipe(ialu_reg_mem);
5550 %}
5551 
5552 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5553 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5554   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5555   effect(KILL cr);
5556 
5557   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5558             "XOR    $dst.hi,$dst.hi" %}
5559   ins_encode %{
5560     Register Rdst = $dst$$Register;
5561     __ movzbl(Rdst, $mem$$Address);
5562     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5563   %}
5564   ins_pipe(ialu_reg_mem);
5565 %}
5566 
5567 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5568 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5569   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5570   effect(KILL cr);
5571 
5572   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5573             "XOR    $dst.hi,$dst.hi\n\t"
5574             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5575   ins_encode %{
5576     Register Rdst = $dst$$Register;
5577     __ movzwl(Rdst, $mem$$Address);
5578     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5579     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5580   %}
5581   ins_pipe(ialu_reg_mem);
5582 %}
5583 
5584 // Load Integer
5585 instruct loadI(rRegI dst, memory mem) %{
5586   match(Set dst (LoadI mem));
5587 
5588   ins_cost(125);
5589   format %{ "MOV    $dst,$mem\t# int" %}
5590 
5591   ins_encode %{
5592     __ movl($dst$$Register, $mem$$Address);
5593   %}
5594 
5595   ins_pipe(ialu_reg_mem);
5596 %}
5597 
5598 // Load Integer (32 bit signed) to Byte (8 bit signed)
5599 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5600   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5601 
5602   ins_cost(125);
5603   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5604   ins_encode %{
5605     __ movsbl($dst$$Register, $mem$$Address);
5606   %}
5607   ins_pipe(ialu_reg_mem);
5608 %}
5609 
5610 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5611 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5612   match(Set dst (AndI (LoadI mem) mask));
5613 
5614   ins_cost(125);
5615   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5616   ins_encode %{
5617     __ movzbl($dst$$Register, $mem$$Address);
5618   %}
5619   ins_pipe(ialu_reg_mem);
5620 %}
5621 
5622 // Load Integer (32 bit signed) to Short (16 bit signed)
5623 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5624   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5625 
5626   ins_cost(125);
5627   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5628   ins_encode %{
5629     __ movswl($dst$$Register, $mem$$Address);
5630   %}
5631   ins_pipe(ialu_reg_mem);
5632 %}
5633 
5634 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5635 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5636   match(Set dst (AndI (LoadI mem) mask));
5637 
5638   ins_cost(125);
5639   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5640   ins_encode %{
5641     __ movzwl($dst$$Register, $mem$$Address);
5642   %}
5643   ins_pipe(ialu_reg_mem);
5644 %}
5645 
5646 // Load Integer into Long Register
5647 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5648   match(Set dst (ConvI2L (LoadI mem)));
5649   effect(KILL cr);
5650 
5651   ins_cost(375);
5652   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5653             "MOV    $dst.hi,$dst.lo\n\t"
5654             "SAR    $dst.hi,31" %}
5655 
5656   ins_encode %{
5657     __ movl($dst$$Register, $mem$$Address);
5658     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5659     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5660   %}
5661 
5662   ins_pipe(ialu_reg_mem);
5663 %}
5664 
5665 // Load Integer with mask 0xFF into Long Register
5666 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5667   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5668   effect(KILL cr);
5669 
5670   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5671             "XOR    $dst.hi,$dst.hi" %}
5672   ins_encode %{
5673     Register Rdst = $dst$$Register;
5674     __ movzbl(Rdst, $mem$$Address);
5675     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5676   %}
5677   ins_pipe(ialu_reg_mem);
5678 %}
5679 
5680 // Load Integer with mask 0xFFFF into Long Register
5681 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5682   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5683   effect(KILL cr);
5684 
5685   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5686             "XOR    $dst.hi,$dst.hi" %}
5687   ins_encode %{
5688     Register Rdst = $dst$$Register;
5689     __ movzwl(Rdst, $mem$$Address);
5690     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5691   %}
5692   ins_pipe(ialu_reg_mem);
5693 %}
5694 
5695 // Load Integer with 31-bit mask into Long Register
5696 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5697   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5698   effect(KILL cr);
5699 
5700   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5701             "XOR    $dst.hi,$dst.hi\n\t"
5702             "AND    $dst.lo,$mask" %}
5703   ins_encode %{
5704     Register Rdst = $dst$$Register;
5705     __ movl(Rdst, $mem$$Address);
5706     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5707     __ andl(Rdst, $mask$$constant);
5708   %}
5709   ins_pipe(ialu_reg_mem);
5710 %}
5711 
5712 // Load Unsigned Integer into Long Register
5713 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5714   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5715   effect(KILL cr);
5716 
5717   ins_cost(250);
5718   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5719             "XOR    $dst.hi,$dst.hi" %}
5720 
5721   ins_encode %{
5722     __ movl($dst$$Register, $mem$$Address);
5723     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5724   %}
5725 
5726   ins_pipe(ialu_reg_mem);
5727 %}
5728 
5729 // Load Long.  Cannot clobber address while loading, so restrict address
5730 // register to ESI
5731 instruct loadL(eRegL dst, load_long_memory mem) %{
5732   predicate(!((LoadLNode*)n)->require_atomic_access());
5733   match(Set dst (LoadL mem));
5734 
5735   ins_cost(250);
5736   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5737             "MOV    $dst.hi,$mem+4" %}
5738 
5739   ins_encode %{
5740     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5741     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5742     __ movl($dst$$Register, Amemlo);
5743     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5744   %}
5745 
5746   ins_pipe(ialu_reg_long_mem);
5747 %}
5748 
5749 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5750 // then store it down to the stack and reload on the int
5751 // side.
5752 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5753   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5754   match(Set dst (LoadL mem));
5755 
5756   ins_cost(200);
5757   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5758             "FISTp  $dst" %}
5759   ins_encode(enc_loadL_volatile(mem,dst));
5760   ins_pipe( fpu_reg_mem );
5761 %}
5762 
5763 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5764   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5765   match(Set dst (LoadL mem));
5766   effect(TEMP tmp);
5767   ins_cost(180);
5768   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5769             "MOVSD  $dst,$tmp" %}
5770   ins_encode %{
5771     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5772     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5773   %}
5774   ins_pipe( pipe_slow );
5775 %}
5776 
5777 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5778   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5779   match(Set dst (LoadL mem));
5780   effect(TEMP tmp);
5781   ins_cost(160);
5782   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5783             "MOVD   $dst.lo,$tmp\n\t"
5784             "PSRLQ  $tmp,32\n\t"
5785             "MOVD   $dst.hi,$tmp" %}
5786   ins_encode %{
5787     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5788     __ movdl($dst$$Register, $tmp$$XMMRegister);
5789     __ psrlq($tmp$$XMMRegister, 32);
5790     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5791   %}
5792   ins_pipe( pipe_slow );
5793 %}
5794 
5795 // Load Range
5796 instruct loadRange(rRegI dst, memory mem) %{
5797   match(Set dst (LoadRange mem));
5798 
5799   ins_cost(125);
5800   format %{ "MOV    $dst,$mem" %}
5801   opcode(0x8B);
5802   ins_encode( OpcP, RegMem(dst,mem));
5803   ins_pipe( ialu_reg_mem );
5804 %}
5805 
5806 
5807 // Load Pointer
5808 instruct loadP(eRegP dst, memory mem) %{
5809   match(Set dst (LoadP mem));
5810 
5811   ins_cost(125);
5812   format %{ "MOV    $dst,$mem" %}
5813   opcode(0x8B);
5814   ins_encode( OpcP, RegMem(dst,mem));
5815   ins_pipe( ialu_reg_mem );
5816 %}
5817 
5818 // Load Klass Pointer
5819 instruct loadKlass(eRegP dst, memory mem) %{
5820   match(Set dst (LoadKlass mem));
5821 
5822   ins_cost(125);
5823   format %{ "MOV    $dst,$mem" %}
5824   opcode(0x8B);
5825   ins_encode( OpcP, RegMem(dst,mem));
5826   ins_pipe( ialu_reg_mem );
5827 %}
5828 
5829 // Load Double
5830 instruct loadDPR(regDPR dst, memory mem) %{
5831   predicate(UseSSE<=1);
5832   match(Set dst (LoadD mem));
5833 
5834   ins_cost(150);
5835   format %{ "FLD_D  ST,$mem\n\t"
5836             "FSTP   $dst" %}
5837   opcode(0xDD);               /* DD /0 */
5838   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5839               Pop_Reg_DPR(dst) );
5840   ins_pipe( fpu_reg_mem );
5841 %}
5842 
5843 // Load Double to XMM
5844 instruct loadD(regD dst, memory mem) %{
5845   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5846   match(Set dst (LoadD mem));
5847   ins_cost(145);
5848   format %{ "MOVSD  $dst,$mem" %}
5849   ins_encode %{
5850     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5851   %}
5852   ins_pipe( pipe_slow );
5853 %}
5854 
5855 instruct loadD_partial(regD dst, memory mem) %{
5856   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5857   match(Set dst (LoadD mem));
5858   ins_cost(145);
5859   format %{ "MOVLPD $dst,$mem" %}
5860   ins_encode %{
5861     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5862   %}
5863   ins_pipe( pipe_slow );
5864 %}
5865 
5866 // Load to XMM register (single-precision floating point)
5867 // MOVSS instruction
5868 instruct loadF(regF dst, memory mem) %{
5869   predicate(UseSSE>=1);
5870   match(Set dst (LoadF mem));
5871   ins_cost(145);
5872   format %{ "MOVSS  $dst,$mem" %}
5873   ins_encode %{
5874     __ movflt ($dst$$XMMRegister, $mem$$Address);
5875   %}
5876   ins_pipe( pipe_slow );
5877 %}
5878 
5879 // Load Float
5880 instruct loadFPR(regFPR dst, memory mem) %{
5881   predicate(UseSSE==0);
5882   match(Set dst (LoadF mem));
5883 
5884   ins_cost(150);
5885   format %{ "FLD_S  ST,$mem\n\t"
5886             "FSTP   $dst" %}
5887   opcode(0xD9);               /* D9 /0 */
5888   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5889               Pop_Reg_FPR(dst) );
5890   ins_pipe( fpu_reg_mem );
5891 %}
5892 
5893 // Load Effective Address
5894 instruct leaP8(eRegP dst, indOffset8 mem) %{
5895   match(Set dst mem);
5896 
5897   ins_cost(110);
5898   format %{ "LEA    $dst,$mem" %}
5899   opcode(0x8D);
5900   ins_encode( OpcP, RegMem(dst,mem));
5901   ins_pipe( ialu_reg_reg_fat );
5902 %}
5903 
5904 instruct leaP32(eRegP dst, indOffset32 mem) %{
5905   match(Set dst mem);
5906 
5907   ins_cost(110);
5908   format %{ "LEA    $dst,$mem" %}
5909   opcode(0x8D);
5910   ins_encode( OpcP, RegMem(dst,mem));
5911   ins_pipe( ialu_reg_reg_fat );
5912 %}
5913 
5914 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5915   match(Set dst mem);
5916 
5917   ins_cost(110);
5918   format %{ "LEA    $dst,$mem" %}
5919   opcode(0x8D);
5920   ins_encode( OpcP, RegMem(dst,mem));
5921   ins_pipe( ialu_reg_reg_fat );
5922 %}
5923 
5924 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5925   match(Set dst mem);
5926 
5927   ins_cost(110);
5928   format %{ "LEA    $dst,$mem" %}
5929   opcode(0x8D);
5930   ins_encode( OpcP, RegMem(dst,mem));
5931   ins_pipe( ialu_reg_reg_fat );
5932 %}
5933 
5934 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5935   match(Set dst mem);
5936 
5937   ins_cost(110);
5938   format %{ "LEA    $dst,$mem" %}
5939   opcode(0x8D);
5940   ins_encode( OpcP, RegMem(dst,mem));
5941   ins_pipe( ialu_reg_reg_fat );
5942 %}
5943 
5944 // Load Constant
5945 instruct loadConI(rRegI dst, immI src) %{
5946   match(Set dst src);
5947 
5948   format %{ "MOV    $dst,$src" %}
5949   ins_encode( LdImmI(dst, src) );
5950   ins_pipe( ialu_reg_fat );
5951 %}
5952 
5953 // Load Constant zero
5954 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5955   match(Set dst src);
5956   effect(KILL cr);
5957 
5958   ins_cost(50);
5959   format %{ "XOR    $dst,$dst" %}
5960   opcode(0x33);  /* + rd */
5961   ins_encode( OpcP, RegReg( dst, dst ) );
5962   ins_pipe( ialu_reg );
5963 %}
5964 
5965 instruct loadConP(eRegP dst, immP src) %{
5966   match(Set dst src);
5967 
5968   format %{ "MOV    $dst,$src" %}
5969   opcode(0xB8);  /* + rd */
5970   ins_encode( LdImmP(dst, src) );
5971   ins_pipe( ialu_reg_fat );
5972 %}
5973 
5974 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5975   match(Set dst src);
5976   effect(KILL cr);
5977   ins_cost(200);
5978   format %{ "MOV    $dst.lo,$src.lo\n\t"
5979             "MOV    $dst.hi,$src.hi" %}
5980   opcode(0xB8);
5981   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5982   ins_pipe( ialu_reg_long_fat );
5983 %}
5984 
5985 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5986   match(Set dst src);
5987   effect(KILL cr);
5988   ins_cost(150);
5989   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5990             "XOR    $dst.hi,$dst.hi" %}
5991   opcode(0x33,0x33);
5992   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5993   ins_pipe( ialu_reg_long );
5994 %}
5995 
5996 // The instruction usage is guarded by predicate in operand immFPR().
5997 instruct loadConFPR(regFPR dst, immFPR con) %{
5998   match(Set dst con);
5999   ins_cost(125);
6000   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6001             "FSTP   $dst" %}
6002   ins_encode %{
6003     __ fld_s($constantaddress($con));
6004     __ fstp_d($dst$$reg);
6005   %}
6006   ins_pipe(fpu_reg_con);
6007 %}
6008 
6009 // The instruction usage is guarded by predicate in operand immFPR0().
6010 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6011   match(Set dst con);
6012   ins_cost(125);
6013   format %{ "FLDZ   ST\n\t"
6014             "FSTP   $dst" %}
6015   ins_encode %{
6016     __ fldz();
6017     __ fstp_d($dst$$reg);
6018   %}
6019   ins_pipe(fpu_reg_con);
6020 %}
6021 
6022 // The instruction usage is guarded by predicate in operand immFPR1().
6023 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6024   match(Set dst con);
6025   ins_cost(125);
6026   format %{ "FLD1   ST\n\t"
6027             "FSTP   $dst" %}
6028   ins_encode %{
6029     __ fld1();
6030     __ fstp_d($dst$$reg);
6031   %}
6032   ins_pipe(fpu_reg_con);
6033 %}
6034 
6035 // The instruction usage is guarded by predicate in operand immF().
6036 instruct loadConF(regF dst, immF con) %{
6037   match(Set dst con);
6038   ins_cost(125);
6039   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6040   ins_encode %{
6041     __ movflt($dst$$XMMRegister, $constantaddress($con));
6042   %}
6043   ins_pipe(pipe_slow);
6044 %}
6045 
6046 // The instruction usage is guarded by predicate in operand immF0().
6047 instruct loadConF0(regF dst, immF0 src) %{
6048   match(Set dst src);
6049   ins_cost(100);
6050   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6051   ins_encode %{
6052     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6053   %}
6054   ins_pipe(pipe_slow);
6055 %}
6056 
6057 // The instruction usage is guarded by predicate in operand immDPR().
6058 instruct loadConDPR(regDPR dst, immDPR con) %{
6059   match(Set dst con);
6060   ins_cost(125);
6061 
6062   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6063             "FSTP   $dst" %}
6064   ins_encode %{
6065     __ fld_d($constantaddress($con));
6066     __ fstp_d($dst$$reg);
6067   %}
6068   ins_pipe(fpu_reg_con);
6069 %}
6070 
6071 // The instruction usage is guarded by predicate in operand immDPR0().
6072 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6073   match(Set dst con);
6074   ins_cost(125);
6075 
6076   format %{ "FLDZ   ST\n\t"
6077             "FSTP   $dst" %}
6078   ins_encode %{
6079     __ fldz();
6080     __ fstp_d($dst$$reg);
6081   %}
6082   ins_pipe(fpu_reg_con);
6083 %}
6084 
6085 // The instruction usage is guarded by predicate in operand immDPR1().
6086 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6087   match(Set dst con);
6088   ins_cost(125);
6089 
6090   format %{ "FLD1   ST\n\t"
6091             "FSTP   $dst" %}
6092   ins_encode %{
6093     __ fld1();
6094     __ fstp_d($dst$$reg);
6095   %}
6096   ins_pipe(fpu_reg_con);
6097 %}
6098 
6099 // The instruction usage is guarded by predicate in operand immD().
6100 instruct loadConD(regD dst, immD con) %{
6101   match(Set dst con);
6102   ins_cost(125);
6103   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6104   ins_encode %{
6105     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6106   %}
6107   ins_pipe(pipe_slow);
6108 %}
6109 
6110 // The instruction usage is guarded by predicate in operand immD0().
6111 instruct loadConD0(regD dst, immD0 src) %{
6112   match(Set dst src);
6113   ins_cost(100);
6114   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6115   ins_encode %{
6116     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6117   %}
6118   ins_pipe( pipe_slow );
6119 %}
6120 
6121 // Load Stack Slot
6122 instruct loadSSI(rRegI dst, stackSlotI src) %{
6123   match(Set dst src);
6124   ins_cost(125);
6125 
6126   format %{ "MOV    $dst,$src" %}
6127   opcode(0x8B);
6128   ins_encode( OpcP, RegMem(dst,src));
6129   ins_pipe( ialu_reg_mem );
6130 %}
6131 
6132 instruct loadSSL(eRegL dst, stackSlotL src) %{
6133   match(Set dst src);
6134 
6135   ins_cost(200);
6136   format %{ "MOV    $dst,$src.lo\n\t"
6137             "MOV    $dst+4,$src.hi" %}
6138   opcode(0x8B, 0x8B);
6139   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6140   ins_pipe( ialu_mem_long_reg );
6141 %}
6142 
6143 // Load Stack Slot
6144 instruct loadSSP(eRegP dst, stackSlotP src) %{
6145   match(Set dst src);
6146   ins_cost(125);
6147 
6148   format %{ "MOV    $dst,$src" %}
6149   opcode(0x8B);
6150   ins_encode( OpcP, RegMem(dst,src));
6151   ins_pipe( ialu_reg_mem );
6152 %}
6153 
6154 // Load Stack Slot
6155 instruct loadSSF(regFPR dst, stackSlotF src) %{
6156   match(Set dst src);
6157   ins_cost(125);
6158 
6159   format %{ "FLD_S  $src\n\t"
6160             "FSTP   $dst" %}
6161   opcode(0xD9);               /* D9 /0, FLD m32real */
6162   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6163               Pop_Reg_FPR(dst) );
6164   ins_pipe( fpu_reg_mem );
6165 %}
6166 
6167 // Load Stack Slot
6168 instruct loadSSD(regDPR dst, stackSlotD src) %{
6169   match(Set dst src);
6170   ins_cost(125);
6171 
6172   format %{ "FLD_D  $src\n\t"
6173             "FSTP   $dst" %}
6174   opcode(0xDD);               /* DD /0, FLD m64real */
6175   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6176               Pop_Reg_DPR(dst) );
6177   ins_pipe( fpu_reg_mem );
6178 %}
6179 
6180 // Prefetch instructions for allocation.
6181 // Must be safe to execute with invalid address (cannot fault).
6182 
6183 instruct prefetchAlloc0( memory mem ) %{
6184   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6185   match(PrefetchAllocation mem);
6186   ins_cost(0);
6187   size(0);
6188   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6189   ins_encode();
6190   ins_pipe(empty);
6191 %}
6192 
6193 instruct prefetchAlloc( memory mem ) %{
6194   predicate(AllocatePrefetchInstr==3);
6195   match( PrefetchAllocation mem );
6196   ins_cost(100);
6197 
6198   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6199   ins_encode %{
6200     __ prefetchw($mem$$Address);
6201   %}
6202   ins_pipe(ialu_mem);
6203 %}
6204 
6205 instruct prefetchAllocNTA( memory mem ) %{
6206   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6207   match(PrefetchAllocation mem);
6208   ins_cost(100);
6209 
6210   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6211   ins_encode %{
6212     __ prefetchnta($mem$$Address);
6213   %}
6214   ins_pipe(ialu_mem);
6215 %}
6216 
6217 instruct prefetchAllocT0( memory mem ) %{
6218   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6219   match(PrefetchAllocation mem);
6220   ins_cost(100);
6221 
6222   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6223   ins_encode %{
6224     __ prefetcht0($mem$$Address);
6225   %}
6226   ins_pipe(ialu_mem);
6227 %}
6228 
6229 instruct prefetchAllocT2( memory mem ) %{
6230   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6231   match(PrefetchAllocation mem);
6232   ins_cost(100);
6233 
6234   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6235   ins_encode %{
6236     __ prefetcht2($mem$$Address);
6237   %}
6238   ins_pipe(ialu_mem);
6239 %}
6240 
6241 //----------Store Instructions-------------------------------------------------
6242 
6243 // Store Byte
6244 instruct storeB(memory mem, xRegI src) %{
6245   match(Set mem (StoreB mem src));
6246 
6247   ins_cost(125);
6248   format %{ "MOV8   $mem,$src" %}
6249   opcode(0x88);
6250   ins_encode( OpcP, RegMem( src, mem ) );
6251   ins_pipe( ialu_mem_reg );
6252 %}
6253 
6254 // Store Char/Short
6255 instruct storeC(memory mem, rRegI src) %{
6256   match(Set mem (StoreC mem src));
6257 
6258   ins_cost(125);
6259   format %{ "MOV16  $mem,$src" %}
6260   opcode(0x89, 0x66);
6261   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6262   ins_pipe( ialu_mem_reg );
6263 %}
6264 
6265 // Store Integer
6266 instruct storeI(memory mem, rRegI src) %{
6267   match(Set mem (StoreI mem src));
6268 
6269   ins_cost(125);
6270   format %{ "MOV    $mem,$src" %}
6271   opcode(0x89);
6272   ins_encode( OpcP, RegMem( src, mem ) );
6273   ins_pipe( ialu_mem_reg );
6274 %}
6275 
6276 // Store Long
6277 instruct storeL(long_memory mem, eRegL src) %{
6278   predicate(!((StoreLNode*)n)->require_atomic_access());
6279   match(Set mem (StoreL mem src));
6280 
6281   ins_cost(200);
6282   format %{ "MOV    $mem,$src.lo\n\t"
6283             "MOV    $mem+4,$src.hi" %}
6284   opcode(0x89, 0x89);
6285   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6286   ins_pipe( ialu_mem_long_reg );
6287 %}
6288 
6289 // Store Long to Integer
6290 instruct storeL2I(memory mem, eRegL src) %{
6291   match(Set mem (StoreI mem (ConvL2I src)));
6292 
6293   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6294   ins_encode %{
6295     __ movl($mem$$Address, $src$$Register);
6296   %}
6297   ins_pipe(ialu_mem_reg);
6298 %}
6299 
6300 // Volatile Store Long.  Must be atomic, so move it into
6301 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6302 // target address before the store (for null-ptr checks)
6303 // so the memory operand is used twice in the encoding.
6304 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6305   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6306   match(Set mem (StoreL mem src));
6307   effect( KILL cr );
6308   ins_cost(400);
6309   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6310             "FILD   $src\n\t"
6311             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6312   opcode(0x3B);
6313   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6314   ins_pipe( fpu_reg_mem );
6315 %}
6316 
6317 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6318   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6319   match(Set mem (StoreL mem src));
6320   effect( TEMP tmp, KILL cr );
6321   ins_cost(380);
6322   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6323             "MOVSD  $tmp,$src\n\t"
6324             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6325   ins_encode %{
6326     __ cmpl(rax, $mem$$Address);
6327     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6328     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6329   %}
6330   ins_pipe( pipe_slow );
6331 %}
6332 
6333 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6334   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6335   match(Set mem (StoreL mem src));
6336   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6337   ins_cost(360);
6338   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6339             "MOVD   $tmp,$src.lo\n\t"
6340             "MOVD   $tmp2,$src.hi\n\t"
6341             "PUNPCKLDQ $tmp,$tmp2\n\t"
6342             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6343   ins_encode %{
6344     __ cmpl(rax, $mem$$Address);
6345     __ movdl($tmp$$XMMRegister, $src$$Register);
6346     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6347     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6348     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6349   %}
6350   ins_pipe( pipe_slow );
6351 %}
6352 
6353 // Store Pointer; for storing unknown oops and raw pointers
6354 instruct storeP(memory mem, anyRegP src) %{
6355   match(Set mem (StoreP mem src));
6356 
6357   ins_cost(125);
6358   format %{ "MOV    $mem,$src" %}
6359   opcode(0x89);
6360   ins_encode( OpcP, RegMem( src, mem ) );
6361   ins_pipe( ialu_mem_reg );
6362 %}
6363 
6364 // Store Integer Immediate
6365 instruct storeImmI(memory mem, immI src) %{
6366   match(Set mem (StoreI mem src));
6367 
6368   ins_cost(150);
6369   format %{ "MOV    $mem,$src" %}
6370   opcode(0xC7);               /* C7 /0 */
6371   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6372   ins_pipe( ialu_mem_imm );
6373 %}
6374 
6375 // Store Short/Char Immediate
6376 instruct storeImmI16(memory mem, immI16 src) %{
6377   predicate(UseStoreImmI16);
6378   match(Set mem (StoreC mem src));
6379 
6380   ins_cost(150);
6381   format %{ "MOV16  $mem,$src" %}
6382   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6383   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6384   ins_pipe( ialu_mem_imm );
6385 %}
6386 
6387 // Store Pointer Immediate; null pointers or constant oops that do not
6388 // need card-mark barriers.
6389 instruct storeImmP(memory mem, immP src) %{
6390   match(Set mem (StoreP mem src));
6391 
6392   ins_cost(150);
6393   format %{ "MOV    $mem,$src" %}
6394   opcode(0xC7);               /* C7 /0 */
6395   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6396   ins_pipe( ialu_mem_imm );
6397 %}
6398 
6399 // Store Byte Immediate
6400 instruct storeImmB(memory mem, immI8 src) %{
6401   match(Set mem (StoreB mem src));
6402 
6403   ins_cost(150);
6404   format %{ "MOV8   $mem,$src" %}
6405   opcode(0xC6);               /* C6 /0 */
6406   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6407   ins_pipe( ialu_mem_imm );
6408 %}
6409 
6410 // Store CMS card-mark Immediate
6411 instruct storeImmCM(memory mem, immI8 src) %{
6412   match(Set mem (StoreCM mem src));
6413 
6414   ins_cost(150);
6415   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6416   opcode(0xC6);               /* C6 /0 */
6417   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6418   ins_pipe( ialu_mem_imm );
6419 %}
6420 
6421 // Store Double
6422 instruct storeDPR( memory mem, regDPR1 src) %{
6423   predicate(UseSSE<=1);
6424   match(Set mem (StoreD mem src));
6425 
6426   ins_cost(100);
6427   format %{ "FST_D  $mem,$src" %}
6428   opcode(0xDD);       /* DD /2 */
6429   ins_encode( enc_FPR_store(mem,src) );
6430   ins_pipe( fpu_mem_reg );
6431 %}
6432 
6433 // Store double does rounding on x86
6434 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6435   predicate(UseSSE<=1);
6436   match(Set mem (StoreD mem (RoundDouble src)));
6437 
6438   ins_cost(100);
6439   format %{ "FST_D  $mem,$src\t# round" %}
6440   opcode(0xDD);       /* DD /2 */
6441   ins_encode( enc_FPR_store(mem,src) );
6442   ins_pipe( fpu_mem_reg );
6443 %}
6444 
6445 // Store XMM register to memory (double-precision floating points)
6446 // MOVSD instruction
6447 instruct storeD(memory mem, regD src) %{
6448   predicate(UseSSE>=2);
6449   match(Set mem (StoreD mem src));
6450   ins_cost(95);
6451   format %{ "MOVSD  $mem,$src" %}
6452   ins_encode %{
6453     __ movdbl($mem$$Address, $src$$XMMRegister);
6454   %}
6455   ins_pipe( pipe_slow );
6456 %}
6457 
6458 // Store XMM register to memory (single-precision floating point)
6459 // MOVSS instruction
6460 instruct storeF(memory mem, regF src) %{
6461   predicate(UseSSE>=1);
6462   match(Set mem (StoreF mem src));
6463   ins_cost(95);
6464   format %{ "MOVSS  $mem,$src" %}
6465   ins_encode %{
6466     __ movflt($mem$$Address, $src$$XMMRegister);
6467   %}
6468   ins_pipe( pipe_slow );
6469 %}
6470 
6471 // Store Float
6472 instruct storeFPR( memory mem, regFPR1 src) %{
6473   predicate(UseSSE==0);
6474   match(Set mem (StoreF mem src));
6475 
6476   ins_cost(100);
6477   format %{ "FST_S  $mem,$src" %}
6478   opcode(0xD9);       /* D9 /2 */
6479   ins_encode( enc_FPR_store(mem,src) );
6480   ins_pipe( fpu_mem_reg );
6481 %}
6482 
6483 // Store Float does rounding on x86
6484 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6485   predicate(UseSSE==0);
6486   match(Set mem (StoreF mem (RoundFloat src)));
6487 
6488   ins_cost(100);
6489   format %{ "FST_S  $mem,$src\t# round" %}
6490   opcode(0xD9);       /* D9 /2 */
6491   ins_encode( enc_FPR_store(mem,src) );
6492   ins_pipe( fpu_mem_reg );
6493 %}
6494 
6495 // Store Float does rounding on x86
6496 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6497   predicate(UseSSE<=1);
6498   match(Set mem (StoreF mem (ConvD2F src)));
6499 
6500   ins_cost(100);
6501   format %{ "FST_S  $mem,$src\t# D-round" %}
6502   opcode(0xD9);       /* D9 /2 */
6503   ins_encode( enc_FPR_store(mem,src) );
6504   ins_pipe( fpu_mem_reg );
6505 %}
6506 
6507 // Store immediate Float value (it is faster than store from FPU register)
6508 // The instruction usage is guarded by predicate in operand immFPR().
6509 instruct storeFPR_imm( memory mem, immFPR src) %{
6510   match(Set mem (StoreF mem src));
6511 
6512   ins_cost(50);
6513   format %{ "MOV    $mem,$src\t# store float" %}
6514   opcode(0xC7);               /* C7 /0 */
6515   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6516   ins_pipe( ialu_mem_imm );
6517 %}
6518 
6519 // Store immediate Float value (it is faster than store from XMM register)
6520 // The instruction usage is guarded by predicate in operand immF().
6521 instruct storeF_imm( memory mem, immF src) %{
6522   match(Set mem (StoreF mem src));
6523 
6524   ins_cost(50);
6525   format %{ "MOV    $mem,$src\t# store float" %}
6526   opcode(0xC7);               /* C7 /0 */
6527   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6528   ins_pipe( ialu_mem_imm );
6529 %}
6530 
6531 // Store Integer to stack slot
6532 instruct storeSSI(stackSlotI dst, rRegI src) %{
6533   match(Set dst src);
6534 
6535   ins_cost(100);
6536   format %{ "MOV    $dst,$src" %}
6537   opcode(0x89);
6538   ins_encode( OpcPRegSS( dst, src ) );
6539   ins_pipe( ialu_mem_reg );
6540 %}
6541 
6542 // Store Integer to stack slot
6543 instruct storeSSP(stackSlotP dst, eRegP src) %{
6544   match(Set dst src);
6545 
6546   ins_cost(100);
6547   format %{ "MOV    $dst,$src" %}
6548   opcode(0x89);
6549   ins_encode( OpcPRegSS( dst, src ) );
6550   ins_pipe( ialu_mem_reg );
6551 %}
6552 
6553 // Store Long to stack slot
6554 instruct storeSSL(stackSlotL dst, eRegL src) %{
6555   match(Set dst src);
6556 
6557   ins_cost(200);
6558   format %{ "MOV    $dst,$src.lo\n\t"
6559             "MOV    $dst+4,$src.hi" %}
6560   opcode(0x89, 0x89);
6561   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6562   ins_pipe( ialu_mem_long_reg );
6563 %}
6564 
6565 //----------MemBar Instructions-----------------------------------------------
6566 // Memory barrier flavors
6567 
6568 instruct membar_acquire() %{
6569   match(MemBarAcquire);
6570   match(LoadFence);
6571   ins_cost(400);
6572 
6573   size(0);
6574   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6575   ins_encode();
6576   ins_pipe(empty);
6577 %}
6578 
6579 instruct membar_acquire_lock() %{
6580   match(MemBarAcquireLock);
6581   ins_cost(0);
6582 
6583   size(0);
6584   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6585   ins_encode( );
6586   ins_pipe(empty);
6587 %}
6588 
6589 instruct membar_release() %{
6590   match(MemBarRelease);
6591   match(StoreFence);
6592   ins_cost(400);
6593 
6594   size(0);
6595   format %{ "MEMBAR-release ! (empty encoding)" %}
6596   ins_encode( );
6597   ins_pipe(empty);
6598 %}
6599 
6600 instruct membar_release_lock() %{
6601   match(MemBarReleaseLock);
6602   ins_cost(0);
6603 
6604   size(0);
6605   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6606   ins_encode( );
6607   ins_pipe(empty);
6608 %}
6609 
6610 instruct membar_volatile(eFlagsReg cr) %{
6611   match(MemBarVolatile);
6612   effect(KILL cr);
6613   ins_cost(400);
6614 
6615   format %{
6616     $$template
6617     if (os::is_MP()) {
6618       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6619     } else {
6620       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6621     }
6622   %}
6623   ins_encode %{
6624     __ membar(Assembler::StoreLoad);
6625   %}
6626   ins_pipe(pipe_slow);
6627 %}
6628 
6629 instruct unnecessary_membar_volatile() %{
6630   match(MemBarVolatile);
6631   predicate(Matcher::post_store_load_barrier(n));
6632   ins_cost(0);
6633 
6634   size(0);
6635   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6636   ins_encode( );
6637   ins_pipe(empty);
6638 %}
6639 
6640 instruct membar_storestore() %{
6641   match(MemBarStoreStore);
6642   ins_cost(0);
6643 
6644   size(0);
6645   format %{ "MEMBAR-storestore (empty encoding)" %}
6646   ins_encode( );
6647   ins_pipe(empty);
6648 %}
6649 
6650 //----------Move Instructions--------------------------------------------------
6651 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6652   match(Set dst (CastX2P src));
6653   format %{ "# X2P  $dst, $src" %}
6654   ins_encode( /*empty encoding*/ );
6655   ins_cost(0);
6656   ins_pipe(empty);
6657 %}
6658 
6659 instruct castP2X(rRegI dst, eRegP src ) %{
6660   match(Set dst (CastP2X src));
6661   ins_cost(50);
6662   format %{ "MOV    $dst, $src\t# CastP2X" %}
6663   ins_encode( enc_Copy( dst, src) );
6664   ins_pipe( ialu_reg_reg );
6665 %}
6666 
6667 //----------Conditional Move---------------------------------------------------
6668 // Conditional move
6669 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6670   predicate(!VM_Version::supports_cmov() );
6671   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6672   ins_cost(200);
6673   format %{ "J$cop,us skip\t# signed cmove\n\t"
6674             "MOV    $dst,$src\n"
6675       "skip:" %}
6676   ins_encode %{
6677     Label Lskip;
6678     // Invert sense of branch from sense of CMOV
6679     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6680     __ movl($dst$$Register, $src$$Register);
6681     __ bind(Lskip);
6682   %}
6683   ins_pipe( pipe_cmov_reg );
6684 %}
6685 
6686 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6687   predicate(!VM_Version::supports_cmov() );
6688   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6689   ins_cost(200);
6690   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6691             "MOV    $dst,$src\n"
6692       "skip:" %}
6693   ins_encode %{
6694     Label Lskip;
6695     // Invert sense of branch from sense of CMOV
6696     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6697     __ movl($dst$$Register, $src$$Register);
6698     __ bind(Lskip);
6699   %}
6700   ins_pipe( pipe_cmov_reg );
6701 %}
6702 
6703 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6704   predicate(VM_Version::supports_cmov() );
6705   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6706   ins_cost(200);
6707   format %{ "CMOV$cop $dst,$src" %}
6708   opcode(0x0F,0x40);
6709   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6710   ins_pipe( pipe_cmov_reg );
6711 %}
6712 
6713 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6714   predicate(VM_Version::supports_cmov() );
6715   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6716   ins_cost(200);
6717   format %{ "CMOV$cop $dst,$src" %}
6718   opcode(0x0F,0x40);
6719   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6720   ins_pipe( pipe_cmov_reg );
6721 %}
6722 
6723 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6724   predicate(VM_Version::supports_cmov() );
6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6726   ins_cost(200);
6727   expand %{
6728     cmovI_regU(cop, cr, dst, src);
6729   %}
6730 %}
6731 
6732 // Conditional move
6733 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6734   predicate(VM_Version::supports_cmov() );
6735   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6736   ins_cost(250);
6737   format %{ "CMOV$cop $dst,$src" %}
6738   opcode(0x0F,0x40);
6739   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6740   ins_pipe( pipe_cmov_mem );
6741 %}
6742 
6743 // Conditional move
6744 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6745   predicate(VM_Version::supports_cmov() );
6746   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6747   ins_cost(250);
6748   format %{ "CMOV$cop $dst,$src" %}
6749   opcode(0x0F,0x40);
6750   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6751   ins_pipe( pipe_cmov_mem );
6752 %}
6753 
6754 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6755   predicate(VM_Version::supports_cmov() );
6756   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6757   ins_cost(250);
6758   expand %{
6759     cmovI_memU(cop, cr, dst, src);
6760   %}
6761 %}
6762 
6763 // Conditional move
6764 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6765   predicate(VM_Version::supports_cmov() );
6766   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6767   ins_cost(200);
6768   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6769   opcode(0x0F,0x40);
6770   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6771   ins_pipe( pipe_cmov_reg );
6772 %}
6773 
6774 // Conditional move (non-P6 version)
6775 // Note:  a CMoveP is generated for  stubs and native wrappers
6776 //        regardless of whether we are on a P6, so we
6777 //        emulate a cmov here
6778 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6779   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6780   ins_cost(300);
6781   format %{ "Jn$cop   skip\n\t"
6782           "MOV    $dst,$src\t# pointer\n"
6783       "skip:" %}
6784   opcode(0x8b);
6785   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6786   ins_pipe( pipe_cmov_reg );
6787 %}
6788 
6789 // Conditional move
6790 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6791   predicate(VM_Version::supports_cmov() );
6792   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6793   ins_cost(200);
6794   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6795   opcode(0x0F,0x40);
6796   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6797   ins_pipe( pipe_cmov_reg );
6798 %}
6799 
6800 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6801   predicate(VM_Version::supports_cmov() );
6802   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6803   ins_cost(200);
6804   expand %{
6805     cmovP_regU(cop, cr, dst, src);
6806   %}
6807 %}
6808 
6809 // DISABLED: Requires the ADLC to emit a bottom_type call that
6810 // correctly meets the two pointer arguments; one is an incoming
6811 // register but the other is a memory operand.  ALSO appears to
6812 // be buggy with implicit null checks.
6813 //
6814 //// Conditional move
6815 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6816 //  predicate(VM_Version::supports_cmov() );
6817 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6818 //  ins_cost(250);
6819 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6820 //  opcode(0x0F,0x40);
6821 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6822 //  ins_pipe( pipe_cmov_mem );
6823 //%}
6824 //
6825 //// Conditional move
6826 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6827 //  predicate(VM_Version::supports_cmov() );
6828 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6829 //  ins_cost(250);
6830 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6831 //  opcode(0x0F,0x40);
6832 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6833 //  ins_pipe( pipe_cmov_mem );
6834 //%}
6835 
6836 // Conditional move
6837 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6838   predicate(UseSSE<=1);
6839   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6840   ins_cost(200);
6841   format %{ "FCMOV$cop $dst,$src\t# double" %}
6842   opcode(0xDA);
6843   ins_encode( enc_cmov_dpr(cop,src) );
6844   ins_pipe( pipe_cmovDPR_reg );
6845 %}
6846 
6847 // Conditional move
6848 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6849   predicate(UseSSE==0);
6850   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6851   ins_cost(200);
6852   format %{ "FCMOV$cop $dst,$src\t# float" %}
6853   opcode(0xDA);
6854   ins_encode( enc_cmov_dpr(cop,src) );
6855   ins_pipe( pipe_cmovDPR_reg );
6856 %}
6857 
6858 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6859 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6860   predicate(UseSSE<=1);
6861   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6862   ins_cost(200);
6863   format %{ "Jn$cop   skip\n\t"
6864             "MOV    $dst,$src\t# double\n"
6865       "skip:" %}
6866   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6867   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6868   ins_pipe( pipe_cmovDPR_reg );
6869 %}
6870 
6871 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6872 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6873   predicate(UseSSE==0);
6874   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6875   ins_cost(200);
6876   format %{ "Jn$cop    skip\n\t"
6877             "MOV    $dst,$src\t# float\n"
6878       "skip:" %}
6879   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6880   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6881   ins_pipe( pipe_cmovDPR_reg );
6882 %}
6883 
6884 // No CMOVE with SSE/SSE2
6885 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6886   predicate (UseSSE>=1);
6887   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6888   ins_cost(200);
6889   format %{ "Jn$cop   skip\n\t"
6890             "MOVSS  $dst,$src\t# float\n"
6891       "skip:" %}
6892   ins_encode %{
6893     Label skip;
6894     // Invert sense of branch from sense of CMOV
6895     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6896     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6897     __ bind(skip);
6898   %}
6899   ins_pipe( pipe_slow );
6900 %}
6901 
6902 // No CMOVE with SSE/SSE2
6903 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6904   predicate (UseSSE>=2);
6905   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6906   ins_cost(200);
6907   format %{ "Jn$cop   skip\n\t"
6908             "MOVSD  $dst,$src\t# float\n"
6909       "skip:" %}
6910   ins_encode %{
6911     Label skip;
6912     // Invert sense of branch from sense of CMOV
6913     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6914     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6915     __ bind(skip);
6916   %}
6917   ins_pipe( pipe_slow );
6918 %}
6919 
6920 // unsigned version
6921 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6922   predicate (UseSSE>=1);
6923   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6924   ins_cost(200);
6925   format %{ "Jn$cop   skip\n\t"
6926             "MOVSS  $dst,$src\t# float\n"
6927       "skip:" %}
6928   ins_encode %{
6929     Label skip;
6930     // Invert sense of branch from sense of CMOV
6931     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6932     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6933     __ bind(skip);
6934   %}
6935   ins_pipe( pipe_slow );
6936 %}
6937 
6938 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6939   predicate (UseSSE>=1);
6940   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6941   ins_cost(200);
6942   expand %{
6943     fcmovF_regU(cop, cr, dst, src);
6944   %}
6945 %}
6946 
6947 // unsigned version
6948 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6949   predicate (UseSSE>=2);
6950   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6951   ins_cost(200);
6952   format %{ "Jn$cop   skip\n\t"
6953             "MOVSD  $dst,$src\t# float\n"
6954       "skip:" %}
6955   ins_encode %{
6956     Label skip;
6957     // Invert sense of branch from sense of CMOV
6958     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6959     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6960     __ bind(skip);
6961   %}
6962   ins_pipe( pipe_slow );
6963 %}
6964 
6965 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6966   predicate (UseSSE>=2);
6967   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6968   ins_cost(200);
6969   expand %{
6970     fcmovD_regU(cop, cr, dst, src);
6971   %}
6972 %}
6973 
6974 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6975   predicate(VM_Version::supports_cmov() );
6976   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6977   ins_cost(200);
6978   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6979             "CMOV$cop $dst.hi,$src.hi" %}
6980   opcode(0x0F,0x40);
6981   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6982   ins_pipe( pipe_cmov_reg_long );
6983 %}
6984 
6985 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6986   predicate(VM_Version::supports_cmov() );
6987   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6988   ins_cost(200);
6989   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6990             "CMOV$cop $dst.hi,$src.hi" %}
6991   opcode(0x0F,0x40);
6992   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6993   ins_pipe( pipe_cmov_reg_long );
6994 %}
6995 
6996 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6997   predicate(VM_Version::supports_cmov() );
6998   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6999   ins_cost(200);
7000   expand %{
7001     cmovL_regU(cop, cr, dst, src);
7002   %}
7003 %}
7004 
7005 //----------Arithmetic Instructions--------------------------------------------
7006 //----------Addition Instructions----------------------------------------------
7007 
7008 // Integer Addition Instructions
7009 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7010   match(Set dst (AddI dst src));
7011   effect(KILL cr);
7012 
7013   size(2);
7014   format %{ "ADD    $dst,$src" %}
7015   opcode(0x03);
7016   ins_encode( OpcP, RegReg( dst, src) );
7017   ins_pipe( ialu_reg_reg );
7018 %}
7019 
7020 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7021   match(Set dst (AddI dst src));
7022   effect(KILL cr);
7023 
7024   format %{ "ADD    $dst,$src" %}
7025   opcode(0x81, 0x00); /* /0 id */
7026   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7027   ins_pipe( ialu_reg );
7028 %}
7029 
7030 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7031   predicate(UseIncDec);
7032   match(Set dst (AddI dst src));
7033   effect(KILL cr);
7034 
7035   size(1);
7036   format %{ "INC    $dst" %}
7037   opcode(0x40); /*  */
7038   ins_encode( Opc_plus( primary, dst ) );
7039   ins_pipe( ialu_reg );
7040 %}
7041 
7042 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7043   match(Set dst (AddI src0 src1));
7044   ins_cost(110);
7045 
7046   format %{ "LEA    $dst,[$src0 + $src1]" %}
7047   opcode(0x8D); /* 0x8D /r */
7048   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7049   ins_pipe( ialu_reg_reg );
7050 %}
7051 
7052 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7053   match(Set dst (AddP src0 src1));
7054   ins_cost(110);
7055 
7056   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7057   opcode(0x8D); /* 0x8D /r */
7058   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7059   ins_pipe( ialu_reg_reg );
7060 %}
7061 
7062 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7063   predicate(UseIncDec);
7064   match(Set dst (AddI dst src));
7065   effect(KILL cr);
7066 
7067   size(1);
7068   format %{ "DEC    $dst" %}
7069   opcode(0x48); /*  */
7070   ins_encode( Opc_plus( primary, dst ) );
7071   ins_pipe( ialu_reg );
7072 %}
7073 
7074 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7075   match(Set dst (AddP dst src));
7076   effect(KILL cr);
7077 
7078   size(2);
7079   format %{ "ADD    $dst,$src" %}
7080   opcode(0x03);
7081   ins_encode( OpcP, RegReg( dst, src) );
7082   ins_pipe( ialu_reg_reg );
7083 %}
7084 
7085 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7086   match(Set dst (AddP dst src));
7087   effect(KILL cr);
7088 
7089   format %{ "ADD    $dst,$src" %}
7090   opcode(0x81,0x00); /* Opcode 81 /0 id */
7091   // ins_encode( RegImm( dst, src) );
7092   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7093   ins_pipe( ialu_reg );
7094 %}
7095 
7096 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7097   match(Set dst (AddI dst (LoadI src)));
7098   effect(KILL cr);
7099 
7100   ins_cost(125);
7101   format %{ "ADD    $dst,$src" %}
7102   opcode(0x03);
7103   ins_encode( OpcP, RegMem( dst, src) );
7104   ins_pipe( ialu_reg_mem );
7105 %}
7106 
7107 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7108   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7109   effect(KILL cr);
7110 
7111   ins_cost(150);
7112   format %{ "ADD    $dst,$src" %}
7113   opcode(0x01);  /* Opcode 01 /r */
7114   ins_encode( OpcP, RegMem( src, dst ) );
7115   ins_pipe( ialu_mem_reg );
7116 %}
7117 
7118 // Add Memory with Immediate
7119 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7120   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7121   effect(KILL cr);
7122 
7123   ins_cost(125);
7124   format %{ "ADD    $dst,$src" %}
7125   opcode(0x81);               /* Opcode 81 /0 id */
7126   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7127   ins_pipe( ialu_mem_imm );
7128 %}
7129 
7130 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7131   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7132   effect(KILL cr);
7133 
7134   ins_cost(125);
7135   format %{ "INC    $dst" %}
7136   opcode(0xFF);               /* Opcode FF /0 */
7137   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7138   ins_pipe( ialu_mem_imm );
7139 %}
7140 
7141 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7142   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7143   effect(KILL cr);
7144 
7145   ins_cost(125);
7146   format %{ "DEC    $dst" %}
7147   opcode(0xFF);               /* Opcode FF /1 */
7148   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7149   ins_pipe( ialu_mem_imm );
7150 %}
7151 
7152 
7153 instruct checkCastPP( eRegP dst ) %{
7154   match(Set dst (CheckCastPP dst));
7155 
7156   size(0);
7157   format %{ "#checkcastPP of $dst" %}
7158   ins_encode( /*empty encoding*/ );
7159   ins_pipe( empty );
7160 %}
7161 
7162 instruct castPP( eRegP dst ) %{
7163   match(Set dst (CastPP dst));
7164   format %{ "#castPP of $dst" %}
7165   ins_encode( /*empty encoding*/ );
7166   ins_pipe( empty );
7167 %}
7168 
7169 instruct castII( rRegI dst ) %{
7170   match(Set dst (CastII dst));
7171   format %{ "#castII of $dst" %}
7172   ins_encode( /*empty encoding*/ );
7173   ins_cost(0);
7174   ins_pipe( empty );
7175 %}
7176 
7177 
7178 // Load-locked - same as a regular pointer load when used with compare-swap
7179 instruct loadPLocked(eRegP dst, memory mem) %{
7180   match(Set dst (LoadPLocked mem));
7181 
7182   ins_cost(125);
7183   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7184   opcode(0x8B);
7185   ins_encode( OpcP, RegMem(dst,mem));
7186   ins_pipe( ialu_reg_mem );
7187 %}
7188 
7189 // Conditional-store of the updated heap-top.
7190 // Used during allocation of the shared heap.
7191 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7192 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7193   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7194   // EAX is killed if there is contention, but then it's also unused.
7195   // In the common case of no contention, EAX holds the new oop address.
7196   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7197   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7198   ins_pipe( pipe_cmpxchg );
7199 %}
7200 
7201 // Conditional-store of an int value.
7202 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7203 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7204   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7205   effect(KILL oldval);
7206   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7207   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7208   ins_pipe( pipe_cmpxchg );
7209 %}
7210 
7211 // Conditional-store of a long value.
7212 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7213 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7214   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7215   effect(KILL oldval);
7216   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7217             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7218             "XCHG   EBX,ECX"
7219   %}
7220   ins_encode %{
7221     // Note: we need to swap rbx, and rcx before and after the
7222     //       cmpxchg8 instruction because the instruction uses
7223     //       rcx as the high order word of the new value to store but
7224     //       our register encoding uses rbx.
7225     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7226     if( os::is_MP() )
7227       __ lock();
7228     __ cmpxchg8($mem$$Address);
7229     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7230   %}
7231   ins_pipe( pipe_cmpxchg );
7232 %}
7233 
7234 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7235 
7236 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7237   predicate(VM_Version::supports_cx8());
7238   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7239   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7240   effect(KILL cr, KILL oldval);
7241   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7242             "MOV    $res,0\n\t"
7243             "JNE,s  fail\n\t"
7244             "MOV    $res,1\n"
7245           "fail:" %}
7246   ins_encode( enc_cmpxchg8(mem_ptr),
7247               enc_flags_ne_to_boolean(res) );
7248   ins_pipe( pipe_cmpxchg );
7249 %}
7250 
7251 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7252   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7253   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7254   effect(KILL cr, KILL oldval);
7255   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7256             "MOV    $res,0\n\t"
7257             "JNE,s  fail\n\t"
7258             "MOV    $res,1\n"
7259           "fail:" %}
7260   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7261   ins_pipe( pipe_cmpxchg );
7262 %}
7263 
7264 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7265   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7266   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7267   effect(KILL cr, KILL oldval);
7268   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7269             "MOV    $res,0\n\t"
7270             "JNE,s  fail\n\t"
7271             "MOV    $res,1\n"
7272           "fail:" %}
7273   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7274   ins_pipe( pipe_cmpxchg );
7275 %}
7276 
7277 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7278   predicate(VM_Version::supports_cx8());
7279   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7280   effect(KILL cr);
7281   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7282   ins_encode( enc_cmpxchg8(mem_ptr) );
7283   ins_pipe( pipe_cmpxchg );
7284 %}
7285 
7286 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7287   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7288   effect(KILL cr);
7289   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7290   ins_encode( enc_cmpxchg(mem_ptr) );
7291   ins_pipe( pipe_cmpxchg );
7292 %}
7293 
7294 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7295   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7296   effect(KILL cr);
7297   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7298   ins_encode( enc_cmpxchg(mem_ptr) );
7299   ins_pipe( pipe_cmpxchg );
7300 %}
7301 
7302 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7303   predicate(n->as_LoadStore()->result_not_used());
7304   match(Set dummy (GetAndAddI mem add));
7305   effect(KILL cr);
7306   format %{ "ADDL  [$mem],$add" %}
7307   ins_encode %{
7308     if (os::is_MP()) { __ lock(); }
7309     __ addl($mem$$Address, $add$$constant);
7310   %}
7311   ins_pipe( pipe_cmpxchg );
7312 %}
7313 
7314 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7315   match(Set newval (GetAndAddI mem newval));
7316   effect(KILL cr);
7317   format %{ "XADDL  [$mem],$newval" %}
7318   ins_encode %{
7319     if (os::is_MP()) { __ lock(); }
7320     __ xaddl($mem$$Address, $newval$$Register);
7321   %}
7322   ins_pipe( pipe_cmpxchg );
7323 %}
7324 
7325 instruct xchgI( memory mem, rRegI newval) %{
7326   match(Set newval (GetAndSetI mem newval));
7327   format %{ "XCHGL  $newval,[$mem]" %}
7328   ins_encode %{
7329     __ xchgl($newval$$Register, $mem$$Address);
7330   %}
7331   ins_pipe( pipe_cmpxchg );
7332 %}
7333 
7334 instruct xchgP( memory mem, pRegP newval) %{
7335   match(Set newval (GetAndSetP mem newval));
7336   format %{ "XCHGL  $newval,[$mem]" %}
7337   ins_encode %{
7338     __ xchgl($newval$$Register, $mem$$Address);
7339   %}
7340   ins_pipe( pipe_cmpxchg );
7341 %}
7342 
7343 //----------Subtraction Instructions-------------------------------------------
7344 
7345 // Integer Subtraction Instructions
7346 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7347   match(Set dst (SubI dst src));
7348   effect(KILL cr);
7349 
7350   size(2);
7351   format %{ "SUB    $dst,$src" %}
7352   opcode(0x2B);
7353   ins_encode( OpcP, RegReg( dst, src) );
7354   ins_pipe( ialu_reg_reg );
7355 %}
7356 
7357 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7358   match(Set dst (SubI dst src));
7359   effect(KILL cr);
7360 
7361   format %{ "SUB    $dst,$src" %}
7362   opcode(0x81,0x05);  /* Opcode 81 /5 */
7363   // ins_encode( RegImm( dst, src) );
7364   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7365   ins_pipe( ialu_reg );
7366 %}
7367 
7368 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7369   match(Set dst (SubI dst (LoadI src)));
7370   effect(KILL cr);
7371 
7372   ins_cost(125);
7373   format %{ "SUB    $dst,$src" %}
7374   opcode(0x2B);
7375   ins_encode( OpcP, RegMem( dst, src) );
7376   ins_pipe( ialu_reg_mem );
7377 %}
7378 
7379 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7380   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7381   effect(KILL cr);
7382 
7383   ins_cost(150);
7384   format %{ "SUB    $dst,$src" %}
7385   opcode(0x29);  /* Opcode 29 /r */
7386   ins_encode( OpcP, RegMem( src, dst ) );
7387   ins_pipe( ialu_mem_reg );
7388 %}
7389 
7390 // Subtract from a pointer
7391 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7392   match(Set dst (AddP dst (SubI zero src)));
7393   effect(KILL cr);
7394 
7395   size(2);
7396   format %{ "SUB    $dst,$src" %}
7397   opcode(0x2B);
7398   ins_encode( OpcP, RegReg( dst, src) );
7399   ins_pipe( ialu_reg_reg );
7400 %}
7401 
7402 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7403   match(Set dst (SubI zero dst));
7404   effect(KILL cr);
7405 
7406   size(2);
7407   format %{ "NEG    $dst" %}
7408   opcode(0xF7,0x03);  // Opcode F7 /3
7409   ins_encode( OpcP, RegOpc( dst ) );
7410   ins_pipe( ialu_reg );
7411 %}
7412 
7413 //----------Multiplication/Division Instructions-------------------------------
7414 // Integer Multiplication Instructions
7415 // Multiply Register
7416 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7417   match(Set dst (MulI dst src));
7418   effect(KILL cr);
7419 
7420   size(3);
7421   ins_cost(300);
7422   format %{ "IMUL   $dst,$src" %}
7423   opcode(0xAF, 0x0F);
7424   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7425   ins_pipe( ialu_reg_reg_alu0 );
7426 %}
7427 
7428 // Multiply 32-bit Immediate
7429 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7430   match(Set dst (MulI src imm));
7431   effect(KILL cr);
7432 
7433   ins_cost(300);
7434   format %{ "IMUL   $dst,$src,$imm" %}
7435   opcode(0x69);  /* 69 /r id */
7436   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7437   ins_pipe( ialu_reg_reg_alu0 );
7438 %}
7439 
7440 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7441   match(Set dst src);
7442   effect(KILL cr);
7443 
7444   // Note that this is artificially increased to make it more expensive than loadConL
7445   ins_cost(250);
7446   format %{ "MOV    EAX,$src\t// low word only" %}
7447   opcode(0xB8);
7448   ins_encode( LdImmL_Lo(dst, src) );
7449   ins_pipe( ialu_reg_fat );
7450 %}
7451 
7452 // Multiply by 32-bit Immediate, taking the shifted high order results
7453 //  (special case for shift by 32)
7454 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7455   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7456   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7457              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7458              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7459   effect(USE src1, KILL cr);
7460 
7461   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7462   ins_cost(0*100 + 1*400 - 150);
7463   format %{ "IMUL   EDX:EAX,$src1" %}
7464   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7465   ins_pipe( pipe_slow );
7466 %}
7467 
7468 // Multiply by 32-bit Immediate, taking the shifted high order results
7469 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7470   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7471   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7472              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7473              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7474   effect(USE src1, KILL cr);
7475 
7476   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7477   ins_cost(1*100 + 1*400 - 150);
7478   format %{ "IMUL   EDX:EAX,$src1\n\t"
7479             "SAR    EDX,$cnt-32" %}
7480   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7481   ins_pipe( pipe_slow );
7482 %}
7483 
7484 // Multiply Memory 32-bit Immediate
7485 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7486   match(Set dst (MulI (LoadI src) imm));
7487   effect(KILL cr);
7488 
7489   ins_cost(300);
7490   format %{ "IMUL   $dst,$src,$imm" %}
7491   opcode(0x69);  /* 69 /r id */
7492   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7493   ins_pipe( ialu_reg_mem_alu0 );
7494 %}
7495 
7496 // Multiply Memory
7497 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7498   match(Set dst (MulI dst (LoadI src)));
7499   effect(KILL cr);
7500 
7501   ins_cost(350);
7502   format %{ "IMUL   $dst,$src" %}
7503   opcode(0xAF, 0x0F);
7504   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7505   ins_pipe( ialu_reg_mem_alu0 );
7506 %}
7507 
7508 // Multiply Register Int to Long
7509 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7510   // Basic Idea: long = (long)int * (long)int
7511   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7512   effect(DEF dst, USE src, USE src1, KILL flags);
7513 
7514   ins_cost(300);
7515   format %{ "IMUL   $dst,$src1" %}
7516 
7517   ins_encode( long_int_multiply( dst, src1 ) );
7518   ins_pipe( ialu_reg_reg_alu0 );
7519 %}
7520 
7521 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7522   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7523   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7524   effect(KILL flags);
7525 
7526   ins_cost(300);
7527   format %{ "MUL    $dst,$src1" %}
7528 
7529   ins_encode( long_uint_multiply(dst, src1) );
7530   ins_pipe( ialu_reg_reg_alu0 );
7531 %}
7532 
7533 // Multiply Register Long
7534 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7535   match(Set dst (MulL dst src));
7536   effect(KILL cr, TEMP tmp);
7537   ins_cost(4*100+3*400);
7538 // Basic idea: lo(result) = lo(x_lo * y_lo)
7539 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7540   format %{ "MOV    $tmp,$src.lo\n\t"
7541             "IMUL   $tmp,EDX\n\t"
7542             "MOV    EDX,$src.hi\n\t"
7543             "IMUL   EDX,EAX\n\t"
7544             "ADD    $tmp,EDX\n\t"
7545             "MUL    EDX:EAX,$src.lo\n\t"
7546             "ADD    EDX,$tmp" %}
7547   ins_encode( long_multiply( dst, src, tmp ) );
7548   ins_pipe( pipe_slow );
7549 %}
7550 
7551 // Multiply Register Long where the left operand's high 32 bits are zero
7552 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7553   predicate(is_operand_hi32_zero(n->in(1)));
7554   match(Set dst (MulL dst src));
7555   effect(KILL cr, TEMP tmp);
7556   ins_cost(2*100+2*400);
7557 // Basic idea: lo(result) = lo(x_lo * y_lo)
7558 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7559   format %{ "MOV    $tmp,$src.hi\n\t"
7560             "IMUL   $tmp,EAX\n\t"
7561             "MUL    EDX:EAX,$src.lo\n\t"
7562             "ADD    EDX,$tmp" %}
7563   ins_encode %{
7564     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7565     __ imull($tmp$$Register, rax);
7566     __ mull($src$$Register);
7567     __ addl(rdx, $tmp$$Register);
7568   %}
7569   ins_pipe( pipe_slow );
7570 %}
7571 
7572 // Multiply Register Long where the right operand's high 32 bits are zero
7573 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7574   predicate(is_operand_hi32_zero(n->in(2)));
7575   match(Set dst (MulL dst src));
7576   effect(KILL cr, TEMP tmp);
7577   ins_cost(2*100+2*400);
7578 // Basic idea: lo(result) = lo(x_lo * y_lo)
7579 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7580   format %{ "MOV    $tmp,$src.lo\n\t"
7581             "IMUL   $tmp,EDX\n\t"
7582             "MUL    EDX:EAX,$src.lo\n\t"
7583             "ADD    EDX,$tmp" %}
7584   ins_encode %{
7585     __ movl($tmp$$Register, $src$$Register);
7586     __ imull($tmp$$Register, rdx);
7587     __ mull($src$$Register);
7588     __ addl(rdx, $tmp$$Register);
7589   %}
7590   ins_pipe( pipe_slow );
7591 %}
7592 
7593 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7594 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7595   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7596   match(Set dst (MulL dst src));
7597   effect(KILL cr);
7598   ins_cost(1*400);
7599 // Basic idea: lo(result) = lo(x_lo * y_lo)
7600 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7601   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7602   ins_encode %{
7603     __ mull($src$$Register);
7604   %}
7605   ins_pipe( pipe_slow );
7606 %}
7607 
7608 // Multiply Register Long by small constant
7609 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7610   match(Set dst (MulL dst src));
7611   effect(KILL cr, TEMP tmp);
7612   ins_cost(2*100+2*400);
7613   size(12);
7614 // Basic idea: lo(result) = lo(src * EAX)
7615 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7616   format %{ "IMUL   $tmp,EDX,$src\n\t"
7617             "MOV    EDX,$src\n\t"
7618             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7619             "ADD    EDX,$tmp" %}
7620   ins_encode( long_multiply_con( dst, src, tmp ) );
7621   ins_pipe( pipe_slow );
7622 %}
7623 
7624 // Integer DIV with Register
7625 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7626   match(Set rax (DivI rax div));
7627   effect(KILL rdx, KILL cr);
7628   size(26);
7629   ins_cost(30*100+10*100);
7630   format %{ "CMP    EAX,0x80000000\n\t"
7631             "JNE,s  normal\n\t"
7632             "XOR    EDX,EDX\n\t"
7633             "CMP    ECX,-1\n\t"
7634             "JE,s   done\n"
7635     "normal: CDQ\n\t"
7636             "IDIV   $div\n\t"
7637     "done:"        %}
7638   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7639   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7640   ins_pipe( ialu_reg_reg_alu0 );
7641 %}
7642 
7643 // Divide Register Long
7644 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7645   match(Set dst (DivL src1 src2));
7646   effect( KILL cr, KILL cx, KILL bx );
7647   ins_cost(10000);
7648   format %{ "PUSH   $src1.hi\n\t"
7649             "PUSH   $src1.lo\n\t"
7650             "PUSH   $src2.hi\n\t"
7651             "PUSH   $src2.lo\n\t"
7652             "CALL   SharedRuntime::ldiv\n\t"
7653             "ADD    ESP,16" %}
7654   ins_encode( long_div(src1,src2) );
7655   ins_pipe( pipe_slow );
7656 %}
7657 
7658 // Integer DIVMOD with Register, both quotient and mod results
7659 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7660   match(DivModI rax div);
7661   effect(KILL cr);
7662   size(26);
7663   ins_cost(30*100+10*100);
7664   format %{ "CMP    EAX,0x80000000\n\t"
7665             "JNE,s  normal\n\t"
7666             "XOR    EDX,EDX\n\t"
7667             "CMP    ECX,-1\n\t"
7668             "JE,s   done\n"
7669     "normal: CDQ\n\t"
7670             "IDIV   $div\n\t"
7671     "done:"        %}
7672   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7673   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7674   ins_pipe( pipe_slow );
7675 %}
7676 
7677 // Integer MOD with Register
7678 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7679   match(Set rdx (ModI rax div));
7680   effect(KILL rax, KILL cr);
7681 
7682   size(26);
7683   ins_cost(300);
7684   format %{ "CDQ\n\t"
7685             "IDIV   $div" %}
7686   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7687   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7688   ins_pipe( ialu_reg_reg_alu0 );
7689 %}
7690 
7691 // Remainder Register Long
7692 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7693   match(Set dst (ModL src1 src2));
7694   effect( KILL cr, KILL cx, KILL bx );
7695   ins_cost(10000);
7696   format %{ "PUSH   $src1.hi\n\t"
7697             "PUSH   $src1.lo\n\t"
7698             "PUSH   $src2.hi\n\t"
7699             "PUSH   $src2.lo\n\t"
7700             "CALL   SharedRuntime::lrem\n\t"
7701             "ADD    ESP,16" %}
7702   ins_encode( long_mod(src1,src2) );
7703   ins_pipe( pipe_slow );
7704 %}
7705 
7706 // Divide Register Long (no special case since divisor != -1)
7707 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7708   match(Set dst (DivL dst imm));
7709   effect( TEMP tmp, TEMP tmp2, KILL cr );
7710   ins_cost(1000);
7711   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7712             "XOR    $tmp2,$tmp2\n\t"
7713             "CMP    $tmp,EDX\n\t"
7714             "JA,s   fast\n\t"
7715             "MOV    $tmp2,EAX\n\t"
7716             "MOV    EAX,EDX\n\t"
7717             "MOV    EDX,0\n\t"
7718             "JLE,s  pos\n\t"
7719             "LNEG   EAX : $tmp2\n\t"
7720             "DIV    $tmp # unsigned division\n\t"
7721             "XCHG   EAX,$tmp2\n\t"
7722             "DIV    $tmp\n\t"
7723             "LNEG   $tmp2 : EAX\n\t"
7724             "JMP,s  done\n"
7725     "pos:\n\t"
7726             "DIV    $tmp\n\t"
7727             "XCHG   EAX,$tmp2\n"
7728     "fast:\n\t"
7729             "DIV    $tmp\n"
7730     "done:\n\t"
7731             "MOV    EDX,$tmp2\n\t"
7732             "NEG    EDX:EAX # if $imm < 0" %}
7733   ins_encode %{
7734     int con = (int)$imm$$constant;
7735     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7736     int pcon = (con > 0) ? con : -con;
7737     Label Lfast, Lpos, Ldone;
7738 
7739     __ movl($tmp$$Register, pcon);
7740     __ xorl($tmp2$$Register,$tmp2$$Register);
7741     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7742     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7743 
7744     __ movl($tmp2$$Register, $dst$$Register); // save
7745     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7746     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7747     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7748 
7749     // Negative dividend.
7750     // convert value to positive to use unsigned division
7751     __ lneg($dst$$Register, $tmp2$$Register);
7752     __ divl($tmp$$Register);
7753     __ xchgl($dst$$Register, $tmp2$$Register);
7754     __ divl($tmp$$Register);
7755     // revert result back to negative
7756     __ lneg($tmp2$$Register, $dst$$Register);
7757     __ jmpb(Ldone);
7758 
7759     __ bind(Lpos);
7760     __ divl($tmp$$Register); // Use unsigned division
7761     __ xchgl($dst$$Register, $tmp2$$Register);
7762     // Fallthrow for final divide, tmp2 has 32 bit hi result
7763 
7764     __ bind(Lfast);
7765     // fast path: src is positive
7766     __ divl($tmp$$Register); // Use unsigned division
7767 
7768     __ bind(Ldone);
7769     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7770     if (con < 0) {
7771       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7772     }
7773   %}
7774   ins_pipe( pipe_slow );
7775 %}
7776 
7777 // Remainder Register Long (remainder fit into 32 bits)
7778 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7779   match(Set dst (ModL dst imm));
7780   effect( TEMP tmp, TEMP tmp2, KILL cr );
7781   ins_cost(1000);
7782   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7783             "CMP    $tmp,EDX\n\t"
7784             "JA,s   fast\n\t"
7785             "MOV    $tmp2,EAX\n\t"
7786             "MOV    EAX,EDX\n\t"
7787             "MOV    EDX,0\n\t"
7788             "JLE,s  pos\n\t"
7789             "LNEG   EAX : $tmp2\n\t"
7790             "DIV    $tmp # unsigned division\n\t"
7791             "MOV    EAX,$tmp2\n\t"
7792             "DIV    $tmp\n\t"
7793             "NEG    EDX\n\t"
7794             "JMP,s  done\n"
7795     "pos:\n\t"
7796             "DIV    $tmp\n\t"
7797             "MOV    EAX,$tmp2\n"
7798     "fast:\n\t"
7799             "DIV    $tmp\n"
7800     "done:\n\t"
7801             "MOV    EAX,EDX\n\t"
7802             "SAR    EDX,31\n\t" %}
7803   ins_encode %{
7804     int con = (int)$imm$$constant;
7805     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7806     int pcon = (con > 0) ? con : -con;
7807     Label  Lfast, Lpos, Ldone;
7808 
7809     __ movl($tmp$$Register, pcon);
7810     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7811     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7812 
7813     __ movl($tmp2$$Register, $dst$$Register); // save
7814     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7815     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7816     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7817 
7818     // Negative dividend.
7819     // convert value to positive to use unsigned division
7820     __ lneg($dst$$Register, $tmp2$$Register);
7821     __ divl($tmp$$Register);
7822     __ movl($dst$$Register, $tmp2$$Register);
7823     __ divl($tmp$$Register);
7824     // revert remainder back to negative
7825     __ negl(HIGH_FROM_LOW($dst$$Register));
7826     __ jmpb(Ldone);
7827 
7828     __ bind(Lpos);
7829     __ divl($tmp$$Register);
7830     __ movl($dst$$Register, $tmp2$$Register);
7831 
7832     __ bind(Lfast);
7833     // fast path: src is positive
7834     __ divl($tmp$$Register);
7835 
7836     __ bind(Ldone);
7837     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7838     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7839 
7840   %}
7841   ins_pipe( pipe_slow );
7842 %}
7843 
7844 // Integer Shift Instructions
7845 // Shift Left by one
7846 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7847   match(Set dst (LShiftI dst shift));
7848   effect(KILL cr);
7849 
7850   size(2);
7851   format %{ "SHL    $dst,$shift" %}
7852   opcode(0xD1, 0x4);  /* D1 /4 */
7853   ins_encode( OpcP, RegOpc( dst ) );
7854   ins_pipe( ialu_reg );
7855 %}
7856 
7857 // Shift Left by 8-bit immediate
7858 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7859   match(Set dst (LShiftI dst shift));
7860   effect(KILL cr);
7861 
7862   size(3);
7863   format %{ "SHL    $dst,$shift" %}
7864   opcode(0xC1, 0x4);  /* C1 /4 ib */
7865   ins_encode( RegOpcImm( dst, shift) );
7866   ins_pipe( ialu_reg );
7867 %}
7868 
7869 // Shift Left by variable
7870 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7871   match(Set dst (LShiftI dst shift));
7872   effect(KILL cr);
7873 
7874   size(2);
7875   format %{ "SHL    $dst,$shift" %}
7876   opcode(0xD3, 0x4);  /* D3 /4 */
7877   ins_encode( OpcP, RegOpc( dst ) );
7878   ins_pipe( ialu_reg_reg );
7879 %}
7880 
7881 // Arithmetic shift right by one
7882 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7883   match(Set dst (RShiftI dst shift));
7884   effect(KILL cr);
7885 
7886   size(2);
7887   format %{ "SAR    $dst,$shift" %}
7888   opcode(0xD1, 0x7);  /* D1 /7 */
7889   ins_encode( OpcP, RegOpc( dst ) );
7890   ins_pipe( ialu_reg );
7891 %}
7892 
7893 // Arithmetic shift right by one
7894 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7895   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7896   effect(KILL cr);
7897   format %{ "SAR    $dst,$shift" %}
7898   opcode(0xD1, 0x7);  /* D1 /7 */
7899   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7900   ins_pipe( ialu_mem_imm );
7901 %}
7902 
7903 // Arithmetic Shift Right by 8-bit immediate
7904 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7905   match(Set dst (RShiftI dst shift));
7906   effect(KILL cr);
7907 
7908   size(3);
7909   format %{ "SAR    $dst,$shift" %}
7910   opcode(0xC1, 0x7);  /* C1 /7 ib */
7911   ins_encode( RegOpcImm( dst, shift ) );
7912   ins_pipe( ialu_mem_imm );
7913 %}
7914 
7915 // Arithmetic Shift Right by 8-bit immediate
7916 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7917   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7918   effect(KILL cr);
7919 
7920   format %{ "SAR    $dst,$shift" %}
7921   opcode(0xC1, 0x7);  /* C1 /7 ib */
7922   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7923   ins_pipe( ialu_mem_imm );
7924 %}
7925 
7926 // Arithmetic Shift Right by variable
7927 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7928   match(Set dst (RShiftI dst shift));
7929   effect(KILL cr);
7930 
7931   size(2);
7932   format %{ "SAR    $dst,$shift" %}
7933   opcode(0xD3, 0x7);  /* D3 /7 */
7934   ins_encode( OpcP, RegOpc( dst ) );
7935   ins_pipe( ialu_reg_reg );
7936 %}
7937 
7938 // Logical shift right by one
7939 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7940   match(Set dst (URShiftI dst shift));
7941   effect(KILL cr);
7942 
7943   size(2);
7944   format %{ "SHR    $dst,$shift" %}
7945   opcode(0xD1, 0x5);  /* D1 /5 */
7946   ins_encode( OpcP, RegOpc( dst ) );
7947   ins_pipe( ialu_reg );
7948 %}
7949 
7950 // Logical Shift Right by 8-bit immediate
7951 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7952   match(Set dst (URShiftI dst shift));
7953   effect(KILL cr);
7954 
7955   size(3);
7956   format %{ "SHR    $dst,$shift" %}
7957   opcode(0xC1, 0x5);  /* C1 /5 ib */
7958   ins_encode( RegOpcImm( dst, shift) );
7959   ins_pipe( ialu_reg );
7960 %}
7961 
7962 
7963 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7964 // This idiom is used by the compiler for the i2b bytecode.
7965 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7966   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7967 
7968   size(3);
7969   format %{ "MOVSX  $dst,$src :8" %}
7970   ins_encode %{
7971     __ movsbl($dst$$Register, $src$$Register);
7972   %}
7973   ins_pipe(ialu_reg_reg);
7974 %}
7975 
7976 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7977 // This idiom is used by the compiler the i2s bytecode.
7978 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7979   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7980 
7981   size(3);
7982   format %{ "MOVSX  $dst,$src :16" %}
7983   ins_encode %{
7984     __ movswl($dst$$Register, $src$$Register);
7985   %}
7986   ins_pipe(ialu_reg_reg);
7987 %}
7988 
7989 
7990 // Logical Shift Right by variable
7991 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7992   match(Set dst (URShiftI dst shift));
7993   effect(KILL cr);
7994 
7995   size(2);
7996   format %{ "SHR    $dst,$shift" %}
7997   opcode(0xD3, 0x5);  /* D3 /5 */
7998   ins_encode( OpcP, RegOpc( dst ) );
7999   ins_pipe( ialu_reg_reg );
8000 %}
8001 
8002 
8003 //----------Logical Instructions-----------------------------------------------
8004 //----------Integer Logical Instructions---------------------------------------
8005 // And Instructions
8006 // And Register with Register
8007 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8008   match(Set dst (AndI dst src));
8009   effect(KILL cr);
8010 
8011   size(2);
8012   format %{ "AND    $dst,$src" %}
8013   opcode(0x23);
8014   ins_encode( OpcP, RegReg( dst, src) );
8015   ins_pipe( ialu_reg_reg );
8016 %}
8017 
8018 // And Register with Immediate
8019 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8020   match(Set dst (AndI dst src));
8021   effect(KILL cr);
8022 
8023   format %{ "AND    $dst,$src" %}
8024   opcode(0x81,0x04);  /* Opcode 81 /4 */
8025   // ins_encode( RegImm( dst, src) );
8026   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8027   ins_pipe( ialu_reg );
8028 %}
8029 
8030 // And Register with Memory
8031 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8032   match(Set dst (AndI dst (LoadI src)));
8033   effect(KILL cr);
8034 
8035   ins_cost(125);
8036   format %{ "AND    $dst,$src" %}
8037   opcode(0x23);
8038   ins_encode( OpcP, RegMem( dst, src) );
8039   ins_pipe( ialu_reg_mem );
8040 %}
8041 
8042 // And Memory with Register
8043 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8044   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8045   effect(KILL cr);
8046 
8047   ins_cost(150);
8048   format %{ "AND    $dst,$src" %}
8049   opcode(0x21);  /* Opcode 21 /r */
8050   ins_encode( OpcP, RegMem( src, dst ) );
8051   ins_pipe( ialu_mem_reg );
8052 %}
8053 
8054 // And Memory with Immediate
8055 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8056   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8057   effect(KILL cr);
8058 
8059   ins_cost(125);
8060   format %{ "AND    $dst,$src" %}
8061   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8062   // ins_encode( MemImm( dst, src) );
8063   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8064   ins_pipe( ialu_mem_imm );
8065 %}
8066 
8067 // BMI1 instructions
8068 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8069   match(Set dst (AndI (XorI src1 minus_1) src2));
8070   predicate(UseBMI1Instructions);
8071   effect(KILL cr);
8072 
8073   format %{ "ANDNL  $dst, $src1, $src2" %}
8074 
8075   ins_encode %{
8076     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8077   %}
8078   ins_pipe(ialu_reg);
8079 %}
8080 
8081 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8082   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8083   predicate(UseBMI1Instructions);
8084   effect(KILL cr);
8085 
8086   ins_cost(125);
8087   format %{ "ANDNL  $dst, $src1, $src2" %}
8088 
8089   ins_encode %{
8090     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8091   %}
8092   ins_pipe(ialu_reg_mem);
8093 %}
8094 
8095 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8096   match(Set dst (AndI (SubI imm_zero src) src));
8097   predicate(UseBMI1Instructions);
8098   effect(KILL cr);
8099 
8100   format %{ "BLSIL  $dst, $src" %}
8101 
8102   ins_encode %{
8103     __ blsil($dst$$Register, $src$$Register);
8104   %}
8105   ins_pipe(ialu_reg);
8106 %}
8107 
8108 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8109   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8110   predicate(UseBMI1Instructions);
8111   effect(KILL cr);
8112 
8113   ins_cost(125);
8114   format %{ "BLSIL  $dst, $src" %}
8115 
8116   ins_encode %{
8117     __ blsil($dst$$Register, $src$$Address);
8118   %}
8119   ins_pipe(ialu_reg_mem);
8120 %}
8121 
8122 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8123 %{
8124   match(Set dst (XorI (AddI src minus_1) src));
8125   predicate(UseBMI1Instructions);
8126   effect(KILL cr);
8127 
8128   format %{ "BLSMSKL $dst, $src" %}
8129 
8130   ins_encode %{
8131     __ blsmskl($dst$$Register, $src$$Register);
8132   %}
8133 
8134   ins_pipe(ialu_reg);
8135 %}
8136 
8137 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8138 %{
8139   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8140   predicate(UseBMI1Instructions);
8141   effect(KILL cr);
8142 
8143   ins_cost(125);
8144   format %{ "BLSMSKL $dst, $src" %}
8145 
8146   ins_encode %{
8147     __ blsmskl($dst$$Register, $src$$Address);
8148   %}
8149 
8150   ins_pipe(ialu_reg_mem);
8151 %}
8152 
8153 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8154 %{
8155   match(Set dst (AndI (AddI src minus_1) src) );
8156   predicate(UseBMI1Instructions);
8157   effect(KILL cr);
8158 
8159   format %{ "BLSRL  $dst, $src" %}
8160 
8161   ins_encode %{
8162     __ blsrl($dst$$Register, $src$$Register);
8163   %}
8164 
8165   ins_pipe(ialu_reg);
8166 %}
8167 
8168 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8169 %{
8170   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8171   predicate(UseBMI1Instructions);
8172   effect(KILL cr);
8173 
8174   ins_cost(125);
8175   format %{ "BLSRL  $dst, $src" %}
8176 
8177   ins_encode %{
8178     __ blsrl($dst$$Register, $src$$Address);
8179   %}
8180 
8181   ins_pipe(ialu_reg_mem);
8182 %}
8183 
8184 // Or Instructions
8185 // Or Register with Register
8186 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8187   match(Set dst (OrI dst src));
8188   effect(KILL cr);
8189 
8190   size(2);
8191   format %{ "OR     $dst,$src" %}
8192   opcode(0x0B);
8193   ins_encode( OpcP, RegReg( dst, src) );
8194   ins_pipe( ialu_reg_reg );
8195 %}
8196 
8197 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8198   match(Set dst (OrI dst (CastP2X src)));
8199   effect(KILL cr);
8200 
8201   size(2);
8202   format %{ "OR     $dst,$src" %}
8203   opcode(0x0B);
8204   ins_encode( OpcP, RegReg( dst, src) );
8205   ins_pipe( ialu_reg_reg );
8206 %}
8207 
8208 
8209 // Or Register with Immediate
8210 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8211   match(Set dst (OrI dst src));
8212   effect(KILL cr);
8213 
8214   format %{ "OR     $dst,$src" %}
8215   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8216   // ins_encode( RegImm( dst, src) );
8217   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8218   ins_pipe( ialu_reg );
8219 %}
8220 
8221 // Or Register with Memory
8222 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8223   match(Set dst (OrI dst (LoadI src)));
8224   effect(KILL cr);
8225 
8226   ins_cost(125);
8227   format %{ "OR     $dst,$src" %}
8228   opcode(0x0B);
8229   ins_encode( OpcP, RegMem( dst, src) );
8230   ins_pipe( ialu_reg_mem );
8231 %}
8232 
8233 // Or Memory with Register
8234 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8235   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8236   effect(KILL cr);
8237 
8238   ins_cost(150);
8239   format %{ "OR     $dst,$src" %}
8240   opcode(0x09);  /* Opcode 09 /r */
8241   ins_encode( OpcP, RegMem( src, dst ) );
8242   ins_pipe( ialu_mem_reg );
8243 %}
8244 
8245 // Or Memory with Immediate
8246 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8247   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8248   effect(KILL cr);
8249 
8250   ins_cost(125);
8251   format %{ "OR     $dst,$src" %}
8252   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8253   // ins_encode( MemImm( dst, src) );
8254   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8255   ins_pipe( ialu_mem_imm );
8256 %}
8257 
8258 // ROL/ROR
8259 // ROL expand
8260 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8261   effect(USE_DEF dst, USE shift, KILL cr);
8262 
8263   format %{ "ROL    $dst, $shift" %}
8264   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8265   ins_encode( OpcP, RegOpc( dst ));
8266   ins_pipe( ialu_reg );
8267 %}
8268 
8269 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8270   effect(USE_DEF dst, USE shift, KILL cr);
8271 
8272   format %{ "ROL    $dst, $shift" %}
8273   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8274   ins_encode( RegOpcImm(dst, shift) );
8275   ins_pipe(ialu_reg);
8276 %}
8277 
8278 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8279   effect(USE_DEF dst, USE shift, KILL cr);
8280 
8281   format %{ "ROL    $dst, $shift" %}
8282   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8283   ins_encode(OpcP, RegOpc(dst));
8284   ins_pipe( ialu_reg_reg );
8285 %}
8286 // end of ROL expand
8287 
8288 // ROL 32bit by one once
8289 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8290   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8291 
8292   expand %{
8293     rolI_eReg_imm1(dst, lshift, cr);
8294   %}
8295 %}
8296 
8297 // ROL 32bit var by imm8 once
8298 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8299   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8300   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8301 
8302   expand %{
8303     rolI_eReg_imm8(dst, lshift, cr);
8304   %}
8305 %}
8306 
8307 // ROL 32bit var by var once
8308 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8309   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8310 
8311   expand %{
8312     rolI_eReg_CL(dst, shift, cr);
8313   %}
8314 %}
8315 
8316 // ROL 32bit var by var once
8317 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8318   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8319 
8320   expand %{
8321     rolI_eReg_CL(dst, shift, cr);
8322   %}
8323 %}
8324 
8325 // ROR expand
8326 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8327   effect(USE_DEF dst, USE shift, KILL cr);
8328 
8329   format %{ "ROR    $dst, $shift" %}
8330   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8331   ins_encode( OpcP, RegOpc( dst ) );
8332   ins_pipe( ialu_reg );
8333 %}
8334 
8335 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8336   effect (USE_DEF dst, USE shift, KILL cr);
8337 
8338   format %{ "ROR    $dst, $shift" %}
8339   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8340   ins_encode( RegOpcImm(dst, shift) );
8341   ins_pipe( ialu_reg );
8342 %}
8343 
8344 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8345   effect(USE_DEF dst, USE shift, KILL cr);
8346 
8347   format %{ "ROR    $dst, $shift" %}
8348   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8349   ins_encode(OpcP, RegOpc(dst));
8350   ins_pipe( ialu_reg_reg );
8351 %}
8352 // end of ROR expand
8353 
8354 // ROR right once
8355 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8356   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8357 
8358   expand %{
8359     rorI_eReg_imm1(dst, rshift, cr);
8360   %}
8361 %}
8362 
8363 // ROR 32bit by immI8 once
8364 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8365   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8366   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8367 
8368   expand %{
8369     rorI_eReg_imm8(dst, rshift, cr);
8370   %}
8371 %}
8372 
8373 // ROR 32bit var by var once
8374 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8375   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8376 
8377   expand %{
8378     rorI_eReg_CL(dst, shift, cr);
8379   %}
8380 %}
8381 
8382 // ROR 32bit var by var once
8383 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8384   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8385 
8386   expand %{
8387     rorI_eReg_CL(dst, shift, cr);
8388   %}
8389 %}
8390 
8391 // Xor Instructions
8392 // Xor Register with Register
8393 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8394   match(Set dst (XorI dst src));
8395   effect(KILL cr);
8396 
8397   size(2);
8398   format %{ "XOR    $dst,$src" %}
8399   opcode(0x33);
8400   ins_encode( OpcP, RegReg( dst, src) );
8401   ins_pipe( ialu_reg_reg );
8402 %}
8403 
8404 // Xor Register with Immediate -1
8405 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8406   match(Set dst (XorI dst imm));
8407 
8408   size(2);
8409   format %{ "NOT    $dst" %}
8410   ins_encode %{
8411      __ notl($dst$$Register);
8412   %}
8413   ins_pipe( ialu_reg );
8414 %}
8415 
8416 // Xor Register with Immediate
8417 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8418   match(Set dst (XorI dst src));
8419   effect(KILL cr);
8420 
8421   format %{ "XOR    $dst,$src" %}
8422   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8423   // ins_encode( RegImm( dst, src) );
8424   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8425   ins_pipe( ialu_reg );
8426 %}
8427 
8428 // Xor Register with Memory
8429 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8430   match(Set dst (XorI dst (LoadI src)));
8431   effect(KILL cr);
8432 
8433   ins_cost(125);
8434   format %{ "XOR    $dst,$src" %}
8435   opcode(0x33);
8436   ins_encode( OpcP, RegMem(dst, src) );
8437   ins_pipe( ialu_reg_mem );
8438 %}
8439 
8440 // Xor Memory with Register
8441 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8442   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8443   effect(KILL cr);
8444 
8445   ins_cost(150);
8446   format %{ "XOR    $dst,$src" %}
8447   opcode(0x31);  /* Opcode 31 /r */
8448   ins_encode( OpcP, RegMem( src, dst ) );
8449   ins_pipe( ialu_mem_reg );
8450 %}
8451 
8452 // Xor Memory with Immediate
8453 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8454   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8455   effect(KILL cr);
8456 
8457   ins_cost(125);
8458   format %{ "XOR    $dst,$src" %}
8459   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8460   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8461   ins_pipe( ialu_mem_imm );
8462 %}
8463 
8464 //----------Convert Int to Boolean---------------------------------------------
8465 
8466 instruct movI_nocopy(rRegI dst, rRegI src) %{
8467   effect( DEF dst, USE src );
8468   format %{ "MOV    $dst,$src" %}
8469   ins_encode( enc_Copy( dst, src) );
8470   ins_pipe( ialu_reg_reg );
8471 %}
8472 
8473 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8474   effect( USE_DEF dst, USE src, KILL cr );
8475 
8476   size(4);
8477   format %{ "NEG    $dst\n\t"
8478             "ADC    $dst,$src" %}
8479   ins_encode( neg_reg(dst),
8480               OpcRegReg(0x13,dst,src) );
8481   ins_pipe( ialu_reg_reg_long );
8482 %}
8483 
8484 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8485   match(Set dst (Conv2B src));
8486 
8487   expand %{
8488     movI_nocopy(dst,src);
8489     ci2b(dst,src,cr);
8490   %}
8491 %}
8492 
8493 instruct movP_nocopy(rRegI dst, eRegP src) %{
8494   effect( DEF dst, USE src );
8495   format %{ "MOV    $dst,$src" %}
8496   ins_encode( enc_Copy( dst, src) );
8497   ins_pipe( ialu_reg_reg );
8498 %}
8499 
8500 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8501   effect( USE_DEF dst, USE src, KILL cr );
8502   format %{ "NEG    $dst\n\t"
8503             "ADC    $dst,$src" %}
8504   ins_encode( neg_reg(dst),
8505               OpcRegReg(0x13,dst,src) );
8506   ins_pipe( ialu_reg_reg_long );
8507 %}
8508 
8509 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8510   match(Set dst (Conv2B src));
8511 
8512   expand %{
8513     movP_nocopy(dst,src);
8514     cp2b(dst,src,cr);
8515   %}
8516 %}
8517 
8518 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8519   match(Set dst (CmpLTMask p q));
8520   effect(KILL cr);
8521   ins_cost(400);
8522 
8523   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8524   format %{ "XOR    $dst,$dst\n\t"
8525             "CMP    $p,$q\n\t"
8526             "SETlt  $dst\n\t"
8527             "NEG    $dst" %}
8528   ins_encode %{
8529     Register Rp = $p$$Register;
8530     Register Rq = $q$$Register;
8531     Register Rd = $dst$$Register;
8532     Label done;
8533     __ xorl(Rd, Rd);
8534     __ cmpl(Rp, Rq);
8535     __ setb(Assembler::less, Rd);
8536     __ negl(Rd);
8537   %}
8538 
8539   ins_pipe(pipe_slow);
8540 %}
8541 
8542 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8543   match(Set dst (CmpLTMask dst zero));
8544   effect(DEF dst, KILL cr);
8545   ins_cost(100);
8546 
8547   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8548   ins_encode %{
8549   __ sarl($dst$$Register, 31);
8550   %}
8551   ins_pipe(ialu_reg);
8552 %}
8553 
8554 /* better to save a register than avoid a branch */
8555 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8556   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8557   effect(KILL cr);
8558   ins_cost(400);
8559   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8560             "JGE    done\n\t"
8561             "ADD    $p,$y\n"
8562             "done:  " %}
8563   ins_encode %{
8564     Register Rp = $p$$Register;
8565     Register Rq = $q$$Register;
8566     Register Ry = $y$$Register;
8567     Label done;
8568     __ subl(Rp, Rq);
8569     __ jccb(Assembler::greaterEqual, done);
8570     __ addl(Rp, Ry);
8571     __ bind(done);
8572   %}
8573 
8574   ins_pipe(pipe_cmplt);
8575 %}
8576 
8577 /* better to save a register than avoid a branch */
8578 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8579   match(Set y (AndI (CmpLTMask p q) y));
8580   effect(KILL cr);
8581 
8582   ins_cost(300);
8583 
8584   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8585             "JLT      done\n\t"
8586             "XORL     $y, $y\n"
8587             "done:  " %}
8588   ins_encode %{
8589     Register Rp = $p$$Register;
8590     Register Rq = $q$$Register;
8591     Register Ry = $y$$Register;
8592     Label done;
8593     __ cmpl(Rp, Rq);
8594     __ jccb(Assembler::less, done);
8595     __ xorl(Ry, Ry);
8596     __ bind(done);
8597   %}
8598 
8599   ins_pipe(pipe_cmplt);
8600 %}
8601 
8602 /* If I enable this, I encourage spilling in the inner loop of compress.
8603 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8604   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8605 */
8606 //----------Overflow Math Instructions-----------------------------------------
8607 
8608 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8609 %{
8610   match(Set cr (OverflowAddI op1 op2));
8611   effect(DEF cr, USE_KILL op1, USE op2);
8612 
8613   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8614 
8615   ins_encode %{
8616     __ addl($op1$$Register, $op2$$Register);
8617   %}
8618   ins_pipe(ialu_reg_reg);
8619 %}
8620 
8621 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8622 %{
8623   match(Set cr (OverflowAddI op1 op2));
8624   effect(DEF cr, USE_KILL op1, USE op2);
8625 
8626   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8627 
8628   ins_encode %{
8629     __ addl($op1$$Register, $op2$$constant);
8630   %}
8631   ins_pipe(ialu_reg_reg);
8632 %}
8633 
8634 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8635 %{
8636   match(Set cr (OverflowSubI op1 op2));
8637 
8638   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8639   ins_encode %{
8640     __ cmpl($op1$$Register, $op2$$Register);
8641   %}
8642   ins_pipe(ialu_reg_reg);
8643 %}
8644 
8645 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8646 %{
8647   match(Set cr (OverflowSubI op1 op2));
8648 
8649   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8650   ins_encode %{
8651     __ cmpl($op1$$Register, $op2$$constant);
8652   %}
8653   ins_pipe(ialu_reg_reg);
8654 %}
8655 
8656 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8657 %{
8658   match(Set cr (OverflowSubI zero op2));
8659   effect(DEF cr, USE_KILL op2);
8660 
8661   format %{ "NEG    $op2\t# overflow check int" %}
8662   ins_encode %{
8663     __ negl($op2$$Register);
8664   %}
8665   ins_pipe(ialu_reg_reg);
8666 %}
8667 
8668 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8669 %{
8670   match(Set cr (OverflowMulI op1 op2));
8671   effect(DEF cr, USE_KILL op1, USE op2);
8672 
8673   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8674   ins_encode %{
8675     __ imull($op1$$Register, $op2$$Register);
8676   %}
8677   ins_pipe(ialu_reg_reg_alu0);
8678 %}
8679 
8680 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8681 %{
8682   match(Set cr (OverflowMulI op1 op2));
8683   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8684 
8685   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8686   ins_encode %{
8687     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8688   %}
8689   ins_pipe(ialu_reg_reg_alu0);
8690 %}
8691 
8692 //----------Long Instructions------------------------------------------------
8693 // Add Long Register with Register
8694 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8695   match(Set dst (AddL dst src));
8696   effect(KILL cr);
8697   ins_cost(200);
8698   format %{ "ADD    $dst.lo,$src.lo\n\t"
8699             "ADC    $dst.hi,$src.hi" %}
8700   opcode(0x03, 0x13);
8701   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8702   ins_pipe( ialu_reg_reg_long );
8703 %}
8704 
8705 // Add Long Register with Immediate
8706 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8707   match(Set dst (AddL dst src));
8708   effect(KILL cr);
8709   format %{ "ADD    $dst.lo,$src.lo\n\t"
8710             "ADC    $dst.hi,$src.hi" %}
8711   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8712   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8713   ins_pipe( ialu_reg_long );
8714 %}
8715 
8716 // Add Long Register with Memory
8717 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8718   match(Set dst (AddL dst (LoadL mem)));
8719   effect(KILL cr);
8720   ins_cost(125);
8721   format %{ "ADD    $dst.lo,$mem\n\t"
8722             "ADC    $dst.hi,$mem+4" %}
8723   opcode(0x03, 0x13);
8724   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8725   ins_pipe( ialu_reg_long_mem );
8726 %}
8727 
8728 // Subtract Long Register with Register.
8729 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8730   match(Set dst (SubL dst src));
8731   effect(KILL cr);
8732   ins_cost(200);
8733   format %{ "SUB    $dst.lo,$src.lo\n\t"
8734             "SBB    $dst.hi,$src.hi" %}
8735   opcode(0x2B, 0x1B);
8736   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8737   ins_pipe( ialu_reg_reg_long );
8738 %}
8739 
8740 // Subtract Long Register with Immediate
8741 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8742   match(Set dst (SubL dst src));
8743   effect(KILL cr);
8744   format %{ "SUB    $dst.lo,$src.lo\n\t"
8745             "SBB    $dst.hi,$src.hi" %}
8746   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8747   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8748   ins_pipe( ialu_reg_long );
8749 %}
8750 
8751 // Subtract Long Register with Memory
8752 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8753   match(Set dst (SubL dst (LoadL mem)));
8754   effect(KILL cr);
8755   ins_cost(125);
8756   format %{ "SUB    $dst.lo,$mem\n\t"
8757             "SBB    $dst.hi,$mem+4" %}
8758   opcode(0x2B, 0x1B);
8759   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8760   ins_pipe( ialu_reg_long_mem );
8761 %}
8762 
8763 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8764   match(Set dst (SubL zero dst));
8765   effect(KILL cr);
8766   ins_cost(300);
8767   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8768   ins_encode( neg_long(dst) );
8769   ins_pipe( ialu_reg_reg_long );
8770 %}
8771 
8772 // And Long Register with Register
8773 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8774   match(Set dst (AndL dst src));
8775   effect(KILL cr);
8776   format %{ "AND    $dst.lo,$src.lo\n\t"
8777             "AND    $dst.hi,$src.hi" %}
8778   opcode(0x23,0x23);
8779   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8780   ins_pipe( ialu_reg_reg_long );
8781 %}
8782 
8783 // And Long Register with Immediate
8784 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8785   match(Set dst (AndL dst src));
8786   effect(KILL cr);
8787   format %{ "AND    $dst.lo,$src.lo\n\t"
8788             "AND    $dst.hi,$src.hi" %}
8789   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8790   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8791   ins_pipe( ialu_reg_long );
8792 %}
8793 
8794 // And Long Register with Memory
8795 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8796   match(Set dst (AndL dst (LoadL mem)));
8797   effect(KILL cr);
8798   ins_cost(125);
8799   format %{ "AND    $dst.lo,$mem\n\t"
8800             "AND    $dst.hi,$mem+4" %}
8801   opcode(0x23, 0x23);
8802   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8803   ins_pipe( ialu_reg_long_mem );
8804 %}
8805 
8806 // BMI1 instructions
8807 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8808   match(Set dst (AndL (XorL src1 minus_1) src2));
8809   predicate(UseBMI1Instructions);
8810   effect(KILL cr, TEMP dst);
8811 
8812   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8813             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8814          %}
8815 
8816   ins_encode %{
8817     Register Rdst = $dst$$Register;
8818     Register Rsrc1 = $src1$$Register;
8819     Register Rsrc2 = $src2$$Register;
8820     __ andnl(Rdst, Rsrc1, Rsrc2);
8821     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8822   %}
8823   ins_pipe(ialu_reg_reg_long);
8824 %}
8825 
8826 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8827   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8828   predicate(UseBMI1Instructions);
8829   effect(KILL cr, TEMP dst);
8830 
8831   ins_cost(125);
8832   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8833             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8834          %}
8835 
8836   ins_encode %{
8837     Register Rdst = $dst$$Register;
8838     Register Rsrc1 = $src1$$Register;
8839     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8840 
8841     __ andnl(Rdst, Rsrc1, $src2$$Address);
8842     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8843   %}
8844   ins_pipe(ialu_reg_mem);
8845 %}
8846 
8847 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8848   match(Set dst (AndL (SubL imm_zero src) src));
8849   predicate(UseBMI1Instructions);
8850   effect(KILL cr, TEMP dst);
8851 
8852   format %{ "MOVL   $dst.hi, 0\n\t"
8853             "BLSIL  $dst.lo, $src.lo\n\t"
8854             "JNZ    done\n\t"
8855             "BLSIL  $dst.hi, $src.hi\n"
8856             "done:"
8857          %}
8858 
8859   ins_encode %{
8860     Label done;
8861     Register Rdst = $dst$$Register;
8862     Register Rsrc = $src$$Register;
8863     __ movl(HIGH_FROM_LOW(Rdst), 0);
8864     __ blsil(Rdst, Rsrc);
8865     __ jccb(Assembler::notZero, done);
8866     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8867     __ bind(done);
8868   %}
8869   ins_pipe(ialu_reg);
8870 %}
8871 
8872 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8873   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8874   predicate(UseBMI1Instructions);
8875   effect(KILL cr, TEMP dst);
8876 
8877   ins_cost(125);
8878   format %{ "MOVL   $dst.hi, 0\n\t"
8879             "BLSIL  $dst.lo, $src\n\t"
8880             "JNZ    done\n\t"
8881             "BLSIL  $dst.hi, $src+4\n"
8882             "done:"
8883          %}
8884 
8885   ins_encode %{
8886     Label done;
8887     Register Rdst = $dst$$Register;
8888     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8889 
8890     __ movl(HIGH_FROM_LOW(Rdst), 0);
8891     __ blsil(Rdst, $src$$Address);
8892     __ jccb(Assembler::notZero, done);
8893     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8894     __ bind(done);
8895   %}
8896   ins_pipe(ialu_reg_mem);
8897 %}
8898 
8899 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8900 %{
8901   match(Set dst (XorL (AddL src minus_1) src));
8902   predicate(UseBMI1Instructions);
8903   effect(KILL cr, TEMP dst);
8904 
8905   format %{ "MOVL    $dst.hi, 0\n\t"
8906             "BLSMSKL $dst.lo, $src.lo\n\t"
8907             "JNC     done\n\t"
8908             "BLSMSKL $dst.hi, $src.hi\n"
8909             "done:"
8910          %}
8911 
8912   ins_encode %{
8913     Label done;
8914     Register Rdst = $dst$$Register;
8915     Register Rsrc = $src$$Register;
8916     __ movl(HIGH_FROM_LOW(Rdst), 0);
8917     __ blsmskl(Rdst, Rsrc);
8918     __ jccb(Assembler::carryClear, done);
8919     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8920     __ bind(done);
8921   %}
8922 
8923   ins_pipe(ialu_reg);
8924 %}
8925 
8926 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8927 %{
8928   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8929   predicate(UseBMI1Instructions);
8930   effect(KILL cr, TEMP dst);
8931 
8932   ins_cost(125);
8933   format %{ "MOVL    $dst.hi, 0\n\t"
8934             "BLSMSKL $dst.lo, $src\n\t"
8935             "JNC     done\n\t"
8936             "BLSMSKL $dst.hi, $src+4\n"
8937             "done:"
8938          %}
8939 
8940   ins_encode %{
8941     Label done;
8942     Register Rdst = $dst$$Register;
8943     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8944 
8945     __ movl(HIGH_FROM_LOW(Rdst), 0);
8946     __ blsmskl(Rdst, $src$$Address);
8947     __ jccb(Assembler::carryClear, done);
8948     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8949     __ bind(done);
8950   %}
8951 
8952   ins_pipe(ialu_reg_mem);
8953 %}
8954 
8955 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8956 %{
8957   match(Set dst (AndL (AddL src minus_1) src) );
8958   predicate(UseBMI1Instructions);
8959   effect(KILL cr, TEMP dst);
8960 
8961   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8962             "BLSRL  $dst.lo, $src.lo\n\t"
8963             "JNC    done\n\t"
8964             "BLSRL  $dst.hi, $src.hi\n"
8965             "done:"
8966   %}
8967 
8968   ins_encode %{
8969     Label done;
8970     Register Rdst = $dst$$Register;
8971     Register Rsrc = $src$$Register;
8972     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8973     __ blsrl(Rdst, Rsrc);
8974     __ jccb(Assembler::carryClear, done);
8975     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8976     __ bind(done);
8977   %}
8978 
8979   ins_pipe(ialu_reg);
8980 %}
8981 
8982 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8983 %{
8984   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8985   predicate(UseBMI1Instructions);
8986   effect(KILL cr, TEMP dst);
8987 
8988   ins_cost(125);
8989   format %{ "MOVL   $dst.hi, $src+4\n\t"
8990             "BLSRL  $dst.lo, $src\n\t"
8991             "JNC    done\n\t"
8992             "BLSRL  $dst.hi, $src+4\n"
8993             "done:"
8994   %}
8995 
8996   ins_encode %{
8997     Label done;
8998     Register Rdst = $dst$$Register;
8999     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9000     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9001     __ blsrl(Rdst, $src$$Address);
9002     __ jccb(Assembler::carryClear, done);
9003     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9004     __ bind(done);
9005   %}
9006 
9007   ins_pipe(ialu_reg_mem);
9008 %}
9009 
9010 // Or Long Register with Register
9011 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9012   match(Set dst (OrL dst src));
9013   effect(KILL cr);
9014   format %{ "OR     $dst.lo,$src.lo\n\t"
9015             "OR     $dst.hi,$src.hi" %}
9016   opcode(0x0B,0x0B);
9017   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9018   ins_pipe( ialu_reg_reg_long );
9019 %}
9020 
9021 // Or Long Register with Immediate
9022 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9023   match(Set dst (OrL dst src));
9024   effect(KILL cr);
9025   format %{ "OR     $dst.lo,$src.lo\n\t"
9026             "OR     $dst.hi,$src.hi" %}
9027   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9028   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9029   ins_pipe( ialu_reg_long );
9030 %}
9031 
9032 // Or Long Register with Memory
9033 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9034   match(Set dst (OrL dst (LoadL mem)));
9035   effect(KILL cr);
9036   ins_cost(125);
9037   format %{ "OR     $dst.lo,$mem\n\t"
9038             "OR     $dst.hi,$mem+4" %}
9039   opcode(0x0B,0x0B);
9040   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9041   ins_pipe( ialu_reg_long_mem );
9042 %}
9043 
9044 // Xor Long Register with Register
9045 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9046   match(Set dst (XorL dst src));
9047   effect(KILL cr);
9048   format %{ "XOR    $dst.lo,$src.lo\n\t"
9049             "XOR    $dst.hi,$src.hi" %}
9050   opcode(0x33,0x33);
9051   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9052   ins_pipe( ialu_reg_reg_long );
9053 %}
9054 
9055 // Xor Long Register with Immediate -1
9056 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9057   match(Set dst (XorL dst imm));
9058   format %{ "NOT    $dst.lo\n\t"
9059             "NOT    $dst.hi" %}
9060   ins_encode %{
9061      __ notl($dst$$Register);
9062      __ notl(HIGH_FROM_LOW($dst$$Register));
9063   %}
9064   ins_pipe( ialu_reg_long );
9065 %}
9066 
9067 // Xor Long Register with Immediate
9068 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9069   match(Set dst (XorL dst src));
9070   effect(KILL cr);
9071   format %{ "XOR    $dst.lo,$src.lo\n\t"
9072             "XOR    $dst.hi,$src.hi" %}
9073   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9074   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9075   ins_pipe( ialu_reg_long );
9076 %}
9077 
9078 // Xor Long Register with Memory
9079 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9080   match(Set dst (XorL dst (LoadL mem)));
9081   effect(KILL cr);
9082   ins_cost(125);
9083   format %{ "XOR    $dst.lo,$mem\n\t"
9084             "XOR    $dst.hi,$mem+4" %}
9085   opcode(0x33,0x33);
9086   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9087   ins_pipe( ialu_reg_long_mem );
9088 %}
9089 
9090 // Shift Left Long by 1
9091 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9092   predicate(UseNewLongLShift);
9093   match(Set dst (LShiftL dst cnt));
9094   effect(KILL cr);
9095   ins_cost(100);
9096   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9097             "ADC    $dst.hi,$dst.hi" %}
9098   ins_encode %{
9099     __ addl($dst$$Register,$dst$$Register);
9100     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9101   %}
9102   ins_pipe( ialu_reg_long );
9103 %}
9104 
9105 // Shift Left Long by 2
9106 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9107   predicate(UseNewLongLShift);
9108   match(Set dst (LShiftL dst cnt));
9109   effect(KILL cr);
9110   ins_cost(100);
9111   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9112             "ADC    $dst.hi,$dst.hi\n\t"
9113             "ADD    $dst.lo,$dst.lo\n\t"
9114             "ADC    $dst.hi,$dst.hi" %}
9115   ins_encode %{
9116     __ addl($dst$$Register,$dst$$Register);
9117     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9118     __ addl($dst$$Register,$dst$$Register);
9119     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9120   %}
9121   ins_pipe( ialu_reg_long );
9122 %}
9123 
9124 // Shift Left Long by 3
9125 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9126   predicate(UseNewLongLShift);
9127   match(Set dst (LShiftL dst cnt));
9128   effect(KILL cr);
9129   ins_cost(100);
9130   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9131             "ADC    $dst.hi,$dst.hi\n\t"
9132             "ADD    $dst.lo,$dst.lo\n\t"
9133             "ADC    $dst.hi,$dst.hi\n\t"
9134             "ADD    $dst.lo,$dst.lo\n\t"
9135             "ADC    $dst.hi,$dst.hi" %}
9136   ins_encode %{
9137     __ addl($dst$$Register,$dst$$Register);
9138     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9139     __ addl($dst$$Register,$dst$$Register);
9140     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9141     __ addl($dst$$Register,$dst$$Register);
9142     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9143   %}
9144   ins_pipe( ialu_reg_long );
9145 %}
9146 
9147 // Shift Left Long by 1-31
9148 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9149   match(Set dst (LShiftL dst cnt));
9150   effect(KILL cr);
9151   ins_cost(200);
9152   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9153             "SHL    $dst.lo,$cnt" %}
9154   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9155   ins_encode( move_long_small_shift(dst,cnt) );
9156   ins_pipe( ialu_reg_long );
9157 %}
9158 
9159 // Shift Left Long by 32-63
9160 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9161   match(Set dst (LShiftL dst cnt));
9162   effect(KILL cr);
9163   ins_cost(300);
9164   format %{ "MOV    $dst.hi,$dst.lo\n"
9165           "\tSHL    $dst.hi,$cnt-32\n"
9166           "\tXOR    $dst.lo,$dst.lo" %}
9167   opcode(0xC1, 0x4);  /* C1 /4 ib */
9168   ins_encode( move_long_big_shift_clr(dst,cnt) );
9169   ins_pipe( ialu_reg_long );
9170 %}
9171 
9172 // Shift Left Long by variable
9173 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9174   match(Set dst (LShiftL dst shift));
9175   effect(KILL cr);
9176   ins_cost(500+200);
9177   size(17);
9178   format %{ "TEST   $shift,32\n\t"
9179             "JEQ,s  small\n\t"
9180             "MOV    $dst.hi,$dst.lo\n\t"
9181             "XOR    $dst.lo,$dst.lo\n"
9182     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9183             "SHL    $dst.lo,$shift" %}
9184   ins_encode( shift_left_long( dst, shift ) );
9185   ins_pipe( pipe_slow );
9186 %}
9187 
9188 // Shift Right Long by 1-31
9189 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9190   match(Set dst (URShiftL dst cnt));
9191   effect(KILL cr);
9192   ins_cost(200);
9193   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9194             "SHR    $dst.hi,$cnt" %}
9195   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9196   ins_encode( move_long_small_shift(dst,cnt) );
9197   ins_pipe( ialu_reg_long );
9198 %}
9199 
9200 // Shift Right Long by 32-63
9201 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9202   match(Set dst (URShiftL dst cnt));
9203   effect(KILL cr);
9204   ins_cost(300);
9205   format %{ "MOV    $dst.lo,$dst.hi\n"
9206           "\tSHR    $dst.lo,$cnt-32\n"
9207           "\tXOR    $dst.hi,$dst.hi" %}
9208   opcode(0xC1, 0x5);  /* C1 /5 ib */
9209   ins_encode( move_long_big_shift_clr(dst,cnt) );
9210   ins_pipe( ialu_reg_long );
9211 %}
9212 
9213 // Shift Right Long by variable
9214 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9215   match(Set dst (URShiftL dst shift));
9216   effect(KILL cr);
9217   ins_cost(600);
9218   size(17);
9219   format %{ "TEST   $shift,32\n\t"
9220             "JEQ,s  small\n\t"
9221             "MOV    $dst.lo,$dst.hi\n\t"
9222             "XOR    $dst.hi,$dst.hi\n"
9223     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9224             "SHR    $dst.hi,$shift" %}
9225   ins_encode( shift_right_long( dst, shift ) );
9226   ins_pipe( pipe_slow );
9227 %}
9228 
9229 // Shift Right Long by 1-31
9230 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9231   match(Set dst (RShiftL dst cnt));
9232   effect(KILL cr);
9233   ins_cost(200);
9234   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9235             "SAR    $dst.hi,$cnt" %}
9236   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9237   ins_encode( move_long_small_shift(dst,cnt) );
9238   ins_pipe( ialu_reg_long );
9239 %}
9240 
9241 // Shift Right Long by 32-63
9242 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9243   match(Set dst (RShiftL dst cnt));
9244   effect(KILL cr);
9245   ins_cost(300);
9246   format %{ "MOV    $dst.lo,$dst.hi\n"
9247           "\tSAR    $dst.lo,$cnt-32\n"
9248           "\tSAR    $dst.hi,31" %}
9249   opcode(0xC1, 0x7);  /* C1 /7 ib */
9250   ins_encode( move_long_big_shift_sign(dst,cnt) );
9251   ins_pipe( ialu_reg_long );
9252 %}
9253 
9254 // Shift Right arithmetic Long by variable
9255 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9256   match(Set dst (RShiftL dst shift));
9257   effect(KILL cr);
9258   ins_cost(600);
9259   size(18);
9260   format %{ "TEST   $shift,32\n\t"
9261             "JEQ,s  small\n\t"
9262             "MOV    $dst.lo,$dst.hi\n\t"
9263             "SAR    $dst.hi,31\n"
9264     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9265             "SAR    $dst.hi,$shift" %}
9266   ins_encode( shift_right_arith_long( dst, shift ) );
9267   ins_pipe( pipe_slow );
9268 %}
9269 
9270 
9271 //----------Double Instructions------------------------------------------------
9272 // Double Math
9273 
9274 // Compare & branch
9275 
9276 // P6 version of float compare, sets condition codes in EFLAGS
9277 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9278   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9279   match(Set cr (CmpD src1 src2));
9280   effect(KILL rax);
9281   ins_cost(150);
9282   format %{ "FLD    $src1\n\t"
9283             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9284             "JNP    exit\n\t"
9285             "MOV    ah,1       // saw a NaN, set CF\n\t"
9286             "SAHF\n"
9287      "exit:\tNOP               // avoid branch to branch" %}
9288   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9289   ins_encode( Push_Reg_DPR(src1),
9290               OpcP, RegOpc(src2),
9291               cmpF_P6_fixup );
9292   ins_pipe( pipe_slow );
9293 %}
9294 
9295 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9296   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9297   match(Set cr (CmpD src1 src2));
9298   ins_cost(150);
9299   format %{ "FLD    $src1\n\t"
9300             "FUCOMIP ST,$src2  // P6 instruction" %}
9301   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9302   ins_encode( Push_Reg_DPR(src1),
9303               OpcP, RegOpc(src2));
9304   ins_pipe( pipe_slow );
9305 %}
9306 
9307 // Compare & branch
9308 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9309   predicate(UseSSE<=1);
9310   match(Set cr (CmpD src1 src2));
9311   effect(KILL rax);
9312   ins_cost(200);
9313   format %{ "FLD    $src1\n\t"
9314             "FCOMp  $src2\n\t"
9315             "FNSTSW AX\n\t"
9316             "TEST   AX,0x400\n\t"
9317             "JZ,s   flags\n\t"
9318             "MOV    AH,1\t# unordered treat as LT\n"
9319     "flags:\tSAHF" %}
9320   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9321   ins_encode( Push_Reg_DPR(src1),
9322               OpcP, RegOpc(src2),
9323               fpu_flags);
9324   ins_pipe( pipe_slow );
9325 %}
9326 
9327 // Compare vs zero into -1,0,1
9328 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9329   predicate(UseSSE<=1);
9330   match(Set dst (CmpD3 src1 zero));
9331   effect(KILL cr, KILL rax);
9332   ins_cost(280);
9333   format %{ "FTSTD  $dst,$src1" %}
9334   opcode(0xE4, 0xD9);
9335   ins_encode( Push_Reg_DPR(src1),
9336               OpcS, OpcP, PopFPU,
9337               CmpF_Result(dst));
9338   ins_pipe( pipe_slow );
9339 %}
9340 
9341 // Compare into -1,0,1
9342 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9343   predicate(UseSSE<=1);
9344   match(Set dst (CmpD3 src1 src2));
9345   effect(KILL cr, KILL rax);
9346   ins_cost(300);
9347   format %{ "FCMPD  $dst,$src1,$src2" %}
9348   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9349   ins_encode( Push_Reg_DPR(src1),
9350               OpcP, RegOpc(src2),
9351               CmpF_Result(dst));
9352   ins_pipe( pipe_slow );
9353 %}
9354 
9355 // float compare and set condition codes in EFLAGS by XMM regs
9356 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9357   predicate(UseSSE>=2);
9358   match(Set cr (CmpD src1 src2));
9359   ins_cost(145);
9360   format %{ "UCOMISD $src1,$src2\n\t"
9361             "JNP,s   exit\n\t"
9362             "PUSHF\t# saw NaN, set CF\n\t"
9363             "AND     [rsp], #0xffffff2b\n\t"
9364             "POPF\n"
9365     "exit:" %}
9366   ins_encode %{
9367     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9368     emit_cmpfp_fixup(_masm);
9369   %}
9370   ins_pipe( pipe_slow );
9371 %}
9372 
9373 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9374   predicate(UseSSE>=2);
9375   match(Set cr (CmpD src1 src2));
9376   ins_cost(100);
9377   format %{ "UCOMISD $src1,$src2" %}
9378   ins_encode %{
9379     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9380   %}
9381   ins_pipe( pipe_slow );
9382 %}
9383 
9384 // float compare and set condition codes in EFLAGS by XMM regs
9385 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9386   predicate(UseSSE>=2);
9387   match(Set cr (CmpD src1 (LoadD src2)));
9388   ins_cost(145);
9389   format %{ "UCOMISD $src1,$src2\n\t"
9390             "JNP,s   exit\n\t"
9391             "PUSHF\t# saw NaN, set CF\n\t"
9392             "AND     [rsp], #0xffffff2b\n\t"
9393             "POPF\n"
9394     "exit:" %}
9395   ins_encode %{
9396     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9397     emit_cmpfp_fixup(_masm);
9398   %}
9399   ins_pipe( pipe_slow );
9400 %}
9401 
9402 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9403   predicate(UseSSE>=2);
9404   match(Set cr (CmpD src1 (LoadD src2)));
9405   ins_cost(100);
9406   format %{ "UCOMISD $src1,$src2" %}
9407   ins_encode %{
9408     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9409   %}
9410   ins_pipe( pipe_slow );
9411 %}
9412 
9413 // Compare into -1,0,1 in XMM
9414 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9415   predicate(UseSSE>=2);
9416   match(Set dst (CmpD3 src1 src2));
9417   effect(KILL cr);
9418   ins_cost(255);
9419   format %{ "UCOMISD $src1, $src2\n\t"
9420             "MOV     $dst, #-1\n\t"
9421             "JP,s    done\n\t"
9422             "JB,s    done\n\t"
9423             "SETNE   $dst\n\t"
9424             "MOVZB   $dst, $dst\n"
9425     "done:" %}
9426   ins_encode %{
9427     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9428     emit_cmpfp3(_masm, $dst$$Register);
9429   %}
9430   ins_pipe( pipe_slow );
9431 %}
9432 
9433 // Compare into -1,0,1 in XMM and memory
9434 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9435   predicate(UseSSE>=2);
9436   match(Set dst (CmpD3 src1 (LoadD src2)));
9437   effect(KILL cr);
9438   ins_cost(275);
9439   format %{ "UCOMISD $src1, $src2\n\t"
9440             "MOV     $dst, #-1\n\t"
9441             "JP,s    done\n\t"
9442             "JB,s    done\n\t"
9443             "SETNE   $dst\n\t"
9444             "MOVZB   $dst, $dst\n"
9445     "done:" %}
9446   ins_encode %{
9447     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9448     emit_cmpfp3(_masm, $dst$$Register);
9449   %}
9450   ins_pipe( pipe_slow );
9451 %}
9452 
9453 
9454 instruct subDPR_reg(regDPR dst, regDPR src) %{
9455   predicate (UseSSE <=1);
9456   match(Set dst (SubD dst src));
9457 
9458   format %{ "FLD    $src\n\t"
9459             "DSUBp  $dst,ST" %}
9460   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9461   ins_cost(150);
9462   ins_encode( Push_Reg_DPR(src),
9463               OpcP, RegOpc(dst) );
9464   ins_pipe( fpu_reg_reg );
9465 %}
9466 
9467 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9468   predicate (UseSSE <=1);
9469   match(Set dst (RoundDouble (SubD src1 src2)));
9470   ins_cost(250);
9471 
9472   format %{ "FLD    $src2\n\t"
9473             "DSUB   ST,$src1\n\t"
9474             "FSTP_D $dst\t# D-round" %}
9475   opcode(0xD8, 0x5);
9476   ins_encode( Push_Reg_DPR(src2),
9477               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9478   ins_pipe( fpu_mem_reg_reg );
9479 %}
9480 
9481 
9482 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9483   predicate (UseSSE <=1);
9484   match(Set dst (SubD dst (LoadD src)));
9485   ins_cost(150);
9486 
9487   format %{ "FLD    $src\n\t"
9488             "DSUBp  $dst,ST" %}
9489   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9490   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9491               OpcP, RegOpc(dst) );
9492   ins_pipe( fpu_reg_mem );
9493 %}
9494 
9495 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9496   predicate (UseSSE<=1);
9497   match(Set dst (AbsD src));
9498   ins_cost(100);
9499   format %{ "FABS" %}
9500   opcode(0xE1, 0xD9);
9501   ins_encode( OpcS, OpcP );
9502   ins_pipe( fpu_reg_reg );
9503 %}
9504 
9505 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9506   predicate(UseSSE<=1);
9507   match(Set dst (NegD src));
9508   ins_cost(100);
9509   format %{ "FCHS" %}
9510   opcode(0xE0, 0xD9);
9511   ins_encode( OpcS, OpcP );
9512   ins_pipe( fpu_reg_reg );
9513 %}
9514 
9515 instruct addDPR_reg(regDPR dst, regDPR src) %{
9516   predicate(UseSSE<=1);
9517   match(Set dst (AddD dst src));
9518   format %{ "FLD    $src\n\t"
9519             "DADD   $dst,ST" %}
9520   size(4);
9521   ins_cost(150);
9522   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9523   ins_encode( Push_Reg_DPR(src),
9524               OpcP, RegOpc(dst) );
9525   ins_pipe( fpu_reg_reg );
9526 %}
9527 
9528 
9529 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9530   predicate(UseSSE<=1);
9531   match(Set dst (RoundDouble (AddD src1 src2)));
9532   ins_cost(250);
9533 
9534   format %{ "FLD    $src2\n\t"
9535             "DADD   ST,$src1\n\t"
9536             "FSTP_D $dst\t# D-round" %}
9537   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9538   ins_encode( Push_Reg_DPR(src2),
9539               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9540   ins_pipe( fpu_mem_reg_reg );
9541 %}
9542 
9543 
9544 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9545   predicate(UseSSE<=1);
9546   match(Set dst (AddD dst (LoadD src)));
9547   ins_cost(150);
9548 
9549   format %{ "FLD    $src\n\t"
9550             "DADDp  $dst,ST" %}
9551   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9552   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9553               OpcP, RegOpc(dst) );
9554   ins_pipe( fpu_reg_mem );
9555 %}
9556 
9557 // add-to-memory
9558 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9559   predicate(UseSSE<=1);
9560   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9561   ins_cost(150);
9562 
9563   format %{ "FLD_D  $dst\n\t"
9564             "DADD   ST,$src\n\t"
9565             "FST_D  $dst" %}
9566   opcode(0xDD, 0x0);
9567   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9568               Opcode(0xD8), RegOpc(src),
9569               set_instruction_start,
9570               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9571   ins_pipe( fpu_reg_mem );
9572 %}
9573 
9574 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9575   predicate(UseSSE<=1);
9576   match(Set dst (AddD dst con));
9577   ins_cost(125);
9578   format %{ "FLD1\n\t"
9579             "DADDp  $dst,ST" %}
9580   ins_encode %{
9581     __ fld1();
9582     __ faddp($dst$$reg);
9583   %}
9584   ins_pipe(fpu_reg);
9585 %}
9586 
9587 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9588   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9589   match(Set dst (AddD dst con));
9590   ins_cost(200);
9591   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9592             "DADDp  $dst,ST" %}
9593   ins_encode %{
9594     __ fld_d($constantaddress($con));
9595     __ faddp($dst$$reg);
9596   %}
9597   ins_pipe(fpu_reg_mem);
9598 %}
9599 
9600 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9601   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9602   match(Set dst (RoundDouble (AddD src con)));
9603   ins_cost(200);
9604   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9605             "DADD   ST,$src\n\t"
9606             "FSTP_D $dst\t# D-round" %}
9607   ins_encode %{
9608     __ fld_d($constantaddress($con));
9609     __ fadd($src$$reg);
9610     __ fstp_d(Address(rsp, $dst$$disp));
9611   %}
9612   ins_pipe(fpu_mem_reg_con);
9613 %}
9614 
9615 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9616   predicate(UseSSE<=1);
9617   match(Set dst (MulD dst src));
9618   format %{ "FLD    $src\n\t"
9619             "DMULp  $dst,ST" %}
9620   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9621   ins_cost(150);
9622   ins_encode( Push_Reg_DPR(src),
9623               OpcP, RegOpc(dst) );
9624   ins_pipe( fpu_reg_reg );
9625 %}
9626 
9627 // Strict FP instruction biases argument before multiply then
9628 // biases result to avoid double rounding of subnormals.
9629 //
9630 // scale arg1 by multiplying arg1 by 2^(-15360)
9631 // load arg2
9632 // multiply scaled arg1 by arg2
9633 // rescale product by 2^(15360)
9634 //
9635 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9636   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9637   match(Set dst (MulD dst src));
9638   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9639 
9640   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9641             "DMULp  $dst,ST\n\t"
9642             "FLD    $src\n\t"
9643             "DMULp  $dst,ST\n\t"
9644             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9645             "DMULp  $dst,ST\n\t" %}
9646   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9647   ins_encode( strictfp_bias1(dst),
9648               Push_Reg_DPR(src),
9649               OpcP, RegOpc(dst),
9650               strictfp_bias2(dst) );
9651   ins_pipe( fpu_reg_reg );
9652 %}
9653 
9654 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9655   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9656   match(Set dst (MulD dst con));
9657   ins_cost(200);
9658   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9659             "DMULp  $dst,ST" %}
9660   ins_encode %{
9661     __ fld_d($constantaddress($con));
9662     __ fmulp($dst$$reg);
9663   %}
9664   ins_pipe(fpu_reg_mem);
9665 %}
9666 
9667 
9668 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9669   predicate( UseSSE<=1 );
9670   match(Set dst (MulD dst (LoadD src)));
9671   ins_cost(200);
9672   format %{ "FLD_D  $src\n\t"
9673             "DMULp  $dst,ST" %}
9674   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9675   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9676               OpcP, RegOpc(dst) );
9677   ins_pipe( fpu_reg_mem );
9678 %}
9679 
9680 //
9681 // Cisc-alternate to reg-reg multiply
9682 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9683   predicate( UseSSE<=1 );
9684   match(Set dst (MulD src (LoadD mem)));
9685   ins_cost(250);
9686   format %{ "FLD_D  $mem\n\t"
9687             "DMUL   ST,$src\n\t"
9688             "FSTP_D $dst" %}
9689   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9690   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9691               OpcReg_FPR(src),
9692               Pop_Reg_DPR(dst) );
9693   ins_pipe( fpu_reg_reg_mem );
9694 %}
9695 
9696 
9697 // MACRO3 -- addDPR a mulDPR
9698 // This instruction is a '2-address' instruction in that the result goes
9699 // back to src2.  This eliminates a move from the macro; possibly the
9700 // register allocator will have to add it back (and maybe not).
9701 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9702   predicate( UseSSE<=1 );
9703   match(Set src2 (AddD (MulD src0 src1) src2));
9704   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9705             "DMUL   ST,$src1\n\t"
9706             "DADDp  $src2,ST" %}
9707   ins_cost(250);
9708   opcode(0xDD); /* LoadD DD /0 */
9709   ins_encode( Push_Reg_FPR(src0),
9710               FMul_ST_reg(src1),
9711               FAddP_reg_ST(src2) );
9712   ins_pipe( fpu_reg_reg_reg );
9713 %}
9714 
9715 
9716 // MACRO3 -- subDPR a mulDPR
9717 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9718   predicate( UseSSE<=1 );
9719   match(Set src2 (SubD (MulD src0 src1) src2));
9720   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9721             "DMUL   ST,$src1\n\t"
9722             "DSUBRp $src2,ST" %}
9723   ins_cost(250);
9724   ins_encode( Push_Reg_FPR(src0),
9725               FMul_ST_reg(src1),
9726               Opcode(0xDE), Opc_plus(0xE0,src2));
9727   ins_pipe( fpu_reg_reg_reg );
9728 %}
9729 
9730 
9731 instruct divDPR_reg(regDPR dst, regDPR src) %{
9732   predicate( UseSSE<=1 );
9733   match(Set dst (DivD dst src));
9734 
9735   format %{ "FLD    $src\n\t"
9736             "FDIVp  $dst,ST" %}
9737   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9738   ins_cost(150);
9739   ins_encode( Push_Reg_DPR(src),
9740               OpcP, RegOpc(dst) );
9741   ins_pipe( fpu_reg_reg );
9742 %}
9743 
9744 // Strict FP instruction biases argument before division then
9745 // biases result, to avoid double rounding of subnormals.
9746 //
9747 // scale dividend by multiplying dividend by 2^(-15360)
9748 // load divisor
9749 // divide scaled dividend by divisor
9750 // rescale quotient by 2^(15360)
9751 //
9752 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9753   predicate (UseSSE<=1);
9754   match(Set dst (DivD dst src));
9755   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9756   ins_cost(01);
9757 
9758   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9759             "DMULp  $dst,ST\n\t"
9760             "FLD    $src\n\t"
9761             "FDIVp  $dst,ST\n\t"
9762             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9763             "DMULp  $dst,ST\n\t" %}
9764   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9765   ins_encode( strictfp_bias1(dst),
9766               Push_Reg_DPR(src),
9767               OpcP, RegOpc(dst),
9768               strictfp_bias2(dst) );
9769   ins_pipe( fpu_reg_reg );
9770 %}
9771 
9772 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9773   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9774   match(Set dst (RoundDouble (DivD src1 src2)));
9775 
9776   format %{ "FLD    $src1\n\t"
9777             "FDIV   ST,$src2\n\t"
9778             "FSTP_D $dst\t# D-round" %}
9779   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9780   ins_encode( Push_Reg_DPR(src1),
9781               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9782   ins_pipe( fpu_mem_reg_reg );
9783 %}
9784 
9785 
9786 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9787   predicate(UseSSE<=1);
9788   match(Set dst (ModD dst src));
9789   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9790 
9791   format %{ "DMOD   $dst,$src" %}
9792   ins_cost(250);
9793   ins_encode(Push_Reg_Mod_DPR(dst, src),
9794               emitModDPR(),
9795               Push_Result_Mod_DPR(src),
9796               Pop_Reg_DPR(dst));
9797   ins_pipe( pipe_slow );
9798 %}
9799 
9800 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9801   predicate(UseSSE>=2);
9802   match(Set dst (ModD src0 src1));
9803   effect(KILL rax, KILL cr);
9804 
9805   format %{ "SUB    ESP,8\t # DMOD\n"
9806           "\tMOVSD  [ESP+0],$src1\n"
9807           "\tFLD_D  [ESP+0]\n"
9808           "\tMOVSD  [ESP+0],$src0\n"
9809           "\tFLD_D  [ESP+0]\n"
9810      "loop:\tFPREM\n"
9811           "\tFWAIT\n"
9812           "\tFNSTSW AX\n"
9813           "\tSAHF\n"
9814           "\tJP     loop\n"
9815           "\tFSTP_D [ESP+0]\n"
9816           "\tMOVSD  $dst,[ESP+0]\n"
9817           "\tADD    ESP,8\n"
9818           "\tFSTP   ST0\t # Restore FPU Stack"
9819     %}
9820   ins_cost(250);
9821   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9822   ins_pipe( pipe_slow );
9823 %}
9824 
9825 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9826   predicate (UseSSE<=1);
9827   match(Set dst(TanD src));
9828   format %{ "DTAN   $dst" %}
9829   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9830               Opcode(0xDD), Opcode(0xD8));   // fstp st
9831   ins_pipe( pipe_slow );
9832 %}
9833 
9834 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9835   predicate (UseSSE>=2);
9836   match(Set dst(TanD dst));
9837   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9838   format %{ "DTAN   $dst" %}
9839   ins_encode( Push_SrcD(dst),
9840               Opcode(0xD9), Opcode(0xF2),    // fptan
9841               Opcode(0xDD), Opcode(0xD8),   // fstp st
9842               Push_ResultD(dst) );
9843   ins_pipe( pipe_slow );
9844 %}
9845 
9846 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9847   predicate (UseSSE<=1);
9848   match(Set dst(AtanD dst src));
9849   format %{ "DATA   $dst,$src" %}
9850   opcode(0xD9, 0xF3);
9851   ins_encode( Push_Reg_DPR(src),
9852               OpcP, OpcS, RegOpc(dst) );
9853   ins_pipe( pipe_slow );
9854 %}
9855 
9856 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9857   predicate (UseSSE>=2);
9858   match(Set dst(AtanD dst src));
9859   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9860   format %{ "DATA   $dst,$src" %}
9861   opcode(0xD9, 0xF3);
9862   ins_encode( Push_SrcD(src),
9863               OpcP, OpcS, Push_ResultD(dst) );
9864   ins_pipe( pipe_slow );
9865 %}
9866 
9867 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9868   predicate (UseSSE<=1);
9869   match(Set dst (SqrtD src));
9870   format %{ "DSQRT  $dst,$src" %}
9871   opcode(0xFA, 0xD9);
9872   ins_encode( Push_Reg_DPR(src),
9873               OpcS, OpcP, Pop_Reg_DPR(dst) );
9874   ins_pipe( pipe_slow );
9875 %}
9876 
9877 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9878   predicate (UseSSE<=1);
9879   // The source Double operand on FPU stack
9880   match(Set dst (Log10D src));
9881   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9882   // fxch         ; swap ST(0) with ST(1)
9883   // fyl2x        ; compute log_10(2) * log_2(x)
9884   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9885             "FXCH   \n\t"
9886             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9887          %}
9888   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9889               Opcode(0xD9), Opcode(0xC9),   // fxch
9890               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9891 
9892   ins_pipe( pipe_slow );
9893 %}
9894 
9895 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9896   predicate (UseSSE>=2);
9897   effect(KILL cr);
9898   match(Set dst (Log10D src));
9899   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9900   // fyl2x        ; compute log_10(2) * log_2(x)
9901   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9902             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9903          %}
9904   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9905               Push_SrcD(src),
9906               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9907               Push_ResultD(dst));
9908 
9909   ins_pipe( pipe_slow );
9910 %}
9911 
9912 //-------------Float Instructions-------------------------------
9913 // Float Math
9914 
9915 // Code for float compare:
9916 //     fcompp();
9917 //     fwait(); fnstsw_ax();
9918 //     sahf();
9919 //     movl(dst, unordered_result);
9920 //     jcc(Assembler::parity, exit);
9921 //     movl(dst, less_result);
9922 //     jcc(Assembler::below, exit);
9923 //     movl(dst, equal_result);
9924 //     jcc(Assembler::equal, exit);
9925 //     movl(dst, greater_result);
9926 //   exit:
9927 
9928 // P6 version of float compare, sets condition codes in EFLAGS
9929 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9930   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9931   match(Set cr (CmpF src1 src2));
9932   effect(KILL rax);
9933   ins_cost(150);
9934   format %{ "FLD    $src1\n\t"
9935             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9936             "JNP    exit\n\t"
9937             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9938             "SAHF\n"
9939      "exit:\tNOP               // avoid branch to branch" %}
9940   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9941   ins_encode( Push_Reg_DPR(src1),
9942               OpcP, RegOpc(src2),
9943               cmpF_P6_fixup );
9944   ins_pipe( pipe_slow );
9945 %}
9946 
9947 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9948   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9949   match(Set cr (CmpF src1 src2));
9950   ins_cost(100);
9951   format %{ "FLD    $src1\n\t"
9952             "FUCOMIP ST,$src2  // P6 instruction" %}
9953   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9954   ins_encode( Push_Reg_DPR(src1),
9955               OpcP, RegOpc(src2));
9956   ins_pipe( pipe_slow );
9957 %}
9958 
9959 
9960 // Compare & branch
9961 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9962   predicate(UseSSE == 0);
9963   match(Set cr (CmpF src1 src2));
9964   effect(KILL rax);
9965   ins_cost(200);
9966   format %{ "FLD    $src1\n\t"
9967             "FCOMp  $src2\n\t"
9968             "FNSTSW AX\n\t"
9969             "TEST   AX,0x400\n\t"
9970             "JZ,s   flags\n\t"
9971             "MOV    AH,1\t# unordered treat as LT\n"
9972     "flags:\tSAHF" %}
9973   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9974   ins_encode( Push_Reg_DPR(src1),
9975               OpcP, RegOpc(src2),
9976               fpu_flags);
9977   ins_pipe( pipe_slow );
9978 %}
9979 
9980 // Compare vs zero into -1,0,1
9981 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9982   predicate(UseSSE == 0);
9983   match(Set dst (CmpF3 src1 zero));
9984   effect(KILL cr, KILL rax);
9985   ins_cost(280);
9986   format %{ "FTSTF  $dst,$src1" %}
9987   opcode(0xE4, 0xD9);
9988   ins_encode( Push_Reg_DPR(src1),
9989               OpcS, OpcP, PopFPU,
9990               CmpF_Result(dst));
9991   ins_pipe( pipe_slow );
9992 %}
9993 
9994 // Compare into -1,0,1
9995 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9996   predicate(UseSSE == 0);
9997   match(Set dst (CmpF3 src1 src2));
9998   effect(KILL cr, KILL rax);
9999   ins_cost(300);
10000   format %{ "FCMPF  $dst,$src1,$src2" %}
10001   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10002   ins_encode( Push_Reg_DPR(src1),
10003               OpcP, RegOpc(src2),
10004               CmpF_Result(dst));
10005   ins_pipe( pipe_slow );
10006 %}
10007 
10008 // float compare and set condition codes in EFLAGS by XMM regs
10009 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10010   predicate(UseSSE>=1);
10011   match(Set cr (CmpF src1 src2));
10012   ins_cost(145);
10013   format %{ "UCOMISS $src1,$src2\n\t"
10014             "JNP,s   exit\n\t"
10015             "PUSHF\t# saw NaN, set CF\n\t"
10016             "AND     [rsp], #0xffffff2b\n\t"
10017             "POPF\n"
10018     "exit:" %}
10019   ins_encode %{
10020     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10021     emit_cmpfp_fixup(_masm);
10022   %}
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10027   predicate(UseSSE>=1);
10028   match(Set cr (CmpF src1 src2));
10029   ins_cost(100);
10030   format %{ "UCOMISS $src1,$src2" %}
10031   ins_encode %{
10032     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10033   %}
10034   ins_pipe( pipe_slow );
10035 %}
10036 
10037 // float compare and set condition codes in EFLAGS by XMM regs
10038 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10039   predicate(UseSSE>=1);
10040   match(Set cr (CmpF src1 (LoadF src2)));
10041   ins_cost(165);
10042   format %{ "UCOMISS $src1,$src2\n\t"
10043             "JNP,s   exit\n\t"
10044             "PUSHF\t# saw NaN, set CF\n\t"
10045             "AND     [rsp], #0xffffff2b\n\t"
10046             "POPF\n"
10047     "exit:" %}
10048   ins_encode %{
10049     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10050     emit_cmpfp_fixup(_masm);
10051   %}
10052   ins_pipe( pipe_slow );
10053 %}
10054 
10055 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10056   predicate(UseSSE>=1);
10057   match(Set cr (CmpF src1 (LoadF src2)));
10058   ins_cost(100);
10059   format %{ "UCOMISS $src1,$src2" %}
10060   ins_encode %{
10061     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10062   %}
10063   ins_pipe( pipe_slow );
10064 %}
10065 
10066 // Compare into -1,0,1 in XMM
10067 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10068   predicate(UseSSE>=1);
10069   match(Set dst (CmpF3 src1 src2));
10070   effect(KILL cr);
10071   ins_cost(255);
10072   format %{ "UCOMISS $src1, $src2\n\t"
10073             "MOV     $dst, #-1\n\t"
10074             "JP,s    done\n\t"
10075             "JB,s    done\n\t"
10076             "SETNE   $dst\n\t"
10077             "MOVZB   $dst, $dst\n"
10078     "done:" %}
10079   ins_encode %{
10080     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10081     emit_cmpfp3(_masm, $dst$$Register);
10082   %}
10083   ins_pipe( pipe_slow );
10084 %}
10085 
10086 // Compare into -1,0,1 in XMM and memory
10087 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10088   predicate(UseSSE>=1);
10089   match(Set dst (CmpF3 src1 (LoadF src2)));
10090   effect(KILL cr);
10091   ins_cost(275);
10092   format %{ "UCOMISS $src1, $src2\n\t"
10093             "MOV     $dst, #-1\n\t"
10094             "JP,s    done\n\t"
10095             "JB,s    done\n\t"
10096             "SETNE   $dst\n\t"
10097             "MOVZB   $dst, $dst\n"
10098     "done:" %}
10099   ins_encode %{
10100     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10101     emit_cmpfp3(_masm, $dst$$Register);
10102   %}
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 // Spill to obtain 24-bit precision
10107 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10108   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10109   match(Set dst (SubF src1 src2));
10110 
10111   format %{ "FSUB   $dst,$src1 - $src2" %}
10112   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10113   ins_encode( Push_Reg_FPR(src1),
10114               OpcReg_FPR(src2),
10115               Pop_Mem_FPR(dst) );
10116   ins_pipe( fpu_mem_reg_reg );
10117 %}
10118 //
10119 // This instruction does not round to 24-bits
10120 instruct subFPR_reg(regFPR dst, regFPR src) %{
10121   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10122   match(Set dst (SubF dst src));
10123 
10124   format %{ "FSUB   $dst,$src" %}
10125   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10126   ins_encode( Push_Reg_FPR(src),
10127               OpcP, RegOpc(dst) );
10128   ins_pipe( fpu_reg_reg );
10129 %}
10130 
10131 // Spill to obtain 24-bit precision
10132 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10133   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10134   match(Set dst (AddF src1 src2));
10135 
10136   format %{ "FADD   $dst,$src1,$src2" %}
10137   opcode(0xD8, 0x0); /* D8 C0+i */
10138   ins_encode( Push_Reg_FPR(src2),
10139               OpcReg_FPR(src1),
10140               Pop_Mem_FPR(dst) );
10141   ins_pipe( fpu_mem_reg_reg );
10142 %}
10143 //
10144 // This instruction does not round to 24-bits
10145 instruct addFPR_reg(regFPR dst, regFPR src) %{
10146   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10147   match(Set dst (AddF dst src));
10148 
10149   format %{ "FLD    $src\n\t"
10150             "FADDp  $dst,ST" %}
10151   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10152   ins_encode( Push_Reg_FPR(src),
10153               OpcP, RegOpc(dst) );
10154   ins_pipe( fpu_reg_reg );
10155 %}
10156 
10157 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10158   predicate(UseSSE==0);
10159   match(Set dst (AbsF src));
10160   ins_cost(100);
10161   format %{ "FABS" %}
10162   opcode(0xE1, 0xD9);
10163   ins_encode( OpcS, OpcP );
10164   ins_pipe( fpu_reg_reg );
10165 %}
10166 
10167 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10168   predicate(UseSSE==0);
10169   match(Set dst (NegF src));
10170   ins_cost(100);
10171   format %{ "FCHS" %}
10172   opcode(0xE0, 0xD9);
10173   ins_encode( OpcS, OpcP );
10174   ins_pipe( fpu_reg_reg );
10175 %}
10176 
10177 // Cisc-alternate to addFPR_reg
10178 // Spill to obtain 24-bit precision
10179 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10180   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181   match(Set dst (AddF src1 (LoadF src2)));
10182 
10183   format %{ "FLD    $src2\n\t"
10184             "FADD   ST,$src1\n\t"
10185             "FSTP_S $dst" %}
10186   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10187   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10188               OpcReg_FPR(src1),
10189               Pop_Mem_FPR(dst) );
10190   ins_pipe( fpu_mem_reg_mem );
10191 %}
10192 //
10193 // Cisc-alternate to addFPR_reg
10194 // This instruction does not round to 24-bits
10195 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10196   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10197   match(Set dst (AddF dst (LoadF src)));
10198 
10199   format %{ "FADD   $dst,$src" %}
10200   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10201   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10202               OpcP, RegOpc(dst) );
10203   ins_pipe( fpu_reg_mem );
10204 %}
10205 
10206 // // Following two instructions for _222_mpegaudio
10207 // Spill to obtain 24-bit precision
10208 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10209   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10210   match(Set dst (AddF src1 src2));
10211 
10212   format %{ "FADD   $dst,$src1,$src2" %}
10213   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10214   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10215               OpcReg_FPR(src2),
10216               Pop_Mem_FPR(dst) );
10217   ins_pipe( fpu_mem_reg_mem );
10218 %}
10219 
10220 // Cisc-spill variant
10221 // Spill to obtain 24-bit precision
10222 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10223   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10224   match(Set dst (AddF src1 (LoadF src2)));
10225 
10226   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10227   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10228   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10229               set_instruction_start,
10230               OpcP, RMopc_Mem(secondary,src1),
10231               Pop_Mem_FPR(dst) );
10232   ins_pipe( fpu_mem_mem_mem );
10233 %}
10234 
10235 // Spill to obtain 24-bit precision
10236 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10237   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10238   match(Set dst (AddF src1 src2));
10239 
10240   format %{ "FADD   $dst,$src1,$src2" %}
10241   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10242   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10243               set_instruction_start,
10244               OpcP, RMopc_Mem(secondary,src1),
10245               Pop_Mem_FPR(dst) );
10246   ins_pipe( fpu_mem_mem_mem );
10247 %}
10248 
10249 
10250 // Spill to obtain 24-bit precision
10251 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10252   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10253   match(Set dst (AddF src con));
10254   format %{ "FLD    $src\n\t"
10255             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10256             "FSTP_S $dst"  %}
10257   ins_encode %{
10258     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10259     __ fadd_s($constantaddress($con));
10260     __ fstp_s(Address(rsp, $dst$$disp));
10261   %}
10262   ins_pipe(fpu_mem_reg_con);
10263 %}
10264 //
10265 // This instruction does not round to 24-bits
10266 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10267   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10268   match(Set dst (AddF src con));
10269   format %{ "FLD    $src\n\t"
10270             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10271             "FSTP   $dst"  %}
10272   ins_encode %{
10273     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10274     __ fadd_s($constantaddress($con));
10275     __ fstp_d($dst$$reg);
10276   %}
10277   ins_pipe(fpu_reg_reg_con);
10278 %}
10279 
10280 // Spill to obtain 24-bit precision
10281 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10282   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10283   match(Set dst (MulF src1 src2));
10284 
10285   format %{ "FLD    $src1\n\t"
10286             "FMUL   $src2\n\t"
10287             "FSTP_S $dst"  %}
10288   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10289   ins_encode( Push_Reg_FPR(src1),
10290               OpcReg_FPR(src2),
10291               Pop_Mem_FPR(dst) );
10292   ins_pipe( fpu_mem_reg_reg );
10293 %}
10294 //
10295 // This instruction does not round to 24-bits
10296 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10297   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10298   match(Set dst (MulF src1 src2));
10299 
10300   format %{ "FLD    $src1\n\t"
10301             "FMUL   $src2\n\t"
10302             "FSTP_S $dst"  %}
10303   opcode(0xD8, 0x1); /* D8 C8+i */
10304   ins_encode( Push_Reg_FPR(src2),
10305               OpcReg_FPR(src1),
10306               Pop_Reg_FPR(dst) );
10307   ins_pipe( fpu_reg_reg_reg );
10308 %}
10309 
10310 
10311 // Spill to obtain 24-bit precision
10312 // Cisc-alternate to reg-reg multiply
10313 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10314   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315   match(Set dst (MulF src1 (LoadF src2)));
10316 
10317   format %{ "FLD_S  $src2\n\t"
10318             "FMUL   $src1\n\t"
10319             "FSTP_S $dst"  %}
10320   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10321   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10322               OpcReg_FPR(src1),
10323               Pop_Mem_FPR(dst) );
10324   ins_pipe( fpu_mem_reg_mem );
10325 %}
10326 //
10327 // This instruction does not round to 24-bits
10328 // Cisc-alternate to reg-reg multiply
10329 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10330   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10331   match(Set dst (MulF src1 (LoadF src2)));
10332 
10333   format %{ "FMUL   $dst,$src1,$src2" %}
10334   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10335   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10336               OpcReg_FPR(src1),
10337               Pop_Reg_FPR(dst) );
10338   ins_pipe( fpu_reg_reg_mem );
10339 %}
10340 
10341 // Spill to obtain 24-bit precision
10342 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10343   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10344   match(Set dst (MulF src1 src2));
10345 
10346   format %{ "FMUL   $dst,$src1,$src2" %}
10347   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10348   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10349               set_instruction_start,
10350               OpcP, RMopc_Mem(secondary,src1),
10351               Pop_Mem_FPR(dst) );
10352   ins_pipe( fpu_mem_mem_mem );
10353 %}
10354 
10355 // Spill to obtain 24-bit precision
10356 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10357   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10358   match(Set dst (MulF src con));
10359 
10360   format %{ "FLD    $src\n\t"
10361             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10362             "FSTP_S $dst"  %}
10363   ins_encode %{
10364     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10365     __ fmul_s($constantaddress($con));
10366     __ fstp_s(Address(rsp, $dst$$disp));
10367   %}
10368   ins_pipe(fpu_mem_reg_con);
10369 %}
10370 //
10371 // This instruction does not round to 24-bits
10372 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10373   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10374   match(Set dst (MulF src con));
10375 
10376   format %{ "FLD    $src\n\t"
10377             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10378             "FSTP   $dst"  %}
10379   ins_encode %{
10380     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10381     __ fmul_s($constantaddress($con));
10382     __ fstp_d($dst$$reg);
10383   %}
10384   ins_pipe(fpu_reg_reg_con);
10385 %}
10386 
10387 
10388 //
10389 // MACRO1 -- subsume unshared load into mulFPR
10390 // This instruction does not round to 24-bits
10391 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10392   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10393   match(Set dst (MulF (LoadF mem1) src));
10394 
10395   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10396             "FMUL   ST,$src\n\t"
10397             "FSTP   $dst" %}
10398   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10399   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10400               OpcReg_FPR(src),
10401               Pop_Reg_FPR(dst) );
10402   ins_pipe( fpu_reg_reg_mem );
10403 %}
10404 //
10405 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10406 // This instruction does not round to 24-bits
10407 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10408   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10409   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10410   ins_cost(95);
10411 
10412   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10413             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10414             "FADD   ST,$src2\n\t"
10415             "FSTP   $dst" %}
10416   opcode(0xD9); /* LoadF D9 /0 */
10417   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10418               FMul_ST_reg(src1),
10419               FAdd_ST_reg(src2),
10420               Pop_Reg_FPR(dst) );
10421   ins_pipe( fpu_reg_mem_reg_reg );
10422 %}
10423 
10424 // MACRO3 -- addFPR a mulFPR
10425 // This instruction does not round to 24-bits.  It is a '2-address'
10426 // instruction in that the result goes back to src2.  This eliminates
10427 // a move from the macro; possibly the register allocator will have
10428 // to add it back (and maybe not).
10429 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10430   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10431   match(Set src2 (AddF (MulF src0 src1) src2));
10432 
10433   format %{ "FLD    $src0     ===MACRO3===\n\t"
10434             "FMUL   ST,$src1\n\t"
10435             "FADDP  $src2,ST" %}
10436   opcode(0xD9); /* LoadF D9 /0 */
10437   ins_encode( Push_Reg_FPR(src0),
10438               FMul_ST_reg(src1),
10439               FAddP_reg_ST(src2) );
10440   ins_pipe( fpu_reg_reg_reg );
10441 %}
10442 
10443 // MACRO4 -- divFPR subFPR
10444 // This instruction does not round to 24-bits
10445 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10446   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10447   match(Set dst (DivF (SubF src2 src1) src3));
10448 
10449   format %{ "FLD    $src2   ===MACRO4===\n\t"
10450             "FSUB   ST,$src1\n\t"
10451             "FDIV   ST,$src3\n\t"
10452             "FSTP  $dst" %}
10453   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10454   ins_encode( Push_Reg_FPR(src2),
10455               subFPR_divFPR_encode(src1,src3),
10456               Pop_Reg_FPR(dst) );
10457   ins_pipe( fpu_reg_reg_reg_reg );
10458 %}
10459 
10460 // Spill to obtain 24-bit precision
10461 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10462   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10463   match(Set dst (DivF src1 src2));
10464 
10465   format %{ "FDIV   $dst,$src1,$src2" %}
10466   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10467   ins_encode( Push_Reg_FPR(src1),
10468               OpcReg_FPR(src2),
10469               Pop_Mem_FPR(dst) );
10470   ins_pipe( fpu_mem_reg_reg );
10471 %}
10472 //
10473 // This instruction does not round to 24-bits
10474 instruct divFPR_reg(regFPR dst, regFPR src) %{
10475   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10476   match(Set dst (DivF dst src));
10477 
10478   format %{ "FDIV   $dst,$src" %}
10479   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10480   ins_encode( Push_Reg_FPR(src),
10481               OpcP, RegOpc(dst) );
10482   ins_pipe( fpu_reg_reg );
10483 %}
10484 
10485 
10486 // Spill to obtain 24-bit precision
10487 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10488   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10489   match(Set dst (ModF src1 src2));
10490   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10491 
10492   format %{ "FMOD   $dst,$src1,$src2" %}
10493   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10494               emitModDPR(),
10495               Push_Result_Mod_DPR(src2),
10496               Pop_Mem_FPR(dst));
10497   ins_pipe( pipe_slow );
10498 %}
10499 //
10500 // This instruction does not round to 24-bits
10501 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10502   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10503   match(Set dst (ModF dst src));
10504   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10505 
10506   format %{ "FMOD   $dst,$src" %}
10507   ins_encode(Push_Reg_Mod_DPR(dst, src),
10508               emitModDPR(),
10509               Push_Result_Mod_DPR(src),
10510               Pop_Reg_FPR(dst));
10511   ins_pipe( pipe_slow );
10512 %}
10513 
10514 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10515   predicate(UseSSE>=1);
10516   match(Set dst (ModF src0 src1));
10517   effect(KILL rax, KILL cr);
10518   format %{ "SUB    ESP,4\t # FMOD\n"
10519           "\tMOVSS  [ESP+0],$src1\n"
10520           "\tFLD_S  [ESP+0]\n"
10521           "\tMOVSS  [ESP+0],$src0\n"
10522           "\tFLD_S  [ESP+0]\n"
10523      "loop:\tFPREM\n"
10524           "\tFWAIT\n"
10525           "\tFNSTSW AX\n"
10526           "\tSAHF\n"
10527           "\tJP     loop\n"
10528           "\tFSTP_S [ESP+0]\n"
10529           "\tMOVSS  $dst,[ESP+0]\n"
10530           "\tADD    ESP,4\n"
10531           "\tFSTP   ST0\t # Restore FPU Stack"
10532     %}
10533   ins_cost(250);
10534   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10535   ins_pipe( pipe_slow );
10536 %}
10537 
10538 
10539 //----------Arithmetic Conversion Instructions---------------------------------
10540 // The conversions operations are all Alpha sorted.  Please keep it that way!
10541 
10542 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10543   predicate(UseSSE==0);
10544   match(Set dst (RoundFloat src));
10545   ins_cost(125);
10546   format %{ "FST_S  $dst,$src\t# F-round" %}
10547   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10548   ins_pipe( fpu_mem_reg );
10549 %}
10550 
10551 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10552   predicate(UseSSE<=1);
10553   match(Set dst (RoundDouble src));
10554   ins_cost(125);
10555   format %{ "FST_D  $dst,$src\t# D-round" %}
10556   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10557   ins_pipe( fpu_mem_reg );
10558 %}
10559 
10560 // Force rounding to 24-bit precision and 6-bit exponent
10561 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10562   predicate(UseSSE==0);
10563   match(Set dst (ConvD2F src));
10564   format %{ "FST_S  $dst,$src\t# F-round" %}
10565   expand %{
10566     roundFloat_mem_reg(dst,src);
10567   %}
10568 %}
10569 
10570 // Force rounding to 24-bit precision and 6-bit exponent
10571 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10572   predicate(UseSSE==1);
10573   match(Set dst (ConvD2F src));
10574   effect( KILL cr );
10575   format %{ "SUB    ESP,4\n\t"
10576             "FST_S  [ESP],$src\t# F-round\n\t"
10577             "MOVSS  $dst,[ESP]\n\t"
10578             "ADD ESP,4" %}
10579   ins_encode %{
10580     __ subptr(rsp, 4);
10581     if ($src$$reg != FPR1L_enc) {
10582       __ fld_s($src$$reg-1);
10583       __ fstp_s(Address(rsp, 0));
10584     } else {
10585       __ fst_s(Address(rsp, 0));
10586     }
10587     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10588     __ addptr(rsp, 4);
10589   %}
10590   ins_pipe( pipe_slow );
10591 %}
10592 
10593 // Force rounding double precision to single precision
10594 instruct convD2F_reg(regF dst, regD src) %{
10595   predicate(UseSSE>=2);
10596   match(Set dst (ConvD2F src));
10597   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10598   ins_encode %{
10599     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10600   %}
10601   ins_pipe( pipe_slow );
10602 %}
10603 
10604 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10605   predicate(UseSSE==0);
10606   match(Set dst (ConvF2D src));
10607   format %{ "FST_S  $dst,$src\t# D-round" %}
10608   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10609   ins_pipe( fpu_reg_reg );
10610 %}
10611 
10612 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10613   predicate(UseSSE==1);
10614   match(Set dst (ConvF2D src));
10615   format %{ "FST_D  $dst,$src\t# D-round" %}
10616   expand %{
10617     roundDouble_mem_reg(dst,src);
10618   %}
10619 %}
10620 
10621 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10622   predicate(UseSSE==1);
10623   match(Set dst (ConvF2D src));
10624   effect( KILL cr );
10625   format %{ "SUB    ESP,4\n\t"
10626             "MOVSS  [ESP] $src\n\t"
10627             "FLD_S  [ESP]\n\t"
10628             "ADD    ESP,4\n\t"
10629             "FSTP   $dst\t# D-round" %}
10630   ins_encode %{
10631     __ subptr(rsp, 4);
10632     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10633     __ fld_s(Address(rsp, 0));
10634     __ addptr(rsp, 4);
10635     __ fstp_d($dst$$reg);
10636   %}
10637   ins_pipe( pipe_slow );
10638 %}
10639 
10640 instruct convF2D_reg(regD dst, regF src) %{
10641   predicate(UseSSE>=2);
10642   match(Set dst (ConvF2D src));
10643   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10644   ins_encode %{
10645     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10646   %}
10647   ins_pipe( pipe_slow );
10648 %}
10649 
10650 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10651 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10652   predicate(UseSSE<=1);
10653   match(Set dst (ConvD2I src));
10654   effect( KILL tmp, KILL cr );
10655   format %{ "FLD    $src\t# Convert double to int \n\t"
10656             "FLDCW  trunc mode\n\t"
10657             "SUB    ESP,4\n\t"
10658             "FISTp  [ESP + #0]\n\t"
10659             "FLDCW  std/24-bit mode\n\t"
10660             "POP    EAX\n\t"
10661             "CMP    EAX,0x80000000\n\t"
10662             "JNE,s  fast\n\t"
10663             "FLD_D  $src\n\t"
10664             "CALL   d2i_wrapper\n"
10665       "fast:" %}
10666   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10667   ins_pipe( pipe_slow );
10668 %}
10669 
10670 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10671 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10672   predicate(UseSSE>=2);
10673   match(Set dst (ConvD2I src));
10674   effect( KILL tmp, KILL cr );
10675   format %{ "CVTTSD2SI $dst, $src\n\t"
10676             "CMP    $dst,0x80000000\n\t"
10677             "JNE,s  fast\n\t"
10678             "SUB    ESP, 8\n\t"
10679             "MOVSD  [ESP], $src\n\t"
10680             "FLD_D  [ESP]\n\t"
10681             "ADD    ESP, 8\n\t"
10682             "CALL   d2i_wrapper\n"
10683       "fast:" %}
10684   ins_encode %{
10685     Label fast;
10686     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10687     __ cmpl($dst$$Register, 0x80000000);
10688     __ jccb(Assembler::notEqual, fast);
10689     __ subptr(rsp, 8);
10690     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10691     __ fld_d(Address(rsp, 0));
10692     __ addptr(rsp, 8);
10693     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10694     __ bind(fast);
10695   %}
10696   ins_pipe( pipe_slow );
10697 %}
10698 
10699 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10700   predicate(UseSSE<=1);
10701   match(Set dst (ConvD2L src));
10702   effect( KILL cr );
10703   format %{ "FLD    $src\t# Convert double to long\n\t"
10704             "FLDCW  trunc mode\n\t"
10705             "SUB    ESP,8\n\t"
10706             "FISTp  [ESP + #0]\n\t"
10707             "FLDCW  std/24-bit mode\n\t"
10708             "POP    EAX\n\t"
10709             "POP    EDX\n\t"
10710             "CMP    EDX,0x80000000\n\t"
10711             "JNE,s  fast\n\t"
10712             "TEST   EAX,EAX\n\t"
10713             "JNE,s  fast\n\t"
10714             "FLD    $src\n\t"
10715             "CALL   d2l_wrapper\n"
10716       "fast:" %}
10717   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10718   ins_pipe( pipe_slow );
10719 %}
10720 
10721 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10722 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10723   predicate (UseSSE>=2);
10724   match(Set dst (ConvD2L src));
10725   effect( KILL cr );
10726   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10727             "MOVSD  [ESP],$src\n\t"
10728             "FLD_D  [ESP]\n\t"
10729             "FLDCW  trunc mode\n\t"
10730             "FISTp  [ESP + #0]\n\t"
10731             "FLDCW  std/24-bit mode\n\t"
10732             "POP    EAX\n\t"
10733             "POP    EDX\n\t"
10734             "CMP    EDX,0x80000000\n\t"
10735             "JNE,s  fast\n\t"
10736             "TEST   EAX,EAX\n\t"
10737             "JNE,s  fast\n\t"
10738             "SUB    ESP,8\n\t"
10739             "MOVSD  [ESP],$src\n\t"
10740             "FLD_D  [ESP]\n\t"
10741             "ADD    ESP,8\n\t"
10742             "CALL   d2l_wrapper\n"
10743       "fast:" %}
10744   ins_encode %{
10745     Label fast;
10746     __ subptr(rsp, 8);
10747     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10748     __ fld_d(Address(rsp, 0));
10749     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10750     __ fistp_d(Address(rsp, 0));
10751     // Restore the rounding mode, mask the exception
10752     if (Compile::current()->in_24_bit_fp_mode()) {
10753       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10754     } else {
10755       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10756     }
10757     // Load the converted long, adjust CPU stack
10758     __ pop(rax);
10759     __ pop(rdx);
10760     __ cmpl(rdx, 0x80000000);
10761     __ jccb(Assembler::notEqual, fast);
10762     __ testl(rax, rax);
10763     __ jccb(Assembler::notEqual, fast);
10764     __ subptr(rsp, 8);
10765     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10766     __ fld_d(Address(rsp, 0));
10767     __ addptr(rsp, 8);
10768     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10769     __ bind(fast);
10770   %}
10771   ins_pipe( pipe_slow );
10772 %}
10773 
10774 // Convert a double to an int.  Java semantics require we do complex
10775 // manglations in the corner cases.  So we set the rounding mode to
10776 // 'zero', store the darned double down as an int, and reset the
10777 // rounding mode to 'nearest'.  The hardware stores a flag value down
10778 // if we would overflow or converted a NAN; we check for this and
10779 // and go the slow path if needed.
10780 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10781   predicate(UseSSE==0);
10782   match(Set dst (ConvF2I src));
10783   effect( KILL tmp, KILL cr );
10784   format %{ "FLD    $src\t# Convert float to int \n\t"
10785             "FLDCW  trunc mode\n\t"
10786             "SUB    ESP,4\n\t"
10787             "FISTp  [ESP + #0]\n\t"
10788             "FLDCW  std/24-bit mode\n\t"
10789             "POP    EAX\n\t"
10790             "CMP    EAX,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "FLD    $src\n\t"
10793             "CALL   d2i_wrapper\n"
10794       "fast:" %}
10795   // DPR2I_encoding works for FPR2I
10796   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10797   ins_pipe( pipe_slow );
10798 %}
10799 
10800 // Convert a float in xmm to an int reg.
10801 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10802   predicate(UseSSE>=1);
10803   match(Set dst (ConvF2I src));
10804   effect( KILL tmp, KILL cr );
10805   format %{ "CVTTSS2SI $dst, $src\n\t"
10806             "CMP    $dst,0x80000000\n\t"
10807             "JNE,s  fast\n\t"
10808             "SUB    ESP, 4\n\t"
10809             "MOVSS  [ESP], $src\n\t"
10810             "FLD    [ESP]\n\t"
10811             "ADD    ESP, 4\n\t"
10812             "CALL   d2i_wrapper\n"
10813       "fast:" %}
10814   ins_encode %{
10815     Label fast;
10816     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10817     __ cmpl($dst$$Register, 0x80000000);
10818     __ jccb(Assembler::notEqual, fast);
10819     __ subptr(rsp, 4);
10820     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10821     __ fld_s(Address(rsp, 0));
10822     __ addptr(rsp, 4);
10823     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10824     __ bind(fast);
10825   %}
10826   ins_pipe( pipe_slow );
10827 %}
10828 
10829 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10830   predicate(UseSSE==0);
10831   match(Set dst (ConvF2L src));
10832   effect( KILL cr );
10833   format %{ "FLD    $src\t# Convert float to long\n\t"
10834             "FLDCW  trunc mode\n\t"
10835             "SUB    ESP,8\n\t"
10836             "FISTp  [ESP + #0]\n\t"
10837             "FLDCW  std/24-bit mode\n\t"
10838             "POP    EAX\n\t"
10839             "POP    EDX\n\t"
10840             "CMP    EDX,0x80000000\n\t"
10841             "JNE,s  fast\n\t"
10842             "TEST   EAX,EAX\n\t"
10843             "JNE,s  fast\n\t"
10844             "FLD    $src\n\t"
10845             "CALL   d2l_wrapper\n"
10846       "fast:" %}
10847   // DPR2L_encoding works for FPR2L
10848   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10849   ins_pipe( pipe_slow );
10850 %}
10851 
10852 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10853 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10854   predicate (UseSSE>=1);
10855   match(Set dst (ConvF2L src));
10856   effect( KILL cr );
10857   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10858             "MOVSS  [ESP],$src\n\t"
10859             "FLD_S  [ESP]\n\t"
10860             "FLDCW  trunc mode\n\t"
10861             "FISTp  [ESP + #0]\n\t"
10862             "FLDCW  std/24-bit mode\n\t"
10863             "POP    EAX\n\t"
10864             "POP    EDX\n\t"
10865             "CMP    EDX,0x80000000\n\t"
10866             "JNE,s  fast\n\t"
10867             "TEST   EAX,EAX\n\t"
10868             "JNE,s  fast\n\t"
10869             "SUB    ESP,4\t# Convert float to long\n\t"
10870             "MOVSS  [ESP],$src\n\t"
10871             "FLD_S  [ESP]\n\t"
10872             "ADD    ESP,4\n\t"
10873             "CALL   d2l_wrapper\n"
10874       "fast:" %}
10875   ins_encode %{
10876     Label fast;
10877     __ subptr(rsp, 8);
10878     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10879     __ fld_s(Address(rsp, 0));
10880     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10881     __ fistp_d(Address(rsp, 0));
10882     // Restore the rounding mode, mask the exception
10883     if (Compile::current()->in_24_bit_fp_mode()) {
10884       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10885     } else {
10886       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10887     }
10888     // Load the converted long, adjust CPU stack
10889     __ pop(rax);
10890     __ pop(rdx);
10891     __ cmpl(rdx, 0x80000000);
10892     __ jccb(Assembler::notEqual, fast);
10893     __ testl(rax, rax);
10894     __ jccb(Assembler::notEqual, fast);
10895     __ subptr(rsp, 4);
10896     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10897     __ fld_s(Address(rsp, 0));
10898     __ addptr(rsp, 4);
10899     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10900     __ bind(fast);
10901   %}
10902   ins_pipe( pipe_slow );
10903 %}
10904 
10905 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10906   predicate( UseSSE<=1 );
10907   match(Set dst (ConvI2D src));
10908   format %{ "FILD   $src\n\t"
10909             "FSTP   $dst" %}
10910   opcode(0xDB, 0x0);  /* DB /0 */
10911   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10912   ins_pipe( fpu_reg_mem );
10913 %}
10914 
10915 instruct convI2D_reg(regD dst, rRegI src) %{
10916   predicate( UseSSE>=2 && !UseXmmI2D );
10917   match(Set dst (ConvI2D src));
10918   format %{ "CVTSI2SD $dst,$src" %}
10919   ins_encode %{
10920     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10921   %}
10922   ins_pipe( pipe_slow );
10923 %}
10924 
10925 instruct convI2D_mem(regD dst, memory mem) %{
10926   predicate( UseSSE>=2 );
10927   match(Set dst (ConvI2D (LoadI mem)));
10928   format %{ "CVTSI2SD $dst,$mem" %}
10929   ins_encode %{
10930     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10931   %}
10932   ins_pipe( pipe_slow );
10933 %}
10934 
10935 instruct convXI2D_reg(regD dst, rRegI src)
10936 %{
10937   predicate( UseSSE>=2 && UseXmmI2D );
10938   match(Set dst (ConvI2D src));
10939 
10940   format %{ "MOVD  $dst,$src\n\t"
10941             "CVTDQ2PD $dst,$dst\t# i2d" %}
10942   ins_encode %{
10943     __ movdl($dst$$XMMRegister, $src$$Register);
10944     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10945   %}
10946   ins_pipe(pipe_slow); // XXX
10947 %}
10948 
10949 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10950   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10951   match(Set dst (ConvI2D (LoadI mem)));
10952   format %{ "FILD   $mem\n\t"
10953             "FSTP   $dst" %}
10954   opcode(0xDB);      /* DB /0 */
10955   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10956               Pop_Reg_DPR(dst));
10957   ins_pipe( fpu_reg_mem );
10958 %}
10959 
10960 // Convert a byte to a float; no rounding step needed.
10961 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10962   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10963   match(Set dst (ConvI2F src));
10964   format %{ "FILD   $src\n\t"
10965             "FSTP   $dst" %}
10966 
10967   opcode(0xDB, 0x0);  /* DB /0 */
10968   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10969   ins_pipe( fpu_reg_mem );
10970 %}
10971 
10972 // In 24-bit mode, force exponent rounding by storing back out
10973 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10974   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10975   match(Set dst (ConvI2F src));
10976   ins_cost(200);
10977   format %{ "FILD   $src\n\t"
10978             "FSTP_S $dst" %}
10979   opcode(0xDB, 0x0);  /* DB /0 */
10980   ins_encode( Push_Mem_I(src),
10981               Pop_Mem_FPR(dst));
10982   ins_pipe( fpu_mem_mem );
10983 %}
10984 
10985 // In 24-bit mode, force exponent rounding by storing back out
10986 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10987   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10988   match(Set dst (ConvI2F (LoadI mem)));
10989   ins_cost(200);
10990   format %{ "FILD   $mem\n\t"
10991             "FSTP_S $dst" %}
10992   opcode(0xDB);  /* DB /0 */
10993   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10994               Pop_Mem_FPR(dst));
10995   ins_pipe( fpu_mem_mem );
10996 %}
10997 
10998 // This instruction does not round to 24-bits
10999 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11000   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11001   match(Set dst (ConvI2F src));
11002   format %{ "FILD   $src\n\t"
11003             "FSTP   $dst" %}
11004   opcode(0xDB, 0x0);  /* DB /0 */
11005   ins_encode( Push_Mem_I(src),
11006               Pop_Reg_FPR(dst));
11007   ins_pipe( fpu_reg_mem );
11008 %}
11009 
11010 // This instruction does not round to 24-bits
11011 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11012   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11013   match(Set dst (ConvI2F (LoadI mem)));
11014   format %{ "FILD   $mem\n\t"
11015             "FSTP   $dst" %}
11016   opcode(0xDB);      /* DB /0 */
11017   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11018               Pop_Reg_FPR(dst));
11019   ins_pipe( fpu_reg_mem );
11020 %}
11021 
11022 // Convert an int to a float in xmm; no rounding step needed.
11023 instruct convI2F_reg(regF dst, rRegI src) %{
11024   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11025   match(Set dst (ConvI2F src));
11026   format %{ "CVTSI2SS $dst, $src" %}
11027   ins_encode %{
11028     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11029   %}
11030   ins_pipe( pipe_slow );
11031 %}
11032 
11033  instruct convXI2F_reg(regF dst, rRegI src)
11034 %{
11035   predicate( UseSSE>=2 && UseXmmI2F );
11036   match(Set dst (ConvI2F src));
11037 
11038   format %{ "MOVD  $dst,$src\n\t"
11039             "CVTDQ2PS $dst,$dst\t# i2f" %}
11040   ins_encode %{
11041     __ movdl($dst$$XMMRegister, $src$$Register);
11042     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11043   %}
11044   ins_pipe(pipe_slow); // XXX
11045 %}
11046 
11047 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11048   match(Set dst (ConvI2L src));
11049   effect(KILL cr);
11050   ins_cost(375);
11051   format %{ "MOV    $dst.lo,$src\n\t"
11052             "MOV    $dst.hi,$src\n\t"
11053             "SAR    $dst.hi,31" %}
11054   ins_encode(convert_int_long(dst,src));
11055   ins_pipe( ialu_reg_reg_long );
11056 %}
11057 
11058 // Zero-extend convert int to long
11059 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11060   match(Set dst (AndL (ConvI2L src) mask) );
11061   effect( KILL flags );
11062   ins_cost(250);
11063   format %{ "MOV    $dst.lo,$src\n\t"
11064             "XOR    $dst.hi,$dst.hi" %}
11065   opcode(0x33); // XOR
11066   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11067   ins_pipe( ialu_reg_reg_long );
11068 %}
11069 
11070 // Zero-extend long
11071 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11072   match(Set dst (AndL src mask) );
11073   effect( KILL flags );
11074   ins_cost(250);
11075   format %{ "MOV    $dst.lo,$src.lo\n\t"
11076             "XOR    $dst.hi,$dst.hi\n\t" %}
11077   opcode(0x33); // XOR
11078   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11079   ins_pipe( ialu_reg_reg_long );
11080 %}
11081 
11082 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11083   predicate (UseSSE<=1);
11084   match(Set dst (ConvL2D src));
11085   effect( KILL cr );
11086   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11087             "PUSH   $src.lo\n\t"
11088             "FILD   ST,[ESP + #0]\n\t"
11089             "ADD    ESP,8\n\t"
11090             "FSTP_D $dst\t# D-round" %}
11091   opcode(0xDF, 0x5);  /* DF /5 */
11092   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11093   ins_pipe( pipe_slow );
11094 %}
11095 
11096 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11097   predicate (UseSSE>=2);
11098   match(Set dst (ConvL2D src));
11099   effect( KILL cr );
11100   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11101             "PUSH   $src.lo\n\t"
11102             "FILD_D [ESP]\n\t"
11103             "FSTP_D [ESP]\n\t"
11104             "MOVSD  $dst,[ESP]\n\t"
11105             "ADD    ESP,8" %}
11106   opcode(0xDF, 0x5);  /* DF /5 */
11107   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11108   ins_pipe( pipe_slow );
11109 %}
11110 
11111 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11112   predicate (UseSSE>=1);
11113   match(Set dst (ConvL2F src));
11114   effect( KILL cr );
11115   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11116             "PUSH   $src.lo\n\t"
11117             "FILD_D [ESP]\n\t"
11118             "FSTP_S [ESP]\n\t"
11119             "MOVSS  $dst,[ESP]\n\t"
11120             "ADD    ESP,8" %}
11121   opcode(0xDF, 0x5);  /* DF /5 */
11122   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11123   ins_pipe( pipe_slow );
11124 %}
11125 
11126 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11127   match(Set dst (ConvL2F src));
11128   effect( KILL cr );
11129   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11130             "PUSH   $src.lo\n\t"
11131             "FILD   ST,[ESP + #0]\n\t"
11132             "ADD    ESP,8\n\t"
11133             "FSTP_S $dst\t# F-round" %}
11134   opcode(0xDF, 0x5);  /* DF /5 */
11135   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11136   ins_pipe( pipe_slow );
11137 %}
11138 
11139 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11140   match(Set dst (ConvL2I src));
11141   effect( DEF dst, USE src );
11142   format %{ "MOV    $dst,$src.lo" %}
11143   ins_encode(enc_CopyL_Lo(dst,src));
11144   ins_pipe( ialu_reg_reg );
11145 %}
11146 
11147 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11148   match(Set dst (MoveF2I src));
11149   effect( DEF dst, USE src );
11150   ins_cost(100);
11151   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11152   ins_encode %{
11153     __ movl($dst$$Register, Address(rsp, $src$$disp));
11154   %}
11155   ins_pipe( ialu_reg_mem );
11156 %}
11157 
11158 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11159   predicate(UseSSE==0);
11160   match(Set dst (MoveF2I src));
11161   effect( DEF dst, USE src );
11162 
11163   ins_cost(125);
11164   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11165   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11166   ins_pipe( fpu_mem_reg );
11167 %}
11168 
11169 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11170   predicate(UseSSE>=1);
11171   match(Set dst (MoveF2I src));
11172   effect( DEF dst, USE src );
11173 
11174   ins_cost(95);
11175   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11176   ins_encode %{
11177     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11178   %}
11179   ins_pipe( pipe_slow );
11180 %}
11181 
11182 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11183   predicate(UseSSE>=2);
11184   match(Set dst (MoveF2I src));
11185   effect( DEF dst, USE src );
11186   ins_cost(85);
11187   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11188   ins_encode %{
11189     __ movdl($dst$$Register, $src$$XMMRegister);
11190   %}
11191   ins_pipe( pipe_slow );
11192 %}
11193 
11194 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11195   match(Set dst (MoveI2F src));
11196   effect( DEF dst, USE src );
11197 
11198   ins_cost(100);
11199   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11200   ins_encode %{
11201     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11202   %}
11203   ins_pipe( ialu_mem_reg );
11204 %}
11205 
11206 
11207 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11208   predicate(UseSSE==0);
11209   match(Set dst (MoveI2F src));
11210   effect(DEF dst, USE src);
11211 
11212   ins_cost(125);
11213   format %{ "FLD_S  $src\n\t"
11214             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11215   opcode(0xD9);               /* D9 /0, FLD m32real */
11216   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11217               Pop_Reg_FPR(dst) );
11218   ins_pipe( fpu_reg_mem );
11219 %}
11220 
11221 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11222   predicate(UseSSE>=1);
11223   match(Set dst (MoveI2F src));
11224   effect( DEF dst, USE src );
11225 
11226   ins_cost(95);
11227   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11228   ins_encode %{
11229     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11230   %}
11231   ins_pipe( pipe_slow );
11232 %}
11233 
11234 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11235   predicate(UseSSE>=2);
11236   match(Set dst (MoveI2F src));
11237   effect( DEF dst, USE src );
11238 
11239   ins_cost(85);
11240   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11241   ins_encode %{
11242     __ movdl($dst$$XMMRegister, $src$$Register);
11243   %}
11244   ins_pipe( pipe_slow );
11245 %}
11246 
11247 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11248   match(Set dst (MoveD2L src));
11249   effect(DEF dst, USE src);
11250 
11251   ins_cost(250);
11252   format %{ "MOV    $dst.lo,$src\n\t"
11253             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11254   opcode(0x8B, 0x8B);
11255   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11256   ins_pipe( ialu_mem_long_reg );
11257 %}
11258 
11259 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11260   predicate(UseSSE<=1);
11261   match(Set dst (MoveD2L src));
11262   effect(DEF dst, USE src);
11263 
11264   ins_cost(125);
11265   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11266   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11267   ins_pipe( fpu_mem_reg );
11268 %}
11269 
11270 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11271   predicate(UseSSE>=2);
11272   match(Set dst (MoveD2L src));
11273   effect(DEF dst, USE src);
11274   ins_cost(95);
11275   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11276   ins_encode %{
11277     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11278   %}
11279   ins_pipe( pipe_slow );
11280 %}
11281 
11282 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11283   predicate(UseSSE>=2);
11284   match(Set dst (MoveD2L src));
11285   effect(DEF dst, USE src, TEMP tmp);
11286   ins_cost(85);
11287   format %{ "MOVD   $dst.lo,$src\n\t"
11288             "PSHUFLW $tmp,$src,0x4E\n\t"
11289             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11290   ins_encode %{
11291     __ movdl($dst$$Register, $src$$XMMRegister);
11292     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11293     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11294   %}
11295   ins_pipe( pipe_slow );
11296 %}
11297 
11298 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11299   match(Set dst (MoveL2D src));
11300   effect(DEF dst, USE src);
11301 
11302   ins_cost(200);
11303   format %{ "MOV    $dst,$src.lo\n\t"
11304             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11305   opcode(0x89, 0x89);
11306   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11307   ins_pipe( ialu_mem_long_reg );
11308 %}
11309 
11310 
11311 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11312   predicate(UseSSE<=1);
11313   match(Set dst (MoveL2D src));
11314   effect(DEF dst, USE src);
11315   ins_cost(125);
11316 
11317   format %{ "FLD_D  $src\n\t"
11318             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11319   opcode(0xDD);               /* DD /0, FLD m64real */
11320   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11321               Pop_Reg_DPR(dst) );
11322   ins_pipe( fpu_reg_mem );
11323 %}
11324 
11325 
11326 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11327   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11328   match(Set dst (MoveL2D src));
11329   effect(DEF dst, USE src);
11330 
11331   ins_cost(95);
11332   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11333   ins_encode %{
11334     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11335   %}
11336   ins_pipe( pipe_slow );
11337 %}
11338 
11339 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11340   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11341   match(Set dst (MoveL2D src));
11342   effect(DEF dst, USE src);
11343 
11344   ins_cost(95);
11345   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11346   ins_encode %{
11347     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11348   %}
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11353   predicate(UseSSE>=2);
11354   match(Set dst (MoveL2D src));
11355   effect(TEMP dst, USE src, TEMP tmp);
11356   ins_cost(85);
11357   format %{ "MOVD   $dst,$src.lo\n\t"
11358             "MOVD   $tmp,$src.hi\n\t"
11359             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11360   ins_encode %{
11361     __ movdl($dst$$XMMRegister, $src$$Register);
11362     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11363     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11364   %}
11365   ins_pipe( pipe_slow );
11366 %}
11367 
11368 
11369 // =======================================================================
11370 // fast clearing of an array
11371 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11372   predicate(!UseFastStosb);
11373   match(Set dummy (ClearArray cnt base));
11374   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11375   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11376             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11377             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11378   ins_encode %{
11379     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11380   %}
11381   ins_pipe( pipe_slow );
11382 %}
11383 
11384 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11385   predicate(UseFastStosb);
11386   match(Set dummy (ClearArray cnt base));
11387   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11388   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11389             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11390             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11391   ins_encode %{
11392     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11393   %}
11394   ins_pipe( pipe_slow );
11395 %}
11396 
11397 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11398                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11399   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11400   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11401   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11402 
11403   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11404   ins_encode %{
11405     __ string_compare($str1$$Register, $str2$$Register,
11406                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11407                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11408   %}
11409   ins_pipe( pipe_slow );
11410 %}
11411 
11412 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11413                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11414   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11415   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11416   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11417 
11418   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11419   ins_encode %{
11420     __ string_compare($str1$$Register, $str2$$Register,
11421                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11422                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11423   %}
11424   ins_pipe( pipe_slow );
11425 %}
11426 
11427 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11428                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11429   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11430   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11431   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11432 
11433   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11434   ins_encode %{
11435     __ string_compare($str1$$Register, $str2$$Register,
11436                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11437                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11438   %}
11439   ins_pipe( pipe_slow );
11440 %}
11441 
11442 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11443                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11444   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11445   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11446   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11447 
11448   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11449   ins_encode %{
11450     __ string_compare($str2$$Register, $str1$$Register,
11451                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11452                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11453   %}
11454   ins_pipe( pipe_slow );
11455 %}
11456 
11457 // fast string equals
11458 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11459                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11460   match(Set result (StrEquals (Binary str1 str2) cnt));
11461   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11462 
11463   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11464   ins_encode %{
11465     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11466                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11467                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11468   %}
11469 
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 // fast search of substring with known size.
11474 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11475                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11476   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11477   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11478   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11479 
11480   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11481   ins_encode %{
11482     int icnt2 = (int)$int_cnt2$$constant;
11483     if (icnt2 >= 16) {
11484       // IndexOf for constant substrings with size >= 16 elements
11485       // which don't need to be loaded through stack.
11486       __ string_indexofC8($str1$$Register, $str2$$Register,
11487                           $cnt1$$Register, $cnt2$$Register,
11488                           icnt2, $result$$Register,
11489                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11490     } else {
11491       // Small strings are loaded through stack if they cross page boundary.
11492       __ string_indexof($str1$$Register, $str2$$Register,
11493                         $cnt1$$Register, $cnt2$$Register,
11494                         icnt2, $result$$Register,
11495                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11496     }
11497   %}
11498   ins_pipe( pipe_slow );
11499 %}
11500 
11501 // fast search of substring with known size.
11502 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11503                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11504   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11505   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11506   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11507 
11508   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11509   ins_encode %{
11510     int icnt2 = (int)$int_cnt2$$constant;
11511     if (icnt2 >= 8) {
11512       // IndexOf for constant substrings with size >= 8 elements
11513       // which don't need to be loaded through stack.
11514       __ string_indexofC8($str1$$Register, $str2$$Register,
11515                           $cnt1$$Register, $cnt2$$Register,
11516                           icnt2, $result$$Register,
11517                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11518     } else {
11519       // Small strings are loaded through stack if they cross page boundary.
11520       __ string_indexof($str1$$Register, $str2$$Register,
11521                         $cnt1$$Register, $cnt2$$Register,
11522                         icnt2, $result$$Register,
11523                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11524     }
11525   %}
11526   ins_pipe( pipe_slow );
11527 %}
11528 
11529 // fast search of substring with known size.
11530 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11531                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11532   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11533   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11534   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11535 
11536   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11537   ins_encode %{
11538     int icnt2 = (int)$int_cnt2$$constant;
11539     if (icnt2 >= 8) {
11540       // IndexOf for constant substrings with size >= 8 elements
11541       // which don't need to be loaded through stack.
11542       __ string_indexofC8($str1$$Register, $str2$$Register,
11543                           $cnt1$$Register, $cnt2$$Register,
11544                           icnt2, $result$$Register,
11545                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11546     } else {
11547       // Small strings are loaded through stack if they cross page boundary.
11548       __ string_indexof($str1$$Register, $str2$$Register,
11549                         $cnt1$$Register, $cnt2$$Register,
11550                         icnt2, $result$$Register,
11551                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11552     }
11553   %}
11554   ins_pipe( pipe_slow );
11555 %}
11556 
11557 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11558                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11559   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11560   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11561   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11562 
11563   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11564   ins_encode %{
11565     __ string_indexof($str1$$Register, $str2$$Register,
11566                       $cnt1$$Register, $cnt2$$Register,
11567                       (-1), $result$$Register,
11568                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11569   %}
11570   ins_pipe( pipe_slow );
11571 %}
11572 
11573 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11574                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11575   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11576   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11577   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11578 
11579   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11580   ins_encode %{
11581     __ string_indexof($str1$$Register, $str2$$Register,
11582                       $cnt1$$Register, $cnt2$$Register,
11583                       (-1), $result$$Register,
11584                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11585   %}
11586   ins_pipe( pipe_slow );
11587 %}
11588 
11589 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11590                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11591   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11592   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11593   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11594 
11595   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11596   ins_encode %{
11597     __ string_indexof($str1$$Register, $str2$$Register,
11598                       $cnt1$$Register, $cnt2$$Register,
11599                       (-1), $result$$Register,
11600                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11601   %}
11602   ins_pipe( pipe_slow );
11603 %}
11604 
11605 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11606                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11607   predicate(UseSSE42Intrinsics);
11608   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11609   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11610   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11611   ins_encode %{
11612     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11613                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11614   %}
11615   ins_pipe( pipe_slow );
11616 %}
11617 
11618 // fast array equals
11619 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11620                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11621 %{
11622   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11623   match(Set result (AryEq ary1 ary2));
11624   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11625   //ins_cost(300);
11626 
11627   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11628   ins_encode %{
11629     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11630                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11631                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11632   %}
11633   ins_pipe( pipe_slow );
11634 %}
11635 
11636 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11637                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11638 %{
11639   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11640   match(Set result (AryEq ary1 ary2));
11641   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11642   //ins_cost(300);
11643 
11644   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11645   ins_encode %{
11646     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11647                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11648                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11649   %}
11650   ins_pipe( pipe_slow );
11651 %}
11652 
11653 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11654                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11655 %{
11656   match(Set result (HasNegatives ary1 len));
11657   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11658 
11659   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11660   ins_encode %{
11661     __ has_negatives($ary1$$Register, $len$$Register,
11662                      $result$$Register, $tmp3$$Register,
11663                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11664   %}
11665   ins_pipe( pipe_slow );
11666 %}
11667 
11668 // fast char[] to byte[] compression
11669 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11670                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11671   match(Set result (StrCompressedCopy src (Binary dst len)));
11672   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11673 
11674   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11675   ins_encode %{
11676     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11677                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11678                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11679   %}
11680   ins_pipe( pipe_slow );
11681 %}
11682 
11683 // fast byte[] to char[] inflation
11684 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11685                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11686   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11687   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11688 
11689   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11690   ins_encode %{
11691     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11692                           $tmp1$$XMMRegister, $tmp2$$Register);
11693   %}
11694   ins_pipe( pipe_slow );
11695 %}
11696 
11697 // encode char[] to byte[] in ISO_8859_1
11698 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11699                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11700                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11701   match(Set result (EncodeISOArray src (Binary dst len)));
11702   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11703 
11704   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11705   ins_encode %{
11706     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11707                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11708                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11709   %}
11710   ins_pipe( pipe_slow );
11711 %}
11712 
11713 
11714 //----------Control Flow Instructions------------------------------------------
11715 // Signed compare Instructions
11716 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11717   match(Set cr (CmpI op1 op2));
11718   effect( DEF cr, USE op1, USE op2 );
11719   format %{ "CMP    $op1,$op2" %}
11720   opcode(0x3B);  /* Opcode 3B /r */
11721   ins_encode( OpcP, RegReg( op1, op2) );
11722   ins_pipe( ialu_cr_reg_reg );
11723 %}
11724 
11725 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11726   match(Set cr (CmpI op1 op2));
11727   effect( DEF cr, USE op1 );
11728   format %{ "CMP    $op1,$op2" %}
11729   opcode(0x81,0x07);  /* Opcode 81 /7 */
11730   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11731   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11732   ins_pipe( ialu_cr_reg_imm );
11733 %}
11734 
11735 // Cisc-spilled version of cmpI_eReg
11736 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11737   match(Set cr (CmpI op1 (LoadI op2)));
11738 
11739   format %{ "CMP    $op1,$op2" %}
11740   ins_cost(500);
11741   opcode(0x3B);  /* Opcode 3B /r */
11742   ins_encode( OpcP, RegMem( op1, op2) );
11743   ins_pipe( ialu_cr_reg_mem );
11744 %}
11745 
11746 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11747   match(Set cr (CmpI src zero));
11748   effect( DEF cr, USE src );
11749 
11750   format %{ "TEST   $src,$src" %}
11751   opcode(0x85);
11752   ins_encode( OpcP, RegReg( src, src ) );
11753   ins_pipe( ialu_cr_reg_imm );
11754 %}
11755 
11756 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11757   match(Set cr (CmpI (AndI src con) zero));
11758 
11759   format %{ "TEST   $src,$con" %}
11760   opcode(0xF7,0x00);
11761   ins_encode( OpcP, RegOpc(src), Con32(con) );
11762   ins_pipe( ialu_cr_reg_imm );
11763 %}
11764 
11765 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11766   match(Set cr (CmpI (AndI src mem) zero));
11767 
11768   format %{ "TEST   $src,$mem" %}
11769   opcode(0x85);
11770   ins_encode( OpcP, RegMem( src, mem ) );
11771   ins_pipe( ialu_cr_reg_mem );
11772 %}
11773 
11774 // Unsigned compare Instructions; really, same as signed except they
11775 // produce an eFlagsRegU instead of eFlagsReg.
11776 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11777   match(Set cr (CmpU op1 op2));
11778 
11779   format %{ "CMPu   $op1,$op2" %}
11780   opcode(0x3B);  /* Opcode 3B /r */
11781   ins_encode( OpcP, RegReg( op1, op2) );
11782   ins_pipe( ialu_cr_reg_reg );
11783 %}
11784 
11785 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11786   match(Set cr (CmpU op1 op2));
11787 
11788   format %{ "CMPu   $op1,$op2" %}
11789   opcode(0x81,0x07);  /* Opcode 81 /7 */
11790   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11791   ins_pipe( ialu_cr_reg_imm );
11792 %}
11793 
11794 // // Cisc-spilled version of cmpU_eReg
11795 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11796   match(Set cr (CmpU op1 (LoadI op2)));
11797 
11798   format %{ "CMPu   $op1,$op2" %}
11799   ins_cost(500);
11800   opcode(0x3B);  /* Opcode 3B /r */
11801   ins_encode( OpcP, RegMem( op1, op2) );
11802   ins_pipe( ialu_cr_reg_mem );
11803 %}
11804 
11805 // // Cisc-spilled version of cmpU_eReg
11806 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11807 //  match(Set cr (CmpU (LoadI op1) op2));
11808 //
11809 //  format %{ "CMPu   $op1,$op2" %}
11810 //  ins_cost(500);
11811 //  opcode(0x39);  /* Opcode 39 /r */
11812 //  ins_encode( OpcP, RegMem( op1, op2) );
11813 //%}
11814 
11815 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11816   match(Set cr (CmpU src zero));
11817 
11818   format %{ "TESTu  $src,$src" %}
11819   opcode(0x85);
11820   ins_encode( OpcP, RegReg( src, src ) );
11821   ins_pipe( ialu_cr_reg_imm );
11822 %}
11823 
11824 // Unsigned pointer compare Instructions
11825 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11826   match(Set cr (CmpP op1 op2));
11827 
11828   format %{ "CMPu   $op1,$op2" %}
11829   opcode(0x3B);  /* Opcode 3B /r */
11830   ins_encode( OpcP, RegReg( op1, op2) );
11831   ins_pipe( ialu_cr_reg_reg );
11832 %}
11833 
11834 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11835   match(Set cr (CmpP op1 op2));
11836 
11837   format %{ "CMPu   $op1,$op2" %}
11838   opcode(0x81,0x07);  /* Opcode 81 /7 */
11839   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11840   ins_pipe( ialu_cr_reg_imm );
11841 %}
11842 
11843 // // Cisc-spilled version of cmpP_eReg
11844 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11845   match(Set cr (CmpP op1 (LoadP op2)));
11846 
11847   format %{ "CMPu   $op1,$op2" %}
11848   ins_cost(500);
11849   opcode(0x3B);  /* Opcode 3B /r */
11850   ins_encode( OpcP, RegMem( op1, op2) );
11851   ins_pipe( ialu_cr_reg_mem );
11852 %}
11853 
11854 // // Cisc-spilled version of cmpP_eReg
11855 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11856 //  match(Set cr (CmpP (LoadP op1) op2));
11857 //
11858 //  format %{ "CMPu   $op1,$op2" %}
11859 //  ins_cost(500);
11860 //  opcode(0x39);  /* Opcode 39 /r */
11861 //  ins_encode( OpcP, RegMem( op1, op2) );
11862 //%}
11863 
11864 // Compare raw pointer (used in out-of-heap check).
11865 // Only works because non-oop pointers must be raw pointers
11866 // and raw pointers have no anti-dependencies.
11867 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11868   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11869   match(Set cr (CmpP op1 (LoadP op2)));
11870 
11871   format %{ "CMPu   $op1,$op2" %}
11872   opcode(0x3B);  /* Opcode 3B /r */
11873   ins_encode( OpcP, RegMem( op1, op2) );
11874   ins_pipe( ialu_cr_reg_mem );
11875 %}
11876 
11877 //
11878 // This will generate a signed flags result. This should be ok
11879 // since any compare to a zero should be eq/neq.
11880 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11881   match(Set cr (CmpP src zero));
11882 
11883   format %{ "TEST   $src,$src" %}
11884   opcode(0x85);
11885   ins_encode( OpcP, RegReg( src, src ) );
11886   ins_pipe( ialu_cr_reg_imm );
11887 %}
11888 
11889 // Cisc-spilled version of testP_reg
11890 // This will generate a signed flags result. This should be ok
11891 // since any compare to a zero should be eq/neq.
11892 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11893   match(Set cr (CmpP (LoadP op) zero));
11894 
11895   format %{ "TEST   $op,0xFFFFFFFF" %}
11896   ins_cost(500);
11897   opcode(0xF7);               /* Opcode F7 /0 */
11898   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11899   ins_pipe( ialu_cr_reg_imm );
11900 %}
11901 
11902 // Yanked all unsigned pointer compare operations.
11903 // Pointer compares are done with CmpP which is already unsigned.
11904 
11905 //----------Max and Min--------------------------------------------------------
11906 // Min Instructions
11907 ////
11908 //   *** Min and Max using the conditional move are slower than the
11909 //   *** branch version on a Pentium III.
11910 // // Conditional move for min
11911 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11912 //  effect( USE_DEF op2, USE op1, USE cr );
11913 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11914 //  opcode(0x4C,0x0F);
11915 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11916 //  ins_pipe( pipe_cmov_reg );
11917 //%}
11918 //
11919 //// Min Register with Register (P6 version)
11920 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11921 //  predicate(VM_Version::supports_cmov() );
11922 //  match(Set op2 (MinI op1 op2));
11923 //  ins_cost(200);
11924 //  expand %{
11925 //    eFlagsReg cr;
11926 //    compI_eReg(cr,op1,op2);
11927 //    cmovI_reg_lt(op2,op1,cr);
11928 //  %}
11929 //%}
11930 
11931 // Min Register with Register (generic version)
11932 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11933   match(Set dst (MinI dst src));
11934   effect(KILL flags);
11935   ins_cost(300);
11936 
11937   format %{ "MIN    $dst,$src" %}
11938   opcode(0xCC);
11939   ins_encode( min_enc(dst,src) );
11940   ins_pipe( pipe_slow );
11941 %}
11942 
11943 // Max Register with Register
11944 //   *** Min and Max using the conditional move are slower than the
11945 //   *** branch version on a Pentium III.
11946 // // Conditional move for max
11947 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11948 //  effect( USE_DEF op2, USE op1, USE cr );
11949 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11950 //  opcode(0x4F,0x0F);
11951 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11952 //  ins_pipe( pipe_cmov_reg );
11953 //%}
11954 //
11955 // // Max Register with Register (P6 version)
11956 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11957 //  predicate(VM_Version::supports_cmov() );
11958 //  match(Set op2 (MaxI op1 op2));
11959 //  ins_cost(200);
11960 //  expand %{
11961 //    eFlagsReg cr;
11962 //    compI_eReg(cr,op1,op2);
11963 //    cmovI_reg_gt(op2,op1,cr);
11964 //  %}
11965 //%}
11966 
11967 // Max Register with Register (generic version)
11968 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11969   match(Set dst (MaxI dst src));
11970   effect(KILL flags);
11971   ins_cost(300);
11972 
11973   format %{ "MAX    $dst,$src" %}
11974   opcode(0xCC);
11975   ins_encode( max_enc(dst,src) );
11976   ins_pipe( pipe_slow );
11977 %}
11978 
11979 // ============================================================================
11980 // Counted Loop limit node which represents exact final iterator value.
11981 // Note: the resulting value should fit into integer range since
11982 // counted loops have limit check on overflow.
11983 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11984   match(Set limit (LoopLimit (Binary init limit) stride));
11985   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11986   ins_cost(300);
11987 
11988   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11989   ins_encode %{
11990     int strd = (int)$stride$$constant;
11991     assert(strd != 1 && strd != -1, "sanity");
11992     int m1 = (strd > 0) ? 1 : -1;
11993     // Convert limit to long (EAX:EDX)
11994     __ cdql();
11995     // Convert init to long (init:tmp)
11996     __ movl($tmp$$Register, $init$$Register);
11997     __ sarl($tmp$$Register, 31);
11998     // $limit - $init
11999     __ subl($limit$$Register, $init$$Register);
12000     __ sbbl($limit_hi$$Register, $tmp$$Register);
12001     // + ($stride - 1)
12002     if (strd > 0) {
12003       __ addl($limit$$Register, (strd - 1));
12004       __ adcl($limit_hi$$Register, 0);
12005       __ movl($tmp$$Register, strd);
12006     } else {
12007       __ addl($limit$$Register, (strd + 1));
12008       __ adcl($limit_hi$$Register, -1);
12009       __ lneg($limit_hi$$Register, $limit$$Register);
12010       __ movl($tmp$$Register, -strd);
12011     }
12012     // signed devision: (EAX:EDX) / pos_stride
12013     __ idivl($tmp$$Register);
12014     if (strd < 0) {
12015       // restore sign
12016       __ negl($tmp$$Register);
12017     }
12018     // (EAX) * stride
12019     __ mull($tmp$$Register);
12020     // + init (ignore upper bits)
12021     __ addl($limit$$Register, $init$$Register);
12022   %}
12023   ins_pipe( pipe_slow );
12024 %}
12025 
12026 // ============================================================================
12027 // Branch Instructions
12028 // Jump Table
12029 instruct jumpXtnd(rRegI switch_val) %{
12030   match(Jump switch_val);
12031   ins_cost(350);
12032   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12033   ins_encode %{
12034     // Jump to Address(table_base + switch_reg)
12035     Address index(noreg, $switch_val$$Register, Address::times_1);
12036     __ jump(ArrayAddress($constantaddress, index));
12037   %}
12038   ins_pipe(pipe_jmp);
12039 %}
12040 
12041 // Jump Direct - Label defines a relative address from JMP+1
12042 instruct jmpDir(label labl) %{
12043   match(Goto);
12044   effect(USE labl);
12045 
12046   ins_cost(300);
12047   format %{ "JMP    $labl" %}
12048   size(5);
12049   ins_encode %{
12050     Label* L = $labl$$label;
12051     __ jmp(*L, false); // Always long jump
12052   %}
12053   ins_pipe( pipe_jmp );
12054 %}
12055 
12056 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12057 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12058   match(If cop cr);
12059   effect(USE labl);
12060 
12061   ins_cost(300);
12062   format %{ "J$cop    $labl" %}
12063   size(6);
12064   ins_encode %{
12065     Label* L = $labl$$label;
12066     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12067   %}
12068   ins_pipe( pipe_jcc );
12069 %}
12070 
12071 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12072 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12073   match(CountedLoopEnd cop cr);
12074   effect(USE labl);
12075 
12076   ins_cost(300);
12077   format %{ "J$cop    $labl\t# Loop end" %}
12078   size(6);
12079   ins_encode %{
12080     Label* L = $labl$$label;
12081     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12082   %}
12083   ins_pipe( pipe_jcc );
12084 %}
12085 
12086 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12087 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12088   match(CountedLoopEnd cop cmp);
12089   effect(USE labl);
12090 
12091   ins_cost(300);
12092   format %{ "J$cop,u  $labl\t# Loop end" %}
12093   size(6);
12094   ins_encode %{
12095     Label* L = $labl$$label;
12096     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12097   %}
12098   ins_pipe( pipe_jcc );
12099 %}
12100 
12101 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12102   match(CountedLoopEnd cop cmp);
12103   effect(USE labl);
12104 
12105   ins_cost(200);
12106   format %{ "J$cop,u  $labl\t# Loop end" %}
12107   size(6);
12108   ins_encode %{
12109     Label* L = $labl$$label;
12110     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12111   %}
12112   ins_pipe( pipe_jcc );
12113 %}
12114 
12115 // Jump Direct Conditional - using unsigned comparison
12116 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12117   match(If cop cmp);
12118   effect(USE labl);
12119 
12120   ins_cost(300);
12121   format %{ "J$cop,u  $labl" %}
12122   size(6);
12123   ins_encode %{
12124     Label* L = $labl$$label;
12125     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12126   %}
12127   ins_pipe(pipe_jcc);
12128 %}
12129 
12130 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12131   match(If cop cmp);
12132   effect(USE labl);
12133 
12134   ins_cost(200);
12135   format %{ "J$cop,u  $labl" %}
12136   size(6);
12137   ins_encode %{
12138     Label* L = $labl$$label;
12139     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12140   %}
12141   ins_pipe(pipe_jcc);
12142 %}
12143 
12144 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12145   match(If cop cmp);
12146   effect(USE labl);
12147 
12148   ins_cost(200);
12149   format %{ $$template
12150     if ($cop$$cmpcode == Assembler::notEqual) {
12151       $$emit$$"JP,u   $labl\n\t"
12152       $$emit$$"J$cop,u   $labl"
12153     } else {
12154       $$emit$$"JP,u   done\n\t"
12155       $$emit$$"J$cop,u   $labl\n\t"
12156       $$emit$$"done:"
12157     }
12158   %}
12159   ins_encode %{
12160     Label* l = $labl$$label;
12161     if ($cop$$cmpcode == Assembler::notEqual) {
12162       __ jcc(Assembler::parity, *l, false);
12163       __ jcc(Assembler::notEqual, *l, false);
12164     } else if ($cop$$cmpcode == Assembler::equal) {
12165       Label done;
12166       __ jccb(Assembler::parity, done);
12167       __ jcc(Assembler::equal, *l, false);
12168       __ bind(done);
12169     } else {
12170        ShouldNotReachHere();
12171     }
12172   %}
12173   ins_pipe(pipe_jcc);
12174 %}
12175 
12176 // ============================================================================
12177 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12178 // array for an instance of the superklass.  Set a hidden internal cache on a
12179 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12180 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12181 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12182   match(Set result (PartialSubtypeCheck sub super));
12183   effect( KILL rcx, KILL cr );
12184 
12185   ins_cost(1100);  // slightly larger than the next version
12186   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12187             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12188             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12189             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12190             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12191             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12192             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12193      "miss:\t" %}
12194 
12195   opcode(0x1); // Force a XOR of EDI
12196   ins_encode( enc_PartialSubtypeCheck() );
12197   ins_pipe( pipe_slow );
12198 %}
12199 
12200 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12201   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12202   effect( KILL rcx, KILL result );
12203 
12204   ins_cost(1000);
12205   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12206             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12207             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12208             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12209             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12210             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12211      "miss:\t" %}
12212 
12213   opcode(0x0);  // No need to XOR EDI
12214   ins_encode( enc_PartialSubtypeCheck() );
12215   ins_pipe( pipe_slow );
12216 %}
12217 
12218 // ============================================================================
12219 // Branch Instructions -- short offset versions
12220 //
12221 // These instructions are used to replace jumps of a long offset (the default
12222 // match) with jumps of a shorter offset.  These instructions are all tagged
12223 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12224 // match rules in general matching.  Instead, the ADLC generates a conversion
12225 // method in the MachNode which can be used to do in-place replacement of the
12226 // long variant with the shorter variant.  The compiler will determine if a
12227 // branch can be taken by the is_short_branch_offset() predicate in the machine
12228 // specific code section of the file.
12229 
12230 // Jump Direct - Label defines a relative address from JMP+1
12231 instruct jmpDir_short(label labl) %{
12232   match(Goto);
12233   effect(USE labl);
12234 
12235   ins_cost(300);
12236   format %{ "JMP,s  $labl" %}
12237   size(2);
12238   ins_encode %{
12239     Label* L = $labl$$label;
12240     __ jmpb(*L);
12241   %}
12242   ins_pipe( pipe_jmp );
12243   ins_short_branch(1);
12244 %}
12245 
12246 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12247 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12248   match(If cop cr);
12249   effect(USE labl);
12250 
12251   ins_cost(300);
12252   format %{ "J$cop,s  $labl" %}
12253   size(2);
12254   ins_encode %{
12255     Label* L = $labl$$label;
12256     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12257   %}
12258   ins_pipe( pipe_jcc );
12259   ins_short_branch(1);
12260 %}
12261 
12262 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12263 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12264   match(CountedLoopEnd cop cr);
12265   effect(USE labl);
12266 
12267   ins_cost(300);
12268   format %{ "J$cop,s  $labl\t# Loop end" %}
12269   size(2);
12270   ins_encode %{
12271     Label* L = $labl$$label;
12272     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12273   %}
12274   ins_pipe( pipe_jcc );
12275   ins_short_branch(1);
12276 %}
12277 
12278 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12279 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12280   match(CountedLoopEnd cop cmp);
12281   effect(USE labl);
12282 
12283   ins_cost(300);
12284   format %{ "J$cop,us $labl\t# Loop end" %}
12285   size(2);
12286   ins_encode %{
12287     Label* L = $labl$$label;
12288     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12289   %}
12290   ins_pipe( pipe_jcc );
12291   ins_short_branch(1);
12292 %}
12293 
12294 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12295   match(CountedLoopEnd cop cmp);
12296   effect(USE labl);
12297 
12298   ins_cost(300);
12299   format %{ "J$cop,us $labl\t# Loop end" %}
12300   size(2);
12301   ins_encode %{
12302     Label* L = $labl$$label;
12303     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12304   %}
12305   ins_pipe( pipe_jcc );
12306   ins_short_branch(1);
12307 %}
12308 
12309 // Jump Direct Conditional - using unsigned comparison
12310 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12311   match(If cop cmp);
12312   effect(USE labl);
12313 
12314   ins_cost(300);
12315   format %{ "J$cop,us $labl" %}
12316   size(2);
12317   ins_encode %{
12318     Label* L = $labl$$label;
12319     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12320   %}
12321   ins_pipe( pipe_jcc );
12322   ins_short_branch(1);
12323 %}
12324 
12325 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12326   match(If cop cmp);
12327   effect(USE labl);
12328 
12329   ins_cost(300);
12330   format %{ "J$cop,us $labl" %}
12331   size(2);
12332   ins_encode %{
12333     Label* L = $labl$$label;
12334     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12335   %}
12336   ins_pipe( pipe_jcc );
12337   ins_short_branch(1);
12338 %}
12339 
12340 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12341   match(If cop cmp);
12342   effect(USE labl);
12343 
12344   ins_cost(300);
12345   format %{ $$template
12346     if ($cop$$cmpcode == Assembler::notEqual) {
12347       $$emit$$"JP,u,s   $labl\n\t"
12348       $$emit$$"J$cop,u,s   $labl"
12349     } else {
12350       $$emit$$"JP,u,s   done\n\t"
12351       $$emit$$"J$cop,u,s  $labl\n\t"
12352       $$emit$$"done:"
12353     }
12354   %}
12355   size(4);
12356   ins_encode %{
12357     Label* l = $labl$$label;
12358     if ($cop$$cmpcode == Assembler::notEqual) {
12359       __ jccb(Assembler::parity, *l);
12360       __ jccb(Assembler::notEqual, *l);
12361     } else if ($cop$$cmpcode == Assembler::equal) {
12362       Label done;
12363       __ jccb(Assembler::parity, done);
12364       __ jccb(Assembler::equal, *l);
12365       __ bind(done);
12366     } else {
12367        ShouldNotReachHere();
12368     }
12369   %}
12370   ins_pipe(pipe_jcc);
12371   ins_short_branch(1);
12372 %}
12373 
12374 // ============================================================================
12375 // Long Compare
12376 //
12377 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12378 // is tricky.  The flavor of compare used depends on whether we are testing
12379 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12380 // The GE test is the negated LT test.  The LE test can be had by commuting
12381 // the operands (yielding a GE test) and then negating; negate again for the
12382 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12383 // NE test is negated from that.
12384 
12385 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12386 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12387 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12388 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12389 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12390 // foo match ends up with the wrong leaf.  One fix is to not match both
12391 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12392 // both forms beat the trinary form of long-compare and both are very useful
12393 // on Intel which has so few registers.
12394 
12395 // Manifest a CmpL result in an integer register.  Very painful.
12396 // This is the test to avoid.
12397 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12398   match(Set dst (CmpL3 src1 src2));
12399   effect( KILL flags );
12400   ins_cost(1000);
12401   format %{ "XOR    $dst,$dst\n\t"
12402             "CMP    $src1.hi,$src2.hi\n\t"
12403             "JLT,s  m_one\n\t"
12404             "JGT,s  p_one\n\t"
12405             "CMP    $src1.lo,$src2.lo\n\t"
12406             "JB,s   m_one\n\t"
12407             "JEQ,s  done\n"
12408     "p_one:\tINC    $dst\n\t"
12409             "JMP,s  done\n"
12410     "m_one:\tDEC    $dst\n"
12411      "done:" %}
12412   ins_encode %{
12413     Label p_one, m_one, done;
12414     __ xorptr($dst$$Register, $dst$$Register);
12415     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12416     __ jccb(Assembler::less,    m_one);
12417     __ jccb(Assembler::greater, p_one);
12418     __ cmpl($src1$$Register, $src2$$Register);
12419     __ jccb(Assembler::below,   m_one);
12420     __ jccb(Assembler::equal,   done);
12421     __ bind(p_one);
12422     __ incrementl($dst$$Register);
12423     __ jmpb(done);
12424     __ bind(m_one);
12425     __ decrementl($dst$$Register);
12426     __ bind(done);
12427   %}
12428   ins_pipe( pipe_slow );
12429 %}
12430 
12431 //======
12432 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12433 // compares.  Can be used for LE or GT compares by reversing arguments.
12434 // NOT GOOD FOR EQ/NE tests.
12435 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12436   match( Set flags (CmpL src zero ));
12437   ins_cost(100);
12438   format %{ "TEST   $src.hi,$src.hi" %}
12439   opcode(0x85);
12440   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12441   ins_pipe( ialu_cr_reg_reg );
12442 %}
12443 
12444 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12445 // compares.  Can be used for LE or GT compares by reversing arguments.
12446 // NOT GOOD FOR EQ/NE tests.
12447 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12448   match( Set flags (CmpL src1 src2 ));
12449   effect( TEMP tmp );
12450   ins_cost(300);
12451   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12452             "MOV    $tmp,$src1.hi\n\t"
12453             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12454   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12455   ins_pipe( ialu_cr_reg_reg );
12456 %}
12457 
12458 // Long compares reg < zero/req OR reg >= zero/req.
12459 // Just a wrapper for a normal branch, plus the predicate test.
12460 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12461   match(If cmp flags);
12462   effect(USE labl);
12463   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12464   expand %{
12465     jmpCon(cmp,flags,labl);    // JLT or JGE...
12466   %}
12467 %}
12468 
12469 // Compare 2 longs and CMOVE longs.
12470 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12471   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12472   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12473   ins_cost(400);
12474   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12475             "CMOV$cmp $dst.hi,$src.hi" %}
12476   opcode(0x0F,0x40);
12477   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12478   ins_pipe( pipe_cmov_reg_long );
12479 %}
12480 
12481 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12482   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12483   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12484   ins_cost(500);
12485   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12486             "CMOV$cmp $dst.hi,$src.hi" %}
12487   opcode(0x0F,0x40);
12488   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12489   ins_pipe( pipe_cmov_reg_long );
12490 %}
12491 
12492 // Compare 2 longs and CMOVE ints.
12493 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12494   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12495   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12496   ins_cost(200);
12497   format %{ "CMOV$cmp $dst,$src" %}
12498   opcode(0x0F,0x40);
12499   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12500   ins_pipe( pipe_cmov_reg );
12501 %}
12502 
12503 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12504   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12505   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12506   ins_cost(250);
12507   format %{ "CMOV$cmp $dst,$src" %}
12508   opcode(0x0F,0x40);
12509   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12510   ins_pipe( pipe_cmov_mem );
12511 %}
12512 
12513 // Compare 2 longs and CMOVE ints.
12514 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12515   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12516   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12517   ins_cost(200);
12518   format %{ "CMOV$cmp $dst,$src" %}
12519   opcode(0x0F,0x40);
12520   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12521   ins_pipe( pipe_cmov_reg );
12522 %}
12523 
12524 // Compare 2 longs and CMOVE doubles
12525 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12526   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12527   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12528   ins_cost(200);
12529   expand %{
12530     fcmovDPR_regS(cmp,flags,dst,src);
12531   %}
12532 %}
12533 
12534 // Compare 2 longs and CMOVE doubles
12535 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12536   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12537   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12538   ins_cost(200);
12539   expand %{
12540     fcmovD_regS(cmp,flags,dst,src);
12541   %}
12542 %}
12543 
12544 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12545   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12546   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12547   ins_cost(200);
12548   expand %{
12549     fcmovFPR_regS(cmp,flags,dst,src);
12550   %}
12551 %}
12552 
12553 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12554   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12555   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12556   ins_cost(200);
12557   expand %{
12558     fcmovF_regS(cmp,flags,dst,src);
12559   %}
12560 %}
12561 
12562 //======
12563 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12564 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12565   match( Set flags (CmpL src zero ));
12566   effect(TEMP tmp);
12567   ins_cost(200);
12568   format %{ "MOV    $tmp,$src.lo\n\t"
12569             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12570   ins_encode( long_cmp_flags0( src, tmp ) );
12571   ins_pipe( ialu_reg_reg_long );
12572 %}
12573 
12574 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12575 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12576   match( Set flags (CmpL src1 src2 ));
12577   ins_cost(200+300);
12578   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12579             "JNE,s  skip\n\t"
12580             "CMP    $src1.hi,$src2.hi\n\t"
12581      "skip:\t" %}
12582   ins_encode( long_cmp_flags1( src1, src2 ) );
12583   ins_pipe( ialu_cr_reg_reg );
12584 %}
12585 
12586 // Long compare reg == zero/reg OR reg != zero/reg
12587 // Just a wrapper for a normal branch, plus the predicate test.
12588 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12589   match(If cmp flags);
12590   effect(USE labl);
12591   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12592   expand %{
12593     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12594   %}
12595 %}
12596 
12597 // Compare 2 longs and CMOVE longs.
12598 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12599   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12600   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12601   ins_cost(400);
12602   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12603             "CMOV$cmp $dst.hi,$src.hi" %}
12604   opcode(0x0F,0x40);
12605   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12606   ins_pipe( pipe_cmov_reg_long );
12607 %}
12608 
12609 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12610   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12611   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12612   ins_cost(500);
12613   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12614             "CMOV$cmp $dst.hi,$src.hi" %}
12615   opcode(0x0F,0x40);
12616   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12617   ins_pipe( pipe_cmov_reg_long );
12618 %}
12619 
12620 // Compare 2 longs and CMOVE ints.
12621 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12622   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12623   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12624   ins_cost(200);
12625   format %{ "CMOV$cmp $dst,$src" %}
12626   opcode(0x0F,0x40);
12627   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12628   ins_pipe( pipe_cmov_reg );
12629 %}
12630 
12631 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12632   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12633   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12634   ins_cost(250);
12635   format %{ "CMOV$cmp $dst,$src" %}
12636   opcode(0x0F,0x40);
12637   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12638   ins_pipe( pipe_cmov_mem );
12639 %}
12640 
12641 // Compare 2 longs and CMOVE ints.
12642 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12643   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12644   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12645   ins_cost(200);
12646   format %{ "CMOV$cmp $dst,$src" %}
12647   opcode(0x0F,0x40);
12648   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12649   ins_pipe( pipe_cmov_reg );
12650 %}
12651 
12652 // Compare 2 longs and CMOVE doubles
12653 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12654   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12655   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12656   ins_cost(200);
12657   expand %{
12658     fcmovDPR_regS(cmp,flags,dst,src);
12659   %}
12660 %}
12661 
12662 // Compare 2 longs and CMOVE doubles
12663 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12664   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12665   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12666   ins_cost(200);
12667   expand %{
12668     fcmovD_regS(cmp,flags,dst,src);
12669   %}
12670 %}
12671 
12672 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12673   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12674   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12675   ins_cost(200);
12676   expand %{
12677     fcmovFPR_regS(cmp,flags,dst,src);
12678   %}
12679 %}
12680 
12681 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12682   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12683   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12684   ins_cost(200);
12685   expand %{
12686     fcmovF_regS(cmp,flags,dst,src);
12687   %}
12688 %}
12689 
12690 //======
12691 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12692 // Same as cmpL_reg_flags_LEGT except must negate src
12693 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12694   match( Set flags (CmpL src zero ));
12695   effect( TEMP tmp );
12696   ins_cost(300);
12697   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12698             "CMP    $tmp,$src.lo\n\t"
12699             "SBB    $tmp,$src.hi\n\t" %}
12700   ins_encode( long_cmp_flags3(src, tmp) );
12701   ins_pipe( ialu_reg_reg_long );
12702 %}
12703 
12704 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12705 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12706 // requires a commuted test to get the same result.
12707 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12708   match( Set flags (CmpL src1 src2 ));
12709   effect( TEMP tmp );
12710   ins_cost(300);
12711   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12712             "MOV    $tmp,$src2.hi\n\t"
12713             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12714   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12715   ins_pipe( ialu_cr_reg_reg );
12716 %}
12717 
12718 // Long compares reg < zero/req OR reg >= zero/req.
12719 // Just a wrapper for a normal branch, plus the predicate test
12720 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12721   match(If cmp flags);
12722   effect(USE labl);
12723   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12724   ins_cost(300);
12725   expand %{
12726     jmpCon(cmp,flags,labl);    // JGT or JLE...
12727   %}
12728 %}
12729 
12730 // Compare 2 longs and CMOVE longs.
12731 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12732   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12733   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12734   ins_cost(400);
12735   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12736             "CMOV$cmp $dst.hi,$src.hi" %}
12737   opcode(0x0F,0x40);
12738   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12739   ins_pipe( pipe_cmov_reg_long );
12740 %}
12741 
12742 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12743   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12744   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12745   ins_cost(500);
12746   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12747             "CMOV$cmp $dst.hi,$src.hi+4" %}
12748   opcode(0x0F,0x40);
12749   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12750   ins_pipe( pipe_cmov_reg_long );
12751 %}
12752 
12753 // Compare 2 longs and CMOVE ints.
12754 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12755   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12756   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12757   ins_cost(200);
12758   format %{ "CMOV$cmp $dst,$src" %}
12759   opcode(0x0F,0x40);
12760   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12761   ins_pipe( pipe_cmov_reg );
12762 %}
12763 
12764 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12765   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12766   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12767   ins_cost(250);
12768   format %{ "CMOV$cmp $dst,$src" %}
12769   opcode(0x0F,0x40);
12770   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12771   ins_pipe( pipe_cmov_mem );
12772 %}
12773 
12774 // Compare 2 longs and CMOVE ptrs.
12775 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12776   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12777   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12778   ins_cost(200);
12779   format %{ "CMOV$cmp $dst,$src" %}
12780   opcode(0x0F,0x40);
12781   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12782   ins_pipe( pipe_cmov_reg );
12783 %}
12784 
12785 // Compare 2 longs and CMOVE doubles
12786 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12787   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12788   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12789   ins_cost(200);
12790   expand %{
12791     fcmovDPR_regS(cmp,flags,dst,src);
12792   %}
12793 %}
12794 
12795 // Compare 2 longs and CMOVE doubles
12796 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12797   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12798   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12799   ins_cost(200);
12800   expand %{
12801     fcmovD_regS(cmp,flags,dst,src);
12802   %}
12803 %}
12804 
12805 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12806   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12807   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12808   ins_cost(200);
12809   expand %{
12810     fcmovFPR_regS(cmp,flags,dst,src);
12811   %}
12812 %}
12813 
12814 
12815 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12816   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12817   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12818   ins_cost(200);
12819   expand %{
12820     fcmovF_regS(cmp,flags,dst,src);
12821   %}
12822 %}
12823 
12824 
12825 // ============================================================================
12826 // Procedure Call/Return Instructions
12827 // Call Java Static Instruction
12828 // Note: If this code changes, the corresponding ret_addr_offset() and
12829 //       compute_padding() functions will have to be adjusted.
12830 instruct CallStaticJavaDirect(method meth) %{
12831   match(CallStaticJava);
12832   effect(USE meth);
12833 
12834   ins_cost(300);
12835   format %{ "CALL,static " %}
12836   opcode(0xE8); /* E8 cd */
12837   ins_encode( pre_call_resets,
12838               Java_Static_Call( meth ),
12839               call_epilog,
12840               post_call_FPU );
12841   ins_pipe( pipe_slow );
12842   ins_alignment(4);
12843 %}
12844 
12845 // Call Java Dynamic Instruction
12846 // Note: If this code changes, the corresponding ret_addr_offset() and
12847 //       compute_padding() functions will have to be adjusted.
12848 instruct CallDynamicJavaDirect(method meth) %{
12849   match(CallDynamicJava);
12850   effect(USE meth);
12851 
12852   ins_cost(300);
12853   format %{ "MOV    EAX,(oop)-1\n\t"
12854             "CALL,dynamic" %}
12855   opcode(0xE8); /* E8 cd */
12856   ins_encode( pre_call_resets,
12857               Java_Dynamic_Call( meth ),
12858               call_epilog,
12859               post_call_FPU );
12860   ins_pipe( pipe_slow );
12861   ins_alignment(4);
12862 %}
12863 
12864 // Call Runtime Instruction
12865 instruct CallRuntimeDirect(method meth) %{
12866   match(CallRuntime );
12867   effect(USE meth);
12868 
12869   ins_cost(300);
12870   format %{ "CALL,runtime " %}
12871   opcode(0xE8); /* E8 cd */
12872   // Use FFREEs to clear entries in float stack
12873   ins_encode( pre_call_resets,
12874               FFree_Float_Stack_All,
12875               Java_To_Runtime( meth ),
12876               post_call_FPU );
12877   ins_pipe( pipe_slow );
12878 %}
12879 
12880 // Call runtime without safepoint
12881 instruct CallLeafDirect(method meth) %{
12882   match(CallLeaf);
12883   effect(USE meth);
12884 
12885   ins_cost(300);
12886   format %{ "CALL_LEAF,runtime " %}
12887   opcode(0xE8); /* E8 cd */
12888   ins_encode( pre_call_resets,
12889               FFree_Float_Stack_All,
12890               Java_To_Runtime( meth ),
12891               Verify_FPU_For_Leaf, post_call_FPU );
12892   ins_pipe( pipe_slow );
12893 %}
12894 
12895 instruct CallLeafNoFPDirect(method meth) %{
12896   match(CallLeafNoFP);
12897   effect(USE meth);
12898 
12899   ins_cost(300);
12900   format %{ "CALL_LEAF_NOFP,runtime " %}
12901   opcode(0xE8); /* E8 cd */
12902   ins_encode(Java_To_Runtime(meth));
12903   ins_pipe( pipe_slow );
12904 %}
12905 
12906 
12907 // Return Instruction
12908 // Remove the return address & jump to it.
12909 instruct Ret() %{
12910   match(Return);
12911   format %{ "RET" %}
12912   opcode(0xC3);
12913   ins_encode(OpcP);
12914   ins_pipe( pipe_jmp );
12915 %}
12916 
12917 // Tail Call; Jump from runtime stub to Java code.
12918 // Also known as an 'interprocedural jump'.
12919 // Target of jump will eventually return to caller.
12920 // TailJump below removes the return address.
12921 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12922   match(TailCall jump_target method_oop );
12923   ins_cost(300);
12924   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12925   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12926   ins_encode( OpcP, RegOpc(jump_target) );
12927   ins_pipe( pipe_jmp );
12928 %}
12929 
12930 
12931 // Tail Jump; remove the return address; jump to target.
12932 // TailCall above leaves the return address around.
12933 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12934   match( TailJump jump_target ex_oop );
12935   ins_cost(300);
12936   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12937             "JMP    $jump_target " %}
12938   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12939   ins_encode( enc_pop_rdx,
12940               OpcP, RegOpc(jump_target) );
12941   ins_pipe( pipe_jmp );
12942 %}
12943 
12944 // Create exception oop: created by stack-crawling runtime code.
12945 // Created exception is now available to this handler, and is setup
12946 // just prior to jumping to this handler.  No code emitted.
12947 instruct CreateException( eAXRegP ex_oop )
12948 %{
12949   match(Set ex_oop (CreateEx));
12950 
12951   size(0);
12952   // use the following format syntax
12953   format %{ "# exception oop is in EAX; no code emitted" %}
12954   ins_encode();
12955   ins_pipe( empty );
12956 %}
12957 
12958 
12959 // Rethrow exception:
12960 // The exception oop will come in the first argument position.
12961 // Then JUMP (not call) to the rethrow stub code.
12962 instruct RethrowException()
12963 %{
12964   match(Rethrow);
12965 
12966   // use the following format syntax
12967   format %{ "JMP    rethrow_stub" %}
12968   ins_encode(enc_rethrow);
12969   ins_pipe( pipe_jmp );
12970 %}
12971 
12972 // inlined locking and unlocking
12973 
12974 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12975   predicate(Compile::current()->use_rtm());
12976   match(Set cr (FastLock object box));
12977   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12978   ins_cost(300);
12979   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12980   ins_encode %{
12981     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12982                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12983                  _counters, _rtm_counters, _stack_rtm_counters,
12984                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12985                  true, ra_->C->profile_rtm());
12986   %}
12987   ins_pipe(pipe_slow);
12988 %}
12989 
12990 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12991   predicate(!Compile::current()->use_rtm());
12992   match(Set cr (FastLock object box));
12993   effect(TEMP tmp, TEMP scr, USE_KILL box);
12994   ins_cost(300);
12995   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12996   ins_encode %{
12997     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12998                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12999   %}
13000   ins_pipe(pipe_slow);
13001 %}
13002 
13003 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13004   match(Set cr (FastUnlock object box));
13005   effect(TEMP tmp, USE_KILL box);
13006   ins_cost(300);
13007   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13008   ins_encode %{
13009     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13010   %}
13011   ins_pipe(pipe_slow);
13012 %}
13013 
13014 
13015 
13016 // ============================================================================
13017 // Safepoint Instruction
13018 instruct safePoint_poll(eFlagsReg cr) %{
13019   match(SafePoint);
13020   effect(KILL cr);
13021 
13022   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13023   // On SPARC that might be acceptable as we can generate the address with
13024   // just a sethi, saving an or.  By polling at offset 0 we can end up
13025   // putting additional pressure on the index-0 in the D$.  Because of
13026   // alignment (just like the situation at hand) the lower indices tend
13027   // to see more traffic.  It'd be better to change the polling address
13028   // to offset 0 of the last $line in the polling page.
13029 
13030   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13031   ins_cost(125);
13032   size(6) ;
13033   ins_encode( Safepoint_Poll() );
13034   ins_pipe( ialu_reg_mem );
13035 %}
13036 
13037 
13038 // ============================================================================
13039 // This name is KNOWN by the ADLC and cannot be changed.
13040 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13041 // for this guy.
13042 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13043   match(Set dst (ThreadLocal));
13044   effect(DEF dst, KILL cr);
13045 
13046   format %{ "MOV    $dst, Thread::current()" %}
13047   ins_encode %{
13048     Register dstReg = as_Register($dst$$reg);
13049     __ get_thread(dstReg);
13050   %}
13051   ins_pipe( ialu_reg_fat );
13052 %}
13053 
13054 
13055 
13056 //----------PEEPHOLE RULES-----------------------------------------------------
13057 // These must follow all instruction definitions as they use the names
13058 // defined in the instructions definitions.
13059 //
13060 // peepmatch ( root_instr_name [preceding_instruction]* );
13061 //
13062 // peepconstraint %{
13063 // (instruction_number.operand_name relational_op instruction_number.operand_name
13064 //  [, ...] );
13065 // // instruction numbers are zero-based using left to right order in peepmatch
13066 //
13067 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13068 // // provide an instruction_number.operand_name for each operand that appears
13069 // // in the replacement instruction's match rule
13070 //
13071 // ---------VM FLAGS---------------------------------------------------------
13072 //
13073 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13074 //
13075 // Each peephole rule is given an identifying number starting with zero and
13076 // increasing by one in the order seen by the parser.  An individual peephole
13077 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13078 // on the command-line.
13079 //
13080 // ---------CURRENT LIMITATIONS----------------------------------------------
13081 //
13082 // Only match adjacent instructions in same basic block
13083 // Only equality constraints
13084 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13085 // Only one replacement instruction
13086 //
13087 // ---------EXAMPLE----------------------------------------------------------
13088 //
13089 // // pertinent parts of existing instructions in architecture description
13090 // instruct movI(rRegI dst, rRegI src) %{
13091 //   match(Set dst (CopyI src));
13092 // %}
13093 //
13094 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13095 //   match(Set dst (AddI dst src));
13096 //   effect(KILL cr);
13097 // %}
13098 //
13099 // // Change (inc mov) to lea
13100 // peephole %{
13101 //   // increment preceeded by register-register move
13102 //   peepmatch ( incI_eReg movI );
13103 //   // require that the destination register of the increment
13104 //   // match the destination register of the move
13105 //   peepconstraint ( 0.dst == 1.dst );
13106 //   // construct a replacement instruction that sets
13107 //   // the destination to ( move's source register + one )
13108 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13109 // %}
13110 //
13111 // Implementation no longer uses movX instructions since
13112 // machine-independent system no longer uses CopyX nodes.
13113 //
13114 // peephole %{
13115 //   peepmatch ( incI_eReg movI );
13116 //   peepconstraint ( 0.dst == 1.dst );
13117 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13118 // %}
13119 //
13120 // peephole %{
13121 //   peepmatch ( decI_eReg movI );
13122 //   peepconstraint ( 0.dst == 1.dst );
13123 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13124 // %}
13125 //
13126 // peephole %{
13127 //   peepmatch ( addI_eReg_imm movI );
13128 //   peepconstraint ( 0.dst == 1.dst );
13129 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13130 // %}
13131 //
13132 // peephole %{
13133 //   peepmatch ( addP_eReg_imm movP );
13134 //   peepconstraint ( 0.dst == 1.dst );
13135 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13136 // %}
13137 
13138 // // Change load of spilled value to only a spill
13139 // instruct storeI(memory mem, rRegI src) %{
13140 //   match(Set mem (StoreI mem src));
13141 // %}
13142 //
13143 // instruct loadI(rRegI dst, memory mem) %{
13144 //   match(Set dst (LoadI mem));
13145 // %}
13146 //
13147 peephole %{
13148   peepmatch ( loadI storeI );
13149   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13150   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13151 %}
13152 
13153 //----------SMARTSPILL RULES---------------------------------------------------
13154 // These must follow all instruction definitions as they use the names
13155 // defined in the instructions definitions.