New src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     if(UseAVX <= 2) {
 295       size += 3; // vzeroupper
 296     }
 297   }
 298   return size;
 299 }
 300 
 301 // !!!!! Special hack to get all type of calls to specify the byte offset
 302 //       from the start of the call to the point where the return address
 303 //       will point.
 304 int MachCallStaticJavaNode::ret_addr_offset() {
 305   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 306 }
 307 
 308 int MachCallDynamicJavaNode::ret_addr_offset() {
 309   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 310 }
 311 
 312 static int sizeof_FFree_Float_Stack_All = -1;
 313 
 314 int MachCallRuntimeNode::ret_addr_offset() {
 315   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 316   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 317 }
 318 
 319 // Indicate if the safepoint node needs the polling page as an input.
 320 // Since x86 does have absolute addressing, it doesn't.
 321 bool SafePointNode::needs_polling_address_input() {
 322   return false;
 323 }
 324 
 325 //
 326 // Compute padding required for nodes which need alignment
 327 //
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 1;      // skip call opcode byte
 334   return round_to(current_offset, alignment_required()) - current_offset;
 335 }
 336 
 337 // The address of the call instruction needs to be 4-byte aligned to
 338 // ensure that it does not span a cache line so that it can be patched.
 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 340   current_offset += pre_call_resets_size();  // skip fldcw, if any
 341   current_offset += 5;      // skip MOV instruction
 342   current_offset += 1;      // skip call opcode byte
 343   return round_to(current_offset, alignment_required()) - current_offset;
 344 }
 345 
 346 // EMIT_RM()
 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 348   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 349   cbuf.insts()->emit_int8(c);
 350 }
 351 
 352 // EMIT_CC()
 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 354   unsigned char c = (unsigned char)( f1 | f2 );
 355   cbuf.insts()->emit_int8(c);
 356 }
 357 
 358 // EMIT_OPCODE()
 359 void emit_opcode(CodeBuffer &cbuf, int code) {
 360   cbuf.insts()->emit_int8((unsigned char) code);
 361 }
 362 
 363 // EMIT_OPCODE() w/ relocation information
 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 365   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 366   emit_opcode(cbuf, code);
 367 }
 368 
 369 // EMIT_D8()
 370 void emit_d8(CodeBuffer &cbuf, int d8) {
 371   cbuf.insts()->emit_int8((unsigned char) d8);
 372 }
 373 
 374 // EMIT_D16()
 375 void emit_d16(CodeBuffer &cbuf, int d16) {
 376   cbuf.insts()->emit_int16(d16);
 377 }
 378 
 379 // EMIT_D32()
 380 void emit_d32(CodeBuffer &cbuf, int d32) {
 381   cbuf.insts()->emit_int32(d32);
 382 }
 383 
 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 386         int format) {
 387   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 388   cbuf.insts()->emit_int32(d32);
 389 }
 390 
 391 // emit 32 bit value and construct relocation entry from RelocationHolder
 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 393         int format) {
 394 #ifdef ASSERT
 395   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 396     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 397   }
 398 #endif
 399   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 400   cbuf.insts()->emit_int32(d32);
 401 }
 402 
 403 // Access stack slot for load or store
 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 405   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 406   if( -128 <= disp && disp <= 127 ) {
 407     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 408     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 409     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 410   } else {
 411     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 414   }
 415 }
 416 
 417    // rRegI ereg, memory mem) %{    // emit_reg_mem
 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 419   // There is no index & no scale, use form without SIB byte
 420   if ((index == 0x4) &&
 421       (scale == 0) && (base != ESP_enc)) {
 422     // If no displacement, mode is 0x0; unless base is [EBP]
 423     if ( (displace == 0) && (base != EBP_enc) ) {
 424       emit_rm(cbuf, 0x0, reg_encoding, base);
 425     }
 426     else {                    // If 8-bit displacement, mode 0x1
 427       if ((displace >= -128) && (displace <= 127)
 428           && (disp_reloc == relocInfo::none) ) {
 429         emit_rm(cbuf, 0x1, reg_encoding, base);
 430         emit_d8(cbuf, displace);
 431       }
 432       else {                  // If 32-bit displacement
 433         if (base == -1) { // Special flag for absolute address
 434           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 435           // (manual lies; no SIB needed here)
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442         else {                // Normal base + offset
 443           emit_rm(cbuf, 0x2, reg_encoding, base);
 444           if ( disp_reloc != relocInfo::none ) {
 445             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 446           } else {
 447             emit_d32      (cbuf, displace);
 448           }
 449         }
 450       }
 451     }
 452   }
 453   else {                      // Else, encode with the SIB byte
 454     // If no displacement, mode is 0x0; unless base is [EBP]
 455     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 456       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 457       emit_rm(cbuf, scale, index, base);
 458     }
 459     else {                    // If 8-bit displacement, mode 0x1
 460       if ((displace >= -128) && (displace <= 127)
 461           && (disp_reloc == relocInfo::none) ) {
 462         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 463         emit_rm(cbuf, scale, index, base);
 464         emit_d8(cbuf, displace);
 465       }
 466       else {                  // If 32-bit displacement
 467         if (base == 0x04 ) {
 468           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 469           emit_rm(cbuf, scale, index, 0x04);
 470         } else {
 471           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 472           emit_rm(cbuf, scale, index, base);
 473         }
 474         if ( disp_reloc != relocInfo::none ) {
 475           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 476         } else {
 477           emit_d32      (cbuf, displace);
 478         }
 479       }
 480     }
 481   }
 482 }
 483 
 484 
 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 486   if( dst_encoding == src_encoding ) {
 487     // reg-reg copy, use an empty encoding
 488   } else {
 489     emit_opcode( cbuf, 0x8B );
 490     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 491   }
 492 }
 493 
 494 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 495   Label exit;
 496   __ jccb(Assembler::noParity, exit);
 497   __ pushf();
 498   //
 499   // comiss/ucomiss instructions set ZF,PF,CF flags and
 500   // zero OF,AF,SF for NaN values.
 501   // Fixup flags by zeroing ZF,PF so that compare of NaN
 502   // values returns 'less than' result (CF is set).
 503   // Leave the rest of flags unchanged.
 504   //
 505   //    7 6 5 4 3 2 1 0
 506   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 507   //    0 0 1 0 1 0 1 1   (0x2B)
 508   //
 509   __ andl(Address(rsp, 0), 0xffffff2b);
 510   __ popf();
 511   __ bind(exit);
 512 }
 513 
 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 515   Label done;
 516   __ movl(dst, -1);
 517   __ jcc(Assembler::parity, done);
 518   __ jcc(Assembler::below, done);
 519   __ setb(Assembler::notEqual, dst);
 520   __ movzbl(dst, dst);
 521   __ bind(done);
 522 }
 523 
 524 
 525 //=============================================================================
 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 527 
 528 int Compile::ConstantTable::calculate_table_base_offset() const {
 529   return 0;  // absolute addressing, no offset
 530 }
 531 
 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 534   ShouldNotReachHere();
 535 }
 536 
 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 538   // Empty encoding
 539 }
 540 
 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 542   return 0;
 543 }
 544 
 545 #ifndef PRODUCT
 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   st->print("# MachConstantBaseNode (empty encoding)");
 548 }
 549 #endif
 550 
 551 
 552 //=============================================================================
 553 #ifndef PRODUCT
 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 555   Compile* C = ra_->C;
 556 
 557   int framesize = C->frame_size_in_bytes();
 558   int bangsize = C->bang_size_in_bytes();
 559   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 560   // Remove wordSize for return addr which is already pushed.
 561   framesize -= wordSize;
 562 
 563   if (C->need_stack_bang(bangsize)) {
 564     framesize -= wordSize;
 565     st->print("# stack bang (%d bytes)", bangsize);
 566     st->print("\n\t");
 567     st->print("PUSH   EBP\t# Save EBP");
 568     if (PreserveFramePointer) {
 569       st->print("\n\t");
 570       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 571     }
 572     if (framesize) {
 573       st->print("\n\t");
 574       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 575     }
 576   } else {
 577     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 578     st->print("\n\t");
 579     framesize -= wordSize;
 580     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 581     if (PreserveFramePointer) {
 582       st->print("\n\t");
 583       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 584       if (framesize > 0) {
 585         st->print("\n\t");
 586         st->print("ADD    EBP, #%d", framesize);
 587       }
 588     }
 589   }
 590 
 591   if (VerifyStackAtCalls) {
 592     st->print("\n\t");
 593     framesize -= wordSize;
 594     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 595   }
 596 
 597   if( C->in_24_bit_fp_mode() ) {
 598     st->print("\n\t");
 599     st->print("FLDCW  \t# load 24 bit fpu control word");
 600   }
 601   if (UseSSE >= 2 && VerifyFPU) {
 602     st->print("\n\t");
 603     st->print("# verify FPU stack (must be clean on entry)");
 604   }
 605 
 606 #ifdef ASSERT
 607   if (VerifyStackAtCalls) {
 608     st->print("\n\t");
 609     st->print("# stack alignment check");
 610   }
 611 #endif
 612   st->cr();
 613 }
 614 #endif
 615 
 616 
 617 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 618   Compile* C = ra_->C;
 619   MacroAssembler _masm(&cbuf);
 620 
 621   int framesize = C->frame_size_in_bytes();
 622   int bangsize = C->bang_size_in_bytes();
 623 
 624   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 625 
 626   C->set_frame_complete(cbuf.insts_size());
 627 
 628   if (C->has_mach_constant_base_node()) {
 629     // NOTE: We set the table base offset here because users might be
 630     // emitted before MachConstantBaseNode.
 631     Compile::ConstantTable& constant_table = C->constant_table();
 632     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 633   }
 634 }
 635 
 636 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 637   return MachNode::size(ra_); // too many variables; just compute it the hard way
 638 }
 639 
 640 int MachPrologNode::reloc() const {
 641   return 0; // a large enough number
 642 }
 643 
 644 //=============================================================================
 645 #ifndef PRODUCT
 646 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 647   Compile *C = ra_->C;
 648   int framesize = C->frame_size_in_bytes();
 649   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 650   // Remove two words for return addr and rbp,
 651   framesize -= 2*wordSize;
 652 
 653   if (C->max_vector_size() > 16) {
 654     st->print("VZEROUPPER");
 655     st->cr(); st->print("\t");
 656   }
 657   if (C->in_24_bit_fp_mode()) {
 658     st->print("FLDCW  standard control word");
 659     st->cr(); st->print("\t");
 660   }
 661   if (framesize) {
 662     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 663     st->cr(); st->print("\t");
 664   }
 665   st->print_cr("POPL   EBP"); st->print("\t");
 666   if (do_polling() && C->is_method_compilation()) {
 667     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 668     st->cr(); st->print("\t");
 669   }
 670 }
 671 #endif
 672 
 673 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 674   Compile *C = ra_->C;
 675   MacroAssembler _masm(&cbuf);
 676 
 677   if (C->max_vector_size() > 16) {
 678     // Clear upper bits of YMM registers when current compiled code uses
 679     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 680     _masm.vzeroupper();
 681   }
 682   // If method set FPU control word, restore to standard control word
 683   if (C->in_24_bit_fp_mode()) {
 684     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 685   }
 686 
 687   int framesize = C->frame_size_in_bytes();
 688   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 689   // Remove two words for return addr and rbp,
 690   framesize -= 2*wordSize;
 691 
 692   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 693 
 694   if (framesize >= 128) {
 695     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 696     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 697     emit_d32(cbuf, framesize);
 698   } else if (framesize) {
 699     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 700     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 701     emit_d8(cbuf, framesize);
 702   }
 703 
 704   emit_opcode(cbuf, 0x58 | EBP_enc);
 705 
 706   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 707     __ reserved_stack_check();
 708   }
 709 
 710   if (do_polling() && C->is_method_compilation()) {
 711     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 712     emit_opcode(cbuf,0x85);
 713     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 714     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 715   }
 716 }
 717 
 718 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 719   Compile *C = ra_->C;
 720   // If method set FPU control word, restore to standard control word
 721   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 722   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 723   if (do_polling() && C->is_method_compilation()) size += 6;
 724 
 725   int framesize = C->frame_size_in_bytes();
 726   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 727   // Remove two words for return addr and rbp,
 728   framesize -= 2*wordSize;
 729 
 730   size++; // popl rbp,
 731 
 732   if (framesize >= 128) {
 733     size += 6;
 734   } else {
 735     size += framesize ? 3 : 0;
 736   }
 737   size += 64; // added to support ReservedStackAccess
 738   return size;
 739 }
 740 
 741 int MachEpilogNode::reloc() const {
 742   return 0; // a large enough number
 743 }
 744 
 745 const Pipeline * MachEpilogNode::pipeline() const {
 746   return MachNode::pipeline_class();
 747 }
 748 
 749 int MachEpilogNode::safepoint_offset() const { return 0; }
 750 
 751 //=============================================================================
 752 
 753 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 754 static enum RC rc_class( OptoReg::Name reg ) {
 755 
 756   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 757   if (OptoReg::is_stack(reg)) return rc_stack;
 758 
 759   VMReg r = OptoReg::as_VMReg(reg);
 760   if (r->is_Register()) return rc_int;
 761   if (r->is_FloatRegister()) {
 762     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 763     return rc_float;
 764   }
 765   assert(r->is_XMMRegister(), "must be");
 766   return rc_xmm;
 767 }
 768 
 769 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 770                         int opcode, const char *op_str, int size, outputStream* st ) {
 771   if( cbuf ) {
 772     emit_opcode  (*cbuf, opcode );
 773     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 774 #ifndef PRODUCT
 775   } else if( !do_size ) {
 776     if( size != 0 ) st->print("\n\t");
 777     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 778       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 779       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 780     } else { // FLD, FST, PUSH, POP
 781       st->print("%s [ESP + #%d]",op_str,offset);
 782     }
 783 #endif
 784   }
 785   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 786   return size+3+offset_size;
 787 }
 788 
 789 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 790 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 791                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 792   int in_size_in_bits = Assembler::EVEX_32bit;
 793   int evex_encoding = 0;
 794   if (reg_lo+1 == reg_hi) {
 795     in_size_in_bits = Assembler::EVEX_64bit;
 796     evex_encoding = Assembler::VEX_W;
 797   }
 798   if (cbuf) {
 799     MacroAssembler _masm(cbuf);
 800     if (reg_lo+1 == reg_hi) { // double move?
 801       if (is_load) {
 802         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 803       } else {
 804         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 805       }
 806     } else {
 807       if (is_load) {
 808         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 809       } else {
 810         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 811       }
 812     }
 813 #ifndef PRODUCT
 814   } else if (!do_size) {
 815     if (size != 0) st->print("\n\t");
 816     if (reg_lo+1 == reg_hi) { // double move?
 817       if (is_load) st->print("%s %s,[ESP + #%d]",
 818                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 819                               Matcher::regName[reg_lo], offset);
 820       else         st->print("MOVSD  [ESP + #%d],%s",
 821                               offset, Matcher::regName[reg_lo]);
 822     } else {
 823       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 824                               Matcher::regName[reg_lo], offset);
 825       else         st->print("MOVSS  [ESP + #%d],%s",
 826                               offset, Matcher::regName[reg_lo]);
 827     }
 828 #endif
 829   }
 830   bool is_single_byte = false;
 831   if ((UseAVX > 2) && (offset != 0)) {
 832     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 833   }
 834   int offset_size = 0;
 835   if (UseAVX > 2 ) {
 836     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 837   } else {
 838     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 839   }
 840   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 841   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 842   return size+5+offset_size;
 843 }
 844 
 845 
 846 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 847                             int src_hi, int dst_hi, int size, outputStream* st ) {
 848   if (cbuf) {
 849     MacroAssembler _masm(cbuf);
 850     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 851       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 852                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 853     } else {
 854       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     }
 857 #ifndef PRODUCT
 858   } else if (!do_size) {
 859     if (size != 0) st->print("\n\t");
 860     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 861       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 862         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     } else {
 867       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 868         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 869       } else {
 870         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 871       }
 872     }
 873 #endif
 874   }
 875   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 876   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 877   int sz = (UseAVX > 2) ? 6 : 4;
 878   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 879       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 880   return size + sz;
 881 }
 882 
 883 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 884                             int src_hi, int dst_hi, int size, outputStream* st ) {
 885   // 32-bit
 886   if (cbuf) {
 887     MacroAssembler _masm(cbuf);
 888     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 889              as_Register(Matcher::_regEncode[src_lo]));
 890 #ifndef PRODUCT
 891   } else if (!do_size) {
 892     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 893 #endif
 894   }
 895   return (UseAVX> 2) ? 6 : 4;
 896 }
 897 
 898 
 899 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 900                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 901   // 32-bit
 902   if (cbuf) {
 903     MacroAssembler _masm(cbuf);
 904     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 905              as_XMMRegister(Matcher::_regEncode[src_lo]));
 906 #ifndef PRODUCT
 907   } else if (!do_size) {
 908     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 909 #endif
 910   }
 911   return (UseAVX> 2) ? 6 : 4;
 912 }
 913 
 914 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 915   if( cbuf ) {
 916     emit_opcode(*cbuf, 0x8B );
 917     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 918 #ifndef PRODUCT
 919   } else if( !do_size ) {
 920     if( size != 0 ) st->print("\n\t");
 921     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 922 #endif
 923   }
 924   return size+2;
 925 }
 926 
 927 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 928                                  int offset, int size, outputStream* st ) {
 929   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 930     if( cbuf ) {
 931       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 932       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 933 #ifndef PRODUCT
 934     } else if( !do_size ) {
 935       if( size != 0 ) st->print("\n\t");
 936       st->print("FLD    %s",Matcher::regName[src_lo]);
 937 #endif
 938     }
 939     size += 2;
 940   }
 941 
 942   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 943   const char *op_str;
 944   int op;
 945   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 946     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 947     op = 0xDD;
 948   } else {                   // 32-bit store
 949     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 950     op = 0xD9;
 951     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 952   }
 953 
 954   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 955 }
 956 
 957 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 958 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 959                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 960 
 961 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 962                             int stack_offset, int reg, uint ireg, outputStream* st);
 963 
 964 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 965                                      int dst_offset, uint ireg, outputStream* st) {
 966   int calc_size = 0;
 967   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 968   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 969   switch (ireg) {
 970   case Op_VecS:
 971     calc_size = 3+src_offset_size + 3+dst_offset_size;
 972     break;
 973   case Op_VecD:
 974     calc_size = 3+src_offset_size + 3+dst_offset_size;
 975     src_offset += 4;
 976     dst_offset += 4;
 977     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 978     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 979     calc_size += 3+src_offset_size + 3+dst_offset_size;
 980     break;
 981   case Op_VecX:
 982   case Op_VecY:
 983   case Op_VecZ:
 984     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 985     break;
 986   default:
 987     ShouldNotReachHere();
 988   }
 989   if (cbuf) {
 990     MacroAssembler _masm(cbuf);
 991     int offset = __ offset();
 992     switch (ireg) {
 993     case Op_VecS:
 994       __ pushl(Address(rsp, src_offset));
 995       __ popl (Address(rsp, dst_offset));
 996       break;
 997     case Op_VecD:
 998       __ pushl(Address(rsp, src_offset));
 999       __ popl (Address(rsp, dst_offset));
1000       __ pushl(Address(rsp, src_offset+4));
1001       __ popl (Address(rsp, dst_offset+4));
1002       break;
1003     case Op_VecX:
1004       __ movdqu(Address(rsp, -16), xmm0);
1005       __ movdqu(xmm0, Address(rsp, src_offset));
1006       __ movdqu(Address(rsp, dst_offset), xmm0);
1007       __ movdqu(xmm0, Address(rsp, -16));
1008       break;
1009     case Op_VecY:
1010       __ vmovdqu(Address(rsp, -32), xmm0);
1011       __ vmovdqu(xmm0, Address(rsp, src_offset));
1012       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1013       __ vmovdqu(xmm0, Address(rsp, -32));
1014     case Op_VecZ:
1015       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1016       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculattion");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054     case Op_VecZ:
1055       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1056                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1057                 "vmovdqu [rsp + #%d], xmm0\n\t"
1058                 "vmovdqu xmm0, [rsp - #64]",
1059                 src_offset, dst_offset);
1060       break;
1061     default:
1062       ShouldNotReachHere();
1063     }
1064 #endif
1065   }
1066   return calc_size;
1067 }
1068 
1069 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1070   // Get registers to move
1071   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1072   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1073   OptoReg::Name dst_second = ra_->get_reg_second(this );
1074   OptoReg::Name dst_first = ra_->get_reg_first(this );
1075 
1076   enum RC src_second_rc = rc_class(src_second);
1077   enum RC src_first_rc = rc_class(src_first);
1078   enum RC dst_second_rc = rc_class(dst_second);
1079   enum RC dst_first_rc = rc_class(dst_first);
1080 
1081   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1082 
1083   // Generate spill code!
1084   int size = 0;
1085 
1086   if( src_first == dst_first && src_second == dst_second )
1087     return size;            // Self copy, no move
1088 
1089   if (bottom_type()->isa_vect() != NULL) {
1090     uint ireg = ideal_reg();
1091     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1092     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1093     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1094     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1095       // mem -> mem
1096       int src_offset = ra_->reg2offset(src_first);
1097       int dst_offset = ra_->reg2offset(dst_first);
1098       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1099     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1100       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1101     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1102       int stack_offset = ra_->reg2offset(dst_first);
1103       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1104     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1105       int stack_offset = ra_->reg2offset(src_first);
1106       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1107     } else {
1108       ShouldNotReachHere();
1109     }
1110   }
1111 
1112   // --------------------------------------
1113   // Check for mem-mem move.  push/pop to move.
1114   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1115     if( src_second == dst_first ) { // overlapping stack copy ranges
1116       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1117       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1118       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1119       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1120     }
1121     // move low bits
1122     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1123     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1124     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1125       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1126       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1127     }
1128     return size;
1129   }
1130 
1131   // --------------------------------------
1132   // Check for integer reg-reg copy
1133   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1134     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1135 
1136   // Check for integer store
1137   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1138     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1139 
1140   // Check for integer load
1141   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1142     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1143 
1144   // Check for integer reg-xmm reg copy
1145   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1146     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1147             "no 64 bit integer-float reg moves" );
1148     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1149   }
1150   // --------------------------------------
1151   // Check for float reg-reg copy
1152   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1153     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1154             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1155     if( cbuf ) {
1156 
1157       // Note the mucking with the register encode to compensate for the 0/1
1158       // indexing issue mentioned in a comment in the reg_def sections
1159       // for FPR registers many lines above here.
1160 
1161       if( src_first != FPR1L_num ) {
1162         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1163         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1164         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1165         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1166      } else {
1167         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1168         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1169      }
1170 #ifndef PRODUCT
1171     } else if( !do_size ) {
1172       if( size != 0 ) st->print("\n\t");
1173       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1174       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1175 #endif
1176     }
1177     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1178   }
1179 
1180   // Check for float store
1181   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1182     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1183   }
1184 
1185   // Check for float load
1186   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1187     int offset = ra_->reg2offset(src_first);
1188     const char *op_str;
1189     int op;
1190     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1191       op_str = "FLD_D";
1192       op = 0xDD;
1193     } else {                   // 32-bit load
1194       op_str = "FLD_S";
1195       op = 0xD9;
1196       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1197     }
1198     if( cbuf ) {
1199       emit_opcode  (*cbuf, op );
1200       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1201       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1202       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1203 #ifndef PRODUCT
1204     } else if( !do_size ) {
1205       if( size != 0 ) st->print("\n\t");
1206       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1207 #endif
1208     }
1209     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1210     return size + 3+offset_size+2;
1211   }
1212 
1213   // Check for xmm reg-reg copy
1214   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1215     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1216             (src_first+1 == src_second && dst_first+1 == dst_second),
1217             "no non-adjacent float-moves" );
1218     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1219   }
1220 
1221   // Check for xmm reg-integer reg copy
1222   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1223     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1224             "no 64 bit float-integer reg moves" );
1225     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1226   }
1227 
1228   // Check for xmm store
1229   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1230     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1231   }
1232 
1233   // Check for float xmm load
1234   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1235     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1236   }
1237 
1238   // Copy from float reg to xmm reg
1239   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1240     // copy to the top of stack from floating point reg
1241     // and use LEA to preserve flags
1242     if( cbuf ) {
1243       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1244       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1245       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1246       emit_d8(*cbuf,0xF8);
1247 #ifndef PRODUCT
1248     } else if( !do_size ) {
1249       if( size != 0 ) st->print("\n\t");
1250       st->print("LEA    ESP,[ESP-8]");
1251 #endif
1252     }
1253     size += 4;
1254 
1255     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1256 
1257     // Copy from the temp memory to the xmm reg.
1258     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1259 
1260     if( cbuf ) {
1261       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1262       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1263       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1264       emit_d8(*cbuf,0x08);
1265 #ifndef PRODUCT
1266     } else if( !do_size ) {
1267       if( size != 0 ) st->print("\n\t");
1268       st->print("LEA    ESP,[ESP+8]");
1269 #endif
1270     }
1271     size += 4;
1272     return size;
1273   }
1274 
1275   assert( size > 0, "missed a case" );
1276 
1277   // --------------------------------------------------------------------
1278   // Check for second bits still needing moving.
1279   if( src_second == dst_second )
1280     return size;               // Self copy; no move
1281   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1282 
1283   // Check for second word int-int move
1284   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1285     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1286 
1287   // Check for second word integer store
1288   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1289     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1290 
1291   // Check for second word integer load
1292   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1293     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1294 
1295 
1296   Unimplemented();
1297   return 0; // Mute compiler
1298 }
1299 
1300 #ifndef PRODUCT
1301 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1302   implementation( NULL, ra_, false, st );
1303 }
1304 #endif
1305 
1306 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1307   implementation( &cbuf, ra_, false, NULL );
1308 }
1309 
1310 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1311   return implementation( NULL, ra_, true, NULL );
1312 }
1313 
1314 
1315 //=============================================================================
1316 #ifndef PRODUCT
1317 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1318   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1319   int reg = ra_->get_reg_first(this);
1320   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1321 }
1322 #endif
1323 
1324 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1325   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1326   int reg = ra_->get_encode(this);
1327   if( offset >= 128 ) {
1328     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1329     emit_rm(cbuf, 0x2, reg, 0x04);
1330     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1331     emit_d32(cbuf, offset);
1332   }
1333   else {
1334     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1335     emit_rm(cbuf, 0x1, reg, 0x04);
1336     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1337     emit_d8(cbuf, offset);
1338   }
1339 }
1340 
1341 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1342   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1343   if( offset >= 128 ) {
1344     return 7;
1345   }
1346   else {
1347     return 4;
1348   }
1349 }
1350 
1351 //=============================================================================
1352 #ifndef PRODUCT
1353 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1354   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1355   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1356   st->print_cr("\tNOP");
1357   st->print_cr("\tNOP");
1358   if( !OptoBreakpoint )
1359     st->print_cr("\tNOP");
1360 }
1361 #endif
1362 
1363 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1364   MacroAssembler masm(&cbuf);
1365 #ifdef ASSERT
1366   uint insts_size = cbuf.insts_size();
1367 #endif
1368   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1369   masm.jump_cc(Assembler::notEqual,
1370                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1371   /* WARNING these NOPs are critical so that verified entry point is properly
1372      aligned for patching by NativeJump::patch_verified_entry() */
1373   int nops_cnt = 2;
1374   if( !OptoBreakpoint ) // Leave space for int3
1375      nops_cnt += 1;
1376   masm.nop(nops_cnt);
1377 
1378   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1379 }
1380 
1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1382   return OptoBreakpoint ? 11 : 12;
1383 }
1384 
1385 
1386 //=============================================================================
1387 
1388 int Matcher::regnum_to_fpu_offset(int regnum) {
1389   return regnum - 32; // The FP registers are in the second chunk
1390 }
1391 
1392 // This is UltraSparc specific, true just means we have fast l2f conversion
1393 const bool Matcher::convL2FSupported(void) {
1394   return true;
1395 }
1396 
1397 // Is this branch offset short enough that a short branch can be used?
1398 //
1399 // NOTE: If the platform does not provide any short branch variants, then
1400 //       this method should return false for offset 0.
1401 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1402   // The passed offset is relative to address of the branch.
1403   // On 86 a branch displacement is calculated relative to address
1404   // of a next instruction.
1405   offset -= br_size;
1406 
1407   // the short version of jmpConUCF2 contains multiple branches,
1408   // making the reach slightly less
1409   if (rule == jmpConUCF2_rule)
1410     return (-126 <= offset && offset <= 125);
1411   return (-128 <= offset && offset <= 127);
1412 }
1413 
1414 const bool Matcher::isSimpleConstant64(jlong value) {
1415   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1416   return false;
1417 }
1418 
1419 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1420 const bool Matcher::init_array_count_is_in_bytes = false;
1421 
1422 // Threshold size for cleararray.
1423 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1424 
1425 // Needs 2 CMOV's for longs.
1426 const int Matcher::long_cmove_cost() { return 1; }
1427 
1428 // No CMOVF/CMOVD with SSE/SSE2
1429 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1430 
1431 // Does the CPU require late expand (see block.cpp for description of late expand)?
1432 const bool Matcher::require_postalloc_expand = false;
1433 
1434 // Should the Matcher clone shifts on addressing modes, expecting them to
1435 // be subsumed into complex addressing expressions or compute them into
1436 // registers?  True for Intel but false for most RISCs
1437 const bool Matcher::clone_shift_expressions = true;
1438 
1439 // Do we need to mask the count passed to shift instructions or does
1440 // the cpu only look at the lower 5/6 bits anyway?
1441 const bool Matcher::need_masked_shift_count = false;
1442 
1443 bool Matcher::narrow_oop_use_complex_address() {
1444   ShouldNotCallThis();
1445   return true;
1446 }
1447 
1448 bool Matcher::narrow_klass_use_complex_address() {
1449   ShouldNotCallThis();
1450   return true;
1451 }
1452 
1453 
1454 // Is it better to copy float constants, or load them directly from memory?
1455 // Intel can load a float constant from a direct address, requiring no
1456 // extra registers.  Most RISCs will have to materialize an address into a
1457 // register first, so they would do better to copy the constant from stack.
1458 const bool Matcher::rematerialize_float_constants = true;
1459 
1460 // If CPU can load and store mis-aligned doubles directly then no fixup is
1461 // needed.  Else we split the double into 2 integer pieces and move it
1462 // piece-by-piece.  Only happens when passing doubles into C code as the
1463 // Java calling convention forces doubles to be aligned.
1464 const bool Matcher::misaligned_doubles_ok = true;
1465 
1466 
1467 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1468   // Get the memory operand from the node
1469   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1470   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1471   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1472   uint opcnt     = 1;                 // First operand
1473   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1474   while( idx >= skipped+num_edges ) {
1475     skipped += num_edges;
1476     opcnt++;                          // Bump operand count
1477     assert( opcnt < numopnds, "Accessing non-existent operand" );
1478     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1479   }
1480 
1481   MachOper *memory = node->_opnds[opcnt];
1482   MachOper *new_memory = NULL;
1483   switch (memory->opcode()) {
1484   case DIRECT:
1485   case INDOFFSET32X:
1486     // No transformation necessary.
1487     return;
1488   case INDIRECT:
1489     new_memory = new indirect_win95_safeOper( );
1490     break;
1491   case INDOFFSET8:
1492     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1493     break;
1494   case INDOFFSET32:
1495     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1496     break;
1497   case INDINDEXOFFSET:
1498     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1499     break;
1500   case INDINDEXSCALE:
1501     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1502     break;
1503   case INDINDEXSCALEOFFSET:
1504     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1505     break;
1506   case LOAD_LONG_INDIRECT:
1507   case LOAD_LONG_INDOFFSET32:
1508     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1509     return;
1510   default:
1511     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1512     return;
1513   }
1514   node->_opnds[opcnt] = new_memory;
1515 }
1516 
1517 // Advertise here if the CPU requires explicit rounding operations
1518 // to implement the UseStrictFP mode.
1519 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1520 
1521 // Are floats conerted to double when stored to stack during deoptimization?
1522 // On x32 it is stored with convertion only when FPU is used for floats.
1523 bool Matcher::float_in_double() { return (UseSSE == 0); }
1524 
1525 // Do ints take an entire long register or just half?
1526 const bool Matcher::int_in_long = false;
1527 
1528 // Return whether or not this register is ever used as an argument.  This
1529 // function is used on startup to build the trampoline stubs in generateOptoStub.
1530 // Registers not mentioned will be killed by the VM call in the trampoline, and
1531 // arguments in those registers not be available to the callee.
1532 bool Matcher::can_be_java_arg( int reg ) {
1533   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1534   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1535   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1536   return false;
1537 }
1538 
1539 bool Matcher::is_spillable_arg( int reg ) {
1540   return can_be_java_arg(reg);
1541 }
1542 
1543 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1544   // Use hardware integer DIV instruction when
1545   // it is faster than a code which use multiply.
1546   // Only when constant divisor fits into 32 bit
1547   // (min_jint is excluded to get only correct
1548   // positive 32 bit values from negative).
1549   return VM_Version::has_fast_idiv() &&
1550          (divisor == (int)divisor && divisor != min_jint);
1551 }
1552 
1553 // Register for DIVI projection of divmodI
1554 RegMask Matcher::divI_proj_mask() {
1555   return EAX_REG_mask();
1556 }
1557 
1558 // Register for MODI projection of divmodI
1559 RegMask Matcher::modI_proj_mask() {
1560   return EDX_REG_mask();
1561 }
1562 
1563 // Register for DIVL projection of divmodL
1564 RegMask Matcher::divL_proj_mask() {
1565   ShouldNotReachHere();
1566   return RegMask();
1567 }
1568 
1569 // Register for MODL projection of divmodL
1570 RegMask Matcher::modL_proj_mask() {
1571   ShouldNotReachHere();
1572   return RegMask();
1573 }
1574 
1575 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1576   return NO_REG_mask();
1577 }
1578 
1579 // Returns true if the high 32 bits of the value is known to be zero.
1580 bool is_operand_hi32_zero(Node* n) {
1581   int opc = n->Opcode();
1582   if (opc == Op_AndL) {
1583     Node* o2 = n->in(2);
1584     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1585       return true;
1586     }
1587   }
1588   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1589     return true;
1590   }
1591   return false;
1592 }
1593 
1594 %}
1595 
1596 //----------ENCODING BLOCK-----------------------------------------------------
1597 // This block specifies the encoding classes used by the compiler to output
1598 // byte streams.  Encoding classes generate functions which are called by
1599 // Machine Instruction Nodes in order to generate the bit encoding of the
1600 // instruction.  Operands specify their base encoding interface with the
1601 // interface keyword.  There are currently supported four interfaces,
1602 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1603 // operand to generate a function which returns its register number when
1604 // queried.   CONST_INTER causes an operand to generate a function which
1605 // returns the value of the constant when queried.  MEMORY_INTER causes an
1606 // operand to generate four functions which return the Base Register, the
1607 // Index Register, the Scale Value, and the Offset Value of the operand when
1608 // queried.  COND_INTER causes an operand to generate six functions which
1609 // return the encoding code (ie - encoding bits for the instruction)
1610 // associated with each basic boolean condition for a conditional instruction.
1611 // Instructions specify two basic values for encoding.  They use the
1612 // ins_encode keyword to specify their encoding class (which must be one of
1613 // the class names specified in the encoding block), and they use the
1614 // opcode keyword to specify, in order, their primary, secondary, and
1615 // tertiary opcode.  Only the opcode sections which a particular instruction
1616 // needs for encoding need to be specified.
1617 encode %{
1618   // Build emit functions for each basic byte or larger field in the intel
1619   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1620   // code in the enc_class source block.  Emit functions will live in the
1621   // main source block for now.  In future, we can generalize this by
1622   // adding a syntax that specifies the sizes of fields in an order,
1623   // so that the adlc can build the emit functions automagically
1624 
1625   // Emit primary opcode
1626   enc_class OpcP %{
1627     emit_opcode(cbuf, $primary);
1628   %}
1629 
1630   // Emit secondary opcode
1631   enc_class OpcS %{
1632     emit_opcode(cbuf, $secondary);
1633   %}
1634 
1635   // Emit opcode directly
1636   enc_class Opcode(immI d8) %{
1637     emit_opcode(cbuf, $d8$$constant);
1638   %}
1639 
1640   enc_class SizePrefix %{
1641     emit_opcode(cbuf,0x66);
1642   %}
1643 
1644   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1645     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1646   %}
1647 
1648   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1649     emit_opcode(cbuf,$opcode$$constant);
1650     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1651   %}
1652 
1653   enc_class mov_r32_imm0( rRegI dst ) %{
1654     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1655     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1656   %}
1657 
1658   enc_class cdq_enc %{
1659     // Full implementation of Java idiv and irem; checks for
1660     // special case as described in JVM spec., p.243 & p.271.
1661     //
1662     //         normal case                           special case
1663     //
1664     // input : rax,: dividend                         min_int
1665     //         reg: divisor                          -1
1666     //
1667     // output: rax,: quotient  (= rax, idiv reg)       min_int
1668     //         rdx: remainder (= rax, irem reg)       0
1669     //
1670     //  Code sequnce:
1671     //
1672     //  81 F8 00 00 00 80    cmp         rax,80000000h
1673     //  0F 85 0B 00 00 00    jne         normal_case
1674     //  33 D2                xor         rdx,edx
1675     //  83 F9 FF             cmp         rcx,0FFh
1676     //  0F 84 03 00 00 00    je          done
1677     //                  normal_case:
1678     //  99                   cdq
1679     //  F7 F9                idiv        rax,ecx
1680     //                  done:
1681     //
1682     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1683     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1684     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1685     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1686     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1687     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1688     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1689     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1690     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1691     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1692     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1693     // normal_case:
1694     emit_opcode(cbuf,0x99);                                         // cdq
1695     // idiv (note: must be emitted by the user of this rule)
1696     // normal:
1697   %}
1698 
1699   // Dense encoding for older common ops
1700   enc_class Opc_plus(immI opcode, rRegI reg) %{
1701     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1702   %}
1703 
1704 
1705   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1706   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1707     // Check for 8-bit immediate, and set sign extend bit in opcode
1708     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1709       emit_opcode(cbuf, $primary | 0x02);
1710     }
1711     else {                          // If 32-bit immediate
1712       emit_opcode(cbuf, $primary);
1713     }
1714   %}
1715 
1716   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1717     // Emit primary opcode and set sign-extend bit
1718     // Check for 8-bit immediate, and set sign extend bit in opcode
1719     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1720       emit_opcode(cbuf, $primary | 0x02);    }
1721     else {                          // If 32-bit immediate
1722       emit_opcode(cbuf, $primary);
1723     }
1724     // Emit r/m byte with secondary opcode, after primary opcode.
1725     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1726   %}
1727 
1728   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1729     // Check for 8-bit immediate, and set sign extend bit in opcode
1730     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1731       $$$emit8$imm$$constant;
1732     }
1733     else {                          // If 32-bit immediate
1734       // Output immediate
1735       $$$emit32$imm$$constant;
1736     }
1737   %}
1738 
1739   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1740     // Emit primary opcode and set sign-extend bit
1741     // Check for 8-bit immediate, and set sign extend bit in opcode
1742     int con = (int)$imm$$constant; // Throw away top bits
1743     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1744     // Emit r/m byte with secondary opcode, after primary opcode.
1745     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1746     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1747     else                               emit_d32(cbuf,con);
1748   %}
1749 
1750   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1751     // Emit primary opcode and set sign-extend bit
1752     // Check for 8-bit immediate, and set sign extend bit in opcode
1753     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1754     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1755     // Emit r/m byte with tertiary opcode, after primary opcode.
1756     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1757     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1758     else                               emit_d32(cbuf,con);
1759   %}
1760 
1761   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1762     emit_cc(cbuf, $secondary, $dst$$reg );
1763   %}
1764 
1765   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1766     int destlo = $dst$$reg;
1767     int desthi = HIGH_FROM_LOW(destlo);
1768     // bswap lo
1769     emit_opcode(cbuf, 0x0F);
1770     emit_cc(cbuf, 0xC8, destlo);
1771     // bswap hi
1772     emit_opcode(cbuf, 0x0F);
1773     emit_cc(cbuf, 0xC8, desthi);
1774     // xchg lo and hi
1775     emit_opcode(cbuf, 0x87);
1776     emit_rm(cbuf, 0x3, destlo, desthi);
1777   %}
1778 
1779   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1780     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1781   %}
1782 
1783   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1784     $$$emit8$primary;
1785     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1786   %}
1787 
1788   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1789     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1790     emit_d8(cbuf, op >> 8 );
1791     emit_d8(cbuf, op & 255);
1792   %}
1793 
1794   // emulate a CMOV with a conditional branch around a MOV
1795   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1796     // Invert sense of branch from sense of CMOV
1797     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1798     emit_d8( cbuf, $brOffs$$constant );
1799   %}
1800 
1801   enc_class enc_PartialSubtypeCheck( ) %{
1802     Register Redi = as_Register(EDI_enc); // result register
1803     Register Reax = as_Register(EAX_enc); // super class
1804     Register Recx = as_Register(ECX_enc); // killed
1805     Register Resi = as_Register(ESI_enc); // sub class
1806     Label miss;
1807 
1808     MacroAssembler _masm(&cbuf);
1809     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1810                                      NULL, &miss,
1811                                      /*set_cond_codes:*/ true);
1812     if ($primary) {
1813       __ xorptr(Redi, Redi);
1814     }
1815     __ bind(miss);
1816   %}
1817 
1818   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1819     MacroAssembler masm(&cbuf);
1820     int start = masm.offset();
1821     if (UseSSE >= 2) {
1822       if (VerifyFPU) {
1823         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1824       }
1825     } else {
1826       // External c_calling_convention expects the FPU stack to be 'clean'.
1827       // Compiled code leaves it dirty.  Do cleanup now.
1828       masm.empty_FPU_stack();
1829     }
1830     if (sizeof_FFree_Float_Stack_All == -1) {
1831       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1832     } else {
1833       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1834     }
1835   %}
1836 
1837   enc_class Verify_FPU_For_Leaf %{
1838     if( VerifyFPU ) {
1839       MacroAssembler masm(&cbuf);
1840       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1841     }
1842   %}
1843 
1844   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1845     // This is the instruction starting address for relocation info.
1846     cbuf.set_insts_mark();
1847     $$$emit8$primary;
1848     // CALL directly to the runtime
1849     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1850                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1851 
1852     if (UseSSE >= 2) {
1853       MacroAssembler _masm(&cbuf);
1854       BasicType rt = tf()->return_type();
1855 
1856       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1857         // A C runtime call where the return value is unused.  In SSE2+
1858         // mode the result needs to be removed from the FPU stack.  It's
1859         // likely that this function call could be removed by the
1860         // optimizer if the C function is a pure function.
1861         __ ffree(0);
1862       } else if (rt == T_FLOAT) {
1863         __ lea(rsp, Address(rsp, -4));
1864         __ fstp_s(Address(rsp, 0));
1865         __ movflt(xmm0, Address(rsp, 0));
1866         __ lea(rsp, Address(rsp,  4));
1867       } else if (rt == T_DOUBLE) {
1868         __ lea(rsp, Address(rsp, -8));
1869         __ fstp_d(Address(rsp, 0));
1870         __ movdbl(xmm0, Address(rsp, 0));
1871         __ lea(rsp, Address(rsp,  8));
1872       }
1873     }
1874   %}
1875 
1876 
1877   enc_class pre_call_resets %{
1878     // If method sets FPU control word restore it here
1879     debug_only(int off0 = cbuf.insts_size());
1880     if (ra_->C->in_24_bit_fp_mode()) {
1881       MacroAssembler _masm(&cbuf);
1882       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1883     }
1884     if (ra_->C->max_vector_size() > 16) {
1885       // Clear upper bits of YMM registers when current compiled code uses
1886       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887       MacroAssembler _masm(&cbuf);
1888       __ vzeroupper();
1889     }
1890     debug_only(int off1 = cbuf.insts_size());
1891     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1892   %}
1893 
1894   enc_class post_call_FPU %{
1895     // If method sets FPU control word do it here also
1896     if (Compile::current()->in_24_bit_fp_mode()) {
1897       MacroAssembler masm(&cbuf);
1898       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1899     }
1900   %}
1901 
1902   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1903     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1904     // who we intended to call.
1905     cbuf.set_insts_mark();
1906     $$$emit8$primary;
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1910     } else if (_optimized_virtual) {
1911       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1912                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1913     } else {
1914       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1915                      static_call_Relocation::spec(), RELOC_IMM32 );
1916     }
1917     if (_method) {  // Emit stub for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       } 
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method);
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     if( os::is_MP() )
2091       emit_opcode(cbuf,0xF0);         // [Lock]
2092   %}
2093 
2094   // Cmp-xchg long value.
2095   // Note: we need to swap rbx, and rcx before and after the
2096   //       cmpxchg8 instruction because the instruction uses
2097   //       rcx as the high order word of the new value to store but
2098   //       our register encoding uses rbx,.
2099   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2100 
2101     // XCHG  rbx,ecx
2102     emit_opcode(cbuf,0x87);
2103     emit_opcode(cbuf,0xD9);
2104     // [Lock]
2105     if( os::is_MP() )
2106       emit_opcode(cbuf,0xF0);
2107     // CMPXCHG8 [Eptr]
2108     emit_opcode(cbuf,0x0F);
2109     emit_opcode(cbuf,0xC7);
2110     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2111     // XCHG  rbx,ecx
2112     emit_opcode(cbuf,0x87);
2113     emit_opcode(cbuf,0xD9);
2114   %}
2115 
2116   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2117     // [Lock]
2118     if( os::is_MP() )
2119       emit_opcode(cbuf,0xF0);
2120 
2121     // CMPXCHG [Eptr]
2122     emit_opcode(cbuf,0x0F);
2123     emit_opcode(cbuf,0xB1);
2124     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2125   %}
2126 
2127   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2128     int res_encoding = $res$$reg;
2129 
2130     // MOV  res,0
2131     emit_opcode( cbuf, 0xB8 + res_encoding);
2132     emit_d32( cbuf, 0 );
2133     // JNE,s  fail
2134     emit_opcode(cbuf,0x75);
2135     emit_d8(cbuf, 5 );
2136     // MOV  res,1
2137     emit_opcode( cbuf, 0xB8 + res_encoding);
2138     emit_d32( cbuf, 1 );
2139     // fail:
2140   %}
2141 
2142   enc_class set_instruction_start( ) %{
2143     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2144   %}
2145 
2146   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2147     int reg_encoding = $ereg$$reg;
2148     int base  = $mem$$base;
2149     int index = $mem$$index;
2150     int scale = $mem$$scale;
2151     int displace = $mem$$disp;
2152     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2153     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2154   %}
2155 
2156   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2157     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2158     int base  = $mem$$base;
2159     int index = $mem$$index;
2160     int scale = $mem$$scale;
2161     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2162     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2163     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2164   %}
2165 
2166   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2167     int r1, r2;
2168     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2169     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2170     emit_opcode(cbuf,0x0F);
2171     emit_opcode(cbuf,$tertiary);
2172     emit_rm(cbuf, 0x3, r1, r2);
2173     emit_d8(cbuf,$cnt$$constant);
2174     emit_d8(cbuf,$primary);
2175     emit_rm(cbuf, 0x3, $secondary, r1);
2176     emit_d8(cbuf,$cnt$$constant);
2177   %}
2178 
2179   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2180     emit_opcode( cbuf, 0x8B ); // Move
2181     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2182     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2183       emit_d8(cbuf,$primary);
2184       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2185       emit_d8(cbuf,$cnt$$constant-32);
2186     }
2187     emit_d8(cbuf,$primary);
2188     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2189     emit_d8(cbuf,31);
2190   %}
2191 
2192   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2193     int r1, r2;
2194     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2195     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2196 
2197     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2198     emit_rm(cbuf, 0x3, r1, r2);
2199     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2200       emit_opcode(cbuf,$primary);
2201       emit_rm(cbuf, 0x3, $secondary, r1);
2202       emit_d8(cbuf,$cnt$$constant-32);
2203     }
2204     emit_opcode(cbuf,0x33);  // XOR r2,r2
2205     emit_rm(cbuf, 0x3, r2, r2);
2206   %}
2207 
2208   // Clone of RegMem but accepts an extra parameter to access each
2209   // half of a double in memory; it never needs relocation info.
2210   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2211     emit_opcode(cbuf,$opcode$$constant);
2212     int reg_encoding = $rm_reg$$reg;
2213     int base     = $mem$$base;
2214     int index    = $mem$$index;
2215     int scale    = $mem$$scale;
2216     int displace = $mem$$disp + $disp_for_half$$constant;
2217     relocInfo::relocType disp_reloc = relocInfo::none;
2218     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2219   %}
2220 
2221   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2222   //
2223   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2224   // and it never needs relocation information.
2225   // Frequently used to move data between FPU's Stack Top and memory.
2226   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2227     int rm_byte_opcode = $rm_opcode$$constant;
2228     int base     = $mem$$base;
2229     int index    = $mem$$index;
2230     int scale    = $mem$$scale;
2231     int displace = $mem$$disp;
2232     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2233     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2234   %}
2235 
2236   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2237     int rm_byte_opcode = $rm_opcode$$constant;
2238     int base     = $mem$$base;
2239     int index    = $mem$$index;
2240     int scale    = $mem$$scale;
2241     int displace = $mem$$disp;
2242     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2243     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2244   %}
2245 
2246   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2247     int reg_encoding = $dst$$reg;
2248     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2249     int index        = 0x04;            // 0x04 indicates no index
2250     int scale        = 0x00;            // 0x00 indicates no scale
2251     int displace     = $src1$$constant; // 0x00 indicates no displacement
2252     relocInfo::relocType disp_reloc = relocInfo::none;
2253     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2254   %}
2255 
2256   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2257     // Compare dst,src
2258     emit_opcode(cbuf,0x3B);
2259     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2260     // jmp dst < src around move
2261     emit_opcode(cbuf,0x7C);
2262     emit_d8(cbuf,2);
2263     // move dst,src
2264     emit_opcode(cbuf,0x8B);
2265     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2266   %}
2267 
2268   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2269     // Compare dst,src
2270     emit_opcode(cbuf,0x3B);
2271     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2272     // jmp dst > src around move
2273     emit_opcode(cbuf,0x7F);
2274     emit_d8(cbuf,2);
2275     // move dst,src
2276     emit_opcode(cbuf,0x8B);
2277     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2278   %}
2279 
2280   enc_class enc_FPR_store(memory mem, regDPR src) %{
2281     // If src is FPR1, we can just FST to store it.
2282     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2283     int reg_encoding = 0x2; // Just store
2284     int base  = $mem$$base;
2285     int index = $mem$$index;
2286     int scale = $mem$$scale;
2287     int displace = $mem$$disp;
2288     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2289     if( $src$$reg != FPR1L_enc ) {
2290       reg_encoding = 0x3;  // Store & pop
2291       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2292       emit_d8( cbuf, 0xC0-1+$src$$reg );
2293     }
2294     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2295     emit_opcode(cbuf,$primary);
2296     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2297   %}
2298 
2299   enc_class neg_reg(rRegI dst) %{
2300     // NEG $dst
2301     emit_opcode(cbuf,0xF7);
2302     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2303   %}
2304 
2305   enc_class setLT_reg(eCXRegI dst) %{
2306     // SETLT $dst
2307     emit_opcode(cbuf,0x0F);
2308     emit_opcode(cbuf,0x9C);
2309     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2310   %}
2311 
2312   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2313     int tmpReg = $tmp$$reg;
2314 
2315     // SUB $p,$q
2316     emit_opcode(cbuf,0x2B);
2317     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2318     // SBB $tmp,$tmp
2319     emit_opcode(cbuf,0x1B);
2320     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2321     // AND $tmp,$y
2322     emit_opcode(cbuf,0x23);
2323     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2324     // ADD $p,$tmp
2325     emit_opcode(cbuf,0x03);
2326     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2327   %}
2328 
2329   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2330     // TEST shift,32
2331     emit_opcode(cbuf,0xF7);
2332     emit_rm(cbuf, 0x3, 0, ECX_enc);
2333     emit_d32(cbuf,0x20);
2334     // JEQ,s small
2335     emit_opcode(cbuf, 0x74);
2336     emit_d8(cbuf, 0x04);
2337     // MOV    $dst.hi,$dst.lo
2338     emit_opcode( cbuf, 0x8B );
2339     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2340     // CLR    $dst.lo
2341     emit_opcode(cbuf, 0x33);
2342     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2343 // small:
2344     // SHLD   $dst.hi,$dst.lo,$shift
2345     emit_opcode(cbuf,0x0F);
2346     emit_opcode(cbuf,0xA5);
2347     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2348     // SHL    $dst.lo,$shift"
2349     emit_opcode(cbuf,0xD3);
2350     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2351   %}
2352 
2353   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2354     // TEST shift,32
2355     emit_opcode(cbuf,0xF7);
2356     emit_rm(cbuf, 0x3, 0, ECX_enc);
2357     emit_d32(cbuf,0x20);
2358     // JEQ,s small
2359     emit_opcode(cbuf, 0x74);
2360     emit_d8(cbuf, 0x04);
2361     // MOV    $dst.lo,$dst.hi
2362     emit_opcode( cbuf, 0x8B );
2363     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2364     // CLR    $dst.hi
2365     emit_opcode(cbuf, 0x33);
2366     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2367 // small:
2368     // SHRD   $dst.lo,$dst.hi,$shift
2369     emit_opcode(cbuf,0x0F);
2370     emit_opcode(cbuf,0xAD);
2371     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2372     // SHR    $dst.hi,$shift"
2373     emit_opcode(cbuf,0xD3);
2374     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2375   %}
2376 
2377   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2378     // TEST shift,32
2379     emit_opcode(cbuf,0xF7);
2380     emit_rm(cbuf, 0x3, 0, ECX_enc);
2381     emit_d32(cbuf,0x20);
2382     // JEQ,s small
2383     emit_opcode(cbuf, 0x74);
2384     emit_d8(cbuf, 0x05);
2385     // MOV    $dst.lo,$dst.hi
2386     emit_opcode( cbuf, 0x8B );
2387     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2388     // SAR    $dst.hi,31
2389     emit_opcode(cbuf, 0xC1);
2390     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2391     emit_d8(cbuf, 0x1F );
2392 // small:
2393     // SHRD   $dst.lo,$dst.hi,$shift
2394     emit_opcode(cbuf,0x0F);
2395     emit_opcode(cbuf,0xAD);
2396     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2397     // SAR    $dst.hi,$shift"
2398     emit_opcode(cbuf,0xD3);
2399     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2400   %}
2401 
2402 
2403   // ----------------- Encodings for floating point unit -----------------
2404   // May leave result in FPU-TOS or FPU reg depending on opcodes
2405   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2406     $$$emit8$primary;
2407     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2408   %}
2409 
2410   // Pop argument in FPR0 with FSTP ST(0)
2411   enc_class PopFPU() %{
2412     emit_opcode( cbuf, 0xDD );
2413     emit_d8( cbuf, 0xD8 );
2414   %}
2415 
2416   // !!!!! equivalent to Pop_Reg_F
2417   enc_class Pop_Reg_DPR( regDPR dst ) %{
2418     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2419     emit_d8( cbuf, 0xD8+$dst$$reg );
2420   %}
2421 
2422   enc_class Push_Reg_DPR( regDPR dst ) %{
2423     emit_opcode( cbuf, 0xD9 );
2424     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2425   %}
2426 
2427   enc_class strictfp_bias1( regDPR dst ) %{
2428     emit_opcode( cbuf, 0xDB );           // FLD m80real
2429     emit_opcode( cbuf, 0x2D );
2430     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2431     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2432     emit_opcode( cbuf, 0xC8+$dst$$reg );
2433   %}
2434 
2435   enc_class strictfp_bias2( regDPR dst ) %{
2436     emit_opcode( cbuf, 0xDB );           // FLD m80real
2437     emit_opcode( cbuf, 0x2D );
2438     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2439     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2440     emit_opcode( cbuf, 0xC8+$dst$$reg );
2441   %}
2442 
2443   // Special case for moving an integer register to a stack slot.
2444   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2445     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2446   %}
2447 
2448   // Special case for moving a register to a stack slot.
2449   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2450     // Opcode already emitted
2451     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2452     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2453     emit_d32(cbuf, $dst$$disp);   // Displacement
2454   %}
2455 
2456   // Push the integer in stackSlot 'src' onto FP-stack
2457   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2458     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2459   %}
2460 
2461   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2462   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2463     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2464   %}
2465 
2466   // Same as Pop_Mem_F except for opcode
2467   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2468   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2469     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2470   %}
2471 
2472   enc_class Pop_Reg_FPR( regFPR dst ) %{
2473     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2474     emit_d8( cbuf, 0xD8+$dst$$reg );
2475   %}
2476 
2477   enc_class Push_Reg_FPR( regFPR dst ) %{
2478     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2479     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2480   %}
2481 
2482   // Push FPU's float to a stack-slot, and pop FPU-stack
2483   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2484     int pop = 0x02;
2485     if ($src$$reg != FPR1L_enc) {
2486       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2487       emit_d8( cbuf, 0xC0-1+$src$$reg );
2488       pop = 0x03;
2489     }
2490     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2491   %}
2492 
2493   // Push FPU's double to a stack-slot, and pop FPU-stack
2494   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2495     int pop = 0x02;
2496     if ($src$$reg != FPR1L_enc) {
2497       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2498       emit_d8( cbuf, 0xC0-1+$src$$reg );
2499       pop = 0x03;
2500     }
2501     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2502   %}
2503 
2504   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2505   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2506     int pop = 0xD0 - 1; // -1 since we skip FLD
2507     if ($src$$reg != FPR1L_enc) {
2508       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2509       emit_d8( cbuf, 0xC0-1+$src$$reg );
2510       pop = 0xD8;
2511     }
2512     emit_opcode( cbuf, 0xDD );
2513     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2514   %}
2515 
2516 
2517   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2518     // load dst in FPR0
2519     emit_opcode( cbuf, 0xD9 );
2520     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2521     if ($src$$reg != FPR1L_enc) {
2522       // fincstp
2523       emit_opcode (cbuf, 0xD9);
2524       emit_opcode (cbuf, 0xF7);
2525       // swap src with FPR1:
2526       // FXCH FPR1 with src
2527       emit_opcode(cbuf, 0xD9);
2528       emit_d8(cbuf, 0xC8-1+$src$$reg );
2529       // fdecstp
2530       emit_opcode (cbuf, 0xD9);
2531       emit_opcode (cbuf, 0xF6);
2532     }
2533   %}
2534 
2535   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2536     MacroAssembler _masm(&cbuf);
2537     __ subptr(rsp, 8);
2538     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2539     __ fld_d(Address(rsp, 0));
2540     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2541     __ fld_d(Address(rsp, 0));
2542   %}
2543 
2544   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2545     MacroAssembler _masm(&cbuf);
2546     __ subptr(rsp, 4);
2547     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2548     __ fld_s(Address(rsp, 0));
2549     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2550     __ fld_s(Address(rsp, 0));
2551   %}
2552 
2553   enc_class Push_ResultD(regD dst) %{
2554     MacroAssembler _masm(&cbuf);
2555     __ fstp_d(Address(rsp, 0));
2556     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2557     __ addptr(rsp, 8);
2558   %}
2559 
2560   enc_class Push_ResultF(regF dst, immI d8) %{
2561     MacroAssembler _masm(&cbuf);
2562     __ fstp_s(Address(rsp, 0));
2563     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2564     __ addptr(rsp, $d8$$constant);
2565   %}
2566 
2567   enc_class Push_SrcD(regD src) %{
2568     MacroAssembler _masm(&cbuf);
2569     __ subptr(rsp, 8);
2570     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2571     __ fld_d(Address(rsp, 0));
2572   %}
2573 
2574   enc_class push_stack_temp_qword() %{
2575     MacroAssembler _masm(&cbuf);
2576     __ subptr(rsp, 8);
2577   %}
2578 
2579   enc_class pop_stack_temp_qword() %{
2580     MacroAssembler _masm(&cbuf);
2581     __ addptr(rsp, 8);
2582   %}
2583 
2584   enc_class push_xmm_to_fpr1(regD src) %{
2585     MacroAssembler _masm(&cbuf);
2586     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2587     __ fld_d(Address(rsp, 0));
2588   %}
2589 
2590   enc_class Push_Result_Mod_DPR( regDPR src) %{
2591     if ($src$$reg != FPR1L_enc) {
2592       // fincstp
2593       emit_opcode (cbuf, 0xD9);
2594       emit_opcode (cbuf, 0xF7);
2595       // FXCH FPR1 with src
2596       emit_opcode(cbuf, 0xD9);
2597       emit_d8(cbuf, 0xC8-1+$src$$reg );
2598       // fdecstp
2599       emit_opcode (cbuf, 0xD9);
2600       emit_opcode (cbuf, 0xF6);
2601     }
2602     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2603     // // FSTP   FPR$dst$$reg
2604     // emit_opcode( cbuf, 0xDD );
2605     // emit_d8( cbuf, 0xD8+$dst$$reg );
2606   %}
2607 
2608   enc_class fnstsw_sahf_skip_parity() %{
2609     // fnstsw ax
2610     emit_opcode( cbuf, 0xDF );
2611     emit_opcode( cbuf, 0xE0 );
2612     // sahf
2613     emit_opcode( cbuf, 0x9E );
2614     // jnp  ::skip
2615     emit_opcode( cbuf, 0x7B );
2616     emit_opcode( cbuf, 0x05 );
2617   %}
2618 
2619   enc_class emitModDPR() %{
2620     // fprem must be iterative
2621     // :: loop
2622     // fprem
2623     emit_opcode( cbuf, 0xD9 );
2624     emit_opcode( cbuf, 0xF8 );
2625     // wait
2626     emit_opcode( cbuf, 0x9b );
2627     // fnstsw ax
2628     emit_opcode( cbuf, 0xDF );
2629     emit_opcode( cbuf, 0xE0 );
2630     // sahf
2631     emit_opcode( cbuf, 0x9E );
2632     // jp  ::loop
2633     emit_opcode( cbuf, 0x0F );
2634     emit_opcode( cbuf, 0x8A );
2635     emit_opcode( cbuf, 0xF4 );
2636     emit_opcode( cbuf, 0xFF );
2637     emit_opcode( cbuf, 0xFF );
2638     emit_opcode( cbuf, 0xFF );
2639   %}
2640 
2641   enc_class fpu_flags() %{
2642     // fnstsw_ax
2643     emit_opcode( cbuf, 0xDF);
2644     emit_opcode( cbuf, 0xE0);
2645     // test ax,0x0400
2646     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2647     emit_opcode( cbuf, 0xA9 );
2648     emit_d16   ( cbuf, 0x0400 );
2649     // // // This sequence works, but stalls for 12-16 cycles on PPro
2650     // // test rax,0x0400
2651     // emit_opcode( cbuf, 0xA9 );
2652     // emit_d32   ( cbuf, 0x00000400 );
2653     //
2654     // jz exit (no unordered comparison)
2655     emit_opcode( cbuf, 0x74 );
2656     emit_d8    ( cbuf, 0x02 );
2657     // mov ah,1 - treat as LT case (set carry flag)
2658     emit_opcode( cbuf, 0xB4 );
2659     emit_d8    ( cbuf, 0x01 );
2660     // sahf
2661     emit_opcode( cbuf, 0x9E);
2662   %}
2663 
2664   enc_class cmpF_P6_fixup() %{
2665     // Fixup the integer flags in case comparison involved a NaN
2666     //
2667     // JNP exit (no unordered comparison, P-flag is set by NaN)
2668     emit_opcode( cbuf, 0x7B );
2669     emit_d8    ( cbuf, 0x03 );
2670     // MOV AH,1 - treat as LT case (set carry flag)
2671     emit_opcode( cbuf, 0xB4 );
2672     emit_d8    ( cbuf, 0x01 );
2673     // SAHF
2674     emit_opcode( cbuf, 0x9E);
2675     // NOP     // target for branch to avoid branch to branch
2676     emit_opcode( cbuf, 0x90);
2677   %}
2678 
2679 //     fnstsw_ax();
2680 //     sahf();
2681 //     movl(dst, nan_result);
2682 //     jcc(Assembler::parity, exit);
2683 //     movl(dst, less_result);
2684 //     jcc(Assembler::below, exit);
2685 //     movl(dst, equal_result);
2686 //     jcc(Assembler::equal, exit);
2687 //     movl(dst, greater_result);
2688 
2689 // less_result     =  1;
2690 // greater_result  = -1;
2691 // equal_result    = 0;
2692 // nan_result      = -1;
2693 
2694   enc_class CmpF_Result(rRegI dst) %{
2695     // fnstsw_ax();
2696     emit_opcode( cbuf, 0xDF);
2697     emit_opcode( cbuf, 0xE0);
2698     // sahf
2699     emit_opcode( cbuf, 0x9E);
2700     // movl(dst, nan_result);
2701     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2702     emit_d32( cbuf, -1 );
2703     // jcc(Assembler::parity, exit);
2704     emit_opcode( cbuf, 0x7A );
2705     emit_d8    ( cbuf, 0x13 );
2706     // movl(dst, less_result);
2707     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2708     emit_d32( cbuf, -1 );
2709     // jcc(Assembler::below, exit);
2710     emit_opcode( cbuf, 0x72 );
2711     emit_d8    ( cbuf, 0x0C );
2712     // movl(dst, equal_result);
2713     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2714     emit_d32( cbuf, 0 );
2715     // jcc(Assembler::equal, exit);
2716     emit_opcode( cbuf, 0x74 );
2717     emit_d8    ( cbuf, 0x05 );
2718     // movl(dst, greater_result);
2719     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2720     emit_d32( cbuf, 1 );
2721   %}
2722 
2723 
2724   // Compare the longs and set flags
2725   // BROKEN!  Do Not use as-is
2726   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2727     // CMP    $src1.hi,$src2.hi
2728     emit_opcode( cbuf, 0x3B );
2729     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2730     // JNE,s  done
2731     emit_opcode(cbuf,0x75);
2732     emit_d8(cbuf, 2 );
2733     // CMP    $src1.lo,$src2.lo
2734     emit_opcode( cbuf, 0x3B );
2735     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2736 // done:
2737   %}
2738 
2739   enc_class convert_int_long( regL dst, rRegI src ) %{
2740     // mov $dst.lo,$src
2741     int dst_encoding = $dst$$reg;
2742     int src_encoding = $src$$reg;
2743     encode_Copy( cbuf, dst_encoding  , src_encoding );
2744     // mov $dst.hi,$src
2745     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2746     // sar $dst.hi,31
2747     emit_opcode( cbuf, 0xC1 );
2748     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2749     emit_d8(cbuf, 0x1F );
2750   %}
2751 
2752   enc_class convert_long_double( eRegL src ) %{
2753     // push $src.hi
2754     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2755     // push $src.lo
2756     emit_opcode(cbuf, 0x50+$src$$reg  );
2757     // fild 64-bits at [SP]
2758     emit_opcode(cbuf,0xdf);
2759     emit_d8(cbuf, 0x6C);
2760     emit_d8(cbuf, 0x24);
2761     emit_d8(cbuf, 0x00);
2762     // pop stack
2763     emit_opcode(cbuf, 0x83); // add  SP, #8
2764     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2765     emit_d8(cbuf, 0x8);
2766   %}
2767 
2768   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2769     // IMUL   EDX:EAX,$src1
2770     emit_opcode( cbuf, 0xF7 );
2771     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2772     // SAR    EDX,$cnt-32
2773     int shift_count = ((int)$cnt$$constant) - 32;
2774     if (shift_count > 0) {
2775       emit_opcode(cbuf, 0xC1);
2776       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2777       emit_d8(cbuf, shift_count);
2778     }
2779   %}
2780 
2781   // this version doesn't have add sp, 8
2782   enc_class convert_long_double2( eRegL src ) %{
2783     // push $src.hi
2784     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2785     // push $src.lo
2786     emit_opcode(cbuf, 0x50+$src$$reg  );
2787     // fild 64-bits at [SP]
2788     emit_opcode(cbuf,0xdf);
2789     emit_d8(cbuf, 0x6C);
2790     emit_d8(cbuf, 0x24);
2791     emit_d8(cbuf, 0x00);
2792   %}
2793 
2794   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2795     // Basic idea: long = (long)int * (long)int
2796     // IMUL EDX:EAX, src
2797     emit_opcode( cbuf, 0xF7 );
2798     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2799   %}
2800 
2801   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2802     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2803     // MUL EDX:EAX, src
2804     emit_opcode( cbuf, 0xF7 );
2805     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2806   %}
2807 
2808   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2809     // Basic idea: lo(result) = lo(x_lo * y_lo)
2810     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2811     // MOV    $tmp,$src.lo
2812     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2813     // IMUL   $tmp,EDX
2814     emit_opcode( cbuf, 0x0F );
2815     emit_opcode( cbuf, 0xAF );
2816     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2817     // MOV    EDX,$src.hi
2818     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2819     // IMUL   EDX,EAX
2820     emit_opcode( cbuf, 0x0F );
2821     emit_opcode( cbuf, 0xAF );
2822     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2823     // ADD    $tmp,EDX
2824     emit_opcode( cbuf, 0x03 );
2825     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2826     // MUL   EDX:EAX,$src.lo
2827     emit_opcode( cbuf, 0xF7 );
2828     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2829     // ADD    EDX,ESI
2830     emit_opcode( cbuf, 0x03 );
2831     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2832   %}
2833 
2834   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2835     // Basic idea: lo(result) = lo(src * y_lo)
2836     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2837     // IMUL   $tmp,EDX,$src
2838     emit_opcode( cbuf, 0x6B );
2839     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2840     emit_d8( cbuf, (int)$src$$constant );
2841     // MOV    EDX,$src
2842     emit_opcode(cbuf, 0xB8 + EDX_enc);
2843     emit_d32( cbuf, (int)$src$$constant );
2844     // MUL   EDX:EAX,EDX
2845     emit_opcode( cbuf, 0xF7 );
2846     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2847     // ADD    EDX,ESI
2848     emit_opcode( cbuf, 0x03 );
2849     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2850   %}
2851 
2852   enc_class long_div( eRegL src1, eRegL src2 ) %{
2853     // PUSH src1.hi
2854     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2855     // PUSH src1.lo
2856     emit_opcode(cbuf,               0x50+$src1$$reg  );
2857     // PUSH src2.hi
2858     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2859     // PUSH src2.lo
2860     emit_opcode(cbuf,               0x50+$src2$$reg  );
2861     // CALL directly to the runtime
2862     cbuf.set_insts_mark();
2863     emit_opcode(cbuf,0xE8);       // Call into runtime
2864     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2865     // Restore stack
2866     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2867     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2868     emit_d8(cbuf, 4*4);
2869   %}
2870 
2871   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2872     // PUSH src1.hi
2873     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2874     // PUSH src1.lo
2875     emit_opcode(cbuf,               0x50+$src1$$reg  );
2876     // PUSH src2.hi
2877     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2878     // PUSH src2.lo
2879     emit_opcode(cbuf,               0x50+$src2$$reg  );
2880     // CALL directly to the runtime
2881     cbuf.set_insts_mark();
2882     emit_opcode(cbuf,0xE8);       // Call into runtime
2883     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2884     // Restore stack
2885     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2886     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2887     emit_d8(cbuf, 4*4);
2888   %}
2889 
2890   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2891     // MOV   $tmp,$src.lo
2892     emit_opcode(cbuf, 0x8B);
2893     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2894     // OR    $tmp,$src.hi
2895     emit_opcode(cbuf, 0x0B);
2896     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2897   %}
2898 
2899   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2900     // CMP    $src1.lo,$src2.lo
2901     emit_opcode( cbuf, 0x3B );
2902     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2903     // JNE,s  skip
2904     emit_cc(cbuf, 0x70, 0x5);
2905     emit_d8(cbuf,2);
2906     // CMP    $src1.hi,$src2.hi
2907     emit_opcode( cbuf, 0x3B );
2908     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2909   %}
2910 
2911   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2912     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2913     emit_opcode( cbuf, 0x3B );
2914     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2915     // MOV    $tmp,$src1.hi
2916     emit_opcode( cbuf, 0x8B );
2917     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2918     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2919     emit_opcode( cbuf, 0x1B );
2920     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2921   %}
2922 
2923   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2924     // XOR    $tmp,$tmp
2925     emit_opcode(cbuf,0x33);  // XOR
2926     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2927     // CMP    $tmp,$src.lo
2928     emit_opcode( cbuf, 0x3B );
2929     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2930     // SBB    $tmp,$src.hi
2931     emit_opcode( cbuf, 0x1B );
2932     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2933   %}
2934 
2935  // Sniff, sniff... smells like Gnu Superoptimizer
2936   enc_class neg_long( eRegL dst ) %{
2937     emit_opcode(cbuf,0xF7);    // NEG hi
2938     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2939     emit_opcode(cbuf,0xF7);    // NEG lo
2940     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2941     emit_opcode(cbuf,0x83);    // SBB hi,0
2942     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2943     emit_d8    (cbuf,0 );
2944   %}
2945 
2946   enc_class enc_pop_rdx() %{
2947     emit_opcode(cbuf,0x5A);
2948   %}
2949 
2950   enc_class enc_rethrow() %{
2951     cbuf.set_insts_mark();
2952     emit_opcode(cbuf, 0xE9);        // jmp    entry
2953     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2954                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2955   %}
2956 
2957 
2958   // Convert a double to an int.  Java semantics require we do complex
2959   // manglelations in the corner cases.  So we set the rounding mode to
2960   // 'zero', store the darned double down as an int, and reset the
2961   // rounding mode to 'nearest'.  The hardware throws an exception which
2962   // patches up the correct value directly to the stack.
2963   enc_class DPR2I_encoding( regDPR src ) %{
2964     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2965     // exceptions here, so that a NAN or other corner-case value will
2966     // thrown an exception (but normal values get converted at full speed).
2967     // However, I2C adapters and other float-stack manglers leave pending
2968     // invalid-op exceptions hanging.  We would have to clear them before
2969     // enabling them and that is more expensive than just testing for the
2970     // invalid value Intel stores down in the corner cases.
2971     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2972     emit_opcode(cbuf,0x2D);
2973     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2974     // Allocate a word
2975     emit_opcode(cbuf,0x83);            // SUB ESP,4
2976     emit_opcode(cbuf,0xEC);
2977     emit_d8(cbuf,0x04);
2978     // Encoding assumes a double has been pushed into FPR0.
2979     // Store down the double as an int, popping the FPU stack
2980     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2981     emit_opcode(cbuf,0x1C);
2982     emit_d8(cbuf,0x24);
2983     // Restore the rounding mode; mask the exception
2984     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2985     emit_opcode(cbuf,0x2D);
2986     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2987         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2988         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2989 
2990     // Load the converted int; adjust CPU stack
2991     emit_opcode(cbuf,0x58);       // POP EAX
2992     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2993     emit_d32   (cbuf,0x80000000); //         0x80000000
2994     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2995     emit_d8    (cbuf,0x07);       // Size of slow_call
2996     // Push src onto stack slow-path
2997     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2998     emit_d8    (cbuf,0xC0-1+$src$$reg );
2999     // CALL directly to the runtime
3000     cbuf.set_insts_mark();
3001     emit_opcode(cbuf,0xE8);       // Call into runtime
3002     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3003     // Carry on here...
3004   %}
3005 
3006   enc_class DPR2L_encoding( regDPR src ) %{
3007     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3008     emit_opcode(cbuf,0x2D);
3009     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3010     // Allocate a word
3011     emit_opcode(cbuf,0x83);            // SUB ESP,8
3012     emit_opcode(cbuf,0xEC);
3013     emit_d8(cbuf,0x08);
3014     // Encoding assumes a double has been pushed into FPR0.
3015     // Store down the double as a long, popping the FPU stack
3016     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3017     emit_opcode(cbuf,0x3C);
3018     emit_d8(cbuf,0x24);
3019     // Restore the rounding mode; mask the exception
3020     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3021     emit_opcode(cbuf,0x2D);
3022     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3023         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3024         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3025 
3026     // Load the converted int; adjust CPU stack
3027     emit_opcode(cbuf,0x58);       // POP EAX
3028     emit_opcode(cbuf,0x5A);       // POP EDX
3029     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3030     emit_d8    (cbuf,0xFA);       // rdx
3031     emit_d32   (cbuf,0x80000000); //         0x80000000
3032     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3033     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3034     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3035     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3036     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3037     emit_d8    (cbuf,0x07);       // Size of slow_call
3038     // Push src onto stack slow-path
3039     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3040     emit_d8    (cbuf,0xC0-1+$src$$reg );
3041     // CALL directly to the runtime
3042     cbuf.set_insts_mark();
3043     emit_opcode(cbuf,0xE8);       // Call into runtime
3044     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3045     // Carry on here...
3046   %}
3047 
3048   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3049     // Operand was loaded from memory into fp ST (stack top)
3050     // FMUL   ST,$src  /* D8 C8+i */
3051     emit_opcode(cbuf, 0xD8);
3052     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3053   %}
3054 
3055   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3056     // FADDP  ST,src2  /* D8 C0+i */
3057     emit_opcode(cbuf, 0xD8);
3058     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3059     //could use FADDP  src2,fpST  /* DE C0+i */
3060   %}
3061 
3062   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3063     // FADDP  src2,ST  /* DE C0+i */
3064     emit_opcode(cbuf, 0xDE);
3065     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3066   %}
3067 
3068   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3069     // Operand has been loaded into fp ST (stack top)
3070       // FSUB   ST,$src1
3071       emit_opcode(cbuf, 0xD8);
3072       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3073 
3074       // FDIV
3075       emit_opcode(cbuf, 0xD8);
3076       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3077   %}
3078 
3079   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3080     // Operand was loaded from memory into fp ST (stack top)
3081     // FADD   ST,$src  /* D8 C0+i */
3082     emit_opcode(cbuf, 0xD8);
3083     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3084 
3085     // FMUL  ST,src2  /* D8 C*+i */
3086     emit_opcode(cbuf, 0xD8);
3087     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3088   %}
3089 
3090 
3091   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3092     // Operand was loaded from memory into fp ST (stack top)
3093     // FADD   ST,$src  /* D8 C0+i */
3094     emit_opcode(cbuf, 0xD8);
3095     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3096 
3097     // FMULP  src2,ST  /* DE C8+i */
3098     emit_opcode(cbuf, 0xDE);
3099     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3100   %}
3101 
3102   // Atomically load the volatile long
3103   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3104     emit_opcode(cbuf,0xDF);
3105     int rm_byte_opcode = 0x05;
3106     int base     = $mem$$base;
3107     int index    = $mem$$index;
3108     int scale    = $mem$$scale;
3109     int displace = $mem$$disp;
3110     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3111     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3112     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3113   %}
3114 
3115   // Volatile Store Long.  Must be atomic, so move it into
3116   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3117   // target address before the store (for null-ptr checks)
3118   // so the memory operand is used twice in the encoding.
3119   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3120     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3121     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3122     emit_opcode(cbuf,0xDF);
3123     int rm_byte_opcode = 0x07;
3124     int base     = $mem$$base;
3125     int index    = $mem$$index;
3126     int scale    = $mem$$scale;
3127     int displace = $mem$$disp;
3128     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3129     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3130   %}
3131 
3132   // Safepoint Poll.  This polls the safepoint page, and causes an
3133   // exception if it is not readable. Unfortunately, it kills the condition code
3134   // in the process
3135   // We current use TESTL [spp],EDI
3136   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3137 
3138   enc_class Safepoint_Poll() %{
3139     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3140     emit_opcode(cbuf,0x85);
3141     emit_rm (cbuf, 0x0, 0x7, 0x5);
3142     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3143   %}
3144 %}
3145 
3146 
3147 //----------FRAME--------------------------------------------------------------
3148 // Definition of frame structure and management information.
3149 //
3150 //  S T A C K   L A Y O U T    Allocators stack-slot number
3151 //                             |   (to get allocators register number
3152 //  G  Owned by    |        |  v    add OptoReg::stack0())
3153 //  r   CALLER     |        |
3154 //  o     |        +--------+      pad to even-align allocators stack-slot
3155 //  w     V        |  pad0  |        numbers; owned by CALLER
3156 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3157 //  h     ^        |   in   |  5
3158 //        |        |  args  |  4   Holes in incoming args owned by SELF
3159 //  |     |        |        |  3
3160 //  |     |        +--------+
3161 //  V     |        | old out|      Empty on Intel, window on Sparc
3162 //        |    old |preserve|      Must be even aligned.
3163 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3164 //        |        |   in   |  3   area for Intel ret address
3165 //     Owned by    |preserve|      Empty on Sparc.
3166 //       SELF      +--------+
3167 //        |        |  pad2  |  2   pad to align old SP
3168 //        |        +--------+  1
3169 //        |        | locks  |  0
3170 //        |        +--------+----> OptoReg::stack0(), even aligned
3171 //        |        |  pad1  | 11   pad to align new SP
3172 //        |        +--------+
3173 //        |        |        | 10
3174 //        |        | spills |  9   spills
3175 //        V        |        |  8   (pad0 slot for callee)
3176 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3177 //        ^        |  out   |  7
3178 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3179 //     Owned by    +--------+
3180 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3181 //        |    new |preserve|      Must be even-aligned.
3182 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3183 //        |        |        |
3184 //
3185 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3186 //         known from SELF's arguments and the Java calling convention.
3187 //         Region 6-7 is determined per call site.
3188 // Note 2: If the calling convention leaves holes in the incoming argument
3189 //         area, those holes are owned by SELF.  Holes in the outgoing area
3190 //         are owned by the CALLEE.  Holes should not be nessecary in the
3191 //         incoming area, as the Java calling convention is completely under
3192 //         the control of the AD file.  Doubles can be sorted and packed to
3193 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3194 //         varargs C calling conventions.
3195 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3196 //         even aligned with pad0 as needed.
3197 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3198 //         region 6-11 is even aligned; it may be padded out more so that
3199 //         the region from SP to FP meets the minimum stack alignment.
3200 
3201 frame %{
3202   // What direction does stack grow in (assumed to be same for C & Java)
3203   stack_direction(TOWARDS_LOW);
3204 
3205   // These three registers define part of the calling convention
3206   // between compiled code and the interpreter.
3207   inline_cache_reg(EAX);                // Inline Cache Register
3208   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3209 
3210   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3211   cisc_spilling_operand_name(indOffset32);
3212 
3213   // Number of stack slots consumed by locking an object
3214   sync_stack_slots(1);
3215 
3216   // Compiled code's Frame Pointer
3217   frame_pointer(ESP);
3218   // Interpreter stores its frame pointer in a register which is
3219   // stored to the stack by I2CAdaptors.
3220   // I2CAdaptors convert from interpreted java to compiled java.
3221   interpreter_frame_pointer(EBP);
3222 
3223   // Stack alignment requirement
3224   // Alignment size in bytes (128-bit -> 16 bytes)
3225   stack_alignment(StackAlignmentInBytes);
3226 
3227   // Number of stack slots between incoming argument block and the start of
3228   // a new frame.  The PROLOG must add this many slots to the stack.  The
3229   // EPILOG must remove this many slots.  Intel needs one slot for
3230   // return address and one for rbp, (must save rbp)
3231   in_preserve_stack_slots(2+VerifyStackAtCalls);
3232 
3233   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3234   // for calls to C.  Supports the var-args backing area for register parms.
3235   varargs_C_out_slots_killed(0);
3236 
3237   // The after-PROLOG location of the return address.  Location of
3238   // return address specifies a type (REG or STACK) and a number
3239   // representing the register number (i.e. - use a register name) or
3240   // stack slot.
3241   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3242   // Otherwise, it is above the locks and verification slot and alignment word
3243   return_addr(STACK - 1 +
3244               round_to((Compile::current()->in_preserve_stack_slots() +
3245                         Compile::current()->fixed_slots()),
3246                        stack_alignment_in_slots()));
3247 
3248   // Body of function which returns an integer array locating
3249   // arguments either in registers or in stack slots.  Passed an array
3250   // of ideal registers called "sig" and a "length" count.  Stack-slot
3251   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3252   // arguments for a CALLEE.  Incoming stack arguments are
3253   // automatically biased by the preserve_stack_slots field above.
3254   calling_convention %{
3255     // No difference between ingoing/outgoing just pass false
3256     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3257   %}
3258 
3259 
3260   // Body of function which returns an integer array locating
3261   // arguments either in registers or in stack slots.  Passed an array
3262   // of ideal registers called "sig" and a "length" count.  Stack-slot
3263   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3264   // arguments for a CALLEE.  Incoming stack arguments are
3265   // automatically biased by the preserve_stack_slots field above.
3266   c_calling_convention %{
3267     // This is obviously always outgoing
3268     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3269   %}
3270 
3271   // Location of C & interpreter return values
3272   c_return_value %{
3273     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3274     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3275     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3276 
3277     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3278     // that C functions return float and double results in XMM0.
3279     if( ideal_reg == Op_RegD && UseSSE>=2 )
3280       return OptoRegPair(XMM0b_num,XMM0_num);
3281     if( ideal_reg == Op_RegF && UseSSE>=2 )
3282       return OptoRegPair(OptoReg::Bad,XMM0_num);
3283 
3284     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3285   %}
3286 
3287   // Location of return values
3288   return_value %{
3289     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3290     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3291     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3292     if( ideal_reg == Op_RegD && UseSSE>=2 )
3293       return OptoRegPair(XMM0b_num,XMM0_num);
3294     if( ideal_reg == Op_RegF && UseSSE>=1 )
3295       return OptoRegPair(OptoReg::Bad,XMM0_num);
3296     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3297   %}
3298 
3299 %}
3300 
3301 //----------ATTRIBUTES---------------------------------------------------------
3302 //----------Operand Attributes-------------------------------------------------
3303 op_attrib op_cost(0);        // Required cost attribute
3304 
3305 //----------Instruction Attributes---------------------------------------------
3306 ins_attrib ins_cost(100);       // Required cost attribute
3307 ins_attrib ins_size(8);         // Required size attribute (in bits)
3308 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3309                                 // non-matching short branch variant of some
3310                                                             // long branch?
3311 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3312                                 // specifies the alignment that some part of the instruction (not
3313                                 // necessarily the start) requires.  If > 1, a compute_padding()
3314                                 // function must be provided for the instruction
3315 
3316 //----------OPERANDS-----------------------------------------------------------
3317 // Operand definitions must precede instruction definitions for correct parsing
3318 // in the ADLC because operands constitute user defined types which are used in
3319 // instruction definitions.
3320 
3321 //----------Simple Operands----------------------------------------------------
3322 // Immediate Operands
3323 // Integer Immediate
3324 operand immI() %{
3325   match(ConI);
3326 
3327   op_cost(10);
3328   format %{ %}
3329   interface(CONST_INTER);
3330 %}
3331 
3332 // Constant for test vs zero
3333 operand immI0() %{
3334   predicate(n->get_int() == 0);
3335   match(ConI);
3336 
3337   op_cost(0);
3338   format %{ %}
3339   interface(CONST_INTER);
3340 %}
3341 
3342 // Constant for increment
3343 operand immI1() %{
3344   predicate(n->get_int() == 1);
3345   match(ConI);
3346 
3347   op_cost(0);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for decrement
3353 operand immI_M1() %{
3354   predicate(n->get_int() == -1);
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 // Valid scale values for addressing modes
3363 operand immI2() %{
3364   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3365   match(ConI);
3366 
3367   format %{ %}
3368   interface(CONST_INTER);
3369 %}
3370 
3371 operand immI8() %{
3372   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3373   match(ConI);
3374 
3375   op_cost(5);
3376   format %{ %}
3377   interface(CONST_INTER);
3378 %}
3379 
3380 operand immI16() %{
3381   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3382   match(ConI);
3383 
3384   op_cost(10);
3385   format %{ %}
3386   interface(CONST_INTER);
3387 %}
3388 
3389 // Int Immediate non-negative
3390 operand immU31()
3391 %{
3392   predicate(n->get_int() >= 0);
3393   match(ConI);
3394 
3395   op_cost(0);
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 // Constant for long shifts
3401 operand immI_32() %{
3402   predicate( n->get_int() == 32 );
3403   match(ConI);
3404 
3405   op_cost(0);
3406   format %{ %}
3407   interface(CONST_INTER);
3408 %}
3409 
3410 operand immI_1_31() %{
3411   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3412   match(ConI);
3413 
3414   op_cost(0);
3415   format %{ %}
3416   interface(CONST_INTER);
3417 %}
3418 
3419 operand immI_32_63() %{
3420   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3421   match(ConI);
3422   op_cost(0);
3423 
3424   format %{ %}
3425   interface(CONST_INTER);
3426 %}
3427 
3428 operand immI_1() %{
3429   predicate( n->get_int() == 1 );
3430   match(ConI);
3431 
3432   op_cost(0);
3433   format %{ %}
3434   interface(CONST_INTER);
3435 %}
3436 
3437 operand immI_2() %{
3438   predicate( n->get_int() == 2 );
3439   match(ConI);
3440 
3441   op_cost(0);
3442   format %{ %}
3443   interface(CONST_INTER);
3444 %}
3445 
3446 operand immI_3() %{
3447   predicate( n->get_int() == 3 );
3448   match(ConI);
3449 
3450   op_cost(0);
3451   format %{ %}
3452   interface(CONST_INTER);
3453 %}
3454 
3455 // Pointer Immediate
3456 operand immP() %{
3457   match(ConP);
3458 
3459   op_cost(10);
3460   format %{ %}
3461   interface(CONST_INTER);
3462 %}
3463 
3464 // NULL Pointer Immediate
3465 operand immP0() %{
3466   predicate( n->get_ptr() == 0 );
3467   match(ConP);
3468   op_cost(0);
3469 
3470   format %{ %}
3471   interface(CONST_INTER);
3472 %}
3473 
3474 // Long Immediate
3475 operand immL() %{
3476   match(ConL);
3477 
3478   op_cost(20);
3479   format %{ %}
3480   interface(CONST_INTER);
3481 %}
3482 
3483 // Long Immediate zero
3484 operand immL0() %{
3485   predicate( n->get_long() == 0L );
3486   match(ConL);
3487   op_cost(0);
3488 
3489   format %{ %}
3490   interface(CONST_INTER);
3491 %}
3492 
3493 // Long Immediate zero
3494 operand immL_M1() %{
3495   predicate( n->get_long() == -1L );
3496   match(ConL);
3497   op_cost(0);
3498 
3499   format %{ %}
3500   interface(CONST_INTER);
3501 %}
3502 
3503 // Long immediate from 0 to 127.
3504 // Used for a shorter form of long mul by 10.
3505 operand immL_127() %{
3506   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3507   match(ConL);
3508   op_cost(0);
3509 
3510   format %{ %}
3511   interface(CONST_INTER);
3512 %}
3513 
3514 // Long Immediate: low 32-bit mask
3515 operand immL_32bits() %{
3516   predicate(n->get_long() == 0xFFFFFFFFL);
3517   match(ConL);
3518   op_cost(0);
3519 
3520   format %{ %}
3521   interface(CONST_INTER);
3522 %}
3523 
3524 // Long Immediate: low 32-bit mask
3525 operand immL32() %{
3526   predicate(n->get_long() == (int)(n->get_long()));
3527   match(ConL);
3528   op_cost(20);
3529 
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 //Double Immediate zero
3535 operand immDPR0() %{
3536   // Do additional (and counter-intuitive) test against NaN to work around VC++
3537   // bug that generates code such that NaNs compare equal to 0.0
3538   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3539   match(ConD);
3540 
3541   op_cost(5);
3542   format %{ %}
3543   interface(CONST_INTER);
3544 %}
3545 
3546 // Double Immediate one
3547 operand immDPR1() %{
3548   predicate( UseSSE<=1 && n->getd() == 1.0 );
3549   match(ConD);
3550 
3551   op_cost(5);
3552   format %{ %}
3553   interface(CONST_INTER);
3554 %}
3555 
3556 // Double Immediate
3557 operand immDPR() %{
3558   predicate(UseSSE<=1);
3559   match(ConD);
3560 
3561   op_cost(5);
3562   format %{ %}
3563   interface(CONST_INTER);
3564 %}
3565 
3566 operand immD() %{
3567   predicate(UseSSE>=2);
3568   match(ConD);
3569 
3570   op_cost(5);
3571   format %{ %}
3572   interface(CONST_INTER);
3573 %}
3574 
3575 // Double Immediate zero
3576 operand immD0() %{
3577   // Do additional (and counter-intuitive) test against NaN to work around VC++
3578   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3579   // compare equal to -0.0.
3580   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3581   match(ConD);
3582 
3583   format %{ %}
3584   interface(CONST_INTER);
3585 %}
3586 
3587 // Float Immediate zero
3588 operand immFPR0() %{
3589   predicate(UseSSE == 0 && n->getf() == 0.0F);
3590   match(ConF);
3591 
3592   op_cost(5);
3593   format %{ %}
3594   interface(CONST_INTER);
3595 %}
3596 
3597 // Float Immediate one
3598 operand immFPR1() %{
3599   predicate(UseSSE == 0 && n->getf() == 1.0F);
3600   match(ConF);
3601 
3602   op_cost(5);
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 // Float Immediate
3608 operand immFPR() %{
3609   predicate( UseSSE == 0 );
3610   match(ConF);
3611 
3612   op_cost(5);
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Float Immediate
3618 operand immF() %{
3619   predicate(UseSSE >= 1);
3620   match(ConF);
3621 
3622   op_cost(5);
3623   format %{ %}
3624   interface(CONST_INTER);
3625 %}
3626 
3627 // Float Immediate zero.  Zero and not -0.0
3628 operand immF0() %{
3629   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3630   match(ConF);
3631 
3632   op_cost(5);
3633   format %{ %}
3634   interface(CONST_INTER);
3635 %}
3636 
3637 // Immediates for special shifts (sign extend)
3638 
3639 // Constants for increment
3640 operand immI_16() %{
3641   predicate( n->get_int() == 16 );
3642   match(ConI);
3643 
3644   format %{ %}
3645   interface(CONST_INTER);
3646 %}
3647 
3648 operand immI_24() %{
3649   predicate( n->get_int() == 24 );
3650   match(ConI);
3651 
3652   format %{ %}
3653   interface(CONST_INTER);
3654 %}
3655 
3656 // Constant for byte-wide masking
3657 operand immI_255() %{
3658   predicate( n->get_int() == 255 );
3659   match(ConI);
3660 
3661   format %{ %}
3662   interface(CONST_INTER);
3663 %}
3664 
3665 // Constant for short-wide masking
3666 operand immI_65535() %{
3667   predicate(n->get_int() == 65535);
3668   match(ConI);
3669 
3670   format %{ %}
3671   interface(CONST_INTER);
3672 %}
3673 
3674 // Register Operands
3675 // Integer Register
3676 operand rRegI() %{
3677   constraint(ALLOC_IN_RC(int_reg));
3678   match(RegI);
3679   match(xRegI);
3680   match(eAXRegI);
3681   match(eBXRegI);
3682   match(eCXRegI);
3683   match(eDXRegI);
3684   match(eDIRegI);
3685   match(eSIRegI);
3686 
3687   format %{ %}
3688   interface(REG_INTER);
3689 %}
3690 
3691 // Subset of Integer Register
3692 operand xRegI(rRegI reg) %{
3693   constraint(ALLOC_IN_RC(int_x_reg));
3694   match(reg);
3695   match(eAXRegI);
3696   match(eBXRegI);
3697   match(eCXRegI);
3698   match(eDXRegI);
3699 
3700   format %{ %}
3701   interface(REG_INTER);
3702 %}
3703 
3704 // Special Registers
3705 operand eAXRegI(xRegI reg) %{
3706   constraint(ALLOC_IN_RC(eax_reg));
3707   match(reg);
3708   match(rRegI);
3709 
3710   format %{ "EAX" %}
3711   interface(REG_INTER);
3712 %}
3713 
3714 // Special Registers
3715 operand eBXRegI(xRegI reg) %{
3716   constraint(ALLOC_IN_RC(ebx_reg));
3717   match(reg);
3718   match(rRegI);
3719 
3720   format %{ "EBX" %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 operand eCXRegI(xRegI reg) %{
3725   constraint(ALLOC_IN_RC(ecx_reg));
3726   match(reg);
3727   match(rRegI);
3728 
3729   format %{ "ECX" %}
3730   interface(REG_INTER);
3731 %}
3732 
3733 operand eDXRegI(xRegI reg) %{
3734   constraint(ALLOC_IN_RC(edx_reg));
3735   match(reg);
3736   match(rRegI);
3737 
3738   format %{ "EDX" %}
3739   interface(REG_INTER);
3740 %}
3741 
3742 operand eDIRegI(xRegI reg) %{
3743   constraint(ALLOC_IN_RC(edi_reg));
3744   match(reg);
3745   match(rRegI);
3746 
3747   format %{ "EDI" %}
3748   interface(REG_INTER);
3749 %}
3750 
3751 operand naxRegI() %{
3752   constraint(ALLOC_IN_RC(nax_reg));
3753   match(RegI);
3754   match(eCXRegI);
3755   match(eDXRegI);
3756   match(eSIRegI);
3757   match(eDIRegI);
3758 
3759   format %{ %}
3760   interface(REG_INTER);
3761 %}
3762 
3763 operand nadxRegI() %{
3764   constraint(ALLOC_IN_RC(nadx_reg));
3765   match(RegI);
3766   match(eBXRegI);
3767   match(eCXRegI);
3768   match(eSIRegI);
3769   match(eDIRegI);
3770 
3771   format %{ %}
3772   interface(REG_INTER);
3773 %}
3774 
3775 operand ncxRegI() %{
3776   constraint(ALLOC_IN_RC(ncx_reg));
3777   match(RegI);
3778   match(eAXRegI);
3779   match(eDXRegI);
3780   match(eSIRegI);
3781   match(eDIRegI);
3782 
3783   format %{ %}
3784   interface(REG_INTER);
3785 %}
3786 
3787 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3788 // //
3789 operand eSIRegI(xRegI reg) %{
3790    constraint(ALLOC_IN_RC(esi_reg));
3791    match(reg);
3792    match(rRegI);
3793 
3794    format %{ "ESI" %}
3795    interface(REG_INTER);
3796 %}
3797 
3798 // Pointer Register
3799 operand anyRegP() %{
3800   constraint(ALLOC_IN_RC(any_reg));
3801   match(RegP);
3802   match(eAXRegP);
3803   match(eBXRegP);
3804   match(eCXRegP);
3805   match(eDIRegP);
3806   match(eRegP);
3807 
3808   format %{ %}
3809   interface(REG_INTER);
3810 %}
3811 
3812 operand eRegP() %{
3813   constraint(ALLOC_IN_RC(int_reg));
3814   match(RegP);
3815   match(eAXRegP);
3816   match(eBXRegP);
3817   match(eCXRegP);
3818   match(eDIRegP);
3819 
3820   format %{ %}
3821   interface(REG_INTER);
3822 %}
3823 
3824 // On windows95, EBP is not safe to use for implicit null tests.
3825 operand eRegP_no_EBP() %{
3826   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3827   match(RegP);
3828   match(eAXRegP);
3829   match(eBXRegP);
3830   match(eCXRegP);
3831   match(eDIRegP);
3832 
3833   op_cost(100);
3834   format %{ %}
3835   interface(REG_INTER);
3836 %}
3837 
3838 operand naxRegP() %{
3839   constraint(ALLOC_IN_RC(nax_reg));
3840   match(RegP);
3841   match(eBXRegP);
3842   match(eDXRegP);
3843   match(eCXRegP);
3844   match(eSIRegP);
3845   match(eDIRegP);
3846 
3847   format %{ %}
3848   interface(REG_INTER);
3849 %}
3850 
3851 operand nabxRegP() %{
3852   constraint(ALLOC_IN_RC(nabx_reg));
3853   match(RegP);
3854   match(eCXRegP);
3855   match(eDXRegP);
3856   match(eSIRegP);
3857   match(eDIRegP);
3858 
3859   format %{ %}
3860   interface(REG_INTER);
3861 %}
3862 
3863 operand pRegP() %{
3864   constraint(ALLOC_IN_RC(p_reg));
3865   match(RegP);
3866   match(eBXRegP);
3867   match(eDXRegP);
3868   match(eSIRegP);
3869   match(eDIRegP);
3870 
3871   format %{ %}
3872   interface(REG_INTER);
3873 %}
3874 
3875 // Special Registers
3876 // Return a pointer value
3877 operand eAXRegP(eRegP reg) %{
3878   constraint(ALLOC_IN_RC(eax_reg));
3879   match(reg);
3880   format %{ "EAX" %}
3881   interface(REG_INTER);
3882 %}
3883 
3884 // Used in AtomicAdd
3885 operand eBXRegP(eRegP reg) %{
3886   constraint(ALLOC_IN_RC(ebx_reg));
3887   match(reg);
3888   format %{ "EBX" %}
3889   interface(REG_INTER);
3890 %}
3891 
3892 // Tail-call (interprocedural jump) to interpreter
3893 operand eCXRegP(eRegP reg) %{
3894   constraint(ALLOC_IN_RC(ecx_reg));
3895   match(reg);
3896   format %{ "ECX" %}
3897   interface(REG_INTER);
3898 %}
3899 
3900 operand eSIRegP(eRegP reg) %{
3901   constraint(ALLOC_IN_RC(esi_reg));
3902   match(reg);
3903   format %{ "ESI" %}
3904   interface(REG_INTER);
3905 %}
3906 
3907 // Used in rep stosw
3908 operand eDIRegP(eRegP reg) %{
3909   constraint(ALLOC_IN_RC(edi_reg));
3910   match(reg);
3911   format %{ "EDI" %}
3912   interface(REG_INTER);
3913 %}
3914 
3915 operand eRegL() %{
3916   constraint(ALLOC_IN_RC(long_reg));
3917   match(RegL);
3918   match(eADXRegL);
3919 
3920   format %{ %}
3921   interface(REG_INTER);
3922 %}
3923 
3924 operand eADXRegL( eRegL reg ) %{
3925   constraint(ALLOC_IN_RC(eadx_reg));
3926   match(reg);
3927 
3928   format %{ "EDX:EAX" %}
3929   interface(REG_INTER);
3930 %}
3931 
3932 operand eBCXRegL( eRegL reg ) %{
3933   constraint(ALLOC_IN_RC(ebcx_reg));
3934   match(reg);
3935 
3936   format %{ "EBX:ECX" %}
3937   interface(REG_INTER);
3938 %}
3939 
3940 // Special case for integer high multiply
3941 operand eADXRegL_low_only() %{
3942   constraint(ALLOC_IN_RC(eadx_reg));
3943   match(RegL);
3944 
3945   format %{ "EAX" %}
3946   interface(REG_INTER);
3947 %}
3948 
3949 // Flags register, used as output of compare instructions
3950 operand eFlagsReg() %{
3951   constraint(ALLOC_IN_RC(int_flags));
3952   match(RegFlags);
3953 
3954   format %{ "EFLAGS" %}
3955   interface(REG_INTER);
3956 %}
3957 
3958 // Flags register, used as output of FLOATING POINT compare instructions
3959 operand eFlagsRegU() %{
3960   constraint(ALLOC_IN_RC(int_flags));
3961   match(RegFlags);
3962 
3963   format %{ "EFLAGS_U" %}
3964   interface(REG_INTER);
3965 %}
3966 
3967 operand eFlagsRegUCF() %{
3968   constraint(ALLOC_IN_RC(int_flags));
3969   match(RegFlags);
3970   predicate(false);
3971 
3972   format %{ "EFLAGS_U_CF" %}
3973   interface(REG_INTER);
3974 %}
3975 
3976 // Condition Code Register used by long compare
3977 operand flagsReg_long_LTGE() %{
3978   constraint(ALLOC_IN_RC(int_flags));
3979   match(RegFlags);
3980   format %{ "FLAGS_LTGE" %}
3981   interface(REG_INTER);
3982 %}
3983 operand flagsReg_long_EQNE() %{
3984   constraint(ALLOC_IN_RC(int_flags));
3985   match(RegFlags);
3986   format %{ "FLAGS_EQNE" %}
3987   interface(REG_INTER);
3988 %}
3989 operand flagsReg_long_LEGT() %{
3990   constraint(ALLOC_IN_RC(int_flags));
3991   match(RegFlags);
3992   format %{ "FLAGS_LEGT" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 // Float register operands
3997 operand regDPR() %{
3998   predicate( UseSSE < 2 );
3999   constraint(ALLOC_IN_RC(fp_dbl_reg));
4000   match(RegD);
4001   match(regDPR1);
4002   match(regDPR2);
4003   format %{ %}
4004   interface(REG_INTER);
4005 %}
4006 
4007 operand regDPR1(regDPR reg) %{
4008   predicate( UseSSE < 2 );
4009   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4010   match(reg);
4011   format %{ "FPR1" %}
4012   interface(REG_INTER);
4013 %}
4014 
4015 operand regDPR2(regDPR reg) %{
4016   predicate( UseSSE < 2 );
4017   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4018   match(reg);
4019   format %{ "FPR2" %}
4020   interface(REG_INTER);
4021 %}
4022 
4023 operand regnotDPR1(regDPR reg) %{
4024   predicate( UseSSE < 2 );
4025   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4026   match(reg);
4027   format %{ %}
4028   interface(REG_INTER);
4029 %}
4030 
4031 // Float register operands
4032 operand regFPR() %{
4033   predicate( UseSSE < 2 );
4034   constraint(ALLOC_IN_RC(fp_flt_reg));
4035   match(RegF);
4036   match(regFPR1);
4037   format %{ %}
4038   interface(REG_INTER);
4039 %}
4040 
4041 // Float register operands
4042 operand regFPR1(regFPR reg) %{
4043   predicate( UseSSE < 2 );
4044   constraint(ALLOC_IN_RC(fp_flt_reg0));
4045   match(reg);
4046   format %{ "FPR1" %}
4047   interface(REG_INTER);
4048 %}
4049 
4050 // XMM Float register operands
4051 operand regF() %{
4052   predicate( UseSSE>=1 );
4053   constraint(ALLOC_IN_RC(float_reg_legacy));
4054   match(RegF);
4055   format %{ %}
4056   interface(REG_INTER);
4057 %}
4058 
4059 // XMM Double register operands
4060 operand regD() %{
4061   predicate( UseSSE>=2 );
4062   constraint(ALLOC_IN_RC(double_reg_legacy));
4063   match(RegD);
4064   format %{ %}
4065   interface(REG_INTER);
4066 %}
4067 
4068 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4069 // runtime code generation via reg_class_dynamic.
4070 operand vecS() %{
4071   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4072   match(VecS);
4073 
4074   format %{ %}
4075   interface(REG_INTER);
4076 %}
4077 
4078 operand vecD() %{
4079   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4080   match(VecD);
4081 
4082   format %{ %}
4083   interface(REG_INTER);
4084 %}
4085 
4086 operand vecX() %{
4087   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4088   match(VecX);
4089 
4090   format %{ %}
4091   interface(REG_INTER);
4092 %}
4093 
4094 operand vecY() %{
4095   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4096   match(VecY);
4097 
4098   format %{ %}
4099   interface(REG_INTER);
4100 %}
4101 
4102 //----------Memory Operands----------------------------------------------------
4103 // Direct Memory Operand
4104 operand direct(immP addr) %{
4105   match(addr);
4106 
4107   format %{ "[$addr]" %}
4108   interface(MEMORY_INTER) %{
4109     base(0xFFFFFFFF);
4110     index(0x4);
4111     scale(0x0);
4112     disp($addr);
4113   %}
4114 %}
4115 
4116 // Indirect Memory Operand
4117 operand indirect(eRegP reg) %{
4118   constraint(ALLOC_IN_RC(int_reg));
4119   match(reg);
4120 
4121   format %{ "[$reg]" %}
4122   interface(MEMORY_INTER) %{
4123     base($reg);
4124     index(0x4);
4125     scale(0x0);
4126     disp(0x0);
4127   %}
4128 %}
4129 
4130 // Indirect Memory Plus Short Offset Operand
4131 operand indOffset8(eRegP reg, immI8 off) %{
4132   match(AddP reg off);
4133 
4134   format %{ "[$reg + $off]" %}
4135   interface(MEMORY_INTER) %{
4136     base($reg);
4137     index(0x4);
4138     scale(0x0);
4139     disp($off);
4140   %}
4141 %}
4142 
4143 // Indirect Memory Plus Long Offset Operand
4144 operand indOffset32(eRegP reg, immI off) %{
4145   match(AddP reg off);
4146 
4147   format %{ "[$reg + $off]" %}
4148   interface(MEMORY_INTER) %{
4149     base($reg);
4150     index(0x4);
4151     scale(0x0);
4152     disp($off);
4153   %}
4154 %}
4155 
4156 // Indirect Memory Plus Long Offset Operand
4157 operand indOffset32X(rRegI reg, immP off) %{
4158   match(AddP off reg);
4159 
4160   format %{ "[$reg + $off]" %}
4161   interface(MEMORY_INTER) %{
4162     base($reg);
4163     index(0x4);
4164     scale(0x0);
4165     disp($off);
4166   %}
4167 %}
4168 
4169 // Indirect Memory Plus Index Register Plus Offset Operand
4170 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4171   match(AddP (AddP reg ireg) off);
4172 
4173   op_cost(10);
4174   format %{"[$reg + $off + $ireg]" %}
4175   interface(MEMORY_INTER) %{
4176     base($reg);
4177     index($ireg);
4178     scale(0x0);
4179     disp($off);
4180   %}
4181 %}
4182 
4183 // Indirect Memory Plus Index Register Plus Offset Operand
4184 operand indIndex(eRegP reg, rRegI ireg) %{
4185   match(AddP reg ireg);
4186 
4187   op_cost(10);
4188   format %{"[$reg + $ireg]" %}
4189   interface(MEMORY_INTER) %{
4190     base($reg);
4191     index($ireg);
4192     scale(0x0);
4193     disp(0x0);
4194   %}
4195 %}
4196 
4197 // // -------------------------------------------------------------------------
4198 // // 486 architecture doesn't support "scale * index + offset" with out a base
4199 // // -------------------------------------------------------------------------
4200 // // Scaled Memory Operands
4201 // // Indirect Memory Times Scale Plus Offset Operand
4202 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4203 //   match(AddP off (LShiftI ireg scale));
4204 //
4205 //   op_cost(10);
4206 //   format %{"[$off + $ireg << $scale]" %}
4207 //   interface(MEMORY_INTER) %{
4208 //     base(0x4);
4209 //     index($ireg);
4210 //     scale($scale);
4211 //     disp($off);
4212 //   %}
4213 // %}
4214 
4215 // Indirect Memory Times Scale Plus Index Register
4216 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4217   match(AddP reg (LShiftI ireg scale));
4218 
4219   op_cost(10);
4220   format %{"[$reg + $ireg << $scale]" %}
4221   interface(MEMORY_INTER) %{
4222     base($reg);
4223     index($ireg);
4224     scale($scale);
4225     disp(0x0);
4226   %}
4227 %}
4228 
4229 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4230 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4231   match(AddP (AddP reg (LShiftI ireg scale)) off);
4232 
4233   op_cost(10);
4234   format %{"[$reg + $off + $ireg << $scale]" %}
4235   interface(MEMORY_INTER) %{
4236     base($reg);
4237     index($ireg);
4238     scale($scale);
4239     disp($off);
4240   %}
4241 %}
4242 
4243 //----------Load Long Memory Operands------------------------------------------
4244 // The load-long idiom will use it's address expression again after loading
4245 // the first word of the long.  If the load-long destination overlaps with
4246 // registers used in the addressing expression, the 2nd half will be loaded
4247 // from a clobbered address.  Fix this by requiring that load-long use
4248 // address registers that do not overlap with the load-long target.
4249 
4250 // load-long support
4251 operand load_long_RegP() %{
4252   constraint(ALLOC_IN_RC(esi_reg));
4253   match(RegP);
4254   match(eSIRegP);
4255   op_cost(100);
4256   format %{  %}
4257   interface(REG_INTER);
4258 %}
4259 
4260 // Indirect Memory Operand Long
4261 operand load_long_indirect(load_long_RegP reg) %{
4262   constraint(ALLOC_IN_RC(esi_reg));
4263   match(reg);
4264 
4265   format %{ "[$reg]" %}
4266   interface(MEMORY_INTER) %{
4267     base($reg);
4268     index(0x4);
4269     scale(0x0);
4270     disp(0x0);
4271   %}
4272 %}
4273 
4274 // Indirect Memory Plus Long Offset Operand
4275 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4276   match(AddP reg off);
4277 
4278   format %{ "[$reg + $off]" %}
4279   interface(MEMORY_INTER) %{
4280     base($reg);
4281     index(0x4);
4282     scale(0x0);
4283     disp($off);
4284   %}
4285 %}
4286 
4287 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4288 
4289 
4290 //----------Special Memory Operands--------------------------------------------
4291 // Stack Slot Operand - This operand is used for loading and storing temporary
4292 //                      values on the stack where a match requires a value to
4293 //                      flow through memory.
4294 operand stackSlotP(sRegP reg) %{
4295   constraint(ALLOC_IN_RC(stack_slots));
4296   // No match rule because this operand is only generated in matching
4297   format %{ "[$reg]" %}
4298   interface(MEMORY_INTER) %{
4299     base(0x4);   // ESP
4300     index(0x4);  // No Index
4301     scale(0x0);  // No Scale
4302     disp($reg);  // Stack Offset
4303   %}
4304 %}
4305 
4306 operand stackSlotI(sRegI reg) %{
4307   constraint(ALLOC_IN_RC(stack_slots));
4308   // No match rule because this operand is only generated in matching
4309   format %{ "[$reg]" %}
4310   interface(MEMORY_INTER) %{
4311     base(0x4);   // ESP
4312     index(0x4);  // No Index
4313     scale(0x0);  // No Scale
4314     disp($reg);  // Stack Offset
4315   %}
4316 %}
4317 
4318 operand stackSlotF(sRegF reg) %{
4319   constraint(ALLOC_IN_RC(stack_slots));
4320   // No match rule because this operand is only generated in matching
4321   format %{ "[$reg]" %}
4322   interface(MEMORY_INTER) %{
4323     base(0x4);   // ESP
4324     index(0x4);  // No Index
4325     scale(0x0);  // No Scale
4326     disp($reg);  // Stack Offset
4327   %}
4328 %}
4329 
4330 operand stackSlotD(sRegD reg) %{
4331   constraint(ALLOC_IN_RC(stack_slots));
4332   // No match rule because this operand is only generated in matching
4333   format %{ "[$reg]" %}
4334   interface(MEMORY_INTER) %{
4335     base(0x4);   // ESP
4336     index(0x4);  // No Index
4337     scale(0x0);  // No Scale
4338     disp($reg);  // Stack Offset
4339   %}
4340 %}
4341 
4342 operand stackSlotL(sRegL reg) %{
4343   constraint(ALLOC_IN_RC(stack_slots));
4344   // No match rule because this operand is only generated in matching
4345   format %{ "[$reg]" %}
4346   interface(MEMORY_INTER) %{
4347     base(0x4);   // ESP
4348     index(0x4);  // No Index
4349     scale(0x0);  // No Scale
4350     disp($reg);  // Stack Offset
4351   %}
4352 %}
4353 
4354 //----------Memory Operands - Win95 Implicit Null Variants----------------
4355 // Indirect Memory Operand
4356 operand indirect_win95_safe(eRegP_no_EBP reg)
4357 %{
4358   constraint(ALLOC_IN_RC(int_reg));
4359   match(reg);
4360 
4361   op_cost(100);
4362   format %{ "[$reg]" %}
4363   interface(MEMORY_INTER) %{
4364     base($reg);
4365     index(0x4);
4366     scale(0x0);
4367     disp(0x0);
4368   %}
4369 %}
4370 
4371 // Indirect Memory Plus Short Offset Operand
4372 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4373 %{
4374   match(AddP reg off);
4375 
4376   op_cost(100);
4377   format %{ "[$reg + $off]" %}
4378   interface(MEMORY_INTER) %{
4379     base($reg);
4380     index(0x4);
4381     scale(0x0);
4382     disp($off);
4383   %}
4384 %}
4385 
4386 // Indirect Memory Plus Long Offset Operand
4387 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4388 %{
4389   match(AddP reg off);
4390 
4391   op_cost(100);
4392   format %{ "[$reg + $off]" %}
4393   interface(MEMORY_INTER) %{
4394     base($reg);
4395     index(0x4);
4396     scale(0x0);
4397     disp($off);
4398   %}
4399 %}
4400 
4401 // Indirect Memory Plus Index Register Plus Offset Operand
4402 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4403 %{
4404   match(AddP (AddP reg ireg) off);
4405 
4406   op_cost(100);
4407   format %{"[$reg + $off + $ireg]" %}
4408   interface(MEMORY_INTER) %{
4409     base($reg);
4410     index($ireg);
4411     scale(0x0);
4412     disp($off);
4413   %}
4414 %}
4415 
4416 // Indirect Memory Times Scale Plus Index Register
4417 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4418 %{
4419   match(AddP reg (LShiftI ireg scale));
4420 
4421   op_cost(100);
4422   format %{"[$reg + $ireg << $scale]" %}
4423   interface(MEMORY_INTER) %{
4424     base($reg);
4425     index($ireg);
4426     scale($scale);
4427     disp(0x0);
4428   %}
4429 %}
4430 
4431 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4432 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4433 %{
4434   match(AddP (AddP reg (LShiftI ireg scale)) off);
4435 
4436   op_cost(100);
4437   format %{"[$reg + $off + $ireg << $scale]" %}
4438   interface(MEMORY_INTER) %{
4439     base($reg);
4440     index($ireg);
4441     scale($scale);
4442     disp($off);
4443   %}
4444 %}
4445 
4446 //----------Conditional Branch Operands----------------------------------------
4447 // Comparison Op  - This is the operation of the comparison, and is limited to
4448 //                  the following set of codes:
4449 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4450 //
4451 // Other attributes of the comparison, such as unsignedness, are specified
4452 // by the comparison instruction that sets a condition code flags register.
4453 // That result is represented by a flags operand whose subtype is appropriate
4454 // to the unsignedness (etc.) of the comparison.
4455 //
4456 // Later, the instruction which matches both the Comparison Op (a Bool) and
4457 // the flags (produced by the Cmp) specifies the coding of the comparison op
4458 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4459 
4460 // Comparision Code
4461 operand cmpOp() %{
4462   match(Bool);
4463 
4464   format %{ "" %}
4465   interface(COND_INTER) %{
4466     equal(0x4, "e");
4467     not_equal(0x5, "ne");
4468     less(0xC, "l");
4469     greater_equal(0xD, "ge");
4470     less_equal(0xE, "le");
4471     greater(0xF, "g");
4472     overflow(0x0, "o");
4473     no_overflow(0x1, "no");
4474   %}
4475 %}
4476 
4477 // Comparison Code, unsigned compare.  Used by FP also, with
4478 // C2 (unordered) turned into GT or LT already.  The other bits
4479 // C0 and C3 are turned into Carry & Zero flags.
4480 operand cmpOpU() %{
4481   match(Bool);
4482 
4483   format %{ "" %}
4484   interface(COND_INTER) %{
4485     equal(0x4, "e");
4486     not_equal(0x5, "ne");
4487     less(0x2, "b");
4488     greater_equal(0x3, "nb");
4489     less_equal(0x6, "be");
4490     greater(0x7, "nbe");
4491     overflow(0x0, "o");
4492     no_overflow(0x1, "no");
4493   %}
4494 %}
4495 
4496 // Floating comparisons that don't require any fixup for the unordered case
4497 operand cmpOpUCF() %{
4498   match(Bool);
4499   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4500             n->as_Bool()->_test._test == BoolTest::ge ||
4501             n->as_Bool()->_test._test == BoolTest::le ||
4502             n->as_Bool()->_test._test == BoolTest::gt);
4503   format %{ "" %}
4504   interface(COND_INTER) %{
4505     equal(0x4, "e");
4506     not_equal(0x5, "ne");
4507     less(0x2, "b");
4508     greater_equal(0x3, "nb");
4509     less_equal(0x6, "be");
4510     greater(0x7, "nbe");
4511     overflow(0x0, "o");
4512     no_overflow(0x1, "no");
4513   %}
4514 %}
4515 
4516 
4517 // Floating comparisons that can be fixed up with extra conditional jumps
4518 operand cmpOpUCF2() %{
4519   match(Bool);
4520   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4521             n->as_Bool()->_test._test == BoolTest::eq);
4522   format %{ "" %}
4523   interface(COND_INTER) %{
4524     equal(0x4, "e");
4525     not_equal(0x5, "ne");
4526     less(0x2, "b");
4527     greater_equal(0x3, "nb");
4528     less_equal(0x6, "be");
4529     greater(0x7, "nbe");
4530     overflow(0x0, "o");
4531     no_overflow(0x1, "no");
4532   %}
4533 %}
4534 
4535 // Comparison Code for FP conditional move
4536 operand cmpOp_fcmov() %{
4537   match(Bool);
4538 
4539   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4540             n->as_Bool()->_test._test != BoolTest::no_overflow);
4541   format %{ "" %}
4542   interface(COND_INTER) %{
4543     equal        (0x0C8);
4544     not_equal    (0x1C8);
4545     less         (0x0C0);
4546     greater_equal(0x1C0);
4547     less_equal   (0x0D0);
4548     greater      (0x1D0);
4549     overflow(0x0, "o"); // not really supported by the instruction
4550     no_overflow(0x1, "no"); // not really supported by the instruction
4551   %}
4552 %}
4553 
4554 // Comparision Code used in long compares
4555 operand cmpOp_commute() %{
4556   match(Bool);
4557 
4558   format %{ "" %}
4559   interface(COND_INTER) %{
4560     equal(0x4, "e");
4561     not_equal(0x5, "ne");
4562     less(0xF, "g");
4563     greater_equal(0xE, "le");
4564     less_equal(0xD, "ge");
4565     greater(0xC, "l");
4566     overflow(0x0, "o");
4567     no_overflow(0x1, "no");
4568   %}
4569 %}
4570 
4571 //----------OPERAND CLASSES----------------------------------------------------
4572 // Operand Classes are groups of operands that are used as to simplify
4573 // instruction definitions by not requiring the AD writer to specify separate
4574 // instructions for every form of operand when the instruction accepts
4575 // multiple operand types with the same basic encoding and format.  The classic
4576 // case of this is memory operands.
4577 
4578 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4579                indIndex, indIndexScale, indIndexScaleOffset);
4580 
4581 // Long memory operations are encoded in 2 instructions and a +4 offset.
4582 // This means some kind of offset is always required and you cannot use
4583 // an oop as the offset (done when working on static globals).
4584 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4585                     indIndex, indIndexScale, indIndexScaleOffset);
4586 
4587 
4588 //----------PIPELINE-----------------------------------------------------------
4589 // Rules which define the behavior of the target architectures pipeline.
4590 pipeline %{
4591 
4592 //----------ATTRIBUTES---------------------------------------------------------
4593 attributes %{
4594   variable_size_instructions;        // Fixed size instructions
4595   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4596   instruction_unit_size = 1;         // An instruction is 1 bytes long
4597   instruction_fetch_unit_size = 16;  // The processor fetches one line
4598   instruction_fetch_units = 1;       // of 16 bytes
4599 
4600   // List of nop instructions
4601   nops( MachNop );
4602 %}
4603 
4604 //----------RESOURCES----------------------------------------------------------
4605 // Resources are the functional units available to the machine
4606 
4607 // Generic P2/P3 pipeline
4608 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4609 // 3 instructions decoded per cycle.
4610 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4611 // 2 ALU op, only ALU0 handles mul/div instructions.
4612 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4613            MS0, MS1, MEM = MS0 | MS1,
4614            BR, FPU,
4615            ALU0, ALU1, ALU = ALU0 | ALU1 );
4616 
4617 //----------PIPELINE DESCRIPTION-----------------------------------------------
4618 // Pipeline Description specifies the stages in the machine's pipeline
4619 
4620 // Generic P2/P3 pipeline
4621 pipe_desc(S0, S1, S2, S3, S4, S5);
4622 
4623 //----------PIPELINE CLASSES---------------------------------------------------
4624 // Pipeline Classes describe the stages in which input and output are
4625 // referenced by the hardware pipeline.
4626 
4627 // Naming convention: ialu or fpu
4628 // Then: _reg
4629 // Then: _reg if there is a 2nd register
4630 // Then: _long if it's a pair of instructions implementing a long
4631 // Then: _fat if it requires the big decoder
4632 //   Or: _mem if it requires the big decoder and a memory unit.
4633 
4634 // Integer ALU reg operation
4635 pipe_class ialu_reg(rRegI dst) %{
4636     single_instruction;
4637     dst    : S4(write);
4638     dst    : S3(read);
4639     DECODE : S0;        // any decoder
4640     ALU    : S3;        // any alu
4641 %}
4642 
4643 // Long ALU reg operation
4644 pipe_class ialu_reg_long(eRegL dst) %{
4645     instruction_count(2);
4646     dst    : S4(write);
4647     dst    : S3(read);
4648     DECODE : S0(2);     // any 2 decoders
4649     ALU    : S3(2);     // both alus
4650 %}
4651 
4652 // Integer ALU reg operation using big decoder
4653 pipe_class ialu_reg_fat(rRegI dst) %{
4654     single_instruction;
4655     dst    : S4(write);
4656     dst    : S3(read);
4657     D0     : S0;        // big decoder only
4658     ALU    : S3;        // any alu
4659 %}
4660 
4661 // Long ALU reg operation using big decoder
4662 pipe_class ialu_reg_long_fat(eRegL dst) %{
4663     instruction_count(2);
4664     dst    : S4(write);
4665     dst    : S3(read);
4666     D0     : S0(2);     // big decoder only; twice
4667     ALU    : S3(2);     // any 2 alus
4668 %}
4669 
4670 // Integer ALU reg-reg operation
4671 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4672     single_instruction;
4673     dst    : S4(write);
4674     src    : S3(read);
4675     DECODE : S0;        // any decoder
4676     ALU    : S3;        // any alu
4677 %}
4678 
4679 // Long ALU reg-reg operation
4680 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4681     instruction_count(2);
4682     dst    : S4(write);
4683     src    : S3(read);
4684     DECODE : S0(2);     // any 2 decoders
4685     ALU    : S3(2);     // both alus
4686 %}
4687 
4688 // Integer ALU reg-reg operation
4689 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4690     single_instruction;
4691     dst    : S4(write);
4692     src    : S3(read);
4693     D0     : S0;        // big decoder only
4694     ALU    : S3;        // any alu
4695 %}
4696 
4697 // Long ALU reg-reg operation
4698 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4699     instruction_count(2);
4700     dst    : S4(write);
4701     src    : S3(read);
4702     D0     : S0(2);     // big decoder only; twice
4703     ALU    : S3(2);     // both alus
4704 %}
4705 
4706 // Integer ALU reg-mem operation
4707 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4708     single_instruction;
4709     dst    : S5(write);
4710     mem    : S3(read);
4711     D0     : S0;        // big decoder only
4712     ALU    : S4;        // any alu
4713     MEM    : S3;        // any mem
4714 %}
4715 
4716 // Long ALU reg-mem operation
4717 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4718     instruction_count(2);
4719     dst    : S5(write);
4720     mem    : S3(read);
4721     D0     : S0(2);     // big decoder only; twice
4722     ALU    : S4(2);     // any 2 alus
4723     MEM    : S3(2);     // both mems
4724 %}
4725 
4726 // Integer mem operation (prefetch)
4727 pipe_class ialu_mem(memory mem)
4728 %{
4729     single_instruction;
4730     mem    : S3(read);
4731     D0     : S0;        // big decoder only
4732     MEM    : S3;        // any mem
4733 %}
4734 
4735 // Integer Store to Memory
4736 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4737     single_instruction;
4738     mem    : S3(read);
4739     src    : S5(read);
4740     D0     : S0;        // big decoder only
4741     ALU    : S4;        // any alu
4742     MEM    : S3;
4743 %}
4744 
4745 // Long Store to Memory
4746 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4747     instruction_count(2);
4748     mem    : S3(read);
4749     src    : S5(read);
4750     D0     : S0(2);     // big decoder only; twice
4751     ALU    : S4(2);     // any 2 alus
4752     MEM    : S3(2);     // Both mems
4753 %}
4754 
4755 // Integer Store to Memory
4756 pipe_class ialu_mem_imm(memory mem) %{
4757     single_instruction;
4758     mem    : S3(read);
4759     D0     : S0;        // big decoder only
4760     ALU    : S4;        // any alu
4761     MEM    : S3;
4762 %}
4763 
4764 // Integer ALU0 reg-reg operation
4765 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4766     single_instruction;
4767     dst    : S4(write);
4768     src    : S3(read);
4769     D0     : S0;        // Big decoder only
4770     ALU0   : S3;        // only alu0
4771 %}
4772 
4773 // Integer ALU0 reg-mem operation
4774 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4775     single_instruction;
4776     dst    : S5(write);
4777     mem    : S3(read);
4778     D0     : S0;        // big decoder only
4779     ALU0   : S4;        // ALU0 only
4780     MEM    : S3;        // any mem
4781 %}
4782 
4783 // Integer ALU reg-reg operation
4784 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4785     single_instruction;
4786     cr     : S4(write);
4787     src1   : S3(read);
4788     src2   : S3(read);
4789     DECODE : S0;        // any decoder
4790     ALU    : S3;        // any alu
4791 %}
4792 
4793 // Integer ALU reg-imm operation
4794 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4795     single_instruction;
4796     cr     : S4(write);
4797     src1   : S3(read);
4798     DECODE : S0;        // any decoder
4799     ALU    : S3;        // any alu
4800 %}
4801 
4802 // Integer ALU reg-mem operation
4803 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4804     single_instruction;
4805     cr     : S4(write);
4806     src1   : S3(read);
4807     src2   : S3(read);
4808     D0     : S0;        // big decoder only
4809     ALU    : S4;        // any alu
4810     MEM    : S3;
4811 %}
4812 
4813 // Conditional move reg-reg
4814 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4815     instruction_count(4);
4816     y      : S4(read);
4817     q      : S3(read);
4818     p      : S3(read);
4819     DECODE : S0(4);     // any decoder
4820 %}
4821 
4822 // Conditional move reg-reg
4823 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4824     single_instruction;
4825     dst    : S4(write);
4826     src    : S3(read);
4827     cr     : S3(read);
4828     DECODE : S0;        // any decoder
4829 %}
4830 
4831 // Conditional move reg-mem
4832 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4833     single_instruction;
4834     dst    : S4(write);
4835     src    : S3(read);
4836     cr     : S3(read);
4837     DECODE : S0;        // any decoder
4838     MEM    : S3;
4839 %}
4840 
4841 // Conditional move reg-reg long
4842 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4843     single_instruction;
4844     dst    : S4(write);
4845     src    : S3(read);
4846     cr     : S3(read);
4847     DECODE : S0(2);     // any 2 decoders
4848 %}
4849 
4850 // Conditional move double reg-reg
4851 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4852     single_instruction;
4853     dst    : S4(write);
4854     src    : S3(read);
4855     cr     : S3(read);
4856     DECODE : S0;        // any decoder
4857 %}
4858 
4859 // Float reg-reg operation
4860 pipe_class fpu_reg(regDPR dst) %{
4861     instruction_count(2);
4862     dst    : S3(read);
4863     DECODE : S0(2);     // any 2 decoders
4864     FPU    : S3;
4865 %}
4866 
4867 // Float reg-reg operation
4868 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4869     instruction_count(2);
4870     dst    : S4(write);
4871     src    : S3(read);
4872     DECODE : S0(2);     // any 2 decoders
4873     FPU    : S3;
4874 %}
4875 
4876 // Float reg-reg operation
4877 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4878     instruction_count(3);
4879     dst    : S4(write);
4880     src1   : S3(read);
4881     src2   : S3(read);
4882     DECODE : S0(3);     // any 3 decoders
4883     FPU    : S3(2);
4884 %}
4885 
4886 // Float reg-reg operation
4887 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4888     instruction_count(4);
4889     dst    : S4(write);
4890     src1   : S3(read);
4891     src2   : S3(read);
4892     src3   : S3(read);
4893     DECODE : S0(4);     // any 3 decoders
4894     FPU    : S3(2);
4895 %}
4896 
4897 // Float reg-reg operation
4898 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4899     instruction_count(4);
4900     dst    : S4(write);
4901     src1   : S3(read);
4902     src2   : S3(read);
4903     src3   : S3(read);
4904     DECODE : S1(3);     // any 3 decoders
4905     D0     : S0;        // Big decoder only
4906     FPU    : S3(2);
4907     MEM    : S3;
4908 %}
4909 
4910 // Float reg-mem operation
4911 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4912     instruction_count(2);
4913     dst    : S5(write);
4914     mem    : S3(read);
4915     D0     : S0;        // big decoder only
4916     DECODE : S1;        // any decoder for FPU POP
4917     FPU    : S4;
4918     MEM    : S3;        // any mem
4919 %}
4920 
4921 // Float reg-mem operation
4922 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4923     instruction_count(3);
4924     dst    : S5(write);
4925     src1   : S3(read);
4926     mem    : S3(read);
4927     D0     : S0;        // big decoder only
4928     DECODE : S1(2);     // any decoder for FPU POP
4929     FPU    : S4;
4930     MEM    : S3;        // any mem
4931 %}
4932 
4933 // Float mem-reg operation
4934 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4935     instruction_count(2);
4936     src    : S5(read);
4937     mem    : S3(read);
4938     DECODE : S0;        // any decoder for FPU PUSH
4939     D0     : S1;        // big decoder only
4940     FPU    : S4;
4941     MEM    : S3;        // any mem
4942 %}
4943 
4944 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4945     instruction_count(3);
4946     src1   : S3(read);
4947     src2   : S3(read);
4948     mem    : S3(read);
4949     DECODE : S0(2);     // any decoder for FPU PUSH
4950     D0     : S1;        // big decoder only
4951     FPU    : S4;
4952     MEM    : S3;        // any mem
4953 %}
4954 
4955 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4956     instruction_count(3);
4957     src1   : S3(read);
4958     src2   : S3(read);
4959     mem    : S4(read);
4960     DECODE : S0;        // any decoder for FPU PUSH
4961     D0     : S0(2);     // big decoder only
4962     FPU    : S4;
4963     MEM    : S3(2);     // any mem
4964 %}
4965 
4966 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4967     instruction_count(2);
4968     src1   : S3(read);
4969     dst    : S4(read);
4970     D0     : S0(2);     // big decoder only
4971     MEM    : S3(2);     // any mem
4972 %}
4973 
4974 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4975     instruction_count(3);
4976     src1   : S3(read);
4977     src2   : S3(read);
4978     dst    : S4(read);
4979     D0     : S0(3);     // big decoder only
4980     FPU    : S4;
4981     MEM    : S3(3);     // any mem
4982 %}
4983 
4984 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4985     instruction_count(3);
4986     src1   : S4(read);
4987     mem    : S4(read);
4988     DECODE : S0;        // any decoder for FPU PUSH
4989     D0     : S0(2);     // big decoder only
4990     FPU    : S4;
4991     MEM    : S3(2);     // any mem
4992 %}
4993 
4994 // Float load constant
4995 pipe_class fpu_reg_con(regDPR dst) %{
4996     instruction_count(2);
4997     dst    : S5(write);
4998     D0     : S0;        // big decoder only for the load
4999     DECODE : S1;        // any decoder for FPU POP
5000     FPU    : S4;
5001     MEM    : S3;        // any mem
5002 %}
5003 
5004 // Float load constant
5005 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5006     instruction_count(3);
5007     dst    : S5(write);
5008     src    : S3(read);
5009     D0     : S0;        // big decoder only for the load
5010     DECODE : S1(2);     // any decoder for FPU POP
5011     FPU    : S4;
5012     MEM    : S3;        // any mem
5013 %}
5014 
5015 // UnConditional branch
5016 pipe_class pipe_jmp( label labl ) %{
5017     single_instruction;
5018     BR   : S3;
5019 %}
5020 
5021 // Conditional branch
5022 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5023     single_instruction;
5024     cr    : S1(read);
5025     BR    : S3;
5026 %}
5027 
5028 // Allocation idiom
5029 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5030     instruction_count(1); force_serialization;
5031     fixed_latency(6);
5032     heap_ptr : S3(read);
5033     DECODE   : S0(3);
5034     D0       : S2;
5035     MEM      : S3;
5036     ALU      : S3(2);
5037     dst      : S5(write);
5038     BR       : S5;
5039 %}
5040 
5041 // Generic big/slow expanded idiom
5042 pipe_class pipe_slow(  ) %{
5043     instruction_count(10); multiple_bundles; force_serialization;
5044     fixed_latency(100);
5045     D0  : S0(2);
5046     MEM : S3(2);
5047 %}
5048 
5049 // The real do-nothing guy
5050 pipe_class empty( ) %{
5051     instruction_count(0);
5052 %}
5053 
5054 // Define the class for the Nop node
5055 define %{
5056    MachNop = empty;
5057 %}
5058 
5059 %}
5060 
5061 //----------INSTRUCTIONS-------------------------------------------------------
5062 //
5063 // match      -- States which machine-independent subtree may be replaced
5064 //               by this instruction.
5065 // ins_cost   -- The estimated cost of this instruction is used by instruction
5066 //               selection to identify a minimum cost tree of machine
5067 //               instructions that matches a tree of machine-independent
5068 //               instructions.
5069 // format     -- A string providing the disassembly for this instruction.
5070 //               The value of an instruction's operand may be inserted
5071 //               by referring to it with a '$' prefix.
5072 // opcode     -- Three instruction opcodes may be provided.  These are referred
5073 //               to within an encode class as $primary, $secondary, and $tertiary
5074 //               respectively.  The primary opcode is commonly used to
5075 //               indicate the type of machine instruction, while secondary
5076 //               and tertiary are often used for prefix options or addressing
5077 //               modes.
5078 // ins_encode -- A list of encode classes with parameters. The encode class
5079 //               name must have been defined in an 'enc_class' specification
5080 //               in the encode section of the architecture description.
5081 
5082 //----------BSWAP-Instruction--------------------------------------------------
5083 instruct bytes_reverse_int(rRegI dst) %{
5084   match(Set dst (ReverseBytesI dst));
5085 
5086   format %{ "BSWAP  $dst" %}
5087   opcode(0x0F, 0xC8);
5088   ins_encode( OpcP, OpcSReg(dst) );
5089   ins_pipe( ialu_reg );
5090 %}
5091 
5092 instruct bytes_reverse_long(eRegL dst) %{
5093   match(Set dst (ReverseBytesL dst));
5094 
5095   format %{ "BSWAP  $dst.lo\n\t"
5096             "BSWAP  $dst.hi\n\t"
5097             "XCHG   $dst.lo $dst.hi" %}
5098 
5099   ins_cost(125);
5100   ins_encode( bswap_long_bytes(dst) );
5101   ins_pipe( ialu_reg_reg);
5102 %}
5103 
5104 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5105   match(Set dst (ReverseBytesUS dst));
5106   effect(KILL cr);
5107 
5108   format %{ "BSWAP  $dst\n\t"
5109             "SHR    $dst,16\n\t" %}
5110   ins_encode %{
5111     __ bswapl($dst$$Register);
5112     __ shrl($dst$$Register, 16);
5113   %}
5114   ins_pipe( ialu_reg );
5115 %}
5116 
5117 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5118   match(Set dst (ReverseBytesS dst));
5119   effect(KILL cr);
5120 
5121   format %{ "BSWAP  $dst\n\t"
5122             "SAR    $dst,16\n\t" %}
5123   ins_encode %{
5124     __ bswapl($dst$$Register);
5125     __ sarl($dst$$Register, 16);
5126   %}
5127   ins_pipe( ialu_reg );
5128 %}
5129 
5130 
5131 //---------- Zeros Count Instructions ------------------------------------------
5132 
5133 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5134   predicate(UseCountLeadingZerosInstruction);
5135   match(Set dst (CountLeadingZerosI src));
5136   effect(KILL cr);
5137 
5138   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5139   ins_encode %{
5140     __ lzcntl($dst$$Register, $src$$Register);
5141   %}
5142   ins_pipe(ialu_reg);
5143 %}
5144 
5145 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5146   predicate(!UseCountLeadingZerosInstruction);
5147   match(Set dst (CountLeadingZerosI src));
5148   effect(KILL cr);
5149 
5150   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5151             "JNZ    skip\n\t"
5152             "MOV    $dst, -1\n"
5153       "skip:\n\t"
5154             "NEG    $dst\n\t"
5155             "ADD    $dst, 31" %}
5156   ins_encode %{
5157     Register Rdst = $dst$$Register;
5158     Register Rsrc = $src$$Register;
5159     Label skip;
5160     __ bsrl(Rdst, Rsrc);
5161     __ jccb(Assembler::notZero, skip);
5162     __ movl(Rdst, -1);
5163     __ bind(skip);
5164     __ negl(Rdst);
5165     __ addl(Rdst, BitsPerInt - 1);
5166   %}
5167   ins_pipe(ialu_reg);
5168 %}
5169 
5170 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5171   predicate(UseCountLeadingZerosInstruction);
5172   match(Set dst (CountLeadingZerosL src));
5173   effect(TEMP dst, KILL cr);
5174 
5175   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5176             "JNC    done\n\t"
5177             "LZCNT  $dst, $src.lo\n\t"
5178             "ADD    $dst, 32\n"
5179       "done:" %}
5180   ins_encode %{
5181     Register Rdst = $dst$$Register;
5182     Register Rsrc = $src$$Register;
5183     Label done;
5184     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5185     __ jccb(Assembler::carryClear, done);
5186     __ lzcntl(Rdst, Rsrc);
5187     __ addl(Rdst, BitsPerInt);
5188     __ bind(done);
5189   %}
5190   ins_pipe(ialu_reg);
5191 %}
5192 
5193 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5194   predicate(!UseCountLeadingZerosInstruction);
5195   match(Set dst (CountLeadingZerosL src));
5196   effect(TEMP dst, KILL cr);
5197 
5198   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5199             "JZ     msw_is_zero\n\t"
5200             "ADD    $dst, 32\n\t"
5201             "JMP    not_zero\n"
5202       "msw_is_zero:\n\t"
5203             "BSR    $dst, $src.lo\n\t"
5204             "JNZ    not_zero\n\t"
5205             "MOV    $dst, -1\n"
5206       "not_zero:\n\t"
5207             "NEG    $dst\n\t"
5208             "ADD    $dst, 63\n" %}
5209  ins_encode %{
5210     Register Rdst = $dst$$Register;
5211     Register Rsrc = $src$$Register;
5212     Label msw_is_zero;
5213     Label not_zero;
5214     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5215     __ jccb(Assembler::zero, msw_is_zero);
5216     __ addl(Rdst, BitsPerInt);
5217     __ jmpb(not_zero);
5218     __ bind(msw_is_zero);
5219     __ bsrl(Rdst, Rsrc);
5220     __ jccb(Assembler::notZero, not_zero);
5221     __ movl(Rdst, -1);
5222     __ bind(not_zero);
5223     __ negl(Rdst);
5224     __ addl(Rdst, BitsPerLong - 1);
5225   %}
5226   ins_pipe(ialu_reg);
5227 %}
5228 
5229 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5230   predicate(UseCountTrailingZerosInstruction);
5231   match(Set dst (CountTrailingZerosI src));
5232   effect(KILL cr);
5233 
5234   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5235   ins_encode %{
5236     __ tzcntl($dst$$Register, $src$$Register);
5237   %}
5238   ins_pipe(ialu_reg);
5239 %}
5240 
5241 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5242   predicate(!UseCountTrailingZerosInstruction);
5243   match(Set dst (CountTrailingZerosI src));
5244   effect(KILL cr);
5245 
5246   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5247             "JNZ    done\n\t"
5248             "MOV    $dst, 32\n"
5249       "done:" %}
5250   ins_encode %{
5251     Register Rdst = $dst$$Register;
5252     Label done;
5253     __ bsfl(Rdst, $src$$Register);
5254     __ jccb(Assembler::notZero, done);
5255     __ movl(Rdst, BitsPerInt);
5256     __ bind(done);
5257   %}
5258   ins_pipe(ialu_reg);
5259 %}
5260 
5261 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5262   predicate(UseCountTrailingZerosInstruction);
5263   match(Set dst (CountTrailingZerosL src));
5264   effect(TEMP dst, KILL cr);
5265 
5266   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5267             "JNC    done\n\t"
5268             "TZCNT  $dst, $src.hi\n\t"
5269             "ADD    $dst, 32\n"
5270             "done:" %}
5271   ins_encode %{
5272     Register Rdst = $dst$$Register;
5273     Register Rsrc = $src$$Register;
5274     Label done;
5275     __ tzcntl(Rdst, Rsrc);
5276     __ jccb(Assembler::carryClear, done);
5277     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5278     __ addl(Rdst, BitsPerInt);
5279     __ bind(done);
5280   %}
5281   ins_pipe(ialu_reg);
5282 %}
5283 
5284 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5285   predicate(!UseCountTrailingZerosInstruction);
5286   match(Set dst (CountTrailingZerosL src));
5287   effect(TEMP dst, KILL cr);
5288 
5289   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5290             "JNZ    done\n\t"
5291             "BSF    $dst, $src.hi\n\t"
5292             "JNZ    msw_not_zero\n\t"
5293             "MOV    $dst, 32\n"
5294       "msw_not_zero:\n\t"
5295             "ADD    $dst, 32\n"
5296       "done:" %}
5297   ins_encode %{
5298     Register Rdst = $dst$$Register;
5299     Register Rsrc = $src$$Register;
5300     Label msw_not_zero;
5301     Label done;
5302     __ bsfl(Rdst, Rsrc);
5303     __ jccb(Assembler::notZero, done);
5304     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5305     __ jccb(Assembler::notZero, msw_not_zero);
5306     __ movl(Rdst, BitsPerInt);
5307     __ bind(msw_not_zero);
5308     __ addl(Rdst, BitsPerInt);
5309     __ bind(done);
5310   %}
5311   ins_pipe(ialu_reg);
5312 %}
5313 
5314 
5315 //---------- Population Count Instructions -------------------------------------
5316 
5317 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5318   predicate(UsePopCountInstruction);
5319   match(Set dst (PopCountI src));
5320   effect(KILL cr);
5321 
5322   format %{ "POPCNT $dst, $src" %}
5323   ins_encode %{
5324     __ popcntl($dst$$Register, $src$$Register);
5325   %}
5326   ins_pipe(ialu_reg);
5327 %}
5328 
5329 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5330   predicate(UsePopCountInstruction);
5331   match(Set dst (PopCountI (LoadI mem)));
5332   effect(KILL cr);
5333 
5334   format %{ "POPCNT $dst, $mem" %}
5335   ins_encode %{
5336     __ popcntl($dst$$Register, $mem$$Address);
5337   %}
5338   ins_pipe(ialu_reg);
5339 %}
5340 
5341 // Note: Long.bitCount(long) returns an int.
5342 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5343   predicate(UsePopCountInstruction);
5344   match(Set dst (PopCountL src));
5345   effect(KILL cr, TEMP tmp, TEMP dst);
5346 
5347   format %{ "POPCNT $dst, $src.lo\n\t"
5348             "POPCNT $tmp, $src.hi\n\t"
5349             "ADD    $dst, $tmp" %}
5350   ins_encode %{
5351     __ popcntl($dst$$Register, $src$$Register);
5352     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5353     __ addl($dst$$Register, $tmp$$Register);
5354   %}
5355   ins_pipe(ialu_reg);
5356 %}
5357 
5358 // Note: Long.bitCount(long) returns an int.
5359 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5360   predicate(UsePopCountInstruction);
5361   match(Set dst (PopCountL (LoadL mem)));
5362   effect(KILL cr, TEMP tmp, TEMP dst);
5363 
5364   format %{ "POPCNT $dst, $mem\n\t"
5365             "POPCNT $tmp, $mem+4\n\t"
5366             "ADD    $dst, $tmp" %}
5367   ins_encode %{
5368     //__ popcntl($dst$$Register, $mem$$Address$$first);
5369     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5370     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5371     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5372     __ addl($dst$$Register, $tmp$$Register);
5373   %}
5374   ins_pipe(ialu_reg);
5375 %}
5376 
5377 
5378 //----------Load/Store/Move Instructions---------------------------------------
5379 //----------Load Instructions--------------------------------------------------
5380 // Load Byte (8bit signed)
5381 instruct loadB(xRegI dst, memory mem) %{
5382   match(Set dst (LoadB mem));
5383 
5384   ins_cost(125);
5385   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5386 
5387   ins_encode %{
5388     __ movsbl($dst$$Register, $mem$$Address);
5389   %}
5390 
5391   ins_pipe(ialu_reg_mem);
5392 %}
5393 
5394 // Load Byte (8bit signed) into Long Register
5395 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5396   match(Set dst (ConvI2L (LoadB mem)));
5397   effect(KILL cr);
5398 
5399   ins_cost(375);
5400   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5401             "MOV    $dst.hi,$dst.lo\n\t"
5402             "SAR    $dst.hi,7" %}
5403 
5404   ins_encode %{
5405     __ movsbl($dst$$Register, $mem$$Address);
5406     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5407     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5408   %}
5409 
5410   ins_pipe(ialu_reg_mem);
5411 %}
5412 
5413 // Load Unsigned Byte (8bit UNsigned)
5414 instruct loadUB(xRegI dst, memory mem) %{
5415   match(Set dst (LoadUB mem));
5416 
5417   ins_cost(125);
5418   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5419 
5420   ins_encode %{
5421     __ movzbl($dst$$Register, $mem$$Address);
5422   %}
5423 
5424   ins_pipe(ialu_reg_mem);
5425 %}
5426 
5427 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5428 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5429   match(Set dst (ConvI2L (LoadUB mem)));
5430   effect(KILL cr);
5431 
5432   ins_cost(250);
5433   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5434             "XOR    $dst.hi,$dst.hi" %}
5435 
5436   ins_encode %{
5437     Register Rdst = $dst$$Register;
5438     __ movzbl(Rdst, $mem$$Address);
5439     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5440   %}
5441 
5442   ins_pipe(ialu_reg_mem);
5443 %}
5444 
5445 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5446 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5447   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5448   effect(KILL cr);
5449 
5450   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5451             "XOR    $dst.hi,$dst.hi\n\t"
5452             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5453   ins_encode %{
5454     Register Rdst = $dst$$Register;
5455     __ movzbl(Rdst, $mem$$Address);
5456     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5457     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5458   %}
5459   ins_pipe(ialu_reg_mem);
5460 %}
5461 
5462 // Load Short (16bit signed)
5463 instruct loadS(rRegI dst, memory mem) %{
5464   match(Set dst (LoadS mem));
5465 
5466   ins_cost(125);
5467   format %{ "MOVSX  $dst,$mem\t# short" %}
5468 
5469   ins_encode %{
5470     __ movswl($dst$$Register, $mem$$Address);
5471   %}
5472 
5473   ins_pipe(ialu_reg_mem);
5474 %}
5475 
5476 // Load Short (16 bit signed) to Byte (8 bit signed)
5477 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5478   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5479 
5480   ins_cost(125);
5481   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5482   ins_encode %{
5483     __ movsbl($dst$$Register, $mem$$Address);
5484   %}
5485   ins_pipe(ialu_reg_mem);
5486 %}
5487 
5488 // Load Short (16bit signed) into Long Register
5489 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5490   match(Set dst (ConvI2L (LoadS mem)));
5491   effect(KILL cr);
5492 
5493   ins_cost(375);
5494   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5495             "MOV    $dst.hi,$dst.lo\n\t"
5496             "SAR    $dst.hi,15" %}
5497 
5498   ins_encode %{
5499     __ movswl($dst$$Register, $mem$$Address);
5500     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5501     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5502   %}
5503 
5504   ins_pipe(ialu_reg_mem);
5505 %}
5506 
5507 // Load Unsigned Short/Char (16bit unsigned)
5508 instruct loadUS(rRegI dst, memory mem) %{
5509   match(Set dst (LoadUS mem));
5510 
5511   ins_cost(125);
5512   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5513 
5514   ins_encode %{
5515     __ movzwl($dst$$Register, $mem$$Address);
5516   %}
5517 
5518   ins_pipe(ialu_reg_mem);
5519 %}
5520 
5521 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5522 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5523   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5524 
5525   ins_cost(125);
5526   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5527   ins_encode %{
5528     __ movsbl($dst$$Register, $mem$$Address);
5529   %}
5530   ins_pipe(ialu_reg_mem);
5531 %}
5532 
5533 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5534 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5535   match(Set dst (ConvI2L (LoadUS mem)));
5536   effect(KILL cr);
5537 
5538   ins_cost(250);
5539   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5540             "XOR    $dst.hi,$dst.hi" %}
5541 
5542   ins_encode %{
5543     __ movzwl($dst$$Register, $mem$$Address);
5544     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5545   %}
5546 
5547   ins_pipe(ialu_reg_mem);
5548 %}
5549 
5550 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5551 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5552   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5553   effect(KILL cr);
5554 
5555   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5556             "XOR    $dst.hi,$dst.hi" %}
5557   ins_encode %{
5558     Register Rdst = $dst$$Register;
5559     __ movzbl(Rdst, $mem$$Address);
5560     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5561   %}
5562   ins_pipe(ialu_reg_mem);
5563 %}
5564 
5565 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5566 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5567   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5568   effect(KILL cr);
5569 
5570   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5571             "XOR    $dst.hi,$dst.hi\n\t"
5572             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5573   ins_encode %{
5574     Register Rdst = $dst$$Register;
5575     __ movzwl(Rdst, $mem$$Address);
5576     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5577     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5578   %}
5579   ins_pipe(ialu_reg_mem);
5580 %}
5581 
5582 // Load Integer
5583 instruct loadI(rRegI dst, memory mem) %{
5584   match(Set dst (LoadI mem));
5585 
5586   ins_cost(125);
5587   format %{ "MOV    $dst,$mem\t# int" %}
5588 
5589   ins_encode %{
5590     __ movl($dst$$Register, $mem$$Address);
5591   %}
5592 
5593   ins_pipe(ialu_reg_mem);
5594 %}
5595 
5596 // Load Integer (32 bit signed) to Byte (8 bit signed)
5597 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5598   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5599 
5600   ins_cost(125);
5601   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5602   ins_encode %{
5603     __ movsbl($dst$$Register, $mem$$Address);
5604   %}
5605   ins_pipe(ialu_reg_mem);
5606 %}
5607 
5608 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5609 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5610   match(Set dst (AndI (LoadI mem) mask));
5611 
5612   ins_cost(125);
5613   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5614   ins_encode %{
5615     __ movzbl($dst$$Register, $mem$$Address);
5616   %}
5617   ins_pipe(ialu_reg_mem);
5618 %}
5619 
5620 // Load Integer (32 bit signed) to Short (16 bit signed)
5621 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5622   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5623 
5624   ins_cost(125);
5625   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5626   ins_encode %{
5627     __ movswl($dst$$Register, $mem$$Address);
5628   %}
5629   ins_pipe(ialu_reg_mem);
5630 %}
5631 
5632 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5633 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5634   match(Set dst (AndI (LoadI mem) mask));
5635 
5636   ins_cost(125);
5637   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5638   ins_encode %{
5639     __ movzwl($dst$$Register, $mem$$Address);
5640   %}
5641   ins_pipe(ialu_reg_mem);
5642 %}
5643 
5644 // Load Integer into Long Register
5645 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5646   match(Set dst (ConvI2L (LoadI mem)));
5647   effect(KILL cr);
5648 
5649   ins_cost(375);
5650   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5651             "MOV    $dst.hi,$dst.lo\n\t"
5652             "SAR    $dst.hi,31" %}
5653 
5654   ins_encode %{
5655     __ movl($dst$$Register, $mem$$Address);
5656     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5657     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5658   %}
5659 
5660   ins_pipe(ialu_reg_mem);
5661 %}
5662 
5663 // Load Integer with mask 0xFF into Long Register
5664 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5665   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5666   effect(KILL cr);
5667 
5668   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5669             "XOR    $dst.hi,$dst.hi" %}
5670   ins_encode %{
5671     Register Rdst = $dst$$Register;
5672     __ movzbl(Rdst, $mem$$Address);
5673     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5674   %}
5675   ins_pipe(ialu_reg_mem);
5676 %}
5677 
5678 // Load Integer with mask 0xFFFF into Long Register
5679 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5680   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5681   effect(KILL cr);
5682 
5683   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5684             "XOR    $dst.hi,$dst.hi" %}
5685   ins_encode %{
5686     Register Rdst = $dst$$Register;
5687     __ movzwl(Rdst, $mem$$Address);
5688     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5689   %}
5690   ins_pipe(ialu_reg_mem);
5691 %}
5692 
5693 // Load Integer with 31-bit mask into Long Register
5694 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5695   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5696   effect(KILL cr);
5697 
5698   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5699             "XOR    $dst.hi,$dst.hi\n\t"
5700             "AND    $dst.lo,$mask" %}
5701   ins_encode %{
5702     Register Rdst = $dst$$Register;
5703     __ movl(Rdst, $mem$$Address);
5704     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5705     __ andl(Rdst, $mask$$constant);
5706   %}
5707   ins_pipe(ialu_reg_mem);
5708 %}
5709 
5710 // Load Unsigned Integer into Long Register
5711 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5712   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5713   effect(KILL cr);
5714 
5715   ins_cost(250);
5716   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5717             "XOR    $dst.hi,$dst.hi" %}
5718 
5719   ins_encode %{
5720     __ movl($dst$$Register, $mem$$Address);
5721     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5722   %}
5723 
5724   ins_pipe(ialu_reg_mem);
5725 %}
5726 
5727 // Load Long.  Cannot clobber address while loading, so restrict address
5728 // register to ESI
5729 instruct loadL(eRegL dst, load_long_memory mem) %{
5730   predicate(!((LoadLNode*)n)->require_atomic_access());
5731   match(Set dst (LoadL mem));
5732 
5733   ins_cost(250);
5734   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5735             "MOV    $dst.hi,$mem+4" %}
5736 
5737   ins_encode %{
5738     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5739     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5740     __ movl($dst$$Register, Amemlo);
5741     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5742   %}
5743 
5744   ins_pipe(ialu_reg_long_mem);
5745 %}
5746 
5747 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5748 // then store it down to the stack and reload on the int
5749 // side.
5750 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5751   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5752   match(Set dst (LoadL mem));
5753 
5754   ins_cost(200);
5755   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5756             "FISTp  $dst" %}
5757   ins_encode(enc_loadL_volatile(mem,dst));
5758   ins_pipe( fpu_reg_mem );
5759 %}
5760 
5761 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5762   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5763   match(Set dst (LoadL mem));
5764   effect(TEMP tmp);
5765   ins_cost(180);
5766   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5767             "MOVSD  $dst,$tmp" %}
5768   ins_encode %{
5769     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5770     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5771   %}
5772   ins_pipe( pipe_slow );
5773 %}
5774 
5775 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5776   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5777   match(Set dst (LoadL mem));
5778   effect(TEMP tmp);
5779   ins_cost(160);
5780   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5781             "MOVD   $dst.lo,$tmp\n\t"
5782             "PSRLQ  $tmp,32\n\t"
5783             "MOVD   $dst.hi,$tmp" %}
5784   ins_encode %{
5785     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5786     __ movdl($dst$$Register, $tmp$$XMMRegister);
5787     __ psrlq($tmp$$XMMRegister, 32);
5788     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5789   %}
5790   ins_pipe( pipe_slow );
5791 %}
5792 
5793 // Load Range
5794 instruct loadRange(rRegI dst, memory mem) %{
5795   match(Set dst (LoadRange mem));
5796 
5797   ins_cost(125);
5798   format %{ "MOV    $dst,$mem" %}
5799   opcode(0x8B);
5800   ins_encode( OpcP, RegMem(dst,mem));
5801   ins_pipe( ialu_reg_mem );
5802 %}
5803 
5804 
5805 // Load Pointer
5806 instruct loadP(eRegP dst, memory mem) %{
5807   match(Set dst (LoadP mem));
5808 
5809   ins_cost(125);
5810   format %{ "MOV    $dst,$mem" %}
5811   opcode(0x8B);
5812   ins_encode( OpcP, RegMem(dst,mem));
5813   ins_pipe( ialu_reg_mem );
5814 %}
5815 
5816 // Load Klass Pointer
5817 instruct loadKlass(eRegP dst, memory mem) %{
5818   match(Set dst (LoadKlass mem));
5819 
5820   ins_cost(125);
5821   format %{ "MOV    $dst,$mem" %}
5822   opcode(0x8B);
5823   ins_encode( OpcP, RegMem(dst,mem));
5824   ins_pipe( ialu_reg_mem );
5825 %}
5826 
5827 // Load Double
5828 instruct loadDPR(regDPR dst, memory mem) %{
5829   predicate(UseSSE<=1);
5830   match(Set dst (LoadD mem));
5831 
5832   ins_cost(150);
5833   format %{ "FLD_D  ST,$mem\n\t"
5834             "FSTP   $dst" %}
5835   opcode(0xDD);               /* DD /0 */
5836   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5837               Pop_Reg_DPR(dst) );
5838   ins_pipe( fpu_reg_mem );
5839 %}
5840 
5841 // Load Double to XMM
5842 instruct loadD(regD dst, memory mem) %{
5843   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5844   match(Set dst (LoadD mem));
5845   ins_cost(145);
5846   format %{ "MOVSD  $dst,$mem" %}
5847   ins_encode %{
5848     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5849   %}
5850   ins_pipe( pipe_slow );
5851 %}
5852 
5853 instruct loadD_partial(regD dst, memory mem) %{
5854   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5855   match(Set dst (LoadD mem));
5856   ins_cost(145);
5857   format %{ "MOVLPD $dst,$mem" %}
5858   ins_encode %{
5859     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5860   %}
5861   ins_pipe( pipe_slow );
5862 %}
5863 
5864 // Load to XMM register (single-precision floating point)
5865 // MOVSS instruction
5866 instruct loadF(regF dst, memory mem) %{
5867   predicate(UseSSE>=1);
5868   match(Set dst (LoadF mem));
5869   ins_cost(145);
5870   format %{ "MOVSS  $dst,$mem" %}
5871   ins_encode %{
5872     __ movflt ($dst$$XMMRegister, $mem$$Address);
5873   %}
5874   ins_pipe( pipe_slow );
5875 %}
5876 
5877 // Load Float
5878 instruct loadFPR(regFPR dst, memory mem) %{
5879   predicate(UseSSE==0);
5880   match(Set dst (LoadF mem));
5881 
5882   ins_cost(150);
5883   format %{ "FLD_S  ST,$mem\n\t"
5884             "FSTP   $dst" %}
5885   opcode(0xD9);               /* D9 /0 */
5886   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5887               Pop_Reg_FPR(dst) );
5888   ins_pipe( fpu_reg_mem );
5889 %}
5890 
5891 // Load Effective Address
5892 instruct leaP8(eRegP dst, indOffset8 mem) %{
5893   match(Set dst mem);
5894 
5895   ins_cost(110);
5896   format %{ "LEA    $dst,$mem" %}
5897   opcode(0x8D);
5898   ins_encode( OpcP, RegMem(dst,mem));
5899   ins_pipe( ialu_reg_reg_fat );
5900 %}
5901 
5902 instruct leaP32(eRegP dst, indOffset32 mem) %{
5903   match(Set dst mem);
5904 
5905   ins_cost(110);
5906   format %{ "LEA    $dst,$mem" %}
5907   opcode(0x8D);
5908   ins_encode( OpcP, RegMem(dst,mem));
5909   ins_pipe( ialu_reg_reg_fat );
5910 %}
5911 
5912 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5913   match(Set dst mem);
5914 
5915   ins_cost(110);
5916   format %{ "LEA    $dst,$mem" %}
5917   opcode(0x8D);
5918   ins_encode( OpcP, RegMem(dst,mem));
5919   ins_pipe( ialu_reg_reg_fat );
5920 %}
5921 
5922 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5923   match(Set dst mem);
5924 
5925   ins_cost(110);
5926   format %{ "LEA    $dst,$mem" %}
5927   opcode(0x8D);
5928   ins_encode( OpcP, RegMem(dst,mem));
5929   ins_pipe( ialu_reg_reg_fat );
5930 %}
5931 
5932 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5933   match(Set dst mem);
5934 
5935   ins_cost(110);
5936   format %{ "LEA    $dst,$mem" %}
5937   opcode(0x8D);
5938   ins_encode( OpcP, RegMem(dst,mem));
5939   ins_pipe( ialu_reg_reg_fat );
5940 %}
5941 
5942 // Load Constant
5943 instruct loadConI(rRegI dst, immI src) %{
5944   match(Set dst src);
5945 
5946   format %{ "MOV    $dst,$src" %}
5947   ins_encode( LdImmI(dst, src) );
5948   ins_pipe( ialu_reg_fat );
5949 %}
5950 
5951 // Load Constant zero
5952 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5953   match(Set dst src);
5954   effect(KILL cr);
5955 
5956   ins_cost(50);
5957   format %{ "XOR    $dst,$dst" %}
5958   opcode(0x33);  /* + rd */
5959   ins_encode( OpcP, RegReg( dst, dst ) );
5960   ins_pipe( ialu_reg );
5961 %}
5962 
5963 instruct loadConP(eRegP dst, immP src) %{
5964   match(Set dst src);
5965 
5966   format %{ "MOV    $dst,$src" %}
5967   opcode(0xB8);  /* + rd */
5968   ins_encode( LdImmP(dst, src) );
5969   ins_pipe( ialu_reg_fat );
5970 %}
5971 
5972 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5973   match(Set dst src);
5974   effect(KILL cr);
5975   ins_cost(200);
5976   format %{ "MOV    $dst.lo,$src.lo\n\t"
5977             "MOV    $dst.hi,$src.hi" %}
5978   opcode(0xB8);
5979   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5980   ins_pipe( ialu_reg_long_fat );
5981 %}
5982 
5983 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5984   match(Set dst src);
5985   effect(KILL cr);
5986   ins_cost(150);
5987   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5988             "XOR    $dst.hi,$dst.hi" %}
5989   opcode(0x33,0x33);
5990   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5991   ins_pipe( ialu_reg_long );
5992 %}
5993 
5994 // The instruction usage is guarded by predicate in operand immFPR().
5995 instruct loadConFPR(regFPR dst, immFPR con) %{
5996   match(Set dst con);
5997   ins_cost(125);
5998   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5999             "FSTP   $dst" %}
6000   ins_encode %{
6001     __ fld_s($constantaddress($con));
6002     __ fstp_d($dst$$reg);
6003   %}
6004   ins_pipe(fpu_reg_con);
6005 %}
6006 
6007 // The instruction usage is guarded by predicate in operand immFPR0().
6008 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6009   match(Set dst con);
6010   ins_cost(125);
6011   format %{ "FLDZ   ST\n\t"
6012             "FSTP   $dst" %}
6013   ins_encode %{
6014     __ fldz();
6015     __ fstp_d($dst$$reg);
6016   %}
6017   ins_pipe(fpu_reg_con);
6018 %}
6019 
6020 // The instruction usage is guarded by predicate in operand immFPR1().
6021 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6022   match(Set dst con);
6023   ins_cost(125);
6024   format %{ "FLD1   ST\n\t"
6025             "FSTP   $dst" %}
6026   ins_encode %{
6027     __ fld1();
6028     __ fstp_d($dst$$reg);
6029   %}
6030   ins_pipe(fpu_reg_con);
6031 %}
6032 
6033 // The instruction usage is guarded by predicate in operand immF().
6034 instruct loadConF(regF dst, immF con) %{
6035   match(Set dst con);
6036   ins_cost(125);
6037   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6038   ins_encode %{
6039     __ movflt($dst$$XMMRegister, $constantaddress($con));
6040   %}
6041   ins_pipe(pipe_slow);
6042 %}
6043 
6044 // The instruction usage is guarded by predicate in operand immF0().
6045 instruct loadConF0(regF dst, immF0 src) %{
6046   match(Set dst src);
6047   ins_cost(100);
6048   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6049   ins_encode %{
6050     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6051   %}
6052   ins_pipe(pipe_slow);
6053 %}
6054 
6055 // The instruction usage is guarded by predicate in operand immDPR().
6056 instruct loadConDPR(regDPR dst, immDPR con) %{
6057   match(Set dst con);
6058   ins_cost(125);
6059 
6060   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6061             "FSTP   $dst" %}
6062   ins_encode %{
6063     __ fld_d($constantaddress($con));
6064     __ fstp_d($dst$$reg);
6065   %}
6066   ins_pipe(fpu_reg_con);
6067 %}
6068 
6069 // The instruction usage is guarded by predicate in operand immDPR0().
6070 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6071   match(Set dst con);
6072   ins_cost(125);
6073 
6074   format %{ "FLDZ   ST\n\t"
6075             "FSTP   $dst" %}
6076   ins_encode %{
6077     __ fldz();
6078     __ fstp_d($dst$$reg);
6079   %}
6080   ins_pipe(fpu_reg_con);
6081 %}
6082 
6083 // The instruction usage is guarded by predicate in operand immDPR1().
6084 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6085   match(Set dst con);
6086   ins_cost(125);
6087 
6088   format %{ "FLD1   ST\n\t"
6089             "FSTP   $dst" %}
6090   ins_encode %{
6091     __ fld1();
6092     __ fstp_d($dst$$reg);
6093   %}
6094   ins_pipe(fpu_reg_con);
6095 %}
6096 
6097 // The instruction usage is guarded by predicate in operand immD().
6098 instruct loadConD(regD dst, immD con) %{
6099   match(Set dst con);
6100   ins_cost(125);
6101   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6102   ins_encode %{
6103     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6104   %}
6105   ins_pipe(pipe_slow);
6106 %}
6107 
6108 // The instruction usage is guarded by predicate in operand immD0().
6109 instruct loadConD0(regD dst, immD0 src) %{
6110   match(Set dst src);
6111   ins_cost(100);
6112   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6113   ins_encode %{
6114     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6115   %}
6116   ins_pipe( pipe_slow );
6117 %}
6118 
6119 // Load Stack Slot
6120 instruct loadSSI(rRegI dst, stackSlotI src) %{
6121   match(Set dst src);
6122   ins_cost(125);
6123 
6124   format %{ "MOV    $dst,$src" %}
6125   opcode(0x8B);
6126   ins_encode( OpcP, RegMem(dst,src));
6127   ins_pipe( ialu_reg_mem );
6128 %}
6129 
6130 instruct loadSSL(eRegL dst, stackSlotL src) %{
6131   match(Set dst src);
6132 
6133   ins_cost(200);
6134   format %{ "MOV    $dst,$src.lo\n\t"
6135             "MOV    $dst+4,$src.hi" %}
6136   opcode(0x8B, 0x8B);
6137   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6138   ins_pipe( ialu_mem_long_reg );
6139 %}
6140 
6141 // Load Stack Slot
6142 instruct loadSSP(eRegP dst, stackSlotP src) %{
6143   match(Set dst src);
6144   ins_cost(125);
6145 
6146   format %{ "MOV    $dst,$src" %}
6147   opcode(0x8B);
6148   ins_encode( OpcP, RegMem(dst,src));
6149   ins_pipe( ialu_reg_mem );
6150 %}
6151 
6152 // Load Stack Slot
6153 instruct loadSSF(regFPR dst, stackSlotF src) %{
6154   match(Set dst src);
6155   ins_cost(125);
6156 
6157   format %{ "FLD_S  $src\n\t"
6158             "FSTP   $dst" %}
6159   opcode(0xD9);               /* D9 /0, FLD m32real */
6160   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6161               Pop_Reg_FPR(dst) );
6162   ins_pipe( fpu_reg_mem );
6163 %}
6164 
6165 // Load Stack Slot
6166 instruct loadSSD(regDPR dst, stackSlotD src) %{
6167   match(Set dst src);
6168   ins_cost(125);
6169 
6170   format %{ "FLD_D  $src\n\t"
6171             "FSTP   $dst" %}
6172   opcode(0xDD);               /* DD /0, FLD m64real */
6173   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6174               Pop_Reg_DPR(dst) );
6175   ins_pipe( fpu_reg_mem );
6176 %}
6177 
6178 // Prefetch instructions for allocation.
6179 // Must be safe to execute with invalid address (cannot fault).
6180 
6181 instruct prefetchAlloc0( memory mem ) %{
6182   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6183   match(PrefetchAllocation mem);
6184   ins_cost(0);
6185   size(0);
6186   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6187   ins_encode();
6188   ins_pipe(empty);
6189 %}
6190 
6191 instruct prefetchAlloc( memory mem ) %{
6192   predicate(AllocatePrefetchInstr==3);
6193   match( PrefetchAllocation mem );
6194   ins_cost(100);
6195 
6196   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6197   ins_encode %{
6198     __ prefetchw($mem$$Address);
6199   %}
6200   ins_pipe(ialu_mem);
6201 %}
6202 
6203 instruct prefetchAllocNTA( memory mem ) %{
6204   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6205   match(PrefetchAllocation mem);
6206   ins_cost(100);
6207 
6208   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6209   ins_encode %{
6210     __ prefetchnta($mem$$Address);
6211   %}
6212   ins_pipe(ialu_mem);
6213 %}
6214 
6215 instruct prefetchAllocT0( memory mem ) %{
6216   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6217   match(PrefetchAllocation mem);
6218   ins_cost(100);
6219 
6220   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6221   ins_encode %{
6222     __ prefetcht0($mem$$Address);
6223   %}
6224   ins_pipe(ialu_mem);
6225 %}
6226 
6227 instruct prefetchAllocT2( memory mem ) %{
6228   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6229   match(PrefetchAllocation mem);
6230   ins_cost(100);
6231 
6232   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6233   ins_encode %{
6234     __ prefetcht2($mem$$Address);
6235   %}
6236   ins_pipe(ialu_mem);
6237 %}
6238 
6239 //----------Store Instructions-------------------------------------------------
6240 
6241 // Store Byte
6242 instruct storeB(memory mem, xRegI src) %{
6243   match(Set mem (StoreB mem src));
6244 
6245   ins_cost(125);
6246   format %{ "MOV8   $mem,$src" %}
6247   opcode(0x88);
6248   ins_encode( OpcP, RegMem( src, mem ) );
6249   ins_pipe( ialu_mem_reg );
6250 %}
6251 
6252 // Store Char/Short
6253 instruct storeC(memory mem, rRegI src) %{
6254   match(Set mem (StoreC mem src));
6255 
6256   ins_cost(125);
6257   format %{ "MOV16  $mem,$src" %}
6258   opcode(0x89, 0x66);
6259   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6260   ins_pipe( ialu_mem_reg );
6261 %}
6262 
6263 // Store Integer
6264 instruct storeI(memory mem, rRegI src) %{
6265   match(Set mem (StoreI mem src));
6266 
6267   ins_cost(125);
6268   format %{ "MOV    $mem,$src" %}
6269   opcode(0x89);
6270   ins_encode( OpcP, RegMem( src, mem ) );
6271   ins_pipe( ialu_mem_reg );
6272 %}
6273 
6274 // Store Long
6275 instruct storeL(long_memory mem, eRegL src) %{
6276   predicate(!((StoreLNode*)n)->require_atomic_access());
6277   match(Set mem (StoreL mem src));
6278 
6279   ins_cost(200);
6280   format %{ "MOV    $mem,$src.lo\n\t"
6281             "MOV    $mem+4,$src.hi" %}
6282   opcode(0x89, 0x89);
6283   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6284   ins_pipe( ialu_mem_long_reg );
6285 %}
6286 
6287 // Store Long to Integer
6288 instruct storeL2I(memory mem, eRegL src) %{
6289   match(Set mem (StoreI mem (ConvL2I src)));
6290 
6291   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6292   ins_encode %{
6293     __ movl($mem$$Address, $src$$Register);
6294   %}
6295   ins_pipe(ialu_mem_reg);
6296 %}
6297 
6298 // Volatile Store Long.  Must be atomic, so move it into
6299 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6300 // target address before the store (for null-ptr checks)
6301 // so the memory operand is used twice in the encoding.
6302 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6303   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6304   match(Set mem (StoreL mem src));
6305   effect( KILL cr );
6306   ins_cost(400);
6307   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6308             "FILD   $src\n\t"
6309             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6310   opcode(0x3B);
6311   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6312   ins_pipe( fpu_reg_mem );
6313 %}
6314 
6315 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6316   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6317   match(Set mem (StoreL mem src));
6318   effect( TEMP tmp, KILL cr );
6319   ins_cost(380);
6320   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6321             "MOVSD  $tmp,$src\n\t"
6322             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6323   ins_encode %{
6324     __ cmpl(rax, $mem$$Address);
6325     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6326     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6327   %}
6328   ins_pipe( pipe_slow );
6329 %}
6330 
6331 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6332   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6333   match(Set mem (StoreL mem src));
6334   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6335   ins_cost(360);
6336   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6337             "MOVD   $tmp,$src.lo\n\t"
6338             "MOVD   $tmp2,$src.hi\n\t"
6339             "PUNPCKLDQ $tmp,$tmp2\n\t"
6340             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6341   ins_encode %{
6342     __ cmpl(rax, $mem$$Address);
6343     __ movdl($tmp$$XMMRegister, $src$$Register);
6344     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6345     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6346     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6347   %}
6348   ins_pipe( pipe_slow );
6349 %}
6350 
6351 // Store Pointer; for storing unknown oops and raw pointers
6352 instruct storeP(memory mem, anyRegP src) %{
6353   match(Set mem (StoreP mem src));
6354 
6355   ins_cost(125);
6356   format %{ "MOV    $mem,$src" %}
6357   opcode(0x89);
6358   ins_encode( OpcP, RegMem( src, mem ) );
6359   ins_pipe( ialu_mem_reg );
6360 %}
6361 
6362 // Store Integer Immediate
6363 instruct storeImmI(memory mem, immI src) %{
6364   match(Set mem (StoreI mem src));
6365 
6366   ins_cost(150);
6367   format %{ "MOV    $mem,$src" %}
6368   opcode(0xC7);               /* C7 /0 */
6369   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6370   ins_pipe( ialu_mem_imm );
6371 %}
6372 
6373 // Store Short/Char Immediate
6374 instruct storeImmI16(memory mem, immI16 src) %{
6375   predicate(UseStoreImmI16);
6376   match(Set mem (StoreC mem src));
6377 
6378   ins_cost(150);
6379   format %{ "MOV16  $mem,$src" %}
6380   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6381   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6382   ins_pipe( ialu_mem_imm );
6383 %}
6384 
6385 // Store Pointer Immediate; null pointers or constant oops that do not
6386 // need card-mark barriers.
6387 instruct storeImmP(memory mem, immP src) %{
6388   match(Set mem (StoreP mem src));
6389 
6390   ins_cost(150);
6391   format %{ "MOV    $mem,$src" %}
6392   opcode(0xC7);               /* C7 /0 */
6393   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6394   ins_pipe( ialu_mem_imm );
6395 %}
6396 
6397 // Store Byte Immediate
6398 instruct storeImmB(memory mem, immI8 src) %{
6399   match(Set mem (StoreB mem src));
6400 
6401   ins_cost(150);
6402   format %{ "MOV8   $mem,$src" %}
6403   opcode(0xC6);               /* C6 /0 */
6404   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6405   ins_pipe( ialu_mem_imm );
6406 %}
6407 
6408 // Store CMS card-mark Immediate
6409 instruct storeImmCM(memory mem, immI8 src) %{
6410   match(Set mem (StoreCM mem src));
6411 
6412   ins_cost(150);
6413   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6414   opcode(0xC6);               /* C6 /0 */
6415   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6416   ins_pipe( ialu_mem_imm );
6417 %}
6418 
6419 // Store Double
6420 instruct storeDPR( memory mem, regDPR1 src) %{
6421   predicate(UseSSE<=1);
6422   match(Set mem (StoreD mem src));
6423 
6424   ins_cost(100);
6425   format %{ "FST_D  $mem,$src" %}
6426   opcode(0xDD);       /* DD /2 */
6427   ins_encode( enc_FPR_store(mem,src) );
6428   ins_pipe( fpu_mem_reg );
6429 %}
6430 
6431 // Store double does rounding on x86
6432 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6433   predicate(UseSSE<=1);
6434   match(Set mem (StoreD mem (RoundDouble src)));
6435 
6436   ins_cost(100);
6437   format %{ "FST_D  $mem,$src\t# round" %}
6438   opcode(0xDD);       /* DD /2 */
6439   ins_encode( enc_FPR_store(mem,src) );
6440   ins_pipe( fpu_mem_reg );
6441 %}
6442 
6443 // Store XMM register to memory (double-precision floating points)
6444 // MOVSD instruction
6445 instruct storeD(memory mem, regD src) %{
6446   predicate(UseSSE>=2);
6447   match(Set mem (StoreD mem src));
6448   ins_cost(95);
6449   format %{ "MOVSD  $mem,$src" %}
6450   ins_encode %{
6451     __ movdbl($mem$$Address, $src$$XMMRegister);
6452   %}
6453   ins_pipe( pipe_slow );
6454 %}
6455 
6456 // Store XMM register to memory (single-precision floating point)
6457 // MOVSS instruction
6458 instruct storeF(memory mem, regF src) %{
6459   predicate(UseSSE>=1);
6460   match(Set mem (StoreF mem src));
6461   ins_cost(95);
6462   format %{ "MOVSS  $mem,$src" %}
6463   ins_encode %{
6464     __ movflt($mem$$Address, $src$$XMMRegister);
6465   %}
6466   ins_pipe( pipe_slow );
6467 %}
6468 
6469 // Store Float
6470 instruct storeFPR( memory mem, regFPR1 src) %{
6471   predicate(UseSSE==0);
6472   match(Set mem (StoreF mem src));
6473 
6474   ins_cost(100);
6475   format %{ "FST_S  $mem,$src" %}
6476   opcode(0xD9);       /* D9 /2 */
6477   ins_encode( enc_FPR_store(mem,src) );
6478   ins_pipe( fpu_mem_reg );
6479 %}
6480 
6481 // Store Float does rounding on x86
6482 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6483   predicate(UseSSE==0);
6484   match(Set mem (StoreF mem (RoundFloat src)));
6485 
6486   ins_cost(100);
6487   format %{ "FST_S  $mem,$src\t# round" %}
6488   opcode(0xD9);       /* D9 /2 */
6489   ins_encode( enc_FPR_store(mem,src) );
6490   ins_pipe( fpu_mem_reg );
6491 %}
6492 
6493 // Store Float does rounding on x86
6494 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6495   predicate(UseSSE<=1);
6496   match(Set mem (StoreF mem (ConvD2F src)));
6497 
6498   ins_cost(100);
6499   format %{ "FST_S  $mem,$src\t# D-round" %}
6500   opcode(0xD9);       /* D9 /2 */
6501   ins_encode( enc_FPR_store(mem,src) );
6502   ins_pipe( fpu_mem_reg );
6503 %}
6504 
6505 // Store immediate Float value (it is faster than store from FPU register)
6506 // The instruction usage is guarded by predicate in operand immFPR().
6507 instruct storeFPR_imm( memory mem, immFPR src) %{
6508   match(Set mem (StoreF mem src));
6509 
6510   ins_cost(50);
6511   format %{ "MOV    $mem,$src\t# store float" %}
6512   opcode(0xC7);               /* C7 /0 */
6513   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6514   ins_pipe( ialu_mem_imm );
6515 %}
6516 
6517 // Store immediate Float value (it is faster than store from XMM register)
6518 // The instruction usage is guarded by predicate in operand immF().
6519 instruct storeF_imm( memory mem, immF src) %{
6520   match(Set mem (StoreF mem src));
6521 
6522   ins_cost(50);
6523   format %{ "MOV    $mem,$src\t# store float" %}
6524   opcode(0xC7);               /* C7 /0 */
6525   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6526   ins_pipe( ialu_mem_imm );
6527 %}
6528 
6529 // Store Integer to stack slot
6530 instruct storeSSI(stackSlotI dst, rRegI src) %{
6531   match(Set dst src);
6532 
6533   ins_cost(100);
6534   format %{ "MOV    $dst,$src" %}
6535   opcode(0x89);
6536   ins_encode( OpcPRegSS( dst, src ) );
6537   ins_pipe( ialu_mem_reg );
6538 %}
6539 
6540 // Store Integer to stack slot
6541 instruct storeSSP(stackSlotP dst, eRegP src) %{
6542   match(Set dst src);
6543 
6544   ins_cost(100);
6545   format %{ "MOV    $dst,$src" %}
6546   opcode(0x89);
6547   ins_encode( OpcPRegSS( dst, src ) );
6548   ins_pipe( ialu_mem_reg );
6549 %}
6550 
6551 // Store Long to stack slot
6552 instruct storeSSL(stackSlotL dst, eRegL src) %{
6553   match(Set dst src);
6554 
6555   ins_cost(200);
6556   format %{ "MOV    $dst,$src.lo\n\t"
6557             "MOV    $dst+4,$src.hi" %}
6558   opcode(0x89, 0x89);
6559   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6560   ins_pipe( ialu_mem_long_reg );
6561 %}
6562 
6563 //----------MemBar Instructions-----------------------------------------------
6564 // Memory barrier flavors
6565 
6566 instruct membar_acquire() %{
6567   match(MemBarAcquire);
6568   match(LoadFence);
6569   ins_cost(400);
6570 
6571   size(0);
6572   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6573   ins_encode();
6574   ins_pipe(empty);
6575 %}
6576 
6577 instruct membar_acquire_lock() %{
6578   match(MemBarAcquireLock);
6579   ins_cost(0);
6580 
6581   size(0);
6582   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6583   ins_encode( );
6584   ins_pipe(empty);
6585 %}
6586 
6587 instruct membar_release() %{
6588   match(MemBarRelease);
6589   match(StoreFence);
6590   ins_cost(400);
6591 
6592   size(0);
6593   format %{ "MEMBAR-release ! (empty encoding)" %}
6594   ins_encode( );
6595   ins_pipe(empty);
6596 %}
6597 
6598 instruct membar_release_lock() %{
6599   match(MemBarReleaseLock);
6600   ins_cost(0);
6601 
6602   size(0);
6603   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6604   ins_encode( );
6605   ins_pipe(empty);
6606 %}
6607 
6608 instruct membar_volatile(eFlagsReg cr) %{
6609   match(MemBarVolatile);
6610   effect(KILL cr);
6611   ins_cost(400);
6612 
6613   format %{
6614     $$template
6615     if (os::is_MP()) {
6616       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6617     } else {
6618       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6619     }
6620   %}
6621   ins_encode %{
6622     __ membar(Assembler::StoreLoad);
6623   %}
6624   ins_pipe(pipe_slow);
6625 %}
6626 
6627 instruct unnecessary_membar_volatile() %{
6628   match(MemBarVolatile);
6629   predicate(Matcher::post_store_load_barrier(n));
6630   ins_cost(0);
6631 
6632   size(0);
6633   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6634   ins_encode( );
6635   ins_pipe(empty);
6636 %}
6637 
6638 instruct membar_storestore() %{
6639   match(MemBarStoreStore);
6640   ins_cost(0);
6641 
6642   size(0);
6643   format %{ "MEMBAR-storestore (empty encoding)" %}
6644   ins_encode( );
6645   ins_pipe(empty);
6646 %}
6647 
6648 //----------Move Instructions--------------------------------------------------
6649 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6650   match(Set dst (CastX2P src));
6651   format %{ "# X2P  $dst, $src" %}
6652   ins_encode( /*empty encoding*/ );
6653   ins_cost(0);
6654   ins_pipe(empty);
6655 %}
6656 
6657 instruct castP2X(rRegI dst, eRegP src ) %{
6658   match(Set dst (CastP2X src));
6659   ins_cost(50);
6660   format %{ "MOV    $dst, $src\t# CastP2X" %}
6661   ins_encode( enc_Copy( dst, src) );
6662   ins_pipe( ialu_reg_reg );
6663 %}
6664 
6665 //----------Conditional Move---------------------------------------------------
6666 // Conditional move
6667 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6668   predicate(!VM_Version::supports_cmov() );
6669   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6670   ins_cost(200);
6671   format %{ "J$cop,us skip\t# signed cmove\n\t"
6672             "MOV    $dst,$src\n"
6673       "skip:" %}
6674   ins_encode %{
6675     Label Lskip;
6676     // Invert sense of branch from sense of CMOV
6677     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6678     __ movl($dst$$Register, $src$$Register);
6679     __ bind(Lskip);
6680   %}
6681   ins_pipe( pipe_cmov_reg );
6682 %}
6683 
6684 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6685   predicate(!VM_Version::supports_cmov() );
6686   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6687   ins_cost(200);
6688   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6689             "MOV    $dst,$src\n"
6690       "skip:" %}
6691   ins_encode %{
6692     Label Lskip;
6693     // Invert sense of branch from sense of CMOV
6694     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6695     __ movl($dst$$Register, $src$$Register);
6696     __ bind(Lskip);
6697   %}
6698   ins_pipe( pipe_cmov_reg );
6699 %}
6700 
6701 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6702   predicate(VM_Version::supports_cmov() );
6703   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6704   ins_cost(200);
6705   format %{ "CMOV$cop $dst,$src" %}
6706   opcode(0x0F,0x40);
6707   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6708   ins_pipe( pipe_cmov_reg );
6709 %}
6710 
6711 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6712   predicate(VM_Version::supports_cmov() );
6713   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6714   ins_cost(200);
6715   format %{ "CMOV$cop $dst,$src" %}
6716   opcode(0x0F,0x40);
6717   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6718   ins_pipe( pipe_cmov_reg );
6719 %}
6720 
6721 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6722   predicate(VM_Version::supports_cmov() );
6723   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6724   ins_cost(200);
6725   expand %{
6726     cmovI_regU(cop, cr, dst, src);
6727   %}
6728 %}
6729 
6730 // Conditional move
6731 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6732   predicate(VM_Version::supports_cmov() );
6733   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6734   ins_cost(250);
6735   format %{ "CMOV$cop $dst,$src" %}
6736   opcode(0x0F,0x40);
6737   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6738   ins_pipe( pipe_cmov_mem );
6739 %}
6740 
6741 // Conditional move
6742 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6743   predicate(VM_Version::supports_cmov() );
6744   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6745   ins_cost(250);
6746   format %{ "CMOV$cop $dst,$src" %}
6747   opcode(0x0F,0x40);
6748   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6749   ins_pipe( pipe_cmov_mem );
6750 %}
6751 
6752 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6753   predicate(VM_Version::supports_cmov() );
6754   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6755   ins_cost(250);
6756   expand %{
6757     cmovI_memU(cop, cr, dst, src);
6758   %}
6759 %}
6760 
6761 // Conditional move
6762 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6763   predicate(VM_Version::supports_cmov() );
6764   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6765   ins_cost(200);
6766   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6767   opcode(0x0F,0x40);
6768   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6769   ins_pipe( pipe_cmov_reg );
6770 %}
6771 
6772 // Conditional move (non-P6 version)
6773 // Note:  a CMoveP is generated for  stubs and native wrappers
6774 //        regardless of whether we are on a P6, so we
6775 //        emulate a cmov here
6776 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6777   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6778   ins_cost(300);
6779   format %{ "Jn$cop   skip\n\t"
6780           "MOV    $dst,$src\t# pointer\n"
6781       "skip:" %}
6782   opcode(0x8b);
6783   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6784   ins_pipe( pipe_cmov_reg );
6785 %}
6786 
6787 // Conditional move
6788 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6789   predicate(VM_Version::supports_cmov() );
6790   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6791   ins_cost(200);
6792   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6793   opcode(0x0F,0x40);
6794   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6795   ins_pipe( pipe_cmov_reg );
6796 %}
6797 
6798 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6799   predicate(VM_Version::supports_cmov() );
6800   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6801   ins_cost(200);
6802   expand %{
6803     cmovP_regU(cop, cr, dst, src);
6804   %}
6805 %}
6806 
6807 // DISABLED: Requires the ADLC to emit a bottom_type call that
6808 // correctly meets the two pointer arguments; one is an incoming
6809 // register but the other is a memory operand.  ALSO appears to
6810 // be buggy with implicit null checks.
6811 //
6812 //// Conditional move
6813 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6814 //  predicate(VM_Version::supports_cmov() );
6815 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6816 //  ins_cost(250);
6817 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6818 //  opcode(0x0F,0x40);
6819 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6820 //  ins_pipe( pipe_cmov_mem );
6821 //%}
6822 //
6823 //// Conditional move
6824 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6825 //  predicate(VM_Version::supports_cmov() );
6826 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6827 //  ins_cost(250);
6828 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6829 //  opcode(0x0F,0x40);
6830 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6831 //  ins_pipe( pipe_cmov_mem );
6832 //%}
6833 
6834 // Conditional move
6835 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6836   predicate(UseSSE<=1);
6837   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6838   ins_cost(200);
6839   format %{ "FCMOV$cop $dst,$src\t# double" %}
6840   opcode(0xDA);
6841   ins_encode( enc_cmov_dpr(cop,src) );
6842   ins_pipe( pipe_cmovDPR_reg );
6843 %}
6844 
6845 // Conditional move
6846 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6847   predicate(UseSSE==0);
6848   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6849   ins_cost(200);
6850   format %{ "FCMOV$cop $dst,$src\t# float" %}
6851   opcode(0xDA);
6852   ins_encode( enc_cmov_dpr(cop,src) );
6853   ins_pipe( pipe_cmovDPR_reg );
6854 %}
6855 
6856 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6857 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6858   predicate(UseSSE<=1);
6859   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6860   ins_cost(200);
6861   format %{ "Jn$cop   skip\n\t"
6862             "MOV    $dst,$src\t# double\n"
6863       "skip:" %}
6864   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6865   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6866   ins_pipe( pipe_cmovDPR_reg );
6867 %}
6868 
6869 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6870 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6871   predicate(UseSSE==0);
6872   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6873   ins_cost(200);
6874   format %{ "Jn$cop    skip\n\t"
6875             "MOV    $dst,$src\t# float\n"
6876       "skip:" %}
6877   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6878   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6879   ins_pipe( pipe_cmovDPR_reg );
6880 %}
6881 
6882 // No CMOVE with SSE/SSE2
6883 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6884   predicate (UseSSE>=1);
6885   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6886   ins_cost(200);
6887   format %{ "Jn$cop   skip\n\t"
6888             "MOVSS  $dst,$src\t# float\n"
6889       "skip:" %}
6890   ins_encode %{
6891     Label skip;
6892     // Invert sense of branch from sense of CMOV
6893     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6894     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6895     __ bind(skip);
6896   %}
6897   ins_pipe( pipe_slow );
6898 %}
6899 
6900 // No CMOVE with SSE/SSE2
6901 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6902   predicate (UseSSE>=2);
6903   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6904   ins_cost(200);
6905   format %{ "Jn$cop   skip\n\t"
6906             "MOVSD  $dst,$src\t# float\n"
6907       "skip:" %}
6908   ins_encode %{
6909     Label skip;
6910     // Invert sense of branch from sense of CMOV
6911     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6912     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6913     __ bind(skip);
6914   %}
6915   ins_pipe( pipe_slow );
6916 %}
6917 
6918 // unsigned version
6919 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6920   predicate (UseSSE>=1);
6921   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6922   ins_cost(200);
6923   format %{ "Jn$cop   skip\n\t"
6924             "MOVSS  $dst,$src\t# float\n"
6925       "skip:" %}
6926   ins_encode %{
6927     Label skip;
6928     // Invert sense of branch from sense of CMOV
6929     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6930     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6931     __ bind(skip);
6932   %}
6933   ins_pipe( pipe_slow );
6934 %}
6935 
6936 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6937   predicate (UseSSE>=1);
6938   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6939   ins_cost(200);
6940   expand %{
6941     fcmovF_regU(cop, cr, dst, src);
6942   %}
6943 %}
6944 
6945 // unsigned version
6946 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6947   predicate (UseSSE>=2);
6948   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6949   ins_cost(200);
6950   format %{ "Jn$cop   skip\n\t"
6951             "MOVSD  $dst,$src\t# float\n"
6952       "skip:" %}
6953   ins_encode %{
6954     Label skip;
6955     // Invert sense of branch from sense of CMOV
6956     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6957     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6958     __ bind(skip);
6959   %}
6960   ins_pipe( pipe_slow );
6961 %}
6962 
6963 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6964   predicate (UseSSE>=2);
6965   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6966   ins_cost(200);
6967   expand %{
6968     fcmovD_regU(cop, cr, dst, src);
6969   %}
6970 %}
6971 
6972 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6973   predicate(VM_Version::supports_cmov() );
6974   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6975   ins_cost(200);
6976   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6977             "CMOV$cop $dst.hi,$src.hi" %}
6978   opcode(0x0F,0x40);
6979   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6980   ins_pipe( pipe_cmov_reg_long );
6981 %}
6982 
6983 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6984   predicate(VM_Version::supports_cmov() );
6985   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6986   ins_cost(200);
6987   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6988             "CMOV$cop $dst.hi,$src.hi" %}
6989   opcode(0x0F,0x40);
6990   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6991   ins_pipe( pipe_cmov_reg_long );
6992 %}
6993 
6994 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6995   predicate(VM_Version::supports_cmov() );
6996   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6997   ins_cost(200);
6998   expand %{
6999     cmovL_regU(cop, cr, dst, src);
7000   %}
7001 %}
7002 
7003 //----------Arithmetic Instructions--------------------------------------------
7004 //----------Addition Instructions----------------------------------------------
7005 
7006 // Integer Addition Instructions
7007 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7008   match(Set dst (AddI dst src));
7009   effect(KILL cr);
7010 
7011   size(2);
7012   format %{ "ADD    $dst,$src" %}
7013   opcode(0x03);
7014   ins_encode( OpcP, RegReg( dst, src) );
7015   ins_pipe( ialu_reg_reg );
7016 %}
7017 
7018 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7019   match(Set dst (AddI dst src));
7020   effect(KILL cr);
7021 
7022   format %{ "ADD    $dst,$src" %}
7023   opcode(0x81, 0x00); /* /0 id */
7024   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7025   ins_pipe( ialu_reg );
7026 %}
7027 
7028 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7029   predicate(UseIncDec);
7030   match(Set dst (AddI dst src));
7031   effect(KILL cr);
7032 
7033   size(1);
7034   format %{ "INC    $dst" %}
7035   opcode(0x40); /*  */
7036   ins_encode( Opc_plus( primary, dst ) );
7037   ins_pipe( ialu_reg );
7038 %}
7039 
7040 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7041   match(Set dst (AddI src0 src1));
7042   ins_cost(110);
7043 
7044   format %{ "LEA    $dst,[$src0 + $src1]" %}
7045   opcode(0x8D); /* 0x8D /r */
7046   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7047   ins_pipe( ialu_reg_reg );
7048 %}
7049 
7050 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7051   match(Set dst (AddP src0 src1));
7052   ins_cost(110);
7053 
7054   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7055   opcode(0x8D); /* 0x8D /r */
7056   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7057   ins_pipe( ialu_reg_reg );
7058 %}
7059 
7060 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7061   predicate(UseIncDec);
7062   match(Set dst (AddI dst src));
7063   effect(KILL cr);
7064 
7065   size(1);
7066   format %{ "DEC    $dst" %}
7067   opcode(0x48); /*  */
7068   ins_encode( Opc_plus( primary, dst ) );
7069   ins_pipe( ialu_reg );
7070 %}
7071 
7072 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7073   match(Set dst (AddP dst src));
7074   effect(KILL cr);
7075 
7076   size(2);
7077   format %{ "ADD    $dst,$src" %}
7078   opcode(0x03);
7079   ins_encode( OpcP, RegReg( dst, src) );
7080   ins_pipe( ialu_reg_reg );
7081 %}
7082 
7083 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7084   match(Set dst (AddP dst src));
7085   effect(KILL cr);
7086 
7087   format %{ "ADD    $dst,$src" %}
7088   opcode(0x81,0x00); /* Opcode 81 /0 id */
7089   // ins_encode( RegImm( dst, src) );
7090   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7091   ins_pipe( ialu_reg );
7092 %}
7093 
7094 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7095   match(Set dst (AddI dst (LoadI src)));
7096   effect(KILL cr);
7097 
7098   ins_cost(125);
7099   format %{ "ADD    $dst,$src" %}
7100   opcode(0x03);
7101   ins_encode( OpcP, RegMem( dst, src) );
7102   ins_pipe( ialu_reg_mem );
7103 %}
7104 
7105 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7106   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7107   effect(KILL cr);
7108 
7109   ins_cost(150);
7110   format %{ "ADD    $dst,$src" %}
7111   opcode(0x01);  /* Opcode 01 /r */
7112   ins_encode( OpcP, RegMem( src, dst ) );
7113   ins_pipe( ialu_mem_reg );
7114 %}
7115 
7116 // Add Memory with Immediate
7117 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7118   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7119   effect(KILL cr);
7120 
7121   ins_cost(125);
7122   format %{ "ADD    $dst,$src" %}
7123   opcode(0x81);               /* Opcode 81 /0 id */
7124   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7125   ins_pipe( ialu_mem_imm );
7126 %}
7127 
7128 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7129   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7130   effect(KILL cr);
7131 
7132   ins_cost(125);
7133   format %{ "INC    $dst" %}
7134   opcode(0xFF);               /* Opcode FF /0 */
7135   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7136   ins_pipe( ialu_mem_imm );
7137 %}
7138 
7139 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7140   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7141   effect(KILL cr);
7142 
7143   ins_cost(125);
7144   format %{ "DEC    $dst" %}
7145   opcode(0xFF);               /* Opcode FF /1 */
7146   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7147   ins_pipe( ialu_mem_imm );
7148 %}
7149 
7150 
7151 instruct checkCastPP( eRegP dst ) %{
7152   match(Set dst (CheckCastPP dst));
7153 
7154   size(0);
7155   format %{ "#checkcastPP of $dst" %}
7156   ins_encode( /*empty encoding*/ );
7157   ins_pipe( empty );
7158 %}
7159 
7160 instruct castPP( eRegP dst ) %{
7161   match(Set dst (CastPP dst));
7162   format %{ "#castPP of $dst" %}
7163   ins_encode( /*empty encoding*/ );
7164   ins_pipe( empty );
7165 %}
7166 
7167 instruct castII( rRegI dst ) %{
7168   match(Set dst (CastII dst));
7169   format %{ "#castII of $dst" %}
7170   ins_encode( /*empty encoding*/ );
7171   ins_cost(0);
7172   ins_pipe( empty );
7173 %}
7174 
7175 
7176 // Load-locked - same as a regular pointer load when used with compare-swap
7177 instruct loadPLocked(eRegP dst, memory mem) %{
7178   match(Set dst (LoadPLocked mem));
7179 
7180   ins_cost(125);
7181   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7182   opcode(0x8B);
7183   ins_encode( OpcP, RegMem(dst,mem));
7184   ins_pipe( ialu_reg_mem );
7185 %}
7186 
7187 // Conditional-store of the updated heap-top.
7188 // Used during allocation of the shared heap.
7189 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7190 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7191   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7192   // EAX is killed if there is contention, but then it's also unused.
7193   // In the common case of no contention, EAX holds the new oop address.
7194   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7195   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7196   ins_pipe( pipe_cmpxchg );
7197 %}
7198 
7199 // Conditional-store of an int value.
7200 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7201 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7202   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7203   effect(KILL oldval);
7204   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7205   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7206   ins_pipe( pipe_cmpxchg );
7207 %}
7208 
7209 // Conditional-store of a long value.
7210 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7211 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7212   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7213   effect(KILL oldval);
7214   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7215             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7216             "XCHG   EBX,ECX"
7217   %}
7218   ins_encode %{
7219     // Note: we need to swap rbx, and rcx before and after the
7220     //       cmpxchg8 instruction because the instruction uses
7221     //       rcx as the high order word of the new value to store but
7222     //       our register encoding uses rbx.
7223     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7224     if( os::is_MP() )
7225       __ lock();
7226     __ cmpxchg8($mem$$Address);
7227     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7228   %}
7229   ins_pipe( pipe_cmpxchg );
7230 %}
7231 
7232 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7233 
7234 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7235   predicate(VM_Version::supports_cx8());
7236   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7237   effect(KILL cr, KILL oldval);
7238   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7239             "MOV    $res,0\n\t"
7240             "JNE,s  fail\n\t"
7241             "MOV    $res,1\n"
7242           "fail:" %}
7243   ins_encode( enc_cmpxchg8(mem_ptr),
7244               enc_flags_ne_to_boolean(res) );
7245   ins_pipe( pipe_cmpxchg );
7246 %}
7247 
7248 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7249   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7250   effect(KILL cr, KILL oldval);
7251   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7252             "MOV    $res,0\n\t"
7253             "JNE,s  fail\n\t"
7254             "MOV    $res,1\n"
7255           "fail:" %}
7256   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7257   ins_pipe( pipe_cmpxchg );
7258 %}
7259 
7260 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7261   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7262   effect(KILL cr, KILL oldval);
7263   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7264             "MOV    $res,0\n\t"
7265             "JNE,s  fail\n\t"
7266             "MOV    $res,1\n"
7267           "fail:" %}
7268   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7269   ins_pipe( pipe_cmpxchg );
7270 %}
7271 
7272 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7273   predicate(n->as_LoadStore()->result_not_used());
7274   match(Set dummy (GetAndAddI mem add));
7275   effect(KILL cr);
7276   format %{ "ADDL  [$mem],$add" %}
7277   ins_encode %{
7278     if (os::is_MP()) { __ lock(); }
7279     __ addl($mem$$Address, $add$$constant);
7280   %}
7281   ins_pipe( pipe_cmpxchg );
7282 %}
7283 
7284 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7285   match(Set newval (GetAndAddI mem newval));
7286   effect(KILL cr);
7287   format %{ "XADDL  [$mem],$newval" %}
7288   ins_encode %{
7289     if (os::is_MP()) { __ lock(); }
7290     __ xaddl($mem$$Address, $newval$$Register);
7291   %}
7292   ins_pipe( pipe_cmpxchg );
7293 %}
7294 
7295 instruct xchgI( memory mem, rRegI newval) %{
7296   match(Set newval (GetAndSetI mem newval));
7297   format %{ "XCHGL  $newval,[$mem]" %}
7298   ins_encode %{
7299     __ xchgl($newval$$Register, $mem$$Address);
7300   %}
7301   ins_pipe( pipe_cmpxchg );
7302 %}
7303 
7304 instruct xchgP( memory mem, pRegP newval) %{
7305   match(Set newval (GetAndSetP mem newval));
7306   format %{ "XCHGL  $newval,[$mem]" %}
7307   ins_encode %{
7308     __ xchgl($newval$$Register, $mem$$Address);
7309   %}
7310   ins_pipe( pipe_cmpxchg );
7311 %}
7312 
7313 //----------Subtraction Instructions-------------------------------------------
7314 
7315 // Integer Subtraction Instructions
7316 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7317   match(Set dst (SubI dst src));
7318   effect(KILL cr);
7319 
7320   size(2);
7321   format %{ "SUB    $dst,$src" %}
7322   opcode(0x2B);
7323   ins_encode( OpcP, RegReg( dst, src) );
7324   ins_pipe( ialu_reg_reg );
7325 %}
7326 
7327 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7328   match(Set dst (SubI dst src));
7329   effect(KILL cr);
7330 
7331   format %{ "SUB    $dst,$src" %}
7332   opcode(0x81,0x05);  /* Opcode 81 /5 */
7333   // ins_encode( RegImm( dst, src) );
7334   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7335   ins_pipe( ialu_reg );
7336 %}
7337 
7338 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7339   match(Set dst (SubI dst (LoadI src)));
7340   effect(KILL cr);
7341 
7342   ins_cost(125);
7343   format %{ "SUB    $dst,$src" %}
7344   opcode(0x2B);
7345   ins_encode( OpcP, RegMem( dst, src) );
7346   ins_pipe( ialu_reg_mem );
7347 %}
7348 
7349 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7350   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7351   effect(KILL cr);
7352 
7353   ins_cost(150);
7354   format %{ "SUB    $dst,$src" %}
7355   opcode(0x29);  /* Opcode 29 /r */
7356   ins_encode( OpcP, RegMem( src, dst ) );
7357   ins_pipe( ialu_mem_reg );
7358 %}
7359 
7360 // Subtract from a pointer
7361 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7362   match(Set dst (AddP dst (SubI zero src)));
7363   effect(KILL cr);
7364 
7365   size(2);
7366   format %{ "SUB    $dst,$src" %}
7367   opcode(0x2B);
7368   ins_encode( OpcP, RegReg( dst, src) );
7369   ins_pipe( ialu_reg_reg );
7370 %}
7371 
7372 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7373   match(Set dst (SubI zero dst));
7374   effect(KILL cr);
7375 
7376   size(2);
7377   format %{ "NEG    $dst" %}
7378   opcode(0xF7,0x03);  // Opcode F7 /3
7379   ins_encode( OpcP, RegOpc( dst ) );
7380   ins_pipe( ialu_reg );
7381 %}
7382 
7383 //----------Multiplication/Division Instructions-------------------------------
7384 // Integer Multiplication Instructions
7385 // Multiply Register
7386 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7387   match(Set dst (MulI dst src));
7388   effect(KILL cr);
7389 
7390   size(3);
7391   ins_cost(300);
7392   format %{ "IMUL   $dst,$src" %}
7393   opcode(0xAF, 0x0F);
7394   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7395   ins_pipe( ialu_reg_reg_alu0 );
7396 %}
7397 
7398 // Multiply 32-bit Immediate
7399 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7400   match(Set dst (MulI src imm));
7401   effect(KILL cr);
7402 
7403   ins_cost(300);
7404   format %{ "IMUL   $dst,$src,$imm" %}
7405   opcode(0x69);  /* 69 /r id */
7406   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7407   ins_pipe( ialu_reg_reg_alu0 );
7408 %}
7409 
7410 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7411   match(Set dst src);
7412   effect(KILL cr);
7413 
7414   // Note that this is artificially increased to make it more expensive than loadConL
7415   ins_cost(250);
7416   format %{ "MOV    EAX,$src\t// low word only" %}
7417   opcode(0xB8);
7418   ins_encode( LdImmL_Lo(dst, src) );
7419   ins_pipe( ialu_reg_fat );
7420 %}
7421 
7422 // Multiply by 32-bit Immediate, taking the shifted high order results
7423 //  (special case for shift by 32)
7424 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7425   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7426   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7427              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7428              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7429   effect(USE src1, KILL cr);
7430 
7431   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7432   ins_cost(0*100 + 1*400 - 150);
7433   format %{ "IMUL   EDX:EAX,$src1" %}
7434   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7435   ins_pipe( pipe_slow );
7436 %}
7437 
7438 // Multiply by 32-bit Immediate, taking the shifted high order results
7439 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7440   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7441   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7442              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7443              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7444   effect(USE src1, KILL cr);
7445 
7446   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7447   ins_cost(1*100 + 1*400 - 150);
7448   format %{ "IMUL   EDX:EAX,$src1\n\t"
7449             "SAR    EDX,$cnt-32" %}
7450   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7451   ins_pipe( pipe_slow );
7452 %}
7453 
7454 // Multiply Memory 32-bit Immediate
7455 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7456   match(Set dst (MulI (LoadI src) imm));
7457   effect(KILL cr);
7458 
7459   ins_cost(300);
7460   format %{ "IMUL   $dst,$src,$imm" %}
7461   opcode(0x69);  /* 69 /r id */
7462   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7463   ins_pipe( ialu_reg_mem_alu0 );
7464 %}
7465 
7466 // Multiply Memory
7467 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7468   match(Set dst (MulI dst (LoadI src)));
7469   effect(KILL cr);
7470 
7471   ins_cost(350);
7472   format %{ "IMUL   $dst,$src" %}
7473   opcode(0xAF, 0x0F);
7474   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7475   ins_pipe( ialu_reg_mem_alu0 );
7476 %}
7477 
7478 // Multiply Register Int to Long
7479 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7480   // Basic Idea: long = (long)int * (long)int
7481   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7482   effect(DEF dst, USE src, USE src1, KILL flags);
7483 
7484   ins_cost(300);
7485   format %{ "IMUL   $dst,$src1" %}
7486 
7487   ins_encode( long_int_multiply( dst, src1 ) );
7488   ins_pipe( ialu_reg_reg_alu0 );
7489 %}
7490 
7491 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7492   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7493   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7494   effect(KILL flags);
7495 
7496   ins_cost(300);
7497   format %{ "MUL    $dst,$src1" %}
7498 
7499   ins_encode( long_uint_multiply(dst, src1) );
7500   ins_pipe( ialu_reg_reg_alu0 );
7501 %}
7502 
7503 // Multiply Register Long
7504 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7505   match(Set dst (MulL dst src));
7506   effect(KILL cr, TEMP tmp);
7507   ins_cost(4*100+3*400);
7508 // Basic idea: lo(result) = lo(x_lo * y_lo)
7509 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7510   format %{ "MOV    $tmp,$src.lo\n\t"
7511             "IMUL   $tmp,EDX\n\t"
7512             "MOV    EDX,$src.hi\n\t"
7513             "IMUL   EDX,EAX\n\t"
7514             "ADD    $tmp,EDX\n\t"
7515             "MUL    EDX:EAX,$src.lo\n\t"
7516             "ADD    EDX,$tmp" %}
7517   ins_encode( long_multiply( dst, src, tmp ) );
7518   ins_pipe( pipe_slow );
7519 %}
7520 
7521 // Multiply Register Long where the left operand's high 32 bits are zero
7522 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7523   predicate(is_operand_hi32_zero(n->in(1)));
7524   match(Set dst (MulL dst src));
7525   effect(KILL cr, TEMP tmp);
7526   ins_cost(2*100+2*400);
7527 // Basic idea: lo(result) = lo(x_lo * y_lo)
7528 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7529   format %{ "MOV    $tmp,$src.hi\n\t"
7530             "IMUL   $tmp,EAX\n\t"
7531             "MUL    EDX:EAX,$src.lo\n\t"
7532             "ADD    EDX,$tmp" %}
7533   ins_encode %{
7534     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7535     __ imull($tmp$$Register, rax);
7536     __ mull($src$$Register);
7537     __ addl(rdx, $tmp$$Register);
7538   %}
7539   ins_pipe( pipe_slow );
7540 %}
7541 
7542 // Multiply Register Long where the right operand's high 32 bits are zero
7543 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7544   predicate(is_operand_hi32_zero(n->in(2)));
7545   match(Set dst (MulL dst src));
7546   effect(KILL cr, TEMP tmp);
7547   ins_cost(2*100+2*400);
7548 // Basic idea: lo(result) = lo(x_lo * y_lo)
7549 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7550   format %{ "MOV    $tmp,$src.lo\n\t"
7551             "IMUL   $tmp,EDX\n\t"
7552             "MUL    EDX:EAX,$src.lo\n\t"
7553             "ADD    EDX,$tmp" %}
7554   ins_encode %{
7555     __ movl($tmp$$Register, $src$$Register);
7556     __ imull($tmp$$Register, rdx);
7557     __ mull($src$$Register);
7558     __ addl(rdx, $tmp$$Register);
7559   %}
7560   ins_pipe( pipe_slow );
7561 %}
7562 
7563 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7564 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7565   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7566   match(Set dst (MulL dst src));
7567   effect(KILL cr);
7568   ins_cost(1*400);
7569 // Basic idea: lo(result) = lo(x_lo * y_lo)
7570 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7571   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7572   ins_encode %{
7573     __ mull($src$$Register);
7574   %}
7575   ins_pipe( pipe_slow );
7576 %}
7577 
7578 // Multiply Register Long by small constant
7579 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7580   match(Set dst (MulL dst src));
7581   effect(KILL cr, TEMP tmp);
7582   ins_cost(2*100+2*400);
7583   size(12);
7584 // Basic idea: lo(result) = lo(src * EAX)
7585 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7586   format %{ "IMUL   $tmp,EDX,$src\n\t"
7587             "MOV    EDX,$src\n\t"
7588             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7589             "ADD    EDX,$tmp" %}
7590   ins_encode( long_multiply_con( dst, src, tmp ) );
7591   ins_pipe( pipe_slow );
7592 %}
7593 
7594 // Integer DIV with Register
7595 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7596   match(Set rax (DivI rax div));
7597   effect(KILL rdx, KILL cr);
7598   size(26);
7599   ins_cost(30*100+10*100);
7600   format %{ "CMP    EAX,0x80000000\n\t"
7601             "JNE,s  normal\n\t"
7602             "XOR    EDX,EDX\n\t"
7603             "CMP    ECX,-1\n\t"
7604             "JE,s   done\n"
7605     "normal: CDQ\n\t"
7606             "IDIV   $div\n\t"
7607     "done:"        %}
7608   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7609   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7610   ins_pipe( ialu_reg_reg_alu0 );
7611 %}
7612 
7613 // Divide Register Long
7614 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7615   match(Set dst (DivL src1 src2));
7616   effect( KILL cr, KILL cx, KILL bx );
7617   ins_cost(10000);
7618   format %{ "PUSH   $src1.hi\n\t"
7619             "PUSH   $src1.lo\n\t"
7620             "PUSH   $src2.hi\n\t"
7621             "PUSH   $src2.lo\n\t"
7622             "CALL   SharedRuntime::ldiv\n\t"
7623             "ADD    ESP,16" %}
7624   ins_encode( long_div(src1,src2) );
7625   ins_pipe( pipe_slow );
7626 %}
7627 
7628 // Integer DIVMOD with Register, both quotient and mod results
7629 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7630   match(DivModI rax div);
7631   effect(KILL cr);
7632   size(26);
7633   ins_cost(30*100+10*100);
7634   format %{ "CMP    EAX,0x80000000\n\t"
7635             "JNE,s  normal\n\t"
7636             "XOR    EDX,EDX\n\t"
7637             "CMP    ECX,-1\n\t"
7638             "JE,s   done\n"
7639     "normal: CDQ\n\t"
7640             "IDIV   $div\n\t"
7641     "done:"        %}
7642   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7643   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7644   ins_pipe( pipe_slow );
7645 %}
7646 
7647 // Integer MOD with Register
7648 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7649   match(Set rdx (ModI rax div));
7650   effect(KILL rax, KILL cr);
7651 
7652   size(26);
7653   ins_cost(300);
7654   format %{ "CDQ\n\t"
7655             "IDIV   $div" %}
7656   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7657   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7658   ins_pipe( ialu_reg_reg_alu0 );
7659 %}
7660 
7661 // Remainder Register Long
7662 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7663   match(Set dst (ModL src1 src2));
7664   effect( KILL cr, KILL cx, KILL bx );
7665   ins_cost(10000);
7666   format %{ "PUSH   $src1.hi\n\t"
7667             "PUSH   $src1.lo\n\t"
7668             "PUSH   $src2.hi\n\t"
7669             "PUSH   $src2.lo\n\t"
7670             "CALL   SharedRuntime::lrem\n\t"
7671             "ADD    ESP,16" %}
7672   ins_encode( long_mod(src1,src2) );
7673   ins_pipe( pipe_slow );
7674 %}
7675 
7676 // Divide Register Long (no special case since divisor != -1)
7677 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7678   match(Set dst (DivL dst imm));
7679   effect( TEMP tmp, TEMP tmp2, KILL cr );
7680   ins_cost(1000);
7681   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7682             "XOR    $tmp2,$tmp2\n\t"
7683             "CMP    $tmp,EDX\n\t"
7684             "JA,s   fast\n\t"
7685             "MOV    $tmp2,EAX\n\t"
7686             "MOV    EAX,EDX\n\t"
7687             "MOV    EDX,0\n\t"
7688             "JLE,s  pos\n\t"
7689             "LNEG   EAX : $tmp2\n\t"
7690             "DIV    $tmp # unsigned division\n\t"
7691             "XCHG   EAX,$tmp2\n\t"
7692             "DIV    $tmp\n\t"
7693             "LNEG   $tmp2 : EAX\n\t"
7694             "JMP,s  done\n"
7695     "pos:\n\t"
7696             "DIV    $tmp\n\t"
7697             "XCHG   EAX,$tmp2\n"
7698     "fast:\n\t"
7699             "DIV    $tmp\n"
7700     "done:\n\t"
7701             "MOV    EDX,$tmp2\n\t"
7702             "NEG    EDX:EAX # if $imm < 0" %}
7703   ins_encode %{
7704     int con = (int)$imm$$constant;
7705     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7706     int pcon = (con > 0) ? con : -con;
7707     Label Lfast, Lpos, Ldone;
7708 
7709     __ movl($tmp$$Register, pcon);
7710     __ xorl($tmp2$$Register,$tmp2$$Register);
7711     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7712     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7713 
7714     __ movl($tmp2$$Register, $dst$$Register); // save
7715     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7716     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7717     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7718 
7719     // Negative dividend.
7720     // convert value to positive to use unsigned division
7721     __ lneg($dst$$Register, $tmp2$$Register);
7722     __ divl($tmp$$Register);
7723     __ xchgl($dst$$Register, $tmp2$$Register);
7724     __ divl($tmp$$Register);
7725     // revert result back to negative
7726     __ lneg($tmp2$$Register, $dst$$Register);
7727     __ jmpb(Ldone);
7728 
7729     __ bind(Lpos);
7730     __ divl($tmp$$Register); // Use unsigned division
7731     __ xchgl($dst$$Register, $tmp2$$Register);
7732     // Fallthrow for final divide, tmp2 has 32 bit hi result
7733 
7734     __ bind(Lfast);
7735     // fast path: src is positive
7736     __ divl($tmp$$Register); // Use unsigned division
7737 
7738     __ bind(Ldone);
7739     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7740     if (con < 0) {
7741       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7742     }
7743   %}
7744   ins_pipe( pipe_slow );
7745 %}
7746 
7747 // Remainder Register Long (remainder fit into 32 bits)
7748 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7749   match(Set dst (ModL dst imm));
7750   effect( TEMP tmp, TEMP tmp2, KILL cr );
7751   ins_cost(1000);
7752   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7753             "CMP    $tmp,EDX\n\t"
7754             "JA,s   fast\n\t"
7755             "MOV    $tmp2,EAX\n\t"
7756             "MOV    EAX,EDX\n\t"
7757             "MOV    EDX,0\n\t"
7758             "JLE,s  pos\n\t"
7759             "LNEG   EAX : $tmp2\n\t"
7760             "DIV    $tmp # unsigned division\n\t"
7761             "MOV    EAX,$tmp2\n\t"
7762             "DIV    $tmp\n\t"
7763             "NEG    EDX\n\t"
7764             "JMP,s  done\n"
7765     "pos:\n\t"
7766             "DIV    $tmp\n\t"
7767             "MOV    EAX,$tmp2\n"
7768     "fast:\n\t"
7769             "DIV    $tmp\n"
7770     "done:\n\t"
7771             "MOV    EAX,EDX\n\t"
7772             "SAR    EDX,31\n\t" %}
7773   ins_encode %{
7774     int con = (int)$imm$$constant;
7775     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7776     int pcon = (con > 0) ? con : -con;
7777     Label  Lfast, Lpos, Ldone;
7778 
7779     __ movl($tmp$$Register, pcon);
7780     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7781     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7782 
7783     __ movl($tmp2$$Register, $dst$$Register); // save
7784     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7785     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7786     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7787 
7788     // Negative dividend.
7789     // convert value to positive to use unsigned division
7790     __ lneg($dst$$Register, $tmp2$$Register);
7791     __ divl($tmp$$Register);
7792     __ movl($dst$$Register, $tmp2$$Register);
7793     __ divl($tmp$$Register);
7794     // revert remainder back to negative
7795     __ negl(HIGH_FROM_LOW($dst$$Register));
7796     __ jmpb(Ldone);
7797 
7798     __ bind(Lpos);
7799     __ divl($tmp$$Register);
7800     __ movl($dst$$Register, $tmp2$$Register);
7801 
7802     __ bind(Lfast);
7803     // fast path: src is positive
7804     __ divl($tmp$$Register);
7805 
7806     __ bind(Ldone);
7807     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7808     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7809 
7810   %}
7811   ins_pipe( pipe_slow );
7812 %}
7813 
7814 // Integer Shift Instructions
7815 // Shift Left by one
7816 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7817   match(Set dst (LShiftI dst shift));
7818   effect(KILL cr);
7819 
7820   size(2);
7821   format %{ "SHL    $dst,$shift" %}
7822   opcode(0xD1, 0x4);  /* D1 /4 */
7823   ins_encode( OpcP, RegOpc( dst ) );
7824   ins_pipe( ialu_reg );
7825 %}
7826 
7827 // Shift Left by 8-bit immediate
7828 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7829   match(Set dst (LShiftI dst shift));
7830   effect(KILL cr);
7831 
7832   size(3);
7833   format %{ "SHL    $dst,$shift" %}
7834   opcode(0xC1, 0x4);  /* C1 /4 ib */
7835   ins_encode( RegOpcImm( dst, shift) );
7836   ins_pipe( ialu_reg );
7837 %}
7838 
7839 // Shift Left by variable
7840 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7841   match(Set dst (LShiftI dst shift));
7842   effect(KILL cr);
7843 
7844   size(2);
7845   format %{ "SHL    $dst,$shift" %}
7846   opcode(0xD3, 0x4);  /* D3 /4 */
7847   ins_encode( OpcP, RegOpc( dst ) );
7848   ins_pipe( ialu_reg_reg );
7849 %}
7850 
7851 // Arithmetic shift right by one
7852 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7853   match(Set dst (RShiftI dst shift));
7854   effect(KILL cr);
7855 
7856   size(2);
7857   format %{ "SAR    $dst,$shift" %}
7858   opcode(0xD1, 0x7);  /* D1 /7 */
7859   ins_encode( OpcP, RegOpc( dst ) );
7860   ins_pipe( ialu_reg );
7861 %}
7862 
7863 // Arithmetic shift right by one
7864 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7865   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7866   effect(KILL cr);
7867   format %{ "SAR    $dst,$shift" %}
7868   opcode(0xD1, 0x7);  /* D1 /7 */
7869   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7870   ins_pipe( ialu_mem_imm );
7871 %}
7872 
7873 // Arithmetic Shift Right by 8-bit immediate
7874 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7875   match(Set dst (RShiftI dst shift));
7876   effect(KILL cr);
7877 
7878   size(3);
7879   format %{ "SAR    $dst,$shift" %}
7880   opcode(0xC1, 0x7);  /* C1 /7 ib */
7881   ins_encode( RegOpcImm( dst, shift ) );
7882   ins_pipe( ialu_mem_imm );
7883 %}
7884 
7885 // Arithmetic Shift Right by 8-bit immediate
7886 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7887   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7888   effect(KILL cr);
7889 
7890   format %{ "SAR    $dst,$shift" %}
7891   opcode(0xC1, 0x7);  /* C1 /7 ib */
7892   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7893   ins_pipe( ialu_mem_imm );
7894 %}
7895 
7896 // Arithmetic Shift Right by variable
7897 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7898   match(Set dst (RShiftI dst shift));
7899   effect(KILL cr);
7900 
7901   size(2);
7902   format %{ "SAR    $dst,$shift" %}
7903   opcode(0xD3, 0x7);  /* D3 /7 */
7904   ins_encode( OpcP, RegOpc( dst ) );
7905   ins_pipe( ialu_reg_reg );
7906 %}
7907 
7908 // Logical shift right by one
7909 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7910   match(Set dst (URShiftI dst shift));
7911   effect(KILL cr);
7912 
7913   size(2);
7914   format %{ "SHR    $dst,$shift" %}
7915   opcode(0xD1, 0x5);  /* D1 /5 */
7916   ins_encode( OpcP, RegOpc( dst ) );
7917   ins_pipe( ialu_reg );
7918 %}
7919 
7920 // Logical Shift Right by 8-bit immediate
7921 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7922   match(Set dst (URShiftI dst shift));
7923   effect(KILL cr);
7924 
7925   size(3);
7926   format %{ "SHR    $dst,$shift" %}
7927   opcode(0xC1, 0x5);  /* C1 /5 ib */
7928   ins_encode( RegOpcImm( dst, shift) );
7929   ins_pipe( ialu_reg );
7930 %}
7931 
7932 
7933 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7934 // This idiom is used by the compiler for the i2b bytecode.
7935 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7936   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7937 
7938   size(3);
7939   format %{ "MOVSX  $dst,$src :8" %}
7940   ins_encode %{
7941     __ movsbl($dst$$Register, $src$$Register);
7942   %}
7943   ins_pipe(ialu_reg_reg);
7944 %}
7945 
7946 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7947 // This idiom is used by the compiler the i2s bytecode.
7948 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7949   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7950 
7951   size(3);
7952   format %{ "MOVSX  $dst,$src :16" %}
7953   ins_encode %{
7954     __ movswl($dst$$Register, $src$$Register);
7955   %}
7956   ins_pipe(ialu_reg_reg);
7957 %}
7958 
7959 
7960 // Logical Shift Right by variable
7961 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7962   match(Set dst (URShiftI dst shift));
7963   effect(KILL cr);
7964 
7965   size(2);
7966   format %{ "SHR    $dst,$shift" %}
7967   opcode(0xD3, 0x5);  /* D3 /5 */
7968   ins_encode( OpcP, RegOpc( dst ) );
7969   ins_pipe( ialu_reg_reg );
7970 %}
7971 
7972 
7973 //----------Logical Instructions-----------------------------------------------
7974 //----------Integer Logical Instructions---------------------------------------
7975 // And Instructions
7976 // And Register with Register
7977 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7978   match(Set dst (AndI dst src));
7979   effect(KILL cr);
7980 
7981   size(2);
7982   format %{ "AND    $dst,$src" %}
7983   opcode(0x23);
7984   ins_encode( OpcP, RegReg( dst, src) );
7985   ins_pipe( ialu_reg_reg );
7986 %}
7987 
7988 // And Register with Immediate
7989 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7990   match(Set dst (AndI dst src));
7991   effect(KILL cr);
7992 
7993   format %{ "AND    $dst,$src" %}
7994   opcode(0x81,0x04);  /* Opcode 81 /4 */
7995   // ins_encode( RegImm( dst, src) );
7996   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7997   ins_pipe( ialu_reg );
7998 %}
7999 
8000 // And Register with Memory
8001 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8002   match(Set dst (AndI dst (LoadI src)));
8003   effect(KILL cr);
8004 
8005   ins_cost(125);
8006   format %{ "AND    $dst,$src" %}
8007   opcode(0x23);
8008   ins_encode( OpcP, RegMem( dst, src) );
8009   ins_pipe( ialu_reg_mem );
8010 %}
8011 
8012 // And Memory with Register
8013 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8014   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8015   effect(KILL cr);
8016 
8017   ins_cost(150);
8018   format %{ "AND    $dst,$src" %}
8019   opcode(0x21);  /* Opcode 21 /r */
8020   ins_encode( OpcP, RegMem( src, dst ) );
8021   ins_pipe( ialu_mem_reg );
8022 %}
8023 
8024 // And Memory with Immediate
8025 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8026   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8027   effect(KILL cr);
8028 
8029   ins_cost(125);
8030   format %{ "AND    $dst,$src" %}
8031   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8032   // ins_encode( MemImm( dst, src) );
8033   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8034   ins_pipe( ialu_mem_imm );
8035 %}
8036 
8037 // BMI1 instructions
8038 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8039   match(Set dst (AndI (XorI src1 minus_1) src2));
8040   predicate(UseBMI1Instructions);
8041   effect(KILL cr);
8042 
8043   format %{ "ANDNL  $dst, $src1, $src2" %}
8044 
8045   ins_encode %{
8046     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8047   %}
8048   ins_pipe(ialu_reg);
8049 %}
8050 
8051 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8052   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8053   predicate(UseBMI1Instructions);
8054   effect(KILL cr);
8055 
8056   ins_cost(125);
8057   format %{ "ANDNL  $dst, $src1, $src2" %}
8058 
8059   ins_encode %{
8060     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8061   %}
8062   ins_pipe(ialu_reg_mem);
8063 %}
8064 
8065 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8066   match(Set dst (AndI (SubI imm_zero src) src));
8067   predicate(UseBMI1Instructions);
8068   effect(KILL cr);
8069 
8070   format %{ "BLSIL  $dst, $src" %}
8071 
8072   ins_encode %{
8073     __ blsil($dst$$Register, $src$$Register);
8074   %}
8075   ins_pipe(ialu_reg);
8076 %}
8077 
8078 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8079   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8080   predicate(UseBMI1Instructions);
8081   effect(KILL cr);
8082 
8083   ins_cost(125);
8084   format %{ "BLSIL  $dst, $src" %}
8085 
8086   ins_encode %{
8087     __ blsil($dst$$Register, $src$$Address);
8088   %}
8089   ins_pipe(ialu_reg_mem);
8090 %}
8091 
8092 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8093 %{
8094   match(Set dst (XorI (AddI src minus_1) src));
8095   predicate(UseBMI1Instructions);
8096   effect(KILL cr);
8097 
8098   format %{ "BLSMSKL $dst, $src" %}
8099 
8100   ins_encode %{
8101     __ blsmskl($dst$$Register, $src$$Register);
8102   %}
8103 
8104   ins_pipe(ialu_reg);
8105 %}
8106 
8107 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8108 %{
8109   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8110   predicate(UseBMI1Instructions);
8111   effect(KILL cr);
8112 
8113   ins_cost(125);
8114   format %{ "BLSMSKL $dst, $src" %}
8115 
8116   ins_encode %{
8117     __ blsmskl($dst$$Register, $src$$Address);
8118   %}
8119 
8120   ins_pipe(ialu_reg_mem);
8121 %}
8122 
8123 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8124 %{
8125   match(Set dst (AndI (AddI src minus_1) src) );
8126   predicate(UseBMI1Instructions);
8127   effect(KILL cr);
8128 
8129   format %{ "BLSRL  $dst, $src" %}
8130 
8131   ins_encode %{
8132     __ blsrl($dst$$Register, $src$$Register);
8133   %}
8134 
8135   ins_pipe(ialu_reg);
8136 %}
8137 
8138 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8139 %{
8140   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8141   predicate(UseBMI1Instructions);
8142   effect(KILL cr);
8143 
8144   ins_cost(125);
8145   format %{ "BLSRL  $dst, $src" %}
8146 
8147   ins_encode %{
8148     __ blsrl($dst$$Register, $src$$Address);
8149   %}
8150 
8151   ins_pipe(ialu_reg_mem);
8152 %}
8153 
8154 // Or Instructions
8155 // Or Register with Register
8156 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8157   match(Set dst (OrI dst src));
8158   effect(KILL cr);
8159 
8160   size(2);
8161   format %{ "OR     $dst,$src" %}
8162   opcode(0x0B);
8163   ins_encode( OpcP, RegReg( dst, src) );
8164   ins_pipe( ialu_reg_reg );
8165 %}
8166 
8167 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8168   match(Set dst (OrI dst (CastP2X src)));
8169   effect(KILL cr);
8170 
8171   size(2);
8172   format %{ "OR     $dst,$src" %}
8173   opcode(0x0B);
8174   ins_encode( OpcP, RegReg( dst, src) );
8175   ins_pipe( ialu_reg_reg );
8176 %}
8177 
8178 
8179 // Or Register with Immediate
8180 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8181   match(Set dst (OrI dst src));
8182   effect(KILL cr);
8183 
8184   format %{ "OR     $dst,$src" %}
8185   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8186   // ins_encode( RegImm( dst, src) );
8187   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8188   ins_pipe( ialu_reg );
8189 %}
8190 
8191 // Or Register with Memory
8192 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8193   match(Set dst (OrI dst (LoadI src)));
8194   effect(KILL cr);
8195 
8196   ins_cost(125);
8197   format %{ "OR     $dst,$src" %}
8198   opcode(0x0B);
8199   ins_encode( OpcP, RegMem( dst, src) );
8200   ins_pipe( ialu_reg_mem );
8201 %}
8202 
8203 // Or Memory with Register
8204 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8205   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8206   effect(KILL cr);
8207 
8208   ins_cost(150);
8209   format %{ "OR     $dst,$src" %}
8210   opcode(0x09);  /* Opcode 09 /r */
8211   ins_encode( OpcP, RegMem( src, dst ) );
8212   ins_pipe( ialu_mem_reg );
8213 %}
8214 
8215 // Or Memory with Immediate
8216 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8217   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8218   effect(KILL cr);
8219 
8220   ins_cost(125);
8221   format %{ "OR     $dst,$src" %}
8222   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8223   // ins_encode( MemImm( dst, src) );
8224   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8225   ins_pipe( ialu_mem_imm );
8226 %}
8227 
8228 // ROL/ROR
8229 // ROL expand
8230 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8231   effect(USE_DEF dst, USE shift, KILL cr);
8232 
8233   format %{ "ROL    $dst, $shift" %}
8234   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8235   ins_encode( OpcP, RegOpc( dst ));
8236   ins_pipe( ialu_reg );
8237 %}
8238 
8239 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8240   effect(USE_DEF dst, USE shift, KILL cr);
8241 
8242   format %{ "ROL    $dst, $shift" %}
8243   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8244   ins_encode( RegOpcImm(dst, shift) );
8245   ins_pipe(ialu_reg);
8246 %}
8247 
8248 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8249   effect(USE_DEF dst, USE shift, KILL cr);
8250 
8251   format %{ "ROL    $dst, $shift" %}
8252   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8253   ins_encode(OpcP, RegOpc(dst));
8254   ins_pipe( ialu_reg_reg );
8255 %}
8256 // end of ROL expand
8257 
8258 // ROL 32bit by one once
8259 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8260   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8261 
8262   expand %{
8263     rolI_eReg_imm1(dst, lshift, cr);
8264   %}
8265 %}
8266 
8267 // ROL 32bit var by imm8 once
8268 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8269   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8270   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8271 
8272   expand %{
8273     rolI_eReg_imm8(dst, lshift, cr);
8274   %}
8275 %}
8276 
8277 // ROL 32bit var by var once
8278 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8279   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8280 
8281   expand %{
8282     rolI_eReg_CL(dst, shift, cr);
8283   %}
8284 %}
8285 
8286 // ROL 32bit var by var once
8287 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8288   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8289 
8290   expand %{
8291     rolI_eReg_CL(dst, shift, cr);
8292   %}
8293 %}
8294 
8295 // ROR expand
8296 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8297   effect(USE_DEF dst, USE shift, KILL cr);
8298 
8299   format %{ "ROR    $dst, $shift" %}
8300   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8301   ins_encode( OpcP, RegOpc( dst ) );
8302   ins_pipe( ialu_reg );
8303 %}
8304 
8305 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8306   effect (USE_DEF dst, USE shift, KILL cr);
8307 
8308   format %{ "ROR    $dst, $shift" %}
8309   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8310   ins_encode( RegOpcImm(dst, shift) );
8311   ins_pipe( ialu_reg );
8312 %}
8313 
8314 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8315   effect(USE_DEF dst, USE shift, KILL cr);
8316 
8317   format %{ "ROR    $dst, $shift" %}
8318   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8319   ins_encode(OpcP, RegOpc(dst));
8320   ins_pipe( ialu_reg_reg );
8321 %}
8322 // end of ROR expand
8323 
8324 // ROR right once
8325 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8326   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8327 
8328   expand %{
8329     rorI_eReg_imm1(dst, rshift, cr);
8330   %}
8331 %}
8332 
8333 // ROR 32bit by immI8 once
8334 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8335   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8336   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8337 
8338   expand %{
8339     rorI_eReg_imm8(dst, rshift, cr);
8340   %}
8341 %}
8342 
8343 // ROR 32bit var by var once
8344 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8345   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8346 
8347   expand %{
8348     rorI_eReg_CL(dst, shift, cr);
8349   %}
8350 %}
8351 
8352 // ROR 32bit var by var once
8353 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8354   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8355 
8356   expand %{
8357     rorI_eReg_CL(dst, shift, cr);
8358   %}
8359 %}
8360 
8361 // Xor Instructions
8362 // Xor Register with Register
8363 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8364   match(Set dst (XorI dst src));
8365   effect(KILL cr);
8366 
8367   size(2);
8368   format %{ "XOR    $dst,$src" %}
8369   opcode(0x33);
8370   ins_encode( OpcP, RegReg( dst, src) );
8371   ins_pipe( ialu_reg_reg );
8372 %}
8373 
8374 // Xor Register with Immediate -1
8375 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8376   match(Set dst (XorI dst imm));
8377 
8378   size(2);
8379   format %{ "NOT    $dst" %}
8380   ins_encode %{
8381      __ notl($dst$$Register);
8382   %}
8383   ins_pipe( ialu_reg );
8384 %}
8385 
8386 // Xor Register with Immediate
8387 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8388   match(Set dst (XorI dst src));
8389   effect(KILL cr);
8390 
8391   format %{ "XOR    $dst,$src" %}
8392   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8393   // ins_encode( RegImm( dst, src) );
8394   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8395   ins_pipe( ialu_reg );
8396 %}
8397 
8398 // Xor Register with Memory
8399 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8400   match(Set dst (XorI dst (LoadI src)));
8401   effect(KILL cr);
8402 
8403   ins_cost(125);
8404   format %{ "XOR    $dst,$src" %}
8405   opcode(0x33);
8406   ins_encode( OpcP, RegMem(dst, src) );
8407   ins_pipe( ialu_reg_mem );
8408 %}
8409 
8410 // Xor Memory with Register
8411 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8412   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8413   effect(KILL cr);
8414 
8415   ins_cost(150);
8416   format %{ "XOR    $dst,$src" %}
8417   opcode(0x31);  /* Opcode 31 /r */
8418   ins_encode( OpcP, RegMem( src, dst ) );
8419   ins_pipe( ialu_mem_reg );
8420 %}
8421 
8422 // Xor Memory with Immediate
8423 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8424   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8425   effect(KILL cr);
8426 
8427   ins_cost(125);
8428   format %{ "XOR    $dst,$src" %}
8429   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8430   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8431   ins_pipe( ialu_mem_imm );
8432 %}
8433 
8434 //----------Convert Int to Boolean---------------------------------------------
8435 
8436 instruct movI_nocopy(rRegI dst, rRegI src) %{
8437   effect( DEF dst, USE src );
8438   format %{ "MOV    $dst,$src" %}
8439   ins_encode( enc_Copy( dst, src) );
8440   ins_pipe( ialu_reg_reg );
8441 %}
8442 
8443 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8444   effect( USE_DEF dst, USE src, KILL cr );
8445 
8446   size(4);
8447   format %{ "NEG    $dst\n\t"
8448             "ADC    $dst,$src" %}
8449   ins_encode( neg_reg(dst),
8450               OpcRegReg(0x13,dst,src) );
8451   ins_pipe( ialu_reg_reg_long );
8452 %}
8453 
8454 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8455   match(Set dst (Conv2B src));
8456 
8457   expand %{
8458     movI_nocopy(dst,src);
8459     ci2b(dst,src,cr);
8460   %}
8461 %}
8462 
8463 instruct movP_nocopy(rRegI dst, eRegP src) %{
8464   effect( DEF dst, USE src );
8465   format %{ "MOV    $dst,$src" %}
8466   ins_encode( enc_Copy( dst, src) );
8467   ins_pipe( ialu_reg_reg );
8468 %}
8469 
8470 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8471   effect( USE_DEF dst, USE src, KILL cr );
8472   format %{ "NEG    $dst\n\t"
8473             "ADC    $dst,$src" %}
8474   ins_encode( neg_reg(dst),
8475               OpcRegReg(0x13,dst,src) );
8476   ins_pipe( ialu_reg_reg_long );
8477 %}
8478 
8479 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8480   match(Set dst (Conv2B src));
8481 
8482   expand %{
8483     movP_nocopy(dst,src);
8484     cp2b(dst,src,cr);
8485   %}
8486 %}
8487 
8488 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8489   match(Set dst (CmpLTMask p q));
8490   effect(KILL cr);
8491   ins_cost(400);
8492 
8493   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8494   format %{ "XOR    $dst,$dst\n\t"
8495             "CMP    $p,$q\n\t"
8496             "SETlt  $dst\n\t"
8497             "NEG    $dst" %}
8498   ins_encode %{
8499     Register Rp = $p$$Register;
8500     Register Rq = $q$$Register;
8501     Register Rd = $dst$$Register;
8502     Label done;
8503     __ xorl(Rd, Rd);
8504     __ cmpl(Rp, Rq);
8505     __ setb(Assembler::less, Rd);
8506     __ negl(Rd);
8507   %}
8508 
8509   ins_pipe(pipe_slow);
8510 %}
8511 
8512 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8513   match(Set dst (CmpLTMask dst zero));
8514   effect(DEF dst, KILL cr);
8515   ins_cost(100);
8516 
8517   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8518   ins_encode %{
8519   __ sarl($dst$$Register, 31);
8520   %}
8521   ins_pipe(ialu_reg);
8522 %}
8523 
8524 /* better to save a register than avoid a branch */
8525 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8526   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8527   effect(KILL cr);
8528   ins_cost(400);
8529   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8530             "JGE    done\n\t"
8531             "ADD    $p,$y\n"
8532             "done:  " %}
8533   ins_encode %{
8534     Register Rp = $p$$Register;
8535     Register Rq = $q$$Register;
8536     Register Ry = $y$$Register;
8537     Label done;
8538     __ subl(Rp, Rq);
8539     __ jccb(Assembler::greaterEqual, done);
8540     __ addl(Rp, Ry);
8541     __ bind(done);
8542   %}
8543 
8544   ins_pipe(pipe_cmplt);
8545 %}
8546 
8547 /* better to save a register than avoid a branch */
8548 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8549   match(Set y (AndI (CmpLTMask p q) y));
8550   effect(KILL cr);
8551 
8552   ins_cost(300);
8553 
8554   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8555             "JLT      done\n\t"
8556             "XORL     $y, $y\n"
8557             "done:  " %}
8558   ins_encode %{
8559     Register Rp = $p$$Register;
8560     Register Rq = $q$$Register;
8561     Register Ry = $y$$Register;
8562     Label done;
8563     __ cmpl(Rp, Rq);
8564     __ jccb(Assembler::less, done);
8565     __ xorl(Ry, Ry);
8566     __ bind(done);
8567   %}
8568 
8569   ins_pipe(pipe_cmplt);
8570 %}
8571 
8572 /* If I enable this, I encourage spilling in the inner loop of compress.
8573 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8574   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8575 */
8576 //----------Overflow Math Instructions-----------------------------------------
8577 
8578 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8579 %{
8580   match(Set cr (OverflowAddI op1 op2));
8581   effect(DEF cr, USE_KILL op1, USE op2);
8582 
8583   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8584 
8585   ins_encode %{
8586     __ addl($op1$$Register, $op2$$Register);
8587   %}
8588   ins_pipe(ialu_reg_reg);
8589 %}
8590 
8591 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8592 %{
8593   match(Set cr (OverflowAddI op1 op2));
8594   effect(DEF cr, USE_KILL op1, USE op2);
8595 
8596   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8597 
8598   ins_encode %{
8599     __ addl($op1$$Register, $op2$$constant);
8600   %}
8601   ins_pipe(ialu_reg_reg);
8602 %}
8603 
8604 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8605 %{
8606   match(Set cr (OverflowSubI op1 op2));
8607 
8608   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8609   ins_encode %{
8610     __ cmpl($op1$$Register, $op2$$Register);
8611   %}
8612   ins_pipe(ialu_reg_reg);
8613 %}
8614 
8615 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8616 %{
8617   match(Set cr (OverflowSubI op1 op2));
8618 
8619   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8620   ins_encode %{
8621     __ cmpl($op1$$Register, $op2$$constant);
8622   %}
8623   ins_pipe(ialu_reg_reg);
8624 %}
8625 
8626 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8627 %{
8628   match(Set cr (OverflowSubI zero op2));
8629   effect(DEF cr, USE_KILL op2);
8630 
8631   format %{ "NEG    $op2\t# overflow check int" %}
8632   ins_encode %{
8633     __ negl($op2$$Register);
8634   %}
8635   ins_pipe(ialu_reg_reg);
8636 %}
8637 
8638 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8639 %{
8640   match(Set cr (OverflowMulI op1 op2));
8641   effect(DEF cr, USE_KILL op1, USE op2);
8642 
8643   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8644   ins_encode %{
8645     __ imull($op1$$Register, $op2$$Register);
8646   %}
8647   ins_pipe(ialu_reg_reg_alu0);
8648 %}
8649 
8650 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8651 %{
8652   match(Set cr (OverflowMulI op1 op2));
8653   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8654 
8655   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8656   ins_encode %{
8657     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8658   %}
8659   ins_pipe(ialu_reg_reg_alu0);
8660 %}
8661 
8662 //----------Long Instructions------------------------------------------------
8663 // Add Long Register with Register
8664 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8665   match(Set dst (AddL dst src));
8666   effect(KILL cr);
8667   ins_cost(200);
8668   format %{ "ADD    $dst.lo,$src.lo\n\t"
8669             "ADC    $dst.hi,$src.hi" %}
8670   opcode(0x03, 0x13);
8671   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8672   ins_pipe( ialu_reg_reg_long );
8673 %}
8674 
8675 // Add Long Register with Immediate
8676 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8677   match(Set dst (AddL dst src));
8678   effect(KILL cr);
8679   format %{ "ADD    $dst.lo,$src.lo\n\t"
8680             "ADC    $dst.hi,$src.hi" %}
8681   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8682   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8683   ins_pipe( ialu_reg_long );
8684 %}
8685 
8686 // Add Long Register with Memory
8687 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8688   match(Set dst (AddL dst (LoadL mem)));
8689   effect(KILL cr);
8690   ins_cost(125);
8691   format %{ "ADD    $dst.lo,$mem\n\t"
8692             "ADC    $dst.hi,$mem+4" %}
8693   opcode(0x03, 0x13);
8694   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8695   ins_pipe( ialu_reg_long_mem );
8696 %}
8697 
8698 // Subtract Long Register with Register.
8699 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8700   match(Set dst (SubL dst src));
8701   effect(KILL cr);
8702   ins_cost(200);
8703   format %{ "SUB    $dst.lo,$src.lo\n\t"
8704             "SBB    $dst.hi,$src.hi" %}
8705   opcode(0x2B, 0x1B);
8706   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8707   ins_pipe( ialu_reg_reg_long );
8708 %}
8709 
8710 // Subtract Long Register with Immediate
8711 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8712   match(Set dst (SubL dst src));
8713   effect(KILL cr);
8714   format %{ "SUB    $dst.lo,$src.lo\n\t"
8715             "SBB    $dst.hi,$src.hi" %}
8716   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8717   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8718   ins_pipe( ialu_reg_long );
8719 %}
8720 
8721 // Subtract Long Register with Memory
8722 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8723   match(Set dst (SubL dst (LoadL mem)));
8724   effect(KILL cr);
8725   ins_cost(125);
8726   format %{ "SUB    $dst.lo,$mem\n\t"
8727             "SBB    $dst.hi,$mem+4" %}
8728   opcode(0x2B, 0x1B);
8729   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8730   ins_pipe( ialu_reg_long_mem );
8731 %}
8732 
8733 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8734   match(Set dst (SubL zero dst));
8735   effect(KILL cr);
8736   ins_cost(300);
8737   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8738   ins_encode( neg_long(dst) );
8739   ins_pipe( ialu_reg_reg_long );
8740 %}
8741 
8742 // And Long Register with Register
8743 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8744   match(Set dst (AndL dst src));
8745   effect(KILL cr);
8746   format %{ "AND    $dst.lo,$src.lo\n\t"
8747             "AND    $dst.hi,$src.hi" %}
8748   opcode(0x23,0x23);
8749   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8750   ins_pipe( ialu_reg_reg_long );
8751 %}
8752 
8753 // And Long Register with Immediate
8754 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8755   match(Set dst (AndL dst src));
8756   effect(KILL cr);
8757   format %{ "AND    $dst.lo,$src.lo\n\t"
8758             "AND    $dst.hi,$src.hi" %}
8759   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8760   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8761   ins_pipe( ialu_reg_long );
8762 %}
8763 
8764 // And Long Register with Memory
8765 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8766   match(Set dst (AndL dst (LoadL mem)));
8767   effect(KILL cr);
8768   ins_cost(125);
8769   format %{ "AND    $dst.lo,$mem\n\t"
8770             "AND    $dst.hi,$mem+4" %}
8771   opcode(0x23, 0x23);
8772   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8773   ins_pipe( ialu_reg_long_mem );
8774 %}
8775 
8776 // BMI1 instructions
8777 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8778   match(Set dst (AndL (XorL src1 minus_1) src2));
8779   predicate(UseBMI1Instructions);
8780   effect(KILL cr, TEMP dst);
8781 
8782   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8783             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8784          %}
8785 
8786   ins_encode %{
8787     Register Rdst = $dst$$Register;
8788     Register Rsrc1 = $src1$$Register;
8789     Register Rsrc2 = $src2$$Register;
8790     __ andnl(Rdst, Rsrc1, Rsrc2);
8791     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8792   %}
8793   ins_pipe(ialu_reg_reg_long);
8794 %}
8795 
8796 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8797   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8798   predicate(UseBMI1Instructions);
8799   effect(KILL cr, TEMP dst);
8800 
8801   ins_cost(125);
8802   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8803             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8804          %}
8805 
8806   ins_encode %{
8807     Register Rdst = $dst$$Register;
8808     Register Rsrc1 = $src1$$Register;
8809     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8810 
8811     __ andnl(Rdst, Rsrc1, $src2$$Address);
8812     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8813   %}
8814   ins_pipe(ialu_reg_mem);
8815 %}
8816 
8817 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8818   match(Set dst (AndL (SubL imm_zero src) src));
8819   predicate(UseBMI1Instructions);
8820   effect(KILL cr, TEMP dst);
8821 
8822   format %{ "MOVL   $dst.hi, 0\n\t"
8823             "BLSIL  $dst.lo, $src.lo\n\t"
8824             "JNZ    done\n\t"
8825             "BLSIL  $dst.hi, $src.hi\n"
8826             "done:"
8827          %}
8828 
8829   ins_encode %{
8830     Label done;
8831     Register Rdst = $dst$$Register;
8832     Register Rsrc = $src$$Register;
8833     __ movl(HIGH_FROM_LOW(Rdst), 0);
8834     __ blsil(Rdst, Rsrc);
8835     __ jccb(Assembler::notZero, done);
8836     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8837     __ bind(done);
8838   %}
8839   ins_pipe(ialu_reg);
8840 %}
8841 
8842 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8843   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8844   predicate(UseBMI1Instructions);
8845   effect(KILL cr, TEMP dst);
8846 
8847   ins_cost(125);
8848   format %{ "MOVL   $dst.hi, 0\n\t"
8849             "BLSIL  $dst.lo, $src\n\t"
8850             "JNZ    done\n\t"
8851             "BLSIL  $dst.hi, $src+4\n"
8852             "done:"
8853          %}
8854 
8855   ins_encode %{
8856     Label done;
8857     Register Rdst = $dst$$Register;
8858     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8859 
8860     __ movl(HIGH_FROM_LOW(Rdst), 0);
8861     __ blsil(Rdst, $src$$Address);
8862     __ jccb(Assembler::notZero, done);
8863     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8864     __ bind(done);
8865   %}
8866   ins_pipe(ialu_reg_mem);
8867 %}
8868 
8869 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8870 %{
8871   match(Set dst (XorL (AddL src minus_1) src));
8872   predicate(UseBMI1Instructions);
8873   effect(KILL cr, TEMP dst);
8874 
8875   format %{ "MOVL    $dst.hi, 0\n\t"
8876             "BLSMSKL $dst.lo, $src.lo\n\t"
8877             "JNC     done\n\t"
8878             "BLSMSKL $dst.hi, $src.hi\n"
8879             "done:"
8880          %}
8881 
8882   ins_encode %{
8883     Label done;
8884     Register Rdst = $dst$$Register;
8885     Register Rsrc = $src$$Register;
8886     __ movl(HIGH_FROM_LOW(Rdst), 0);
8887     __ blsmskl(Rdst, Rsrc);
8888     __ jccb(Assembler::carryClear, done);
8889     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8890     __ bind(done);
8891   %}
8892 
8893   ins_pipe(ialu_reg);
8894 %}
8895 
8896 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8897 %{
8898   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8899   predicate(UseBMI1Instructions);
8900   effect(KILL cr, TEMP dst);
8901 
8902   ins_cost(125);
8903   format %{ "MOVL    $dst.hi, 0\n\t"
8904             "BLSMSKL $dst.lo, $src\n\t"
8905             "JNC     done\n\t"
8906             "BLSMSKL $dst.hi, $src+4\n"
8907             "done:"
8908          %}
8909 
8910   ins_encode %{
8911     Label done;
8912     Register Rdst = $dst$$Register;
8913     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8914 
8915     __ movl(HIGH_FROM_LOW(Rdst), 0);
8916     __ blsmskl(Rdst, $src$$Address);
8917     __ jccb(Assembler::carryClear, done);
8918     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8919     __ bind(done);
8920   %}
8921 
8922   ins_pipe(ialu_reg_mem);
8923 %}
8924 
8925 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8926 %{
8927   match(Set dst (AndL (AddL src minus_1) src) );
8928   predicate(UseBMI1Instructions);
8929   effect(KILL cr, TEMP dst);
8930 
8931   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8932             "BLSRL  $dst.lo, $src.lo\n\t"
8933             "JNC    done\n\t"
8934             "BLSRL  $dst.hi, $src.hi\n"
8935             "done:"
8936   %}
8937 
8938   ins_encode %{
8939     Label done;
8940     Register Rdst = $dst$$Register;
8941     Register Rsrc = $src$$Register;
8942     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8943     __ blsrl(Rdst, Rsrc);
8944     __ jccb(Assembler::carryClear, done);
8945     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8946     __ bind(done);
8947   %}
8948 
8949   ins_pipe(ialu_reg);
8950 %}
8951 
8952 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8953 %{
8954   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8955   predicate(UseBMI1Instructions);
8956   effect(KILL cr, TEMP dst);
8957 
8958   ins_cost(125);
8959   format %{ "MOVL   $dst.hi, $src+4\n\t"
8960             "BLSRL  $dst.lo, $src\n\t"
8961             "JNC    done\n\t"
8962             "BLSRL  $dst.hi, $src+4\n"
8963             "done:"
8964   %}
8965 
8966   ins_encode %{
8967     Label done;
8968     Register Rdst = $dst$$Register;
8969     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8970     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8971     __ blsrl(Rdst, $src$$Address);
8972     __ jccb(Assembler::carryClear, done);
8973     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8974     __ bind(done);
8975   %}
8976 
8977   ins_pipe(ialu_reg_mem);
8978 %}
8979 
8980 // Or Long Register with Register
8981 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8982   match(Set dst (OrL dst src));
8983   effect(KILL cr);
8984   format %{ "OR     $dst.lo,$src.lo\n\t"
8985             "OR     $dst.hi,$src.hi" %}
8986   opcode(0x0B,0x0B);
8987   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8988   ins_pipe( ialu_reg_reg_long );
8989 %}
8990 
8991 // Or Long Register with Immediate
8992 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8993   match(Set dst (OrL dst src));
8994   effect(KILL cr);
8995   format %{ "OR     $dst.lo,$src.lo\n\t"
8996             "OR     $dst.hi,$src.hi" %}
8997   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8998   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8999   ins_pipe( ialu_reg_long );
9000 %}
9001 
9002 // Or Long Register with Memory
9003 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9004   match(Set dst (OrL dst (LoadL mem)));
9005   effect(KILL cr);
9006   ins_cost(125);
9007   format %{ "OR     $dst.lo,$mem\n\t"
9008             "OR     $dst.hi,$mem+4" %}
9009   opcode(0x0B,0x0B);
9010   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9011   ins_pipe( ialu_reg_long_mem );
9012 %}
9013 
9014 // Xor Long Register with Register
9015 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9016   match(Set dst (XorL dst src));
9017   effect(KILL cr);
9018   format %{ "XOR    $dst.lo,$src.lo\n\t"
9019             "XOR    $dst.hi,$src.hi" %}
9020   opcode(0x33,0x33);
9021   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9022   ins_pipe( ialu_reg_reg_long );
9023 %}
9024 
9025 // Xor Long Register with Immediate -1
9026 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9027   match(Set dst (XorL dst imm));
9028   format %{ "NOT    $dst.lo\n\t"
9029             "NOT    $dst.hi" %}
9030   ins_encode %{
9031      __ notl($dst$$Register);
9032      __ notl(HIGH_FROM_LOW($dst$$Register));
9033   %}
9034   ins_pipe( ialu_reg_long );
9035 %}
9036 
9037 // Xor Long Register with Immediate
9038 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9039   match(Set dst (XorL dst src));
9040   effect(KILL cr);
9041   format %{ "XOR    $dst.lo,$src.lo\n\t"
9042             "XOR    $dst.hi,$src.hi" %}
9043   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9044   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9045   ins_pipe( ialu_reg_long );
9046 %}
9047 
9048 // Xor Long Register with Memory
9049 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9050   match(Set dst (XorL dst (LoadL mem)));
9051   effect(KILL cr);
9052   ins_cost(125);
9053   format %{ "XOR    $dst.lo,$mem\n\t"
9054             "XOR    $dst.hi,$mem+4" %}
9055   opcode(0x33,0x33);
9056   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9057   ins_pipe( ialu_reg_long_mem );
9058 %}
9059 
9060 // Shift Left Long by 1
9061 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9062   predicate(UseNewLongLShift);
9063   match(Set dst (LShiftL dst cnt));
9064   effect(KILL cr);
9065   ins_cost(100);
9066   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9067             "ADC    $dst.hi,$dst.hi" %}
9068   ins_encode %{
9069     __ addl($dst$$Register,$dst$$Register);
9070     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9071   %}
9072   ins_pipe( ialu_reg_long );
9073 %}
9074 
9075 // Shift Left Long by 2
9076 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9077   predicate(UseNewLongLShift);
9078   match(Set dst (LShiftL dst cnt));
9079   effect(KILL cr);
9080   ins_cost(100);
9081   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9082             "ADC    $dst.hi,$dst.hi\n\t"
9083             "ADD    $dst.lo,$dst.lo\n\t"
9084             "ADC    $dst.hi,$dst.hi" %}
9085   ins_encode %{
9086     __ addl($dst$$Register,$dst$$Register);
9087     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9088     __ addl($dst$$Register,$dst$$Register);
9089     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9090   %}
9091   ins_pipe( ialu_reg_long );
9092 %}
9093 
9094 // Shift Left Long by 3
9095 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9096   predicate(UseNewLongLShift);
9097   match(Set dst (LShiftL dst cnt));
9098   effect(KILL cr);
9099   ins_cost(100);
9100   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9101             "ADC    $dst.hi,$dst.hi\n\t"
9102             "ADD    $dst.lo,$dst.lo\n\t"
9103             "ADC    $dst.hi,$dst.hi\n\t"
9104             "ADD    $dst.lo,$dst.lo\n\t"
9105             "ADC    $dst.hi,$dst.hi" %}
9106   ins_encode %{
9107     __ addl($dst$$Register,$dst$$Register);
9108     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9109     __ addl($dst$$Register,$dst$$Register);
9110     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9111     __ addl($dst$$Register,$dst$$Register);
9112     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9113   %}
9114   ins_pipe( ialu_reg_long );
9115 %}
9116 
9117 // Shift Left Long by 1-31
9118 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9119   match(Set dst (LShiftL dst cnt));
9120   effect(KILL cr);
9121   ins_cost(200);
9122   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9123             "SHL    $dst.lo,$cnt" %}
9124   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9125   ins_encode( move_long_small_shift(dst,cnt) );
9126   ins_pipe( ialu_reg_long );
9127 %}
9128 
9129 // Shift Left Long by 32-63
9130 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9131   match(Set dst (LShiftL dst cnt));
9132   effect(KILL cr);
9133   ins_cost(300);
9134   format %{ "MOV    $dst.hi,$dst.lo\n"
9135           "\tSHL    $dst.hi,$cnt-32\n"
9136           "\tXOR    $dst.lo,$dst.lo" %}
9137   opcode(0xC1, 0x4);  /* C1 /4 ib */
9138   ins_encode( move_long_big_shift_clr(dst,cnt) );
9139   ins_pipe( ialu_reg_long );
9140 %}
9141 
9142 // Shift Left Long by variable
9143 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9144   match(Set dst (LShiftL dst shift));
9145   effect(KILL cr);
9146   ins_cost(500+200);
9147   size(17);
9148   format %{ "TEST   $shift,32\n\t"
9149             "JEQ,s  small\n\t"
9150             "MOV    $dst.hi,$dst.lo\n\t"
9151             "XOR    $dst.lo,$dst.lo\n"
9152     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9153             "SHL    $dst.lo,$shift" %}
9154   ins_encode( shift_left_long( dst, shift ) );
9155   ins_pipe( pipe_slow );
9156 %}
9157 
9158 // Shift Right Long by 1-31
9159 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9160   match(Set dst (URShiftL dst cnt));
9161   effect(KILL cr);
9162   ins_cost(200);
9163   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9164             "SHR    $dst.hi,$cnt" %}
9165   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9166   ins_encode( move_long_small_shift(dst,cnt) );
9167   ins_pipe( ialu_reg_long );
9168 %}
9169 
9170 // Shift Right Long by 32-63
9171 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9172   match(Set dst (URShiftL dst cnt));
9173   effect(KILL cr);
9174   ins_cost(300);
9175   format %{ "MOV    $dst.lo,$dst.hi\n"
9176           "\tSHR    $dst.lo,$cnt-32\n"
9177           "\tXOR    $dst.hi,$dst.hi" %}
9178   opcode(0xC1, 0x5);  /* C1 /5 ib */
9179   ins_encode( move_long_big_shift_clr(dst,cnt) );
9180   ins_pipe( ialu_reg_long );
9181 %}
9182 
9183 // Shift Right Long by variable
9184 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9185   match(Set dst (URShiftL dst shift));
9186   effect(KILL cr);
9187   ins_cost(600);
9188   size(17);
9189   format %{ "TEST   $shift,32\n\t"
9190             "JEQ,s  small\n\t"
9191             "MOV    $dst.lo,$dst.hi\n\t"
9192             "XOR    $dst.hi,$dst.hi\n"
9193     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9194             "SHR    $dst.hi,$shift" %}
9195   ins_encode( shift_right_long( dst, shift ) );
9196   ins_pipe( pipe_slow );
9197 %}
9198 
9199 // Shift Right Long by 1-31
9200 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9201   match(Set dst (RShiftL dst cnt));
9202   effect(KILL cr);
9203   ins_cost(200);
9204   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9205             "SAR    $dst.hi,$cnt" %}
9206   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9207   ins_encode( move_long_small_shift(dst,cnt) );
9208   ins_pipe( ialu_reg_long );
9209 %}
9210 
9211 // Shift Right Long by 32-63
9212 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9213   match(Set dst (RShiftL dst cnt));
9214   effect(KILL cr);
9215   ins_cost(300);
9216   format %{ "MOV    $dst.lo,$dst.hi\n"
9217           "\tSAR    $dst.lo,$cnt-32\n"
9218           "\tSAR    $dst.hi,31" %}
9219   opcode(0xC1, 0x7);  /* C1 /7 ib */
9220   ins_encode( move_long_big_shift_sign(dst,cnt) );
9221   ins_pipe( ialu_reg_long );
9222 %}
9223 
9224 // Shift Right arithmetic Long by variable
9225 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9226   match(Set dst (RShiftL dst shift));
9227   effect(KILL cr);
9228   ins_cost(600);
9229   size(18);
9230   format %{ "TEST   $shift,32\n\t"
9231             "JEQ,s  small\n\t"
9232             "MOV    $dst.lo,$dst.hi\n\t"
9233             "SAR    $dst.hi,31\n"
9234     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9235             "SAR    $dst.hi,$shift" %}
9236   ins_encode( shift_right_arith_long( dst, shift ) );
9237   ins_pipe( pipe_slow );
9238 %}
9239 
9240 
9241 //----------Double Instructions------------------------------------------------
9242 // Double Math
9243 
9244 // Compare & branch
9245 
9246 // P6 version of float compare, sets condition codes in EFLAGS
9247 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9248   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9249   match(Set cr (CmpD src1 src2));
9250   effect(KILL rax);
9251   ins_cost(150);
9252   format %{ "FLD    $src1\n\t"
9253             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9254             "JNP    exit\n\t"
9255             "MOV    ah,1       // saw a NaN, set CF\n\t"
9256             "SAHF\n"
9257      "exit:\tNOP               // avoid branch to branch" %}
9258   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9259   ins_encode( Push_Reg_DPR(src1),
9260               OpcP, RegOpc(src2),
9261               cmpF_P6_fixup );
9262   ins_pipe( pipe_slow );
9263 %}
9264 
9265 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9266   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9267   match(Set cr (CmpD src1 src2));
9268   ins_cost(150);
9269   format %{ "FLD    $src1\n\t"
9270             "FUCOMIP ST,$src2  // P6 instruction" %}
9271   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9272   ins_encode( Push_Reg_DPR(src1),
9273               OpcP, RegOpc(src2));
9274   ins_pipe( pipe_slow );
9275 %}
9276 
9277 // Compare & branch
9278 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9279   predicate(UseSSE<=1);
9280   match(Set cr (CmpD src1 src2));
9281   effect(KILL rax);
9282   ins_cost(200);
9283   format %{ "FLD    $src1\n\t"
9284             "FCOMp  $src2\n\t"
9285             "FNSTSW AX\n\t"
9286             "TEST   AX,0x400\n\t"
9287             "JZ,s   flags\n\t"
9288             "MOV    AH,1\t# unordered treat as LT\n"
9289     "flags:\tSAHF" %}
9290   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9291   ins_encode( Push_Reg_DPR(src1),
9292               OpcP, RegOpc(src2),
9293               fpu_flags);
9294   ins_pipe( pipe_slow );
9295 %}
9296 
9297 // Compare vs zero into -1,0,1
9298 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9299   predicate(UseSSE<=1);
9300   match(Set dst (CmpD3 src1 zero));
9301   effect(KILL cr, KILL rax);
9302   ins_cost(280);
9303   format %{ "FTSTD  $dst,$src1" %}
9304   opcode(0xE4, 0xD9);
9305   ins_encode( Push_Reg_DPR(src1),
9306               OpcS, OpcP, PopFPU,
9307               CmpF_Result(dst));
9308   ins_pipe( pipe_slow );
9309 %}
9310 
9311 // Compare into -1,0,1
9312 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9313   predicate(UseSSE<=1);
9314   match(Set dst (CmpD3 src1 src2));
9315   effect(KILL cr, KILL rax);
9316   ins_cost(300);
9317   format %{ "FCMPD  $dst,$src1,$src2" %}
9318   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9319   ins_encode( Push_Reg_DPR(src1),
9320               OpcP, RegOpc(src2),
9321               CmpF_Result(dst));
9322   ins_pipe( pipe_slow );
9323 %}
9324 
9325 // float compare and set condition codes in EFLAGS by XMM regs
9326 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9327   predicate(UseSSE>=2);
9328   match(Set cr (CmpD src1 src2));
9329   ins_cost(145);
9330   format %{ "UCOMISD $src1,$src2\n\t"
9331             "JNP,s   exit\n\t"
9332             "PUSHF\t# saw NaN, set CF\n\t"
9333             "AND     [rsp], #0xffffff2b\n\t"
9334             "POPF\n"
9335     "exit:" %}
9336   ins_encode %{
9337     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9338     emit_cmpfp_fixup(_masm);
9339   %}
9340   ins_pipe( pipe_slow );
9341 %}
9342 
9343 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9344   predicate(UseSSE>=2);
9345   match(Set cr (CmpD src1 src2));
9346   ins_cost(100);
9347   format %{ "UCOMISD $src1,$src2" %}
9348   ins_encode %{
9349     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9350   %}
9351   ins_pipe( pipe_slow );
9352 %}
9353 
9354 // float compare and set condition codes in EFLAGS by XMM regs
9355 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9356   predicate(UseSSE>=2);
9357   match(Set cr (CmpD src1 (LoadD src2)));
9358   ins_cost(145);
9359   format %{ "UCOMISD $src1,$src2\n\t"
9360             "JNP,s   exit\n\t"
9361             "PUSHF\t# saw NaN, set CF\n\t"
9362             "AND     [rsp], #0xffffff2b\n\t"
9363             "POPF\n"
9364     "exit:" %}
9365   ins_encode %{
9366     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9367     emit_cmpfp_fixup(_masm);
9368   %}
9369   ins_pipe( pipe_slow );
9370 %}
9371 
9372 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9373   predicate(UseSSE>=2);
9374   match(Set cr (CmpD src1 (LoadD src2)));
9375   ins_cost(100);
9376   format %{ "UCOMISD $src1,$src2" %}
9377   ins_encode %{
9378     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9379   %}
9380   ins_pipe( pipe_slow );
9381 %}
9382 
9383 // Compare into -1,0,1 in XMM
9384 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9385   predicate(UseSSE>=2);
9386   match(Set dst (CmpD3 src1 src2));
9387   effect(KILL cr);
9388   ins_cost(255);
9389   format %{ "UCOMISD $src1, $src2\n\t"
9390             "MOV     $dst, #-1\n\t"
9391             "JP,s    done\n\t"
9392             "JB,s    done\n\t"
9393             "SETNE   $dst\n\t"
9394             "MOVZB   $dst, $dst\n"
9395     "done:" %}
9396   ins_encode %{
9397     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9398     emit_cmpfp3(_masm, $dst$$Register);
9399   %}
9400   ins_pipe( pipe_slow );
9401 %}
9402 
9403 // Compare into -1,0,1 in XMM and memory
9404 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9405   predicate(UseSSE>=2);
9406   match(Set dst (CmpD3 src1 (LoadD src2)));
9407   effect(KILL cr);
9408   ins_cost(275);
9409   format %{ "UCOMISD $src1, $src2\n\t"
9410             "MOV     $dst, #-1\n\t"
9411             "JP,s    done\n\t"
9412             "JB,s    done\n\t"
9413             "SETNE   $dst\n\t"
9414             "MOVZB   $dst, $dst\n"
9415     "done:" %}
9416   ins_encode %{
9417     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9418     emit_cmpfp3(_masm, $dst$$Register);
9419   %}
9420   ins_pipe( pipe_slow );
9421 %}
9422 
9423 
9424 instruct subDPR_reg(regDPR dst, regDPR src) %{
9425   predicate (UseSSE <=1);
9426   match(Set dst (SubD dst src));
9427 
9428   format %{ "FLD    $src\n\t"
9429             "DSUBp  $dst,ST" %}
9430   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9431   ins_cost(150);
9432   ins_encode( Push_Reg_DPR(src),
9433               OpcP, RegOpc(dst) );
9434   ins_pipe( fpu_reg_reg );
9435 %}
9436 
9437 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9438   predicate (UseSSE <=1);
9439   match(Set dst (RoundDouble (SubD src1 src2)));
9440   ins_cost(250);
9441 
9442   format %{ "FLD    $src2\n\t"
9443             "DSUB   ST,$src1\n\t"
9444             "FSTP_D $dst\t# D-round" %}
9445   opcode(0xD8, 0x5);
9446   ins_encode( Push_Reg_DPR(src2),
9447               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9448   ins_pipe( fpu_mem_reg_reg );
9449 %}
9450 
9451 
9452 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9453   predicate (UseSSE <=1);
9454   match(Set dst (SubD dst (LoadD src)));
9455   ins_cost(150);
9456 
9457   format %{ "FLD    $src\n\t"
9458             "DSUBp  $dst,ST" %}
9459   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9460   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9461               OpcP, RegOpc(dst) );
9462   ins_pipe( fpu_reg_mem );
9463 %}
9464 
9465 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9466   predicate (UseSSE<=1);
9467   match(Set dst (AbsD src));
9468   ins_cost(100);
9469   format %{ "FABS" %}
9470   opcode(0xE1, 0xD9);
9471   ins_encode( OpcS, OpcP );
9472   ins_pipe( fpu_reg_reg );
9473 %}
9474 
9475 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9476   predicate(UseSSE<=1);
9477   match(Set dst (NegD src));
9478   ins_cost(100);
9479   format %{ "FCHS" %}
9480   opcode(0xE0, 0xD9);
9481   ins_encode( OpcS, OpcP );
9482   ins_pipe( fpu_reg_reg );
9483 %}
9484 
9485 instruct addDPR_reg(regDPR dst, regDPR src) %{
9486   predicate(UseSSE<=1);
9487   match(Set dst (AddD dst src));
9488   format %{ "FLD    $src\n\t"
9489             "DADD   $dst,ST" %}
9490   size(4);
9491   ins_cost(150);
9492   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9493   ins_encode( Push_Reg_DPR(src),
9494               OpcP, RegOpc(dst) );
9495   ins_pipe( fpu_reg_reg );
9496 %}
9497 
9498 
9499 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9500   predicate(UseSSE<=1);
9501   match(Set dst (RoundDouble (AddD src1 src2)));
9502   ins_cost(250);
9503 
9504   format %{ "FLD    $src2\n\t"
9505             "DADD   ST,$src1\n\t"
9506             "FSTP_D $dst\t# D-round" %}
9507   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9508   ins_encode( Push_Reg_DPR(src2),
9509               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9510   ins_pipe( fpu_mem_reg_reg );
9511 %}
9512 
9513 
9514 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9515   predicate(UseSSE<=1);
9516   match(Set dst (AddD dst (LoadD src)));
9517   ins_cost(150);
9518 
9519   format %{ "FLD    $src\n\t"
9520             "DADDp  $dst,ST" %}
9521   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9522   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9523               OpcP, RegOpc(dst) );
9524   ins_pipe( fpu_reg_mem );
9525 %}
9526 
9527 // add-to-memory
9528 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9529   predicate(UseSSE<=1);
9530   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9531   ins_cost(150);
9532 
9533   format %{ "FLD_D  $dst\n\t"
9534             "DADD   ST,$src\n\t"
9535             "FST_D  $dst" %}
9536   opcode(0xDD, 0x0);
9537   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9538               Opcode(0xD8), RegOpc(src),
9539               set_instruction_start,
9540               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9541   ins_pipe( fpu_reg_mem );
9542 %}
9543 
9544 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9545   predicate(UseSSE<=1);
9546   match(Set dst (AddD dst con));
9547   ins_cost(125);
9548   format %{ "FLD1\n\t"
9549             "DADDp  $dst,ST" %}
9550   ins_encode %{
9551     __ fld1();
9552     __ faddp($dst$$reg);
9553   %}
9554   ins_pipe(fpu_reg);
9555 %}
9556 
9557 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9558   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9559   match(Set dst (AddD dst con));
9560   ins_cost(200);
9561   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9562             "DADDp  $dst,ST" %}
9563   ins_encode %{
9564     __ fld_d($constantaddress($con));
9565     __ faddp($dst$$reg);
9566   %}
9567   ins_pipe(fpu_reg_mem);
9568 %}
9569 
9570 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9571   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9572   match(Set dst (RoundDouble (AddD src con)));
9573   ins_cost(200);
9574   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9575             "DADD   ST,$src\n\t"
9576             "FSTP_D $dst\t# D-round" %}
9577   ins_encode %{
9578     __ fld_d($constantaddress($con));
9579     __ fadd($src$$reg);
9580     __ fstp_d(Address(rsp, $dst$$disp));
9581   %}
9582   ins_pipe(fpu_mem_reg_con);
9583 %}
9584 
9585 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9586   predicate(UseSSE<=1);
9587   match(Set dst (MulD dst src));
9588   format %{ "FLD    $src\n\t"
9589             "DMULp  $dst,ST" %}
9590   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9591   ins_cost(150);
9592   ins_encode( Push_Reg_DPR(src),
9593               OpcP, RegOpc(dst) );
9594   ins_pipe( fpu_reg_reg );
9595 %}
9596 
9597 // Strict FP instruction biases argument before multiply then
9598 // biases result to avoid double rounding of subnormals.
9599 //
9600 // scale arg1 by multiplying arg1 by 2^(-15360)
9601 // load arg2
9602 // multiply scaled arg1 by arg2
9603 // rescale product by 2^(15360)
9604 //
9605 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9606   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9607   match(Set dst (MulD dst src));
9608   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9609 
9610   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9611             "DMULp  $dst,ST\n\t"
9612             "FLD    $src\n\t"
9613             "DMULp  $dst,ST\n\t"
9614             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9615             "DMULp  $dst,ST\n\t" %}
9616   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9617   ins_encode( strictfp_bias1(dst),
9618               Push_Reg_DPR(src),
9619               OpcP, RegOpc(dst),
9620               strictfp_bias2(dst) );
9621   ins_pipe( fpu_reg_reg );
9622 %}
9623 
9624 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9625   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9626   match(Set dst (MulD dst con));
9627   ins_cost(200);
9628   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9629             "DMULp  $dst,ST" %}
9630   ins_encode %{
9631     __ fld_d($constantaddress($con));
9632     __ fmulp($dst$$reg);
9633   %}
9634   ins_pipe(fpu_reg_mem);
9635 %}
9636 
9637 
9638 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9639   predicate( UseSSE<=1 );
9640   match(Set dst (MulD dst (LoadD src)));
9641   ins_cost(200);
9642   format %{ "FLD_D  $src\n\t"
9643             "DMULp  $dst,ST" %}
9644   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9645   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9646               OpcP, RegOpc(dst) );
9647   ins_pipe( fpu_reg_mem );
9648 %}
9649 
9650 //
9651 // Cisc-alternate to reg-reg multiply
9652 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9653   predicate( UseSSE<=1 );
9654   match(Set dst (MulD src (LoadD mem)));
9655   ins_cost(250);
9656   format %{ "FLD_D  $mem\n\t"
9657             "DMUL   ST,$src\n\t"
9658             "FSTP_D $dst" %}
9659   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9660   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9661               OpcReg_FPR(src),
9662               Pop_Reg_DPR(dst) );
9663   ins_pipe( fpu_reg_reg_mem );
9664 %}
9665 
9666 
9667 // MACRO3 -- addDPR a mulDPR
9668 // This instruction is a '2-address' instruction in that the result goes
9669 // back to src2.  This eliminates a move from the macro; possibly the
9670 // register allocator will have to add it back (and maybe not).
9671 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9672   predicate( UseSSE<=1 );
9673   match(Set src2 (AddD (MulD src0 src1) src2));
9674   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9675             "DMUL   ST,$src1\n\t"
9676             "DADDp  $src2,ST" %}
9677   ins_cost(250);
9678   opcode(0xDD); /* LoadD DD /0 */
9679   ins_encode( Push_Reg_FPR(src0),
9680               FMul_ST_reg(src1),
9681               FAddP_reg_ST(src2) );
9682   ins_pipe( fpu_reg_reg_reg );
9683 %}
9684 
9685 
9686 // MACRO3 -- subDPR a mulDPR
9687 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9688   predicate( UseSSE<=1 );
9689   match(Set src2 (SubD (MulD src0 src1) src2));
9690   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9691             "DMUL   ST,$src1\n\t"
9692             "DSUBRp $src2,ST" %}
9693   ins_cost(250);
9694   ins_encode( Push_Reg_FPR(src0),
9695               FMul_ST_reg(src1),
9696               Opcode(0xDE), Opc_plus(0xE0,src2));
9697   ins_pipe( fpu_reg_reg_reg );
9698 %}
9699 
9700 
9701 instruct divDPR_reg(regDPR dst, regDPR src) %{
9702   predicate( UseSSE<=1 );
9703   match(Set dst (DivD dst src));
9704 
9705   format %{ "FLD    $src\n\t"
9706             "FDIVp  $dst,ST" %}
9707   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9708   ins_cost(150);
9709   ins_encode( Push_Reg_DPR(src),
9710               OpcP, RegOpc(dst) );
9711   ins_pipe( fpu_reg_reg );
9712 %}
9713 
9714 // Strict FP instruction biases argument before division then
9715 // biases result, to avoid double rounding of subnormals.
9716 //
9717 // scale dividend by multiplying dividend by 2^(-15360)
9718 // load divisor
9719 // divide scaled dividend by divisor
9720 // rescale quotient by 2^(15360)
9721 //
9722 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9723   predicate (UseSSE<=1);
9724   match(Set dst (DivD dst src));
9725   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9726   ins_cost(01);
9727 
9728   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9729             "DMULp  $dst,ST\n\t"
9730             "FLD    $src\n\t"
9731             "FDIVp  $dst,ST\n\t"
9732             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9733             "DMULp  $dst,ST\n\t" %}
9734   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9735   ins_encode( strictfp_bias1(dst),
9736               Push_Reg_DPR(src),
9737               OpcP, RegOpc(dst),
9738               strictfp_bias2(dst) );
9739   ins_pipe( fpu_reg_reg );
9740 %}
9741 
9742 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9743   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9744   match(Set dst (RoundDouble (DivD src1 src2)));
9745 
9746   format %{ "FLD    $src1\n\t"
9747             "FDIV   ST,$src2\n\t"
9748             "FSTP_D $dst\t# D-round" %}
9749   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9750   ins_encode( Push_Reg_DPR(src1),
9751               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9752   ins_pipe( fpu_mem_reg_reg );
9753 %}
9754 
9755 
9756 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9757   predicate(UseSSE<=1);
9758   match(Set dst (ModD dst src));
9759   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9760 
9761   format %{ "DMOD   $dst,$src" %}
9762   ins_cost(250);
9763   ins_encode(Push_Reg_Mod_DPR(dst, src),
9764               emitModDPR(),
9765               Push_Result_Mod_DPR(src),
9766               Pop_Reg_DPR(dst));
9767   ins_pipe( pipe_slow );
9768 %}
9769 
9770 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9771   predicate(UseSSE>=2);
9772   match(Set dst (ModD src0 src1));
9773   effect(KILL rax, KILL cr);
9774 
9775   format %{ "SUB    ESP,8\t # DMOD\n"
9776           "\tMOVSD  [ESP+0],$src1\n"
9777           "\tFLD_D  [ESP+0]\n"
9778           "\tMOVSD  [ESP+0],$src0\n"
9779           "\tFLD_D  [ESP+0]\n"
9780      "loop:\tFPREM\n"
9781           "\tFWAIT\n"
9782           "\tFNSTSW AX\n"
9783           "\tSAHF\n"
9784           "\tJP     loop\n"
9785           "\tFSTP_D [ESP+0]\n"
9786           "\tMOVSD  $dst,[ESP+0]\n"
9787           "\tADD    ESP,8\n"
9788           "\tFSTP   ST0\t # Restore FPU Stack"
9789     %}
9790   ins_cost(250);
9791   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9792   ins_pipe( pipe_slow );
9793 %}
9794 
9795 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9796   predicate (UseSSE<=1);
9797   match(Set dst (SinD src));
9798   ins_cost(1800);
9799   format %{ "DSIN   $dst" %}
9800   opcode(0xD9, 0xFE);
9801   ins_encode( OpcP, OpcS );
9802   ins_pipe( pipe_slow );
9803 %}
9804 
9805 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9806   predicate (UseSSE>=2);
9807   match(Set dst (SinD dst));
9808   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9809   ins_cost(1800);
9810   format %{ "DSIN   $dst" %}
9811   opcode(0xD9, 0xFE);
9812   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9813   ins_pipe( pipe_slow );
9814 %}
9815 
9816 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9817   predicate (UseSSE<=1);
9818   match(Set dst (CosD src));
9819   ins_cost(1800);
9820   format %{ "DCOS   $dst" %}
9821   opcode(0xD9, 0xFF);
9822   ins_encode( OpcP, OpcS );
9823   ins_pipe( pipe_slow );
9824 %}
9825 
9826 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9827   predicate (UseSSE>=2);
9828   match(Set dst (CosD dst));
9829   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9830   ins_cost(1800);
9831   format %{ "DCOS   $dst" %}
9832   opcode(0xD9, 0xFF);
9833   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9834   ins_pipe( pipe_slow );
9835 %}
9836 
9837 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9838   predicate (UseSSE<=1);
9839   match(Set dst(TanD src));
9840   format %{ "DTAN   $dst" %}
9841   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9842               Opcode(0xDD), Opcode(0xD8));   // fstp st
9843   ins_pipe( pipe_slow );
9844 %}
9845 
9846 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9847   predicate (UseSSE>=2);
9848   match(Set dst(TanD dst));
9849   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9850   format %{ "DTAN   $dst" %}
9851   ins_encode( Push_SrcD(dst),
9852               Opcode(0xD9), Opcode(0xF2),    // fptan
9853               Opcode(0xDD), Opcode(0xD8),   // fstp st
9854               Push_ResultD(dst) );
9855   ins_pipe( pipe_slow );
9856 %}
9857 
9858 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9859   predicate (UseSSE<=1);
9860   match(Set dst(AtanD dst src));
9861   format %{ "DATA   $dst,$src" %}
9862   opcode(0xD9, 0xF3);
9863   ins_encode( Push_Reg_DPR(src),
9864               OpcP, OpcS, RegOpc(dst) );
9865   ins_pipe( pipe_slow );
9866 %}
9867 
9868 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9869   predicate (UseSSE>=2);
9870   match(Set dst(AtanD dst src));
9871   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9872   format %{ "DATA   $dst,$src" %}
9873   opcode(0xD9, 0xF3);
9874   ins_encode( Push_SrcD(src),
9875               OpcP, OpcS, Push_ResultD(dst) );
9876   ins_pipe( pipe_slow );
9877 %}
9878 
9879 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9880   predicate (UseSSE<=1);
9881   match(Set dst (SqrtD src));
9882   format %{ "DSQRT  $dst,$src" %}
9883   opcode(0xFA, 0xD9);
9884   ins_encode( Push_Reg_DPR(src),
9885               OpcS, OpcP, Pop_Reg_DPR(dst) );
9886   ins_pipe( pipe_slow );
9887 %}
9888 
9889 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9890   predicate (UseSSE<=1);
9891   match(Set Y (PowD X Y));  // Raise X to the Yth power
9892   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9893   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9894   ins_encode %{
9895     __ subptr(rsp, 8);
9896     __ fld_s($X$$reg - 1);
9897     __ fast_pow();
9898     __ addptr(rsp, 8);
9899   %}
9900   ins_pipe( pipe_slow );
9901 %}
9902 
9903 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9904   predicate (UseSSE>=2);
9905   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9906   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9907   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9908   ins_encode %{
9909     __ subptr(rsp, 8);
9910     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9911     __ fld_d(Address(rsp, 0));
9912     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9913     __ fld_d(Address(rsp, 0));
9914     __ fast_pow();
9915     __ fstp_d(Address(rsp, 0));
9916     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9917     __ addptr(rsp, 8);
9918   %}
9919   ins_pipe( pipe_slow );
9920 %}
9921 
9922 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9923   predicate (UseSSE<=1);
9924   // The source Double operand on FPU stack
9925   match(Set dst (Log10D src));
9926   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9927   // fxch         ; swap ST(0) with ST(1)
9928   // fyl2x        ; compute log_10(2) * log_2(x)
9929   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9930             "FXCH   \n\t"
9931             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9932          %}
9933   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9934               Opcode(0xD9), Opcode(0xC9),   // fxch
9935               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9936 
9937   ins_pipe( pipe_slow );
9938 %}
9939 
9940 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9941   predicate (UseSSE>=2);
9942   effect(KILL cr);
9943   match(Set dst (Log10D src));
9944   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9945   // fyl2x        ; compute log_10(2) * log_2(x)
9946   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9947             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9948          %}
9949   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9950               Push_SrcD(src),
9951               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9952               Push_ResultD(dst));
9953 
9954   ins_pipe( pipe_slow );
9955 %}
9956 
9957 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
9958   predicate (UseSSE<=1);
9959   // The source Double operand on FPU stack
9960   match(Set dst (LogD src));
9961   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9962   // fxch         ; swap ST(0) with ST(1)
9963   // fyl2x        ; compute log_e(2) * log_2(x)
9964   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9965             "FXCH   \n\t"
9966             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9967          %}
9968   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9969               Opcode(0xD9), Opcode(0xC9),   // fxch
9970               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9971 
9972   ins_pipe( pipe_slow );
9973 %}
9974 
9975 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
9976   predicate (UseSSE>=2);
9977   effect(KILL cr);
9978   // The source and result Double operands in XMM registers
9979   match(Set dst (LogD src));
9980   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9981   // fyl2x        ; compute log_e(2) * log_2(x)
9982   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9983             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9984          %}
9985   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9986               Push_SrcD(src),
9987               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9988               Push_ResultD(dst));
9989   ins_pipe( pipe_slow );
9990 %}
9991 
9992 //-------------Float Instructions-------------------------------
9993 // Float Math
9994 
9995 // Code for float compare:
9996 //     fcompp();
9997 //     fwait(); fnstsw_ax();
9998 //     sahf();
9999 //     movl(dst, unordered_result);
10000 //     jcc(Assembler::parity, exit);
10001 //     movl(dst, less_result);
10002 //     jcc(Assembler::below, exit);
10003 //     movl(dst, equal_result);
10004 //     jcc(Assembler::equal, exit);
10005 //     movl(dst, greater_result);
10006 //   exit:
10007 
10008 // P6 version of float compare, sets condition codes in EFLAGS
10009 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10010   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10011   match(Set cr (CmpF src1 src2));
10012   effect(KILL rax);
10013   ins_cost(150);
10014   format %{ "FLD    $src1\n\t"
10015             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10016             "JNP    exit\n\t"
10017             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10018             "SAHF\n"
10019      "exit:\tNOP               // avoid branch to branch" %}
10020   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10021   ins_encode( Push_Reg_DPR(src1),
10022               OpcP, RegOpc(src2),
10023               cmpF_P6_fixup );
10024   ins_pipe( pipe_slow );
10025 %}
10026 
10027 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10028   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10029   match(Set cr (CmpF src1 src2));
10030   ins_cost(100);
10031   format %{ "FLD    $src1\n\t"
10032             "FUCOMIP ST,$src2  // P6 instruction" %}
10033   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10034   ins_encode( Push_Reg_DPR(src1),
10035               OpcP, RegOpc(src2));
10036   ins_pipe( pipe_slow );
10037 %}
10038 
10039 
10040 // Compare & branch
10041 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10042   predicate(UseSSE == 0);
10043   match(Set cr (CmpF src1 src2));
10044   effect(KILL rax);
10045   ins_cost(200);
10046   format %{ "FLD    $src1\n\t"
10047             "FCOMp  $src2\n\t"
10048             "FNSTSW AX\n\t"
10049             "TEST   AX,0x400\n\t"
10050             "JZ,s   flags\n\t"
10051             "MOV    AH,1\t# unordered treat as LT\n"
10052     "flags:\tSAHF" %}
10053   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10054   ins_encode( Push_Reg_DPR(src1),
10055               OpcP, RegOpc(src2),
10056               fpu_flags);
10057   ins_pipe( pipe_slow );
10058 %}
10059 
10060 // Compare vs zero into -1,0,1
10061 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10062   predicate(UseSSE == 0);
10063   match(Set dst (CmpF3 src1 zero));
10064   effect(KILL cr, KILL rax);
10065   ins_cost(280);
10066   format %{ "FTSTF  $dst,$src1" %}
10067   opcode(0xE4, 0xD9);
10068   ins_encode( Push_Reg_DPR(src1),
10069               OpcS, OpcP, PopFPU,
10070               CmpF_Result(dst));
10071   ins_pipe( pipe_slow );
10072 %}
10073 
10074 // Compare into -1,0,1
10075 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10076   predicate(UseSSE == 0);
10077   match(Set dst (CmpF3 src1 src2));
10078   effect(KILL cr, KILL rax);
10079   ins_cost(300);
10080   format %{ "FCMPF  $dst,$src1,$src2" %}
10081   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10082   ins_encode( Push_Reg_DPR(src1),
10083               OpcP, RegOpc(src2),
10084               CmpF_Result(dst));
10085   ins_pipe( pipe_slow );
10086 %}
10087 
10088 // float compare and set condition codes in EFLAGS by XMM regs
10089 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10090   predicate(UseSSE>=1);
10091   match(Set cr (CmpF src1 src2));
10092   ins_cost(145);
10093   format %{ "UCOMISS $src1,$src2\n\t"
10094             "JNP,s   exit\n\t"
10095             "PUSHF\t# saw NaN, set CF\n\t"
10096             "AND     [rsp], #0xffffff2b\n\t"
10097             "POPF\n"
10098     "exit:" %}
10099   ins_encode %{
10100     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10101     emit_cmpfp_fixup(_masm);
10102   %}
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10107   predicate(UseSSE>=1);
10108   match(Set cr (CmpF src1 src2));
10109   ins_cost(100);
10110   format %{ "UCOMISS $src1,$src2" %}
10111   ins_encode %{
10112     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10113   %}
10114   ins_pipe( pipe_slow );
10115 %}
10116 
10117 // float compare and set condition codes in EFLAGS by XMM regs
10118 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10119   predicate(UseSSE>=1);
10120   match(Set cr (CmpF src1 (LoadF src2)));
10121   ins_cost(165);
10122   format %{ "UCOMISS $src1,$src2\n\t"
10123             "JNP,s   exit\n\t"
10124             "PUSHF\t# saw NaN, set CF\n\t"
10125             "AND     [rsp], #0xffffff2b\n\t"
10126             "POPF\n"
10127     "exit:" %}
10128   ins_encode %{
10129     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10130     emit_cmpfp_fixup(_masm);
10131   %}
10132   ins_pipe( pipe_slow );
10133 %}
10134 
10135 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10136   predicate(UseSSE>=1);
10137   match(Set cr (CmpF src1 (LoadF src2)));
10138   ins_cost(100);
10139   format %{ "UCOMISS $src1,$src2" %}
10140   ins_encode %{
10141     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10142   %}
10143   ins_pipe( pipe_slow );
10144 %}
10145 
10146 // Compare into -1,0,1 in XMM
10147 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10148   predicate(UseSSE>=1);
10149   match(Set dst (CmpF3 src1 src2));
10150   effect(KILL cr);
10151   ins_cost(255);
10152   format %{ "UCOMISS $src1, $src2\n\t"
10153             "MOV     $dst, #-1\n\t"
10154             "JP,s    done\n\t"
10155             "JB,s    done\n\t"
10156             "SETNE   $dst\n\t"
10157             "MOVZB   $dst, $dst\n"
10158     "done:" %}
10159   ins_encode %{
10160     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10161     emit_cmpfp3(_masm, $dst$$Register);
10162   %}
10163   ins_pipe( pipe_slow );
10164 %}
10165 
10166 // Compare into -1,0,1 in XMM and memory
10167 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10168   predicate(UseSSE>=1);
10169   match(Set dst (CmpF3 src1 (LoadF src2)));
10170   effect(KILL cr);
10171   ins_cost(275);
10172   format %{ "UCOMISS $src1, $src2\n\t"
10173             "MOV     $dst, #-1\n\t"
10174             "JP,s    done\n\t"
10175             "JB,s    done\n\t"
10176             "SETNE   $dst\n\t"
10177             "MOVZB   $dst, $dst\n"
10178     "done:" %}
10179   ins_encode %{
10180     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10181     emit_cmpfp3(_masm, $dst$$Register);
10182   %}
10183   ins_pipe( pipe_slow );
10184 %}
10185 
10186 // Spill to obtain 24-bit precision
10187 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10188   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10189   match(Set dst (SubF src1 src2));
10190 
10191   format %{ "FSUB   $dst,$src1 - $src2" %}
10192   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10193   ins_encode( Push_Reg_FPR(src1),
10194               OpcReg_FPR(src2),
10195               Pop_Mem_FPR(dst) );
10196   ins_pipe( fpu_mem_reg_reg );
10197 %}
10198 //
10199 // This instruction does not round to 24-bits
10200 instruct subFPR_reg(regFPR dst, regFPR src) %{
10201   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10202   match(Set dst (SubF dst src));
10203 
10204   format %{ "FSUB   $dst,$src" %}
10205   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10206   ins_encode( Push_Reg_FPR(src),
10207               OpcP, RegOpc(dst) );
10208   ins_pipe( fpu_reg_reg );
10209 %}
10210 
10211 // Spill to obtain 24-bit precision
10212 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10213   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10214   match(Set dst (AddF src1 src2));
10215 
10216   format %{ "FADD   $dst,$src1,$src2" %}
10217   opcode(0xD8, 0x0); /* D8 C0+i */
10218   ins_encode( Push_Reg_FPR(src2),
10219               OpcReg_FPR(src1),
10220               Pop_Mem_FPR(dst) );
10221   ins_pipe( fpu_mem_reg_reg );
10222 %}
10223 //
10224 // This instruction does not round to 24-bits
10225 instruct addFPR_reg(regFPR dst, regFPR src) %{
10226   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10227   match(Set dst (AddF dst src));
10228 
10229   format %{ "FLD    $src\n\t"
10230             "FADDp  $dst,ST" %}
10231   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10232   ins_encode( Push_Reg_FPR(src),
10233               OpcP, RegOpc(dst) );
10234   ins_pipe( fpu_reg_reg );
10235 %}
10236 
10237 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10238   predicate(UseSSE==0);
10239   match(Set dst (AbsF src));
10240   ins_cost(100);
10241   format %{ "FABS" %}
10242   opcode(0xE1, 0xD9);
10243   ins_encode( OpcS, OpcP );
10244   ins_pipe( fpu_reg_reg );
10245 %}
10246 
10247 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10248   predicate(UseSSE==0);
10249   match(Set dst (NegF src));
10250   ins_cost(100);
10251   format %{ "FCHS" %}
10252   opcode(0xE0, 0xD9);
10253   ins_encode( OpcS, OpcP );
10254   ins_pipe( fpu_reg_reg );
10255 %}
10256 
10257 // Cisc-alternate to addFPR_reg
10258 // Spill to obtain 24-bit precision
10259 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10260   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10261   match(Set dst (AddF src1 (LoadF src2)));
10262 
10263   format %{ "FLD    $src2\n\t"
10264             "FADD   ST,$src1\n\t"
10265             "FSTP_S $dst" %}
10266   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10267   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10268               OpcReg_FPR(src1),
10269               Pop_Mem_FPR(dst) );
10270   ins_pipe( fpu_mem_reg_mem );
10271 %}
10272 //
10273 // Cisc-alternate to addFPR_reg
10274 // This instruction does not round to 24-bits
10275 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10276   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10277   match(Set dst (AddF dst (LoadF src)));
10278 
10279   format %{ "FADD   $dst,$src" %}
10280   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10281   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10282               OpcP, RegOpc(dst) );
10283   ins_pipe( fpu_reg_mem );
10284 %}
10285 
10286 // // Following two instructions for _222_mpegaudio
10287 // Spill to obtain 24-bit precision
10288 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10289   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10290   match(Set dst (AddF src1 src2));
10291 
10292   format %{ "FADD   $dst,$src1,$src2" %}
10293   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10294   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10295               OpcReg_FPR(src2),
10296               Pop_Mem_FPR(dst) );
10297   ins_pipe( fpu_mem_reg_mem );
10298 %}
10299 
10300 // Cisc-spill variant
10301 // Spill to obtain 24-bit precision
10302 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10303   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10304   match(Set dst (AddF src1 (LoadF src2)));
10305 
10306   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10307   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10308   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10309               set_instruction_start,
10310               OpcP, RMopc_Mem(secondary,src1),
10311               Pop_Mem_FPR(dst) );
10312   ins_pipe( fpu_mem_mem_mem );
10313 %}
10314 
10315 // Spill to obtain 24-bit precision
10316 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10317   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10318   match(Set dst (AddF src1 src2));
10319 
10320   format %{ "FADD   $dst,$src1,$src2" %}
10321   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10322   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10323               set_instruction_start,
10324               OpcP, RMopc_Mem(secondary,src1),
10325               Pop_Mem_FPR(dst) );
10326   ins_pipe( fpu_mem_mem_mem );
10327 %}
10328 
10329 
10330 // Spill to obtain 24-bit precision
10331 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10332   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10333   match(Set dst (AddF src con));
10334   format %{ "FLD    $src\n\t"
10335             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10336             "FSTP_S $dst"  %}
10337   ins_encode %{
10338     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10339     __ fadd_s($constantaddress($con));
10340     __ fstp_s(Address(rsp, $dst$$disp));
10341   %}
10342   ins_pipe(fpu_mem_reg_con);
10343 %}
10344 //
10345 // This instruction does not round to 24-bits
10346 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10347   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10348   match(Set dst (AddF src con));
10349   format %{ "FLD    $src\n\t"
10350             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10351             "FSTP   $dst"  %}
10352   ins_encode %{
10353     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10354     __ fadd_s($constantaddress($con));
10355     __ fstp_d($dst$$reg);
10356   %}
10357   ins_pipe(fpu_reg_reg_con);
10358 %}
10359 
10360 // Spill to obtain 24-bit precision
10361 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10362   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10363   match(Set dst (MulF src1 src2));
10364 
10365   format %{ "FLD    $src1\n\t"
10366             "FMUL   $src2\n\t"
10367             "FSTP_S $dst"  %}
10368   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10369   ins_encode( Push_Reg_FPR(src1),
10370               OpcReg_FPR(src2),
10371               Pop_Mem_FPR(dst) );
10372   ins_pipe( fpu_mem_reg_reg );
10373 %}
10374 //
10375 // This instruction does not round to 24-bits
10376 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10377   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10378   match(Set dst (MulF src1 src2));
10379 
10380   format %{ "FLD    $src1\n\t"
10381             "FMUL   $src2\n\t"
10382             "FSTP_S $dst"  %}
10383   opcode(0xD8, 0x1); /* D8 C8+i */
10384   ins_encode( Push_Reg_FPR(src2),
10385               OpcReg_FPR(src1),
10386               Pop_Reg_FPR(dst) );
10387   ins_pipe( fpu_reg_reg_reg );
10388 %}
10389 
10390 
10391 // Spill to obtain 24-bit precision
10392 // Cisc-alternate to reg-reg multiply
10393 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10394   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10395   match(Set dst (MulF src1 (LoadF src2)));
10396 
10397   format %{ "FLD_S  $src2\n\t"
10398             "FMUL   $src1\n\t"
10399             "FSTP_S $dst"  %}
10400   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10401   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10402               OpcReg_FPR(src1),
10403               Pop_Mem_FPR(dst) );
10404   ins_pipe( fpu_mem_reg_mem );
10405 %}
10406 //
10407 // This instruction does not round to 24-bits
10408 // Cisc-alternate to reg-reg multiply
10409 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10410   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10411   match(Set dst (MulF src1 (LoadF src2)));
10412 
10413   format %{ "FMUL   $dst,$src1,$src2" %}
10414   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10415   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10416               OpcReg_FPR(src1),
10417               Pop_Reg_FPR(dst) );
10418   ins_pipe( fpu_reg_reg_mem );
10419 %}
10420 
10421 // Spill to obtain 24-bit precision
10422 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10423   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10424   match(Set dst (MulF src1 src2));
10425 
10426   format %{ "FMUL   $dst,$src1,$src2" %}
10427   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10428   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10429               set_instruction_start,
10430               OpcP, RMopc_Mem(secondary,src1),
10431               Pop_Mem_FPR(dst) );
10432   ins_pipe( fpu_mem_mem_mem );
10433 %}
10434 
10435 // Spill to obtain 24-bit precision
10436 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10437   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10438   match(Set dst (MulF src con));
10439 
10440   format %{ "FLD    $src\n\t"
10441             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10442             "FSTP_S $dst"  %}
10443   ins_encode %{
10444     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10445     __ fmul_s($constantaddress($con));
10446     __ fstp_s(Address(rsp, $dst$$disp));
10447   %}
10448   ins_pipe(fpu_mem_reg_con);
10449 %}
10450 //
10451 // This instruction does not round to 24-bits
10452 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10453   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10454   match(Set dst (MulF src con));
10455 
10456   format %{ "FLD    $src\n\t"
10457             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10458             "FSTP   $dst"  %}
10459   ins_encode %{
10460     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10461     __ fmul_s($constantaddress($con));
10462     __ fstp_d($dst$$reg);
10463   %}
10464   ins_pipe(fpu_reg_reg_con);
10465 %}
10466 
10467 
10468 //
10469 // MACRO1 -- subsume unshared load into mulFPR
10470 // This instruction does not round to 24-bits
10471 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10472   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10473   match(Set dst (MulF (LoadF mem1) src));
10474 
10475   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10476             "FMUL   ST,$src\n\t"
10477             "FSTP   $dst" %}
10478   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10479   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10480               OpcReg_FPR(src),
10481               Pop_Reg_FPR(dst) );
10482   ins_pipe( fpu_reg_reg_mem );
10483 %}
10484 //
10485 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10486 // This instruction does not round to 24-bits
10487 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10488   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10489   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10490   ins_cost(95);
10491 
10492   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10493             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10494             "FADD   ST,$src2\n\t"
10495             "FSTP   $dst" %}
10496   opcode(0xD9); /* LoadF D9 /0 */
10497   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10498               FMul_ST_reg(src1),
10499               FAdd_ST_reg(src2),
10500               Pop_Reg_FPR(dst) );
10501   ins_pipe( fpu_reg_mem_reg_reg );
10502 %}
10503 
10504 // MACRO3 -- addFPR a mulFPR
10505 // This instruction does not round to 24-bits.  It is a '2-address'
10506 // instruction in that the result goes back to src2.  This eliminates
10507 // a move from the macro; possibly the register allocator will have
10508 // to add it back (and maybe not).
10509 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10510   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10511   match(Set src2 (AddF (MulF src0 src1) src2));
10512 
10513   format %{ "FLD    $src0     ===MACRO3===\n\t"
10514             "FMUL   ST,$src1\n\t"
10515             "FADDP  $src2,ST" %}
10516   opcode(0xD9); /* LoadF D9 /0 */
10517   ins_encode( Push_Reg_FPR(src0),
10518               FMul_ST_reg(src1),
10519               FAddP_reg_ST(src2) );
10520   ins_pipe( fpu_reg_reg_reg );
10521 %}
10522 
10523 // MACRO4 -- divFPR subFPR
10524 // This instruction does not round to 24-bits
10525 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10526   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10527   match(Set dst (DivF (SubF src2 src1) src3));
10528 
10529   format %{ "FLD    $src2   ===MACRO4===\n\t"
10530             "FSUB   ST,$src1\n\t"
10531             "FDIV   ST,$src3\n\t"
10532             "FSTP  $dst" %}
10533   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10534   ins_encode( Push_Reg_FPR(src2),
10535               subFPR_divFPR_encode(src1,src3),
10536               Pop_Reg_FPR(dst) );
10537   ins_pipe( fpu_reg_reg_reg_reg );
10538 %}
10539 
10540 // Spill to obtain 24-bit precision
10541 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10542   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10543   match(Set dst (DivF src1 src2));
10544 
10545   format %{ "FDIV   $dst,$src1,$src2" %}
10546   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10547   ins_encode( Push_Reg_FPR(src1),
10548               OpcReg_FPR(src2),
10549               Pop_Mem_FPR(dst) );
10550   ins_pipe( fpu_mem_reg_reg );
10551 %}
10552 //
10553 // This instruction does not round to 24-bits
10554 instruct divFPR_reg(regFPR dst, regFPR src) %{
10555   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10556   match(Set dst (DivF dst src));
10557 
10558   format %{ "FDIV   $dst,$src" %}
10559   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10560   ins_encode( Push_Reg_FPR(src),
10561               OpcP, RegOpc(dst) );
10562   ins_pipe( fpu_reg_reg );
10563 %}
10564 
10565 
10566 // Spill to obtain 24-bit precision
10567 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10568   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10569   match(Set dst (ModF src1 src2));
10570   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10571 
10572   format %{ "FMOD   $dst,$src1,$src2" %}
10573   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10574               emitModDPR(),
10575               Push_Result_Mod_DPR(src2),
10576               Pop_Mem_FPR(dst));
10577   ins_pipe( pipe_slow );
10578 %}
10579 //
10580 // This instruction does not round to 24-bits
10581 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10582   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10583   match(Set dst (ModF dst src));
10584   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10585 
10586   format %{ "FMOD   $dst,$src" %}
10587   ins_encode(Push_Reg_Mod_DPR(dst, src),
10588               emitModDPR(),
10589               Push_Result_Mod_DPR(src),
10590               Pop_Reg_FPR(dst));
10591   ins_pipe( pipe_slow );
10592 %}
10593 
10594 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10595   predicate(UseSSE>=1);
10596   match(Set dst (ModF src0 src1));
10597   effect(KILL rax, KILL cr);
10598   format %{ "SUB    ESP,4\t # FMOD\n"
10599           "\tMOVSS  [ESP+0],$src1\n"
10600           "\tFLD_S  [ESP+0]\n"
10601           "\tMOVSS  [ESP+0],$src0\n"
10602           "\tFLD_S  [ESP+0]\n"
10603      "loop:\tFPREM\n"
10604           "\tFWAIT\n"
10605           "\tFNSTSW AX\n"
10606           "\tSAHF\n"
10607           "\tJP     loop\n"
10608           "\tFSTP_S [ESP+0]\n"
10609           "\tMOVSS  $dst,[ESP+0]\n"
10610           "\tADD    ESP,4\n"
10611           "\tFSTP   ST0\t # Restore FPU Stack"
10612     %}
10613   ins_cost(250);
10614   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10615   ins_pipe( pipe_slow );
10616 %}
10617 
10618 
10619 //----------Arithmetic Conversion Instructions---------------------------------
10620 // The conversions operations are all Alpha sorted.  Please keep it that way!
10621 
10622 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10623   predicate(UseSSE==0);
10624   match(Set dst (RoundFloat src));
10625   ins_cost(125);
10626   format %{ "FST_S  $dst,$src\t# F-round" %}
10627   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10628   ins_pipe( fpu_mem_reg );
10629 %}
10630 
10631 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10632   predicate(UseSSE<=1);
10633   match(Set dst (RoundDouble src));
10634   ins_cost(125);
10635   format %{ "FST_D  $dst,$src\t# D-round" %}
10636   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10637   ins_pipe( fpu_mem_reg );
10638 %}
10639 
10640 // Force rounding to 24-bit precision and 6-bit exponent
10641 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10642   predicate(UseSSE==0);
10643   match(Set dst (ConvD2F src));
10644   format %{ "FST_S  $dst,$src\t# F-round" %}
10645   expand %{
10646     roundFloat_mem_reg(dst,src);
10647   %}
10648 %}
10649 
10650 // Force rounding to 24-bit precision and 6-bit exponent
10651 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10652   predicate(UseSSE==1);
10653   match(Set dst (ConvD2F src));
10654   effect( KILL cr );
10655   format %{ "SUB    ESP,4\n\t"
10656             "FST_S  [ESP],$src\t# F-round\n\t"
10657             "MOVSS  $dst,[ESP]\n\t"
10658             "ADD ESP,4" %}
10659   ins_encode %{
10660     __ subptr(rsp, 4);
10661     if ($src$$reg != FPR1L_enc) {
10662       __ fld_s($src$$reg-1);
10663       __ fstp_s(Address(rsp, 0));
10664     } else {
10665       __ fst_s(Address(rsp, 0));
10666     }
10667     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10668     __ addptr(rsp, 4);
10669   %}
10670   ins_pipe( pipe_slow );
10671 %}
10672 
10673 // Force rounding double precision to single precision
10674 instruct convD2F_reg(regF dst, regD src) %{
10675   predicate(UseSSE>=2);
10676   match(Set dst (ConvD2F src));
10677   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10678   ins_encode %{
10679     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10680   %}
10681   ins_pipe( pipe_slow );
10682 %}
10683 
10684 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10685   predicate(UseSSE==0);
10686   match(Set dst (ConvF2D src));
10687   format %{ "FST_S  $dst,$src\t# D-round" %}
10688   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10689   ins_pipe( fpu_reg_reg );
10690 %}
10691 
10692 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10693   predicate(UseSSE==1);
10694   match(Set dst (ConvF2D src));
10695   format %{ "FST_D  $dst,$src\t# D-round" %}
10696   expand %{
10697     roundDouble_mem_reg(dst,src);
10698   %}
10699 %}
10700 
10701 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10702   predicate(UseSSE==1);
10703   match(Set dst (ConvF2D src));
10704   effect( KILL cr );
10705   format %{ "SUB    ESP,4\n\t"
10706             "MOVSS  [ESP] $src\n\t"
10707             "FLD_S  [ESP]\n\t"
10708             "ADD    ESP,4\n\t"
10709             "FSTP   $dst\t# D-round" %}
10710   ins_encode %{
10711     __ subptr(rsp, 4);
10712     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10713     __ fld_s(Address(rsp, 0));
10714     __ addptr(rsp, 4);
10715     __ fstp_d($dst$$reg);
10716   %}
10717   ins_pipe( pipe_slow );
10718 %}
10719 
10720 instruct convF2D_reg(regD dst, regF src) %{
10721   predicate(UseSSE>=2);
10722   match(Set dst (ConvF2D src));
10723   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10724   ins_encode %{
10725     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10726   %}
10727   ins_pipe( pipe_slow );
10728 %}
10729 
10730 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10731 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10732   predicate(UseSSE<=1);
10733   match(Set dst (ConvD2I src));
10734   effect( KILL tmp, KILL cr );
10735   format %{ "FLD    $src\t# Convert double to int \n\t"
10736             "FLDCW  trunc mode\n\t"
10737             "SUB    ESP,4\n\t"
10738             "FISTp  [ESP + #0]\n\t"
10739             "FLDCW  std/24-bit mode\n\t"
10740             "POP    EAX\n\t"
10741             "CMP    EAX,0x80000000\n\t"
10742             "JNE,s  fast\n\t"
10743             "FLD_D  $src\n\t"
10744             "CALL   d2i_wrapper\n"
10745       "fast:" %}
10746   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10747   ins_pipe( pipe_slow );
10748 %}
10749 
10750 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10751 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10752   predicate(UseSSE>=2);
10753   match(Set dst (ConvD2I src));
10754   effect( KILL tmp, KILL cr );
10755   format %{ "CVTTSD2SI $dst, $src\n\t"
10756             "CMP    $dst,0x80000000\n\t"
10757             "JNE,s  fast\n\t"
10758             "SUB    ESP, 8\n\t"
10759             "MOVSD  [ESP], $src\n\t"
10760             "FLD_D  [ESP]\n\t"
10761             "ADD    ESP, 8\n\t"
10762             "CALL   d2i_wrapper\n"
10763       "fast:" %}
10764   ins_encode %{
10765     Label fast;
10766     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10767     __ cmpl($dst$$Register, 0x80000000);
10768     __ jccb(Assembler::notEqual, fast);
10769     __ subptr(rsp, 8);
10770     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10771     __ fld_d(Address(rsp, 0));
10772     __ addptr(rsp, 8);
10773     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10774     __ bind(fast);
10775   %}
10776   ins_pipe( pipe_slow );
10777 %}
10778 
10779 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10780   predicate(UseSSE<=1);
10781   match(Set dst (ConvD2L src));
10782   effect( KILL cr );
10783   format %{ "FLD    $src\t# Convert double to long\n\t"
10784             "FLDCW  trunc mode\n\t"
10785             "SUB    ESP,8\n\t"
10786             "FISTp  [ESP + #0]\n\t"
10787             "FLDCW  std/24-bit mode\n\t"
10788             "POP    EAX\n\t"
10789             "POP    EDX\n\t"
10790             "CMP    EDX,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "TEST   EAX,EAX\n\t"
10793             "JNE,s  fast\n\t"
10794             "FLD    $src\n\t"
10795             "CALL   d2l_wrapper\n"
10796       "fast:" %}
10797   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10798   ins_pipe( pipe_slow );
10799 %}
10800 
10801 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10802 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10803   predicate (UseSSE>=2);
10804   match(Set dst (ConvD2L src));
10805   effect( KILL cr );
10806   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10807             "MOVSD  [ESP],$src\n\t"
10808             "FLD_D  [ESP]\n\t"
10809             "FLDCW  trunc mode\n\t"
10810             "FISTp  [ESP + #0]\n\t"
10811             "FLDCW  std/24-bit mode\n\t"
10812             "POP    EAX\n\t"
10813             "POP    EDX\n\t"
10814             "CMP    EDX,0x80000000\n\t"
10815             "JNE,s  fast\n\t"
10816             "TEST   EAX,EAX\n\t"
10817             "JNE,s  fast\n\t"
10818             "SUB    ESP,8\n\t"
10819             "MOVSD  [ESP],$src\n\t"
10820             "FLD_D  [ESP]\n\t"
10821             "ADD    ESP,8\n\t"
10822             "CALL   d2l_wrapper\n"
10823       "fast:" %}
10824   ins_encode %{
10825     Label fast;
10826     __ subptr(rsp, 8);
10827     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10828     __ fld_d(Address(rsp, 0));
10829     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10830     __ fistp_d(Address(rsp, 0));
10831     // Restore the rounding mode, mask the exception
10832     if (Compile::current()->in_24_bit_fp_mode()) {
10833       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10834     } else {
10835       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10836     }
10837     // Load the converted long, adjust CPU stack
10838     __ pop(rax);
10839     __ pop(rdx);
10840     __ cmpl(rdx, 0x80000000);
10841     __ jccb(Assembler::notEqual, fast);
10842     __ testl(rax, rax);
10843     __ jccb(Assembler::notEqual, fast);
10844     __ subptr(rsp, 8);
10845     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10846     __ fld_d(Address(rsp, 0));
10847     __ addptr(rsp, 8);
10848     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10849     __ bind(fast);
10850   %}
10851   ins_pipe( pipe_slow );
10852 %}
10853 
10854 // Convert a double to an int.  Java semantics require we do complex
10855 // manglations in the corner cases.  So we set the rounding mode to
10856 // 'zero', store the darned double down as an int, and reset the
10857 // rounding mode to 'nearest'.  The hardware stores a flag value down
10858 // if we would overflow or converted a NAN; we check for this and
10859 // and go the slow path if needed.
10860 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10861   predicate(UseSSE==0);
10862   match(Set dst (ConvF2I src));
10863   effect( KILL tmp, KILL cr );
10864   format %{ "FLD    $src\t# Convert float to int \n\t"
10865             "FLDCW  trunc mode\n\t"
10866             "SUB    ESP,4\n\t"
10867             "FISTp  [ESP + #0]\n\t"
10868             "FLDCW  std/24-bit mode\n\t"
10869             "POP    EAX\n\t"
10870             "CMP    EAX,0x80000000\n\t"
10871             "JNE,s  fast\n\t"
10872             "FLD    $src\n\t"
10873             "CALL   d2i_wrapper\n"
10874       "fast:" %}
10875   // DPR2I_encoding works for FPR2I
10876   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10877   ins_pipe( pipe_slow );
10878 %}
10879 
10880 // Convert a float in xmm to an int reg.
10881 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10882   predicate(UseSSE>=1);
10883   match(Set dst (ConvF2I src));
10884   effect( KILL tmp, KILL cr );
10885   format %{ "CVTTSS2SI $dst, $src\n\t"
10886             "CMP    $dst,0x80000000\n\t"
10887             "JNE,s  fast\n\t"
10888             "SUB    ESP, 4\n\t"
10889             "MOVSS  [ESP], $src\n\t"
10890             "FLD    [ESP]\n\t"
10891             "ADD    ESP, 4\n\t"
10892             "CALL   d2i_wrapper\n"
10893       "fast:" %}
10894   ins_encode %{
10895     Label fast;
10896     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10897     __ cmpl($dst$$Register, 0x80000000);
10898     __ jccb(Assembler::notEqual, fast);
10899     __ subptr(rsp, 4);
10900     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10901     __ fld_s(Address(rsp, 0));
10902     __ addptr(rsp, 4);
10903     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10904     __ bind(fast);
10905   %}
10906   ins_pipe( pipe_slow );
10907 %}
10908 
10909 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10910   predicate(UseSSE==0);
10911   match(Set dst (ConvF2L src));
10912   effect( KILL cr );
10913   format %{ "FLD    $src\t# Convert float to long\n\t"
10914             "FLDCW  trunc mode\n\t"
10915             "SUB    ESP,8\n\t"
10916             "FISTp  [ESP + #0]\n\t"
10917             "FLDCW  std/24-bit mode\n\t"
10918             "POP    EAX\n\t"
10919             "POP    EDX\n\t"
10920             "CMP    EDX,0x80000000\n\t"
10921             "JNE,s  fast\n\t"
10922             "TEST   EAX,EAX\n\t"
10923             "JNE,s  fast\n\t"
10924             "FLD    $src\n\t"
10925             "CALL   d2l_wrapper\n"
10926       "fast:" %}
10927   // DPR2L_encoding works for FPR2L
10928   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10929   ins_pipe( pipe_slow );
10930 %}
10931 
10932 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10933 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10934   predicate (UseSSE>=1);
10935   match(Set dst (ConvF2L src));
10936   effect( KILL cr );
10937   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10938             "MOVSS  [ESP],$src\n\t"
10939             "FLD_S  [ESP]\n\t"
10940             "FLDCW  trunc mode\n\t"
10941             "FISTp  [ESP + #0]\n\t"
10942             "FLDCW  std/24-bit mode\n\t"
10943             "POP    EAX\n\t"
10944             "POP    EDX\n\t"
10945             "CMP    EDX,0x80000000\n\t"
10946             "JNE,s  fast\n\t"
10947             "TEST   EAX,EAX\n\t"
10948             "JNE,s  fast\n\t"
10949             "SUB    ESP,4\t# Convert float to long\n\t"
10950             "MOVSS  [ESP],$src\n\t"
10951             "FLD_S  [ESP]\n\t"
10952             "ADD    ESP,4\n\t"
10953             "CALL   d2l_wrapper\n"
10954       "fast:" %}
10955   ins_encode %{
10956     Label fast;
10957     __ subptr(rsp, 8);
10958     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10959     __ fld_s(Address(rsp, 0));
10960     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10961     __ fistp_d(Address(rsp, 0));
10962     // Restore the rounding mode, mask the exception
10963     if (Compile::current()->in_24_bit_fp_mode()) {
10964       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10965     } else {
10966       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10967     }
10968     // Load the converted long, adjust CPU stack
10969     __ pop(rax);
10970     __ pop(rdx);
10971     __ cmpl(rdx, 0x80000000);
10972     __ jccb(Assembler::notEqual, fast);
10973     __ testl(rax, rax);
10974     __ jccb(Assembler::notEqual, fast);
10975     __ subptr(rsp, 4);
10976     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10977     __ fld_s(Address(rsp, 0));
10978     __ addptr(rsp, 4);
10979     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10980     __ bind(fast);
10981   %}
10982   ins_pipe( pipe_slow );
10983 %}
10984 
10985 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10986   predicate( UseSSE<=1 );
10987   match(Set dst (ConvI2D src));
10988   format %{ "FILD   $src\n\t"
10989             "FSTP   $dst" %}
10990   opcode(0xDB, 0x0);  /* DB /0 */
10991   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10992   ins_pipe( fpu_reg_mem );
10993 %}
10994 
10995 instruct convI2D_reg(regD dst, rRegI src) %{
10996   predicate( UseSSE>=2 && !UseXmmI2D );
10997   match(Set dst (ConvI2D src));
10998   format %{ "CVTSI2SD $dst,$src" %}
10999   ins_encode %{
11000     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11001   %}
11002   ins_pipe( pipe_slow );
11003 %}
11004 
11005 instruct convI2D_mem(regD dst, memory mem) %{
11006   predicate( UseSSE>=2 );
11007   match(Set dst (ConvI2D (LoadI mem)));
11008   format %{ "CVTSI2SD $dst,$mem" %}
11009   ins_encode %{
11010     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11011   %}
11012   ins_pipe( pipe_slow );
11013 %}
11014 
11015 instruct convXI2D_reg(regD dst, rRegI src)
11016 %{
11017   predicate( UseSSE>=2 && UseXmmI2D );
11018   match(Set dst (ConvI2D src));
11019 
11020   format %{ "MOVD  $dst,$src\n\t"
11021             "CVTDQ2PD $dst,$dst\t# i2d" %}
11022   ins_encode %{
11023     __ movdl($dst$$XMMRegister, $src$$Register);
11024     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11025   %}
11026   ins_pipe(pipe_slow); // XXX
11027 %}
11028 
11029 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11030   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11031   match(Set dst (ConvI2D (LoadI mem)));
11032   format %{ "FILD   $mem\n\t"
11033             "FSTP   $dst" %}
11034   opcode(0xDB);      /* DB /0 */
11035   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11036               Pop_Reg_DPR(dst));
11037   ins_pipe( fpu_reg_mem );
11038 %}
11039 
11040 // Convert a byte to a float; no rounding step needed.
11041 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11042   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11043   match(Set dst (ConvI2F src));
11044   format %{ "FILD   $src\n\t"
11045             "FSTP   $dst" %}
11046 
11047   opcode(0xDB, 0x0);  /* DB /0 */
11048   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11049   ins_pipe( fpu_reg_mem );
11050 %}
11051 
11052 // In 24-bit mode, force exponent rounding by storing back out
11053 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11054   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11055   match(Set dst (ConvI2F src));
11056   ins_cost(200);
11057   format %{ "FILD   $src\n\t"
11058             "FSTP_S $dst" %}
11059   opcode(0xDB, 0x0);  /* DB /0 */
11060   ins_encode( Push_Mem_I(src),
11061               Pop_Mem_FPR(dst));
11062   ins_pipe( fpu_mem_mem );
11063 %}
11064 
11065 // In 24-bit mode, force exponent rounding by storing back out
11066 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11067   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11068   match(Set dst (ConvI2F (LoadI mem)));
11069   ins_cost(200);
11070   format %{ "FILD   $mem\n\t"
11071             "FSTP_S $dst" %}
11072   opcode(0xDB);  /* DB /0 */
11073   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11074               Pop_Mem_FPR(dst));
11075   ins_pipe( fpu_mem_mem );
11076 %}
11077 
11078 // This instruction does not round to 24-bits
11079 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11080   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11081   match(Set dst (ConvI2F src));
11082   format %{ "FILD   $src\n\t"
11083             "FSTP   $dst" %}
11084   opcode(0xDB, 0x0);  /* DB /0 */
11085   ins_encode( Push_Mem_I(src),
11086               Pop_Reg_FPR(dst));
11087   ins_pipe( fpu_reg_mem );
11088 %}
11089 
11090 // This instruction does not round to 24-bits
11091 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11092   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11093   match(Set dst (ConvI2F (LoadI mem)));
11094   format %{ "FILD   $mem\n\t"
11095             "FSTP   $dst" %}
11096   opcode(0xDB);      /* DB /0 */
11097   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11098               Pop_Reg_FPR(dst));
11099   ins_pipe( fpu_reg_mem );
11100 %}
11101 
11102 // Convert an int to a float in xmm; no rounding step needed.
11103 instruct convI2F_reg(regF dst, rRegI src) %{
11104   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11105   match(Set dst (ConvI2F src));
11106   format %{ "CVTSI2SS $dst, $src" %}
11107   ins_encode %{
11108     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11109   %}
11110   ins_pipe( pipe_slow );
11111 %}
11112 
11113  instruct convXI2F_reg(regF dst, rRegI src)
11114 %{
11115   predicate( UseSSE>=2 && UseXmmI2F );
11116   match(Set dst (ConvI2F src));
11117 
11118   format %{ "MOVD  $dst,$src\n\t"
11119             "CVTDQ2PS $dst,$dst\t# i2f" %}
11120   ins_encode %{
11121     __ movdl($dst$$XMMRegister, $src$$Register);
11122     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11123   %}
11124   ins_pipe(pipe_slow); // XXX
11125 %}
11126 
11127 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11128   match(Set dst (ConvI2L src));
11129   effect(KILL cr);
11130   ins_cost(375);
11131   format %{ "MOV    $dst.lo,$src\n\t"
11132             "MOV    $dst.hi,$src\n\t"
11133             "SAR    $dst.hi,31" %}
11134   ins_encode(convert_int_long(dst,src));
11135   ins_pipe( ialu_reg_reg_long );
11136 %}
11137 
11138 // Zero-extend convert int to long
11139 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11140   match(Set dst (AndL (ConvI2L src) mask) );
11141   effect( KILL flags );
11142   ins_cost(250);
11143   format %{ "MOV    $dst.lo,$src\n\t"
11144             "XOR    $dst.hi,$dst.hi" %}
11145   opcode(0x33); // XOR
11146   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11147   ins_pipe( ialu_reg_reg_long );
11148 %}
11149 
11150 // Zero-extend long
11151 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11152   match(Set dst (AndL src mask) );
11153   effect( KILL flags );
11154   ins_cost(250);
11155   format %{ "MOV    $dst.lo,$src.lo\n\t"
11156             "XOR    $dst.hi,$dst.hi\n\t" %}
11157   opcode(0x33); // XOR
11158   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11159   ins_pipe( ialu_reg_reg_long );
11160 %}
11161 
11162 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11163   predicate (UseSSE<=1);
11164   match(Set dst (ConvL2D src));
11165   effect( KILL cr );
11166   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11167             "PUSH   $src.lo\n\t"
11168             "FILD   ST,[ESP + #0]\n\t"
11169             "ADD    ESP,8\n\t"
11170             "FSTP_D $dst\t# D-round" %}
11171   opcode(0xDF, 0x5);  /* DF /5 */
11172   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11173   ins_pipe( pipe_slow );
11174 %}
11175 
11176 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11177   predicate (UseSSE>=2);
11178   match(Set dst (ConvL2D src));
11179   effect( KILL cr );
11180   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11181             "PUSH   $src.lo\n\t"
11182             "FILD_D [ESP]\n\t"
11183             "FSTP_D [ESP]\n\t"
11184             "MOVSD  $dst,[ESP]\n\t"
11185             "ADD    ESP,8" %}
11186   opcode(0xDF, 0x5);  /* DF /5 */
11187   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11188   ins_pipe( pipe_slow );
11189 %}
11190 
11191 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11192   predicate (UseSSE>=1);
11193   match(Set dst (ConvL2F src));
11194   effect( KILL cr );
11195   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11196             "PUSH   $src.lo\n\t"
11197             "FILD_D [ESP]\n\t"
11198             "FSTP_S [ESP]\n\t"
11199             "MOVSS  $dst,[ESP]\n\t"
11200             "ADD    ESP,8" %}
11201   opcode(0xDF, 0x5);  /* DF /5 */
11202   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11203   ins_pipe( pipe_slow );
11204 %}
11205 
11206 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11207   match(Set dst (ConvL2F src));
11208   effect( KILL cr );
11209   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11210             "PUSH   $src.lo\n\t"
11211             "FILD   ST,[ESP + #0]\n\t"
11212             "ADD    ESP,8\n\t"
11213             "FSTP_S $dst\t# F-round" %}
11214   opcode(0xDF, 0x5);  /* DF /5 */
11215   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11216   ins_pipe( pipe_slow );
11217 %}
11218 
11219 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11220   match(Set dst (ConvL2I src));
11221   effect( DEF dst, USE src );
11222   format %{ "MOV    $dst,$src.lo" %}
11223   ins_encode(enc_CopyL_Lo(dst,src));
11224   ins_pipe( ialu_reg_reg );
11225 %}
11226 
11227 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11228   match(Set dst (MoveF2I src));
11229   effect( DEF dst, USE src );
11230   ins_cost(100);
11231   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11232   ins_encode %{
11233     __ movl($dst$$Register, Address(rsp, $src$$disp));
11234   %}
11235   ins_pipe( ialu_reg_mem );
11236 %}
11237 
11238 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11239   predicate(UseSSE==0);
11240   match(Set dst (MoveF2I src));
11241   effect( DEF dst, USE src );
11242 
11243   ins_cost(125);
11244   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11245   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11246   ins_pipe( fpu_mem_reg );
11247 %}
11248 
11249 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11250   predicate(UseSSE>=1);
11251   match(Set dst (MoveF2I src));
11252   effect( DEF dst, USE src );
11253 
11254   ins_cost(95);
11255   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11256   ins_encode %{
11257     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11258   %}
11259   ins_pipe( pipe_slow );
11260 %}
11261 
11262 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11263   predicate(UseSSE>=2);
11264   match(Set dst (MoveF2I src));
11265   effect( DEF dst, USE src );
11266   ins_cost(85);
11267   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11268   ins_encode %{
11269     __ movdl($dst$$Register, $src$$XMMRegister);
11270   %}
11271   ins_pipe( pipe_slow );
11272 %}
11273 
11274 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11275   match(Set dst (MoveI2F src));
11276   effect( DEF dst, USE src );
11277 
11278   ins_cost(100);
11279   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11280   ins_encode %{
11281     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11282   %}
11283   ins_pipe( ialu_mem_reg );
11284 %}
11285 
11286 
11287 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11288   predicate(UseSSE==0);
11289   match(Set dst (MoveI2F src));
11290   effect(DEF dst, USE src);
11291 
11292   ins_cost(125);
11293   format %{ "FLD_S  $src\n\t"
11294             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11295   opcode(0xD9);               /* D9 /0, FLD m32real */
11296   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11297               Pop_Reg_FPR(dst) );
11298   ins_pipe( fpu_reg_mem );
11299 %}
11300 
11301 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11302   predicate(UseSSE>=1);
11303   match(Set dst (MoveI2F src));
11304   effect( DEF dst, USE src );
11305 
11306   ins_cost(95);
11307   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11308   ins_encode %{
11309     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11310   %}
11311   ins_pipe( pipe_slow );
11312 %}
11313 
11314 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11315   predicate(UseSSE>=2);
11316   match(Set dst (MoveI2F src));
11317   effect( DEF dst, USE src );
11318 
11319   ins_cost(85);
11320   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11321   ins_encode %{
11322     __ movdl($dst$$XMMRegister, $src$$Register);
11323   %}
11324   ins_pipe( pipe_slow );
11325 %}
11326 
11327 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11328   match(Set dst (MoveD2L src));
11329   effect(DEF dst, USE src);
11330 
11331   ins_cost(250);
11332   format %{ "MOV    $dst.lo,$src\n\t"
11333             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11334   opcode(0x8B, 0x8B);
11335   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11336   ins_pipe( ialu_mem_long_reg );
11337 %}
11338 
11339 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11340   predicate(UseSSE<=1);
11341   match(Set dst (MoveD2L src));
11342   effect(DEF dst, USE src);
11343 
11344   ins_cost(125);
11345   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11346   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11347   ins_pipe( fpu_mem_reg );
11348 %}
11349 
11350 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11351   predicate(UseSSE>=2);
11352   match(Set dst (MoveD2L src));
11353   effect(DEF dst, USE src);
11354   ins_cost(95);
11355   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11356   ins_encode %{
11357     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11358   %}
11359   ins_pipe( pipe_slow );
11360 %}
11361 
11362 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11363   predicate(UseSSE>=2);
11364   match(Set dst (MoveD2L src));
11365   effect(DEF dst, USE src, TEMP tmp);
11366   ins_cost(85);
11367   format %{ "MOVD   $dst.lo,$src\n\t"
11368             "PSHUFLW $tmp,$src,0x4E\n\t"
11369             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11370   ins_encode %{
11371     __ movdl($dst$$Register, $src$$XMMRegister);
11372     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11373     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11374   %}
11375   ins_pipe( pipe_slow );
11376 %}
11377 
11378 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11379   match(Set dst (MoveL2D src));
11380   effect(DEF dst, USE src);
11381 
11382   ins_cost(200);
11383   format %{ "MOV    $dst,$src.lo\n\t"
11384             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11385   opcode(0x89, 0x89);
11386   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11387   ins_pipe( ialu_mem_long_reg );
11388 %}
11389 
11390 
11391 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11392   predicate(UseSSE<=1);
11393   match(Set dst (MoveL2D src));
11394   effect(DEF dst, USE src);
11395   ins_cost(125);
11396 
11397   format %{ "FLD_D  $src\n\t"
11398             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11399   opcode(0xDD);               /* DD /0, FLD m64real */
11400   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11401               Pop_Reg_DPR(dst) );
11402   ins_pipe( fpu_reg_mem );
11403 %}
11404 
11405 
11406 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11407   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11408   match(Set dst (MoveL2D src));
11409   effect(DEF dst, USE src);
11410 
11411   ins_cost(95);
11412   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11413   ins_encode %{
11414     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11415   %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11420   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11421   match(Set dst (MoveL2D src));
11422   effect(DEF dst, USE src);
11423 
11424   ins_cost(95);
11425   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11426   ins_encode %{
11427     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11428   %}
11429   ins_pipe( pipe_slow );
11430 %}
11431 
11432 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11433   predicate(UseSSE>=2);
11434   match(Set dst (MoveL2D src));
11435   effect(TEMP dst, USE src, TEMP tmp);
11436   ins_cost(85);
11437   format %{ "MOVD   $dst,$src.lo\n\t"
11438             "MOVD   $tmp,$src.hi\n\t"
11439             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11440   ins_encode %{
11441     __ movdl($dst$$XMMRegister, $src$$Register);
11442     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11443     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11444   %}
11445   ins_pipe( pipe_slow );
11446 %}
11447 
11448 
11449 // =======================================================================
11450 // fast clearing of an array
11451 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11452   predicate(!UseFastStosb);
11453   match(Set dummy (ClearArray cnt base));
11454   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11455   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11456             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11457             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11458   ins_encode %{
11459     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11460   %}
11461   ins_pipe( pipe_slow );
11462 %}
11463 
11464 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11465   predicate(UseFastStosb);
11466   match(Set dummy (ClearArray cnt base));
11467   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11468   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11469             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11470             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11471   ins_encode %{
11472     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11473   %}
11474   ins_pipe( pipe_slow );
11475 %}
11476 
11477 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11478                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11479   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11480   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11481 
11482   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11483   ins_encode %{
11484     __ string_compare($str1$$Register, $str2$$Register,
11485                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11486                       $tmp1$$XMMRegister);
11487   %}
11488   ins_pipe( pipe_slow );
11489 %}
11490 
11491 // fast string equals
11492 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11493                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11494   match(Set result (StrEquals (Binary str1 str2) cnt));
11495   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11496 
11497   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11498   ins_encode %{
11499     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11500                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11501                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11502   %}
11503   ins_pipe( pipe_slow );
11504 %}
11505 
11506 // fast search of substring with known size.
11507 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11508                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11509   predicate(UseSSE42Intrinsics);
11510   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11511   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11512 
11513   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11514   ins_encode %{
11515     int icnt2 = (int)$int_cnt2$$constant;
11516     if (icnt2 >= 8) {
11517       // IndexOf for constant substrings with size >= 8 elements
11518       // which don't need to be loaded through stack.
11519       __ string_indexofC8($str1$$Register, $str2$$Register,
11520                           $cnt1$$Register, $cnt2$$Register,
11521                           icnt2, $result$$Register,
11522                           $vec$$XMMRegister, $tmp$$Register);
11523     } else {
11524       // Small strings are loaded through stack if they cross page boundary.
11525       __ string_indexof($str1$$Register, $str2$$Register,
11526                         $cnt1$$Register, $cnt2$$Register,
11527                         icnt2, $result$$Register,
11528                         $vec$$XMMRegister, $tmp$$Register);
11529     }
11530   %}
11531   ins_pipe( pipe_slow );
11532 %}
11533 
11534 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11535                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11536   predicate(UseSSE42Intrinsics);
11537   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11538   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11539 
11540   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11541   ins_encode %{
11542     __ string_indexof($str1$$Register, $str2$$Register,
11543                       $cnt1$$Register, $cnt2$$Register,
11544                       (-1), $result$$Register,
11545                       $vec$$XMMRegister, $tmp$$Register);
11546   %}
11547   ins_pipe( pipe_slow );
11548 %}
11549 
11550 // fast array equals
11551 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11552                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11553 %{
11554   match(Set result (AryEq ary1 ary2));
11555   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11556   //ins_cost(300);
11557 
11558   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11559   ins_encode %{
11560     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11561                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11562                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11563   %}
11564   ins_pipe( pipe_slow );
11565 %}
11566 
11567 // encode char[] to byte[] in ISO_8859_1
11568 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11569                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11570                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11571   match(Set result (EncodeISOArray src (Binary dst len)));
11572   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11573 
11574   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11575   ins_encode %{
11576     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11577                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11578                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11579   %}
11580   ins_pipe( pipe_slow );
11581 %}
11582 
11583 
11584 //----------Control Flow Instructions------------------------------------------
11585 // Signed compare Instructions
11586 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11587   match(Set cr (CmpI op1 op2));
11588   effect( DEF cr, USE op1, USE op2 );
11589   format %{ "CMP    $op1,$op2" %}
11590   opcode(0x3B);  /* Opcode 3B /r */
11591   ins_encode( OpcP, RegReg( op1, op2) );
11592   ins_pipe( ialu_cr_reg_reg );
11593 %}
11594 
11595 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11596   match(Set cr (CmpI op1 op2));
11597   effect( DEF cr, USE op1 );
11598   format %{ "CMP    $op1,$op2" %}
11599   opcode(0x81,0x07);  /* Opcode 81 /7 */
11600   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11601   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11602   ins_pipe( ialu_cr_reg_imm );
11603 %}
11604 
11605 // Cisc-spilled version of cmpI_eReg
11606 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11607   match(Set cr (CmpI op1 (LoadI op2)));
11608 
11609   format %{ "CMP    $op1,$op2" %}
11610   ins_cost(500);
11611   opcode(0x3B);  /* Opcode 3B /r */
11612   ins_encode( OpcP, RegMem( op1, op2) );
11613   ins_pipe( ialu_cr_reg_mem );
11614 %}
11615 
11616 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11617   match(Set cr (CmpI src zero));
11618   effect( DEF cr, USE src );
11619 
11620   format %{ "TEST   $src,$src" %}
11621   opcode(0x85);
11622   ins_encode( OpcP, RegReg( src, src ) );
11623   ins_pipe( ialu_cr_reg_imm );
11624 %}
11625 
11626 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11627   match(Set cr (CmpI (AndI src con) zero));
11628 
11629   format %{ "TEST   $src,$con" %}
11630   opcode(0xF7,0x00);
11631   ins_encode( OpcP, RegOpc(src), Con32(con) );
11632   ins_pipe( ialu_cr_reg_imm );
11633 %}
11634 
11635 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11636   match(Set cr (CmpI (AndI src mem) zero));
11637 
11638   format %{ "TEST   $src,$mem" %}
11639   opcode(0x85);
11640   ins_encode( OpcP, RegMem( src, mem ) );
11641   ins_pipe( ialu_cr_reg_mem );
11642 %}
11643 
11644 // Unsigned compare Instructions; really, same as signed except they
11645 // produce an eFlagsRegU instead of eFlagsReg.
11646 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11647   match(Set cr (CmpU op1 op2));
11648 
11649   format %{ "CMPu   $op1,$op2" %}
11650   opcode(0x3B);  /* Opcode 3B /r */
11651   ins_encode( OpcP, RegReg( op1, op2) );
11652   ins_pipe( ialu_cr_reg_reg );
11653 %}
11654 
11655 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11656   match(Set cr (CmpU op1 op2));
11657 
11658   format %{ "CMPu   $op1,$op2" %}
11659   opcode(0x81,0x07);  /* Opcode 81 /7 */
11660   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11661   ins_pipe( ialu_cr_reg_imm );
11662 %}
11663 
11664 // // Cisc-spilled version of cmpU_eReg
11665 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11666   match(Set cr (CmpU op1 (LoadI op2)));
11667 
11668   format %{ "CMPu   $op1,$op2" %}
11669   ins_cost(500);
11670   opcode(0x3B);  /* Opcode 3B /r */
11671   ins_encode( OpcP, RegMem( op1, op2) );
11672   ins_pipe( ialu_cr_reg_mem );
11673 %}
11674 
11675 // // Cisc-spilled version of cmpU_eReg
11676 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11677 //  match(Set cr (CmpU (LoadI op1) op2));
11678 //
11679 //  format %{ "CMPu   $op1,$op2" %}
11680 //  ins_cost(500);
11681 //  opcode(0x39);  /* Opcode 39 /r */
11682 //  ins_encode( OpcP, RegMem( op1, op2) );
11683 //%}
11684 
11685 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11686   match(Set cr (CmpU src zero));
11687 
11688   format %{ "TESTu  $src,$src" %}
11689   opcode(0x85);
11690   ins_encode( OpcP, RegReg( src, src ) );
11691   ins_pipe( ialu_cr_reg_imm );
11692 %}
11693 
11694 // Unsigned pointer compare Instructions
11695 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11696   match(Set cr (CmpP op1 op2));
11697 
11698   format %{ "CMPu   $op1,$op2" %}
11699   opcode(0x3B);  /* Opcode 3B /r */
11700   ins_encode( OpcP, RegReg( op1, op2) );
11701   ins_pipe( ialu_cr_reg_reg );
11702 %}
11703 
11704 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11705   match(Set cr (CmpP op1 op2));
11706 
11707   format %{ "CMPu   $op1,$op2" %}
11708   opcode(0x81,0x07);  /* Opcode 81 /7 */
11709   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11710   ins_pipe( ialu_cr_reg_imm );
11711 %}
11712 
11713 // // Cisc-spilled version of cmpP_eReg
11714 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11715   match(Set cr (CmpP op1 (LoadP op2)));
11716 
11717   format %{ "CMPu   $op1,$op2" %}
11718   ins_cost(500);
11719   opcode(0x3B);  /* Opcode 3B /r */
11720   ins_encode( OpcP, RegMem( op1, op2) );
11721   ins_pipe( ialu_cr_reg_mem );
11722 %}
11723 
11724 // // Cisc-spilled version of cmpP_eReg
11725 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11726 //  match(Set cr (CmpP (LoadP op1) op2));
11727 //
11728 //  format %{ "CMPu   $op1,$op2" %}
11729 //  ins_cost(500);
11730 //  opcode(0x39);  /* Opcode 39 /r */
11731 //  ins_encode( OpcP, RegMem( op1, op2) );
11732 //%}
11733 
11734 // Compare raw pointer (used in out-of-heap check).
11735 // Only works because non-oop pointers must be raw pointers
11736 // and raw pointers have no anti-dependencies.
11737 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11738   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11739   match(Set cr (CmpP op1 (LoadP op2)));
11740 
11741   format %{ "CMPu   $op1,$op2" %}
11742   opcode(0x3B);  /* Opcode 3B /r */
11743   ins_encode( OpcP, RegMem( op1, op2) );
11744   ins_pipe( ialu_cr_reg_mem );
11745 %}
11746 
11747 //
11748 // This will generate a signed flags result. This should be ok
11749 // since any compare to a zero should be eq/neq.
11750 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11751   match(Set cr (CmpP src zero));
11752 
11753   format %{ "TEST   $src,$src" %}
11754   opcode(0x85);
11755   ins_encode( OpcP, RegReg( src, src ) );
11756   ins_pipe( ialu_cr_reg_imm );
11757 %}
11758 
11759 // Cisc-spilled version of testP_reg
11760 // This will generate a signed flags result. This should be ok
11761 // since any compare to a zero should be eq/neq.
11762 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11763   match(Set cr (CmpP (LoadP op) zero));
11764 
11765   format %{ "TEST   $op,0xFFFFFFFF" %}
11766   ins_cost(500);
11767   opcode(0xF7);               /* Opcode F7 /0 */
11768   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11769   ins_pipe( ialu_cr_reg_imm );
11770 %}
11771 
11772 // Yanked all unsigned pointer compare operations.
11773 // Pointer compares are done with CmpP which is already unsigned.
11774 
11775 //----------Max and Min--------------------------------------------------------
11776 // Min Instructions
11777 ////
11778 //   *** Min and Max using the conditional move are slower than the
11779 //   *** branch version on a Pentium III.
11780 // // Conditional move for min
11781 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11782 //  effect( USE_DEF op2, USE op1, USE cr );
11783 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11784 //  opcode(0x4C,0x0F);
11785 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11786 //  ins_pipe( pipe_cmov_reg );
11787 //%}
11788 //
11789 //// Min Register with Register (P6 version)
11790 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11791 //  predicate(VM_Version::supports_cmov() );
11792 //  match(Set op2 (MinI op1 op2));
11793 //  ins_cost(200);
11794 //  expand %{
11795 //    eFlagsReg cr;
11796 //    compI_eReg(cr,op1,op2);
11797 //    cmovI_reg_lt(op2,op1,cr);
11798 //  %}
11799 //%}
11800 
11801 // Min Register with Register (generic version)
11802 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11803   match(Set dst (MinI dst src));
11804   effect(KILL flags);
11805   ins_cost(300);
11806 
11807   format %{ "MIN    $dst,$src" %}
11808   opcode(0xCC);
11809   ins_encode( min_enc(dst,src) );
11810   ins_pipe( pipe_slow );
11811 %}
11812 
11813 // Max Register with Register
11814 //   *** Min and Max using the conditional move are slower than the
11815 //   *** branch version on a Pentium III.
11816 // // Conditional move for max
11817 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11818 //  effect( USE_DEF op2, USE op1, USE cr );
11819 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11820 //  opcode(0x4F,0x0F);
11821 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11822 //  ins_pipe( pipe_cmov_reg );
11823 //%}
11824 //
11825 // // Max Register with Register (P6 version)
11826 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11827 //  predicate(VM_Version::supports_cmov() );
11828 //  match(Set op2 (MaxI op1 op2));
11829 //  ins_cost(200);
11830 //  expand %{
11831 //    eFlagsReg cr;
11832 //    compI_eReg(cr,op1,op2);
11833 //    cmovI_reg_gt(op2,op1,cr);
11834 //  %}
11835 //%}
11836 
11837 // Max Register with Register (generic version)
11838 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11839   match(Set dst (MaxI dst src));
11840   effect(KILL flags);
11841   ins_cost(300);
11842 
11843   format %{ "MAX    $dst,$src" %}
11844   opcode(0xCC);
11845   ins_encode( max_enc(dst,src) );
11846   ins_pipe( pipe_slow );
11847 %}
11848 
11849 // ============================================================================
11850 // Counted Loop limit node which represents exact final iterator value.
11851 // Note: the resulting value should fit into integer range since
11852 // counted loops have limit check on overflow.
11853 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11854   match(Set limit (LoopLimit (Binary init limit) stride));
11855   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11856   ins_cost(300);
11857 
11858   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11859   ins_encode %{
11860     int strd = (int)$stride$$constant;
11861     assert(strd != 1 && strd != -1, "sanity");
11862     int m1 = (strd > 0) ? 1 : -1;
11863     // Convert limit to long (EAX:EDX)
11864     __ cdql();
11865     // Convert init to long (init:tmp)
11866     __ movl($tmp$$Register, $init$$Register);
11867     __ sarl($tmp$$Register, 31);
11868     // $limit - $init
11869     __ subl($limit$$Register, $init$$Register);
11870     __ sbbl($limit_hi$$Register, $tmp$$Register);
11871     // + ($stride - 1)
11872     if (strd > 0) {
11873       __ addl($limit$$Register, (strd - 1));
11874       __ adcl($limit_hi$$Register, 0);
11875       __ movl($tmp$$Register, strd);
11876     } else {
11877       __ addl($limit$$Register, (strd + 1));
11878       __ adcl($limit_hi$$Register, -1);
11879       __ lneg($limit_hi$$Register, $limit$$Register);
11880       __ movl($tmp$$Register, -strd);
11881     }
11882     // signed devision: (EAX:EDX) / pos_stride
11883     __ idivl($tmp$$Register);
11884     if (strd < 0) {
11885       // restore sign
11886       __ negl($tmp$$Register);
11887     }
11888     // (EAX) * stride
11889     __ mull($tmp$$Register);
11890     // + init (ignore upper bits)
11891     __ addl($limit$$Register, $init$$Register);
11892   %}
11893   ins_pipe( pipe_slow );
11894 %}
11895 
11896 // ============================================================================
11897 // Branch Instructions
11898 // Jump Table
11899 instruct jumpXtnd(rRegI switch_val) %{
11900   match(Jump switch_val);
11901   ins_cost(350);
11902   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
11903   ins_encode %{
11904     // Jump to Address(table_base + switch_reg)
11905     Address index(noreg, $switch_val$$Register, Address::times_1);
11906     __ jump(ArrayAddress($constantaddress, index));
11907   %}
11908   ins_pipe(pipe_jmp);
11909 %}
11910 
11911 // Jump Direct - Label defines a relative address from JMP+1
11912 instruct jmpDir(label labl) %{
11913   match(Goto);
11914   effect(USE labl);
11915 
11916   ins_cost(300);
11917   format %{ "JMP    $labl" %}
11918   size(5);
11919   ins_encode %{
11920     Label* L = $labl$$label;
11921     __ jmp(*L, false); // Always long jump
11922   %}
11923   ins_pipe( pipe_jmp );
11924 %}
11925 
11926 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11927 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
11928   match(If cop cr);
11929   effect(USE labl);
11930 
11931   ins_cost(300);
11932   format %{ "J$cop    $labl" %}
11933   size(6);
11934   ins_encode %{
11935     Label* L = $labl$$label;
11936     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11937   %}
11938   ins_pipe( pipe_jcc );
11939 %}
11940 
11941 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11942 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
11943   match(CountedLoopEnd cop cr);
11944   effect(USE labl);
11945 
11946   ins_cost(300);
11947   format %{ "J$cop    $labl\t# Loop end" %}
11948   size(6);
11949   ins_encode %{
11950     Label* L = $labl$$label;
11951     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11952   %}
11953   ins_pipe( pipe_jcc );
11954 %}
11955 
11956 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11957 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11958   match(CountedLoopEnd cop cmp);
11959   effect(USE labl);
11960 
11961   ins_cost(300);
11962   format %{ "J$cop,u  $labl\t# Loop end" %}
11963   size(6);
11964   ins_encode %{
11965     Label* L = $labl$$label;
11966     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11967   %}
11968   ins_pipe( pipe_jcc );
11969 %}
11970 
11971 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11972   match(CountedLoopEnd cop cmp);
11973   effect(USE labl);
11974 
11975   ins_cost(200);
11976   format %{ "J$cop,u  $labl\t# Loop end" %}
11977   size(6);
11978   ins_encode %{
11979     Label* L = $labl$$label;
11980     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11981   %}
11982   ins_pipe( pipe_jcc );
11983 %}
11984 
11985 // Jump Direct Conditional - using unsigned comparison
11986 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11987   match(If cop cmp);
11988   effect(USE labl);
11989 
11990   ins_cost(300);
11991   format %{ "J$cop,u  $labl" %}
11992   size(6);
11993   ins_encode %{
11994     Label* L = $labl$$label;
11995     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11996   %}
11997   ins_pipe(pipe_jcc);
11998 %}
11999 
12000 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12001   match(If cop cmp);
12002   effect(USE labl);
12003 
12004   ins_cost(200);
12005   format %{ "J$cop,u  $labl" %}
12006   size(6);
12007   ins_encode %{
12008     Label* L = $labl$$label;
12009     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12010   %}
12011   ins_pipe(pipe_jcc);
12012 %}
12013 
12014 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12015   match(If cop cmp);
12016   effect(USE labl);
12017 
12018   ins_cost(200);
12019   format %{ $$template
12020     if ($cop$$cmpcode == Assembler::notEqual) {
12021       $$emit$$"JP,u   $labl\n\t"
12022       $$emit$$"J$cop,u   $labl"
12023     } else {
12024       $$emit$$"JP,u   done\n\t"
12025       $$emit$$"J$cop,u   $labl\n\t"
12026       $$emit$$"done:"
12027     }
12028   %}
12029   ins_encode %{
12030     Label* l = $labl$$label;
12031     if ($cop$$cmpcode == Assembler::notEqual) {
12032       __ jcc(Assembler::parity, *l, false);
12033       __ jcc(Assembler::notEqual, *l, false);
12034     } else if ($cop$$cmpcode == Assembler::equal) {
12035       Label done;
12036       __ jccb(Assembler::parity, done);
12037       __ jcc(Assembler::equal, *l, false);
12038       __ bind(done);
12039     } else {
12040        ShouldNotReachHere();
12041     }
12042   %}
12043   ins_pipe(pipe_jcc);
12044 %}
12045 
12046 // ============================================================================
12047 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12048 // array for an instance of the superklass.  Set a hidden internal cache on a
12049 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12050 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12051 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12052   match(Set result (PartialSubtypeCheck sub super));
12053   effect( KILL rcx, KILL cr );
12054 
12055   ins_cost(1100);  // slightly larger than the next version
12056   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12057             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12058             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12059             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12060             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12061             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12062             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12063      "miss:\t" %}
12064 
12065   opcode(0x1); // Force a XOR of EDI
12066   ins_encode( enc_PartialSubtypeCheck() );
12067   ins_pipe( pipe_slow );
12068 %}
12069 
12070 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12071   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12072   effect( KILL rcx, KILL result );
12073 
12074   ins_cost(1000);
12075   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12076             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12077             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12078             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12079             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12080             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12081      "miss:\t" %}
12082 
12083   opcode(0x0);  // No need to XOR EDI
12084   ins_encode( enc_PartialSubtypeCheck() );
12085   ins_pipe( pipe_slow );
12086 %}
12087 
12088 // ============================================================================
12089 // Branch Instructions -- short offset versions
12090 //
12091 // These instructions are used to replace jumps of a long offset (the default
12092 // match) with jumps of a shorter offset.  These instructions are all tagged
12093 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12094 // match rules in general matching.  Instead, the ADLC generates a conversion
12095 // method in the MachNode which can be used to do in-place replacement of the
12096 // long variant with the shorter variant.  The compiler will determine if a
12097 // branch can be taken by the is_short_branch_offset() predicate in the machine
12098 // specific code section of the file.
12099 
12100 // Jump Direct - Label defines a relative address from JMP+1
12101 instruct jmpDir_short(label labl) %{
12102   match(Goto);
12103   effect(USE labl);
12104 
12105   ins_cost(300);
12106   format %{ "JMP,s  $labl" %}
12107   size(2);
12108   ins_encode %{
12109     Label* L = $labl$$label;
12110     __ jmpb(*L);
12111   %}
12112   ins_pipe( pipe_jmp );
12113   ins_short_branch(1);
12114 %}
12115 
12116 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12117 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12118   match(If cop cr);
12119   effect(USE labl);
12120 
12121   ins_cost(300);
12122   format %{ "J$cop,s  $labl" %}
12123   size(2);
12124   ins_encode %{
12125     Label* L = $labl$$label;
12126     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12127   %}
12128   ins_pipe( pipe_jcc );
12129   ins_short_branch(1);
12130 %}
12131 
12132 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12133 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12134   match(CountedLoopEnd cop cr);
12135   effect(USE labl);
12136 
12137   ins_cost(300);
12138   format %{ "J$cop,s  $labl\t# Loop end" %}
12139   size(2);
12140   ins_encode %{
12141     Label* L = $labl$$label;
12142     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12143   %}
12144   ins_pipe( pipe_jcc );
12145   ins_short_branch(1);
12146 %}
12147 
12148 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12149 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12150   match(CountedLoopEnd cop cmp);
12151   effect(USE labl);
12152 
12153   ins_cost(300);
12154   format %{ "J$cop,us $labl\t# Loop end" %}
12155   size(2);
12156   ins_encode %{
12157     Label* L = $labl$$label;
12158     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12159   %}
12160   ins_pipe( pipe_jcc );
12161   ins_short_branch(1);
12162 %}
12163 
12164 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12165   match(CountedLoopEnd cop cmp);
12166   effect(USE labl);
12167 
12168   ins_cost(300);
12169   format %{ "J$cop,us $labl\t# Loop end" %}
12170   size(2);
12171   ins_encode %{
12172     Label* L = $labl$$label;
12173     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12174   %}
12175   ins_pipe( pipe_jcc );
12176   ins_short_branch(1);
12177 %}
12178 
12179 // Jump Direct Conditional - using unsigned comparison
12180 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12181   match(If cop cmp);
12182   effect(USE labl);
12183 
12184   ins_cost(300);
12185   format %{ "J$cop,us $labl" %}
12186   size(2);
12187   ins_encode %{
12188     Label* L = $labl$$label;
12189     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12190   %}
12191   ins_pipe( pipe_jcc );
12192   ins_short_branch(1);
12193 %}
12194 
12195 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12196   match(If cop cmp);
12197   effect(USE labl);
12198 
12199   ins_cost(300);
12200   format %{ "J$cop,us $labl" %}
12201   size(2);
12202   ins_encode %{
12203     Label* L = $labl$$label;
12204     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12205   %}
12206   ins_pipe( pipe_jcc );
12207   ins_short_branch(1);
12208 %}
12209 
12210 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12211   match(If cop cmp);
12212   effect(USE labl);
12213 
12214   ins_cost(300);
12215   format %{ $$template
12216     if ($cop$$cmpcode == Assembler::notEqual) {
12217       $$emit$$"JP,u,s   $labl\n\t"
12218       $$emit$$"J$cop,u,s   $labl"
12219     } else {
12220       $$emit$$"JP,u,s   done\n\t"
12221       $$emit$$"J$cop,u,s  $labl\n\t"
12222       $$emit$$"done:"
12223     }
12224   %}
12225   size(4);
12226   ins_encode %{
12227     Label* l = $labl$$label;
12228     if ($cop$$cmpcode == Assembler::notEqual) {
12229       __ jccb(Assembler::parity, *l);
12230       __ jccb(Assembler::notEqual, *l);
12231     } else if ($cop$$cmpcode == Assembler::equal) {
12232       Label done;
12233       __ jccb(Assembler::parity, done);
12234       __ jccb(Assembler::equal, *l);
12235       __ bind(done);
12236     } else {
12237        ShouldNotReachHere();
12238     }
12239   %}
12240   ins_pipe(pipe_jcc);
12241   ins_short_branch(1);
12242 %}
12243 
12244 // ============================================================================
12245 // Long Compare
12246 //
12247 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12248 // is tricky.  The flavor of compare used depends on whether we are testing
12249 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12250 // The GE test is the negated LT test.  The LE test can be had by commuting
12251 // the operands (yielding a GE test) and then negating; negate again for the
12252 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12253 // NE test is negated from that.
12254 
12255 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12256 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12257 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12258 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12259 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12260 // foo match ends up with the wrong leaf.  One fix is to not match both
12261 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12262 // both forms beat the trinary form of long-compare and both are very useful
12263 // on Intel which has so few registers.
12264 
12265 // Manifest a CmpL result in an integer register.  Very painful.
12266 // This is the test to avoid.
12267 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12268   match(Set dst (CmpL3 src1 src2));
12269   effect( KILL flags );
12270   ins_cost(1000);
12271   format %{ "XOR    $dst,$dst\n\t"
12272             "CMP    $src1.hi,$src2.hi\n\t"
12273             "JLT,s  m_one\n\t"
12274             "JGT,s  p_one\n\t"
12275             "CMP    $src1.lo,$src2.lo\n\t"
12276             "JB,s   m_one\n\t"
12277             "JEQ,s  done\n"
12278     "p_one:\tINC    $dst\n\t"
12279             "JMP,s  done\n"
12280     "m_one:\tDEC    $dst\n"
12281      "done:" %}
12282   ins_encode %{
12283     Label p_one, m_one, done;
12284     __ xorptr($dst$$Register, $dst$$Register);
12285     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12286     __ jccb(Assembler::less,    m_one);
12287     __ jccb(Assembler::greater, p_one);
12288     __ cmpl($src1$$Register, $src2$$Register);
12289     __ jccb(Assembler::below,   m_one);
12290     __ jccb(Assembler::equal,   done);
12291     __ bind(p_one);
12292     __ incrementl($dst$$Register);
12293     __ jmpb(done);
12294     __ bind(m_one);
12295     __ decrementl($dst$$Register);
12296     __ bind(done);
12297   %}
12298   ins_pipe( pipe_slow );
12299 %}
12300 
12301 //======
12302 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12303 // compares.  Can be used for LE or GT compares by reversing arguments.
12304 // NOT GOOD FOR EQ/NE tests.
12305 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12306   match( Set flags (CmpL src zero ));
12307   ins_cost(100);
12308   format %{ "TEST   $src.hi,$src.hi" %}
12309   opcode(0x85);
12310   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12311   ins_pipe( ialu_cr_reg_reg );
12312 %}
12313 
12314 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12315 // compares.  Can be used for LE or GT compares by reversing arguments.
12316 // NOT GOOD FOR EQ/NE tests.
12317 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12318   match( Set flags (CmpL src1 src2 ));
12319   effect( TEMP tmp );
12320   ins_cost(300);
12321   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12322             "MOV    $tmp,$src1.hi\n\t"
12323             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12324   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12325   ins_pipe( ialu_cr_reg_reg );
12326 %}
12327 
12328 // Long compares reg < zero/req OR reg >= zero/req.
12329 // Just a wrapper for a normal branch, plus the predicate test.
12330 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12331   match(If cmp flags);
12332   effect(USE labl);
12333   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12334   expand %{
12335     jmpCon(cmp,flags,labl);    // JLT or JGE...
12336   %}
12337 %}
12338 
12339 // Compare 2 longs and CMOVE longs.
12340 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12341   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12342   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12343   ins_cost(400);
12344   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12345             "CMOV$cmp $dst.hi,$src.hi" %}
12346   opcode(0x0F,0x40);
12347   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12348   ins_pipe( pipe_cmov_reg_long );
12349 %}
12350 
12351 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12352   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12353   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12354   ins_cost(500);
12355   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12356             "CMOV$cmp $dst.hi,$src.hi" %}
12357   opcode(0x0F,0x40);
12358   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12359   ins_pipe( pipe_cmov_reg_long );
12360 %}
12361 
12362 // Compare 2 longs and CMOVE ints.
12363 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12364   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12365   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12366   ins_cost(200);
12367   format %{ "CMOV$cmp $dst,$src" %}
12368   opcode(0x0F,0x40);
12369   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12370   ins_pipe( pipe_cmov_reg );
12371 %}
12372 
12373 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12374   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12375   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12376   ins_cost(250);
12377   format %{ "CMOV$cmp $dst,$src" %}
12378   opcode(0x0F,0x40);
12379   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12380   ins_pipe( pipe_cmov_mem );
12381 %}
12382 
12383 // Compare 2 longs and CMOVE ints.
12384 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12385   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12386   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12387   ins_cost(200);
12388   format %{ "CMOV$cmp $dst,$src" %}
12389   opcode(0x0F,0x40);
12390   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12391   ins_pipe( pipe_cmov_reg );
12392 %}
12393 
12394 // Compare 2 longs and CMOVE doubles
12395 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12396   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12397   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12398   ins_cost(200);
12399   expand %{
12400     fcmovDPR_regS(cmp,flags,dst,src);
12401   %}
12402 %}
12403 
12404 // Compare 2 longs and CMOVE doubles
12405 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12406   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12407   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12408   ins_cost(200);
12409   expand %{
12410     fcmovD_regS(cmp,flags,dst,src);
12411   %}
12412 %}
12413 
12414 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12415   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12416   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12417   ins_cost(200);
12418   expand %{
12419     fcmovFPR_regS(cmp,flags,dst,src);
12420   %}
12421 %}
12422 
12423 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12424   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12425   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12426   ins_cost(200);
12427   expand %{
12428     fcmovF_regS(cmp,flags,dst,src);
12429   %}
12430 %}
12431 
12432 //======
12433 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12434 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12435   match( Set flags (CmpL src zero ));
12436   effect(TEMP tmp);
12437   ins_cost(200);
12438   format %{ "MOV    $tmp,$src.lo\n\t"
12439             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12440   ins_encode( long_cmp_flags0( src, tmp ) );
12441   ins_pipe( ialu_reg_reg_long );
12442 %}
12443 
12444 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12445 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12446   match( Set flags (CmpL src1 src2 ));
12447   ins_cost(200+300);
12448   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12449             "JNE,s  skip\n\t"
12450             "CMP    $src1.hi,$src2.hi\n\t"
12451      "skip:\t" %}
12452   ins_encode( long_cmp_flags1( src1, src2 ) );
12453   ins_pipe( ialu_cr_reg_reg );
12454 %}
12455 
12456 // Long compare reg == zero/reg OR reg != zero/reg
12457 // Just a wrapper for a normal branch, plus the predicate test.
12458 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12459   match(If cmp flags);
12460   effect(USE labl);
12461   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12462   expand %{
12463     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12464   %}
12465 %}
12466 
12467 // Compare 2 longs and CMOVE longs.
12468 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12469   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12470   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12471   ins_cost(400);
12472   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12473             "CMOV$cmp $dst.hi,$src.hi" %}
12474   opcode(0x0F,0x40);
12475   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12476   ins_pipe( pipe_cmov_reg_long );
12477 %}
12478 
12479 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12480   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12481   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12482   ins_cost(500);
12483   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12484             "CMOV$cmp $dst.hi,$src.hi" %}
12485   opcode(0x0F,0x40);
12486   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12487   ins_pipe( pipe_cmov_reg_long );
12488 %}
12489 
12490 // Compare 2 longs and CMOVE ints.
12491 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12492   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12493   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12494   ins_cost(200);
12495   format %{ "CMOV$cmp $dst,$src" %}
12496   opcode(0x0F,0x40);
12497   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12498   ins_pipe( pipe_cmov_reg );
12499 %}
12500 
12501 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12502   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12503   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12504   ins_cost(250);
12505   format %{ "CMOV$cmp $dst,$src" %}
12506   opcode(0x0F,0x40);
12507   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12508   ins_pipe( pipe_cmov_mem );
12509 %}
12510 
12511 // Compare 2 longs and CMOVE ints.
12512 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12513   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12514   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12515   ins_cost(200);
12516   format %{ "CMOV$cmp $dst,$src" %}
12517   opcode(0x0F,0x40);
12518   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12519   ins_pipe( pipe_cmov_reg );
12520 %}
12521 
12522 // Compare 2 longs and CMOVE doubles
12523 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12524   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12525   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12526   ins_cost(200);
12527   expand %{
12528     fcmovDPR_regS(cmp,flags,dst,src);
12529   %}
12530 %}
12531 
12532 // Compare 2 longs and CMOVE doubles
12533 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12534   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12535   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12536   ins_cost(200);
12537   expand %{
12538     fcmovD_regS(cmp,flags,dst,src);
12539   %}
12540 %}
12541 
12542 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12543   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12544   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12545   ins_cost(200);
12546   expand %{
12547     fcmovFPR_regS(cmp,flags,dst,src);
12548   %}
12549 %}
12550 
12551 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12552   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12553   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12554   ins_cost(200);
12555   expand %{
12556     fcmovF_regS(cmp,flags,dst,src);
12557   %}
12558 %}
12559 
12560 //======
12561 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12562 // Same as cmpL_reg_flags_LEGT except must negate src
12563 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12564   match( Set flags (CmpL src zero ));
12565   effect( TEMP tmp );
12566   ins_cost(300);
12567   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12568             "CMP    $tmp,$src.lo\n\t"
12569             "SBB    $tmp,$src.hi\n\t" %}
12570   ins_encode( long_cmp_flags3(src, tmp) );
12571   ins_pipe( ialu_reg_reg_long );
12572 %}
12573 
12574 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12575 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12576 // requires a commuted test to get the same result.
12577 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12578   match( Set flags (CmpL src1 src2 ));
12579   effect( TEMP tmp );
12580   ins_cost(300);
12581   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12582             "MOV    $tmp,$src2.hi\n\t"
12583             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12584   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12585   ins_pipe( ialu_cr_reg_reg );
12586 %}
12587 
12588 // Long compares reg < zero/req OR reg >= zero/req.
12589 // Just a wrapper for a normal branch, plus the predicate test
12590 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12591   match(If cmp flags);
12592   effect(USE labl);
12593   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12594   ins_cost(300);
12595   expand %{
12596     jmpCon(cmp,flags,labl);    // JGT or JLE...
12597   %}
12598 %}
12599 
12600 // Compare 2 longs and CMOVE longs.
12601 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12602   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12603   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12604   ins_cost(400);
12605   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12606             "CMOV$cmp $dst.hi,$src.hi" %}
12607   opcode(0x0F,0x40);
12608   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12609   ins_pipe( pipe_cmov_reg_long );
12610 %}
12611 
12612 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12613   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12614   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12615   ins_cost(500);
12616   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12617             "CMOV$cmp $dst.hi,$src.hi+4" %}
12618   opcode(0x0F,0x40);
12619   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12620   ins_pipe( pipe_cmov_reg_long );
12621 %}
12622 
12623 // Compare 2 longs and CMOVE ints.
12624 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12625   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12626   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12627   ins_cost(200);
12628   format %{ "CMOV$cmp $dst,$src" %}
12629   opcode(0x0F,0x40);
12630   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12631   ins_pipe( pipe_cmov_reg );
12632 %}
12633 
12634 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12635   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12636   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12637   ins_cost(250);
12638   format %{ "CMOV$cmp $dst,$src" %}
12639   opcode(0x0F,0x40);
12640   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12641   ins_pipe( pipe_cmov_mem );
12642 %}
12643 
12644 // Compare 2 longs and CMOVE ptrs.
12645 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12646   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12647   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12648   ins_cost(200);
12649   format %{ "CMOV$cmp $dst,$src" %}
12650   opcode(0x0F,0x40);
12651   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12652   ins_pipe( pipe_cmov_reg );
12653 %}
12654 
12655 // Compare 2 longs and CMOVE doubles
12656 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12657   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12658   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12659   ins_cost(200);
12660   expand %{
12661     fcmovDPR_regS(cmp,flags,dst,src);
12662   %}
12663 %}
12664 
12665 // Compare 2 longs and CMOVE doubles
12666 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12667   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12668   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12669   ins_cost(200);
12670   expand %{
12671     fcmovD_regS(cmp,flags,dst,src);
12672   %}
12673 %}
12674 
12675 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12676   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12677   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12678   ins_cost(200);
12679   expand %{
12680     fcmovFPR_regS(cmp,flags,dst,src);
12681   %}
12682 %}
12683 
12684 
12685 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12686   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12687   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12688   ins_cost(200);
12689   expand %{
12690     fcmovF_regS(cmp,flags,dst,src);
12691   %}
12692 %}
12693 
12694 
12695 // ============================================================================
12696 // Procedure Call/Return Instructions
12697 // Call Java Static Instruction
12698 // Note: If this code changes, the corresponding ret_addr_offset() and
12699 //       compute_padding() functions will have to be adjusted.
12700 instruct CallStaticJavaDirect(method meth) %{
12701   match(CallStaticJava);
12702   effect(USE meth);
12703 
12704   ins_cost(300);
12705   format %{ "CALL,static " %}
12706   opcode(0xE8); /* E8 cd */
12707   ins_encode( pre_call_resets,
12708               Java_Static_Call( meth ),
12709               call_epilog,
12710               post_call_FPU );
12711   ins_pipe( pipe_slow );
12712   ins_alignment(4);
12713 %}
12714 
12715 // Call Java Dynamic Instruction
12716 // Note: If this code changes, the corresponding ret_addr_offset() and
12717 //       compute_padding() functions will have to be adjusted.
12718 instruct CallDynamicJavaDirect(method meth) %{
12719   match(CallDynamicJava);
12720   effect(USE meth);
12721 
12722   ins_cost(300);
12723   format %{ "MOV    EAX,(oop)-1\n\t"
12724             "CALL,dynamic" %}
12725   opcode(0xE8); /* E8 cd */
12726   ins_encode( pre_call_resets,
12727               Java_Dynamic_Call( meth ),
12728               call_epilog,
12729               post_call_FPU );
12730   ins_pipe( pipe_slow );
12731   ins_alignment(4);
12732 %}
12733 
12734 // Call Runtime Instruction
12735 instruct CallRuntimeDirect(method meth) %{
12736   match(CallRuntime );
12737   effect(USE meth);
12738 
12739   ins_cost(300);
12740   format %{ "CALL,runtime " %}
12741   opcode(0xE8); /* E8 cd */
12742   // Use FFREEs to clear entries in float stack
12743   ins_encode( pre_call_resets,
12744               FFree_Float_Stack_All,
12745               Java_To_Runtime( meth ),
12746               post_call_FPU );
12747   ins_pipe( pipe_slow );
12748 %}
12749 
12750 // Call runtime without safepoint
12751 instruct CallLeafDirect(method meth) %{
12752   match(CallLeaf);
12753   effect(USE meth);
12754 
12755   ins_cost(300);
12756   format %{ "CALL_LEAF,runtime " %}
12757   opcode(0xE8); /* E8 cd */
12758   ins_encode( pre_call_resets,
12759               FFree_Float_Stack_All,
12760               Java_To_Runtime( meth ),
12761               Verify_FPU_For_Leaf, post_call_FPU );
12762   ins_pipe( pipe_slow );
12763 %}
12764 
12765 instruct CallLeafNoFPDirect(method meth) %{
12766   match(CallLeafNoFP);
12767   effect(USE meth);
12768 
12769   ins_cost(300);
12770   format %{ "CALL_LEAF_NOFP,runtime " %}
12771   opcode(0xE8); /* E8 cd */
12772   ins_encode(Java_To_Runtime(meth));
12773   ins_pipe( pipe_slow );
12774 %}
12775 
12776 
12777 // Return Instruction
12778 // Remove the return address & jump to it.
12779 instruct Ret() %{
12780   match(Return);
12781   format %{ "RET" %}
12782   opcode(0xC3);
12783   ins_encode(OpcP);
12784   ins_pipe( pipe_jmp );
12785 %}
12786 
12787 // Tail Call; Jump from runtime stub to Java code.
12788 // Also known as an 'interprocedural jump'.
12789 // Target of jump will eventually return to caller.
12790 // TailJump below removes the return address.
12791 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12792   match(TailCall jump_target method_oop );
12793   ins_cost(300);
12794   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12795   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12796   ins_encode( OpcP, RegOpc(jump_target) );
12797   ins_pipe( pipe_jmp );
12798 %}
12799 
12800 
12801 // Tail Jump; remove the return address; jump to target.
12802 // TailCall above leaves the return address around.
12803 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12804   match( TailJump jump_target ex_oop );
12805   ins_cost(300);
12806   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12807             "JMP    $jump_target " %}
12808   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12809   ins_encode( enc_pop_rdx,
12810               OpcP, RegOpc(jump_target) );
12811   ins_pipe( pipe_jmp );
12812 %}
12813 
12814 // Create exception oop: created by stack-crawling runtime code.
12815 // Created exception is now available to this handler, and is setup
12816 // just prior to jumping to this handler.  No code emitted.
12817 instruct CreateException( eAXRegP ex_oop )
12818 %{
12819   match(Set ex_oop (CreateEx));
12820 
12821   size(0);
12822   // use the following format syntax
12823   format %{ "# exception oop is in EAX; no code emitted" %}
12824   ins_encode();
12825   ins_pipe( empty );
12826 %}
12827 
12828 
12829 // Rethrow exception:
12830 // The exception oop will come in the first argument position.
12831 // Then JUMP (not call) to the rethrow stub code.
12832 instruct RethrowException()
12833 %{
12834   match(Rethrow);
12835 
12836   // use the following format syntax
12837   format %{ "JMP    rethrow_stub" %}
12838   ins_encode(enc_rethrow);
12839   ins_pipe( pipe_jmp );
12840 %}
12841 
12842 // inlined locking and unlocking
12843 
12844 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12845   predicate(Compile::current()->use_rtm());
12846   match(Set cr (FastLock object box));
12847   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12848   ins_cost(300);
12849   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12850   ins_encode %{
12851     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12852                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12853                  _counters, _rtm_counters, _stack_rtm_counters,
12854                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12855                  true, ra_->C->profile_rtm());
12856   %}
12857   ins_pipe(pipe_slow);
12858 %}
12859 
12860 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12861   predicate(!Compile::current()->use_rtm());
12862   match(Set cr (FastLock object box));
12863   effect(TEMP tmp, TEMP scr, USE_KILL box);
12864   ins_cost(300);
12865   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12866   ins_encode %{
12867     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12868                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12869   %}
12870   ins_pipe(pipe_slow);
12871 %}
12872 
12873 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12874   match(Set cr (FastUnlock object box));
12875   effect(TEMP tmp, USE_KILL box);
12876   ins_cost(300);
12877   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12878   ins_encode %{
12879     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12880   %}
12881   ins_pipe(pipe_slow);
12882 %}
12883 
12884 
12885 
12886 // ============================================================================
12887 // Safepoint Instruction
12888 instruct safePoint_poll(eFlagsReg cr) %{
12889   match(SafePoint);
12890   effect(KILL cr);
12891 
12892   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12893   // On SPARC that might be acceptable as we can generate the address with
12894   // just a sethi, saving an or.  By polling at offset 0 we can end up
12895   // putting additional pressure on the index-0 in the D$.  Because of
12896   // alignment (just like the situation at hand) the lower indices tend
12897   // to see more traffic.  It'd be better to change the polling address
12898   // to offset 0 of the last $line in the polling page.
12899 
12900   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
12901   ins_cost(125);
12902   size(6) ;
12903   ins_encode( Safepoint_Poll() );
12904   ins_pipe( ialu_reg_mem );
12905 %}
12906 
12907 
12908 // ============================================================================
12909 // This name is KNOWN by the ADLC and cannot be changed.
12910 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12911 // for this guy.
12912 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
12913   match(Set dst (ThreadLocal));
12914   effect(DEF dst, KILL cr);
12915 
12916   format %{ "MOV    $dst, Thread::current()" %}
12917   ins_encode %{
12918     Register dstReg = as_Register($dst$$reg);
12919     __ get_thread(dstReg);
12920   %}
12921   ins_pipe( ialu_reg_fat );
12922 %}
12923 
12924 
12925 
12926 //----------PEEPHOLE RULES-----------------------------------------------------
12927 // These must follow all instruction definitions as they use the names
12928 // defined in the instructions definitions.
12929 //
12930 // peepmatch ( root_instr_name [preceding_instruction]* );
12931 //
12932 // peepconstraint %{
12933 // (instruction_number.operand_name relational_op instruction_number.operand_name
12934 //  [, ...] );
12935 // // instruction numbers are zero-based using left to right order in peepmatch
12936 //
12937 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12938 // // provide an instruction_number.operand_name for each operand that appears
12939 // // in the replacement instruction's match rule
12940 //
12941 // ---------VM FLAGS---------------------------------------------------------
12942 //
12943 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12944 //
12945 // Each peephole rule is given an identifying number starting with zero and
12946 // increasing by one in the order seen by the parser.  An individual peephole
12947 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12948 // on the command-line.
12949 //
12950 // ---------CURRENT LIMITATIONS----------------------------------------------
12951 //
12952 // Only match adjacent instructions in same basic block
12953 // Only equality constraints
12954 // Only constraints between operands, not (0.dest_reg == EAX_enc)
12955 // Only one replacement instruction
12956 //
12957 // ---------EXAMPLE----------------------------------------------------------
12958 //
12959 // // pertinent parts of existing instructions in architecture description
12960 // instruct movI(rRegI dst, rRegI src) %{
12961 //   match(Set dst (CopyI src));
12962 // %}
12963 //
12964 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
12965 //   match(Set dst (AddI dst src));
12966 //   effect(KILL cr);
12967 // %}
12968 //
12969 // // Change (inc mov) to lea
12970 // peephole %{
12971 //   // increment preceeded by register-register move
12972 //   peepmatch ( incI_eReg movI );
12973 //   // require that the destination register of the increment
12974 //   // match the destination register of the move
12975 //   peepconstraint ( 0.dst == 1.dst );
12976 //   // construct a replacement instruction that sets
12977 //   // the destination to ( move's source register + one )
12978 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12979 // %}
12980 //
12981 // Implementation no longer uses movX instructions since
12982 // machine-independent system no longer uses CopyX nodes.
12983 //
12984 // peephole %{
12985 //   peepmatch ( incI_eReg movI );
12986 //   peepconstraint ( 0.dst == 1.dst );
12987 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12988 // %}
12989 //
12990 // peephole %{
12991 //   peepmatch ( decI_eReg movI );
12992 //   peepconstraint ( 0.dst == 1.dst );
12993 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12994 // %}
12995 //
12996 // peephole %{
12997 //   peepmatch ( addI_eReg_imm movI );
12998 //   peepconstraint ( 0.dst == 1.dst );
12999 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13000 // %}
13001 //
13002 // peephole %{
13003 //   peepmatch ( addP_eReg_imm movP );
13004 //   peepconstraint ( 0.dst == 1.dst );
13005 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13006 // %}
13007 
13008 // // Change load of spilled value to only a spill
13009 // instruct storeI(memory mem, rRegI src) %{
13010 //   match(Set mem (StoreI mem src));
13011 // %}
13012 //
13013 // instruct loadI(rRegI dst, memory mem) %{
13014 //   match(Set dst (LoadI mem));
13015 // %}
13016 //
13017 peephole %{
13018   peepmatch ( loadI storeI );
13019   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13020   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13021 %}
13022 
13023 //----------SMARTSPILL RULES---------------------------------------------------
13024 // These must follow all instruction definitions as they use the names
13025 // defined in the instructions definitions.