Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     if(UseAVX <= 2) {
 295       size += 3; // vzeroupper
 296     }
 297   }
 298   return size;
 299 }
 300 
 301 // !!!!! Special hack to get all type of calls to specify the byte offset
 302 //       from the start of the call to the point where the return address
 303 //       will point.
 304 int MachCallStaticJavaNode::ret_addr_offset() {
 305   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 306 }
 307 
 308 int MachCallDynamicJavaNode::ret_addr_offset() {
 309   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 310 }
 311 
 312 static int sizeof_FFree_Float_Stack_All = -1;
 313 
 314 int MachCallRuntimeNode::ret_addr_offset() {
 315   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 316   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 317 }
 318 
 319 // Indicate if the safepoint node needs the polling page as an input.
 320 // Since x86 does have absolute addressing, it doesn't.
 321 bool SafePointNode::needs_polling_address_input() {
 322   return false;
 323 }
 324 
 325 //
 326 // Compute padding required for nodes which need alignment
 327 //
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 1;      // skip call opcode byte
 334   return round_to(current_offset, alignment_required()) - current_offset;
 335 }
 336 
 337 // The address of the call instruction needs to be 4-byte aligned to
 338 // ensure that it does not span a cache line so that it can be patched.
 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 340   current_offset += pre_call_resets_size();  // skip fldcw, if any
 341   current_offset += 5;      // skip MOV instruction
 342   current_offset += 1;      // skip call opcode byte
 343   return round_to(current_offset, alignment_required()) - current_offset;
 344 }
 345 
 346 // EMIT_RM()
 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 348   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 349   cbuf.insts()->emit_int8(c);
 350 }
 351 
 352 // EMIT_CC()
 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 354   unsigned char c = (unsigned char)( f1 | f2 );
 355   cbuf.insts()->emit_int8(c);
 356 }
 357 
 358 // EMIT_OPCODE()
 359 void emit_opcode(CodeBuffer &cbuf, int code) {
 360   cbuf.insts()->emit_int8((unsigned char) code);
 361 }
 362 
 363 // EMIT_OPCODE() w/ relocation information
 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 365   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 366   emit_opcode(cbuf, code);
 367 }
 368 
 369 // EMIT_D8()
 370 void emit_d8(CodeBuffer &cbuf, int d8) {
 371   cbuf.insts()->emit_int8((unsigned char) d8);
 372 }
 373 
 374 // EMIT_D16()
 375 void emit_d16(CodeBuffer &cbuf, int d16) {
 376   cbuf.insts()->emit_int16(d16);
 377 }
 378 
 379 // EMIT_D32()
 380 void emit_d32(CodeBuffer &cbuf, int d32) {
 381   cbuf.insts()->emit_int32(d32);
 382 }
 383 
 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 386         int format) {
 387   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 388   cbuf.insts()->emit_int32(d32);
 389 }
 390 
 391 // emit 32 bit value and construct relocation entry from RelocationHolder
 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 393         int format) {
 394 #ifdef ASSERT
 395   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 396     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 397   }
 398 #endif
 399   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 400   cbuf.insts()->emit_int32(d32);
 401 }
 402 
 403 // Access stack slot for load or store
 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 405   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 406   if( -128 <= disp && disp <= 127 ) {
 407     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 408     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 409     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 410   } else {
 411     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 414   }
 415 }
 416 
 417    // rRegI ereg, memory mem) %{    // emit_reg_mem
 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 419   // There is no index & no scale, use form without SIB byte
 420   if ((index == 0x4) &&
 421       (scale == 0) && (base != ESP_enc)) {
 422     // If no displacement, mode is 0x0; unless base is [EBP]
 423     if ( (displace == 0) && (base != EBP_enc) ) {
 424       emit_rm(cbuf, 0x0, reg_encoding, base);
 425     }
 426     else {                    // If 8-bit displacement, mode 0x1
 427       if ((displace >= -128) && (displace <= 127)
 428           && (disp_reloc == relocInfo::none) ) {
 429         emit_rm(cbuf, 0x1, reg_encoding, base);
 430         emit_d8(cbuf, displace);
 431       }
 432       else {                  // If 32-bit displacement
 433         if (base == -1) { // Special flag for absolute address
 434           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 435           // (manual lies; no SIB needed here)
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442         else {                // Normal base + offset
 443           emit_rm(cbuf, 0x2, reg_encoding, base);
 444           if ( disp_reloc != relocInfo::none ) {
 445             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 446           } else {
 447             emit_d32      (cbuf, displace);
 448           }
 449         }
 450       }
 451     }
 452   }
 453   else {                      // Else, encode with the SIB byte
 454     // If no displacement, mode is 0x0; unless base is [EBP]
 455     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 456       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 457       emit_rm(cbuf, scale, index, base);
 458     }
 459     else {                    // If 8-bit displacement, mode 0x1
 460       if ((displace >= -128) && (displace <= 127)
 461           && (disp_reloc == relocInfo::none) ) {
 462         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 463         emit_rm(cbuf, scale, index, base);
 464         emit_d8(cbuf, displace);
 465       }
 466       else {                  // If 32-bit displacement
 467         if (base == 0x04 ) {
 468           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 469           emit_rm(cbuf, scale, index, 0x04);
 470         } else {
 471           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 472           emit_rm(cbuf, scale, index, base);
 473         }
 474         if ( disp_reloc != relocInfo::none ) {
 475           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 476         } else {
 477           emit_d32      (cbuf, displace);
 478         }
 479       }
 480     }
 481   }
 482 }
 483 
 484 
 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 486   if( dst_encoding == src_encoding ) {
 487     // reg-reg copy, use an empty encoding
 488   } else {
 489     emit_opcode( cbuf, 0x8B );
 490     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 491   }
 492 }
 493 
 494 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 495   Label exit;
 496   __ jccb(Assembler::noParity, exit);
 497   __ pushf();
 498   //
 499   // comiss/ucomiss instructions set ZF,PF,CF flags and
 500   // zero OF,AF,SF for NaN values.
 501   // Fixup flags by zeroing ZF,PF so that compare of NaN
 502   // values returns 'less than' result (CF is set).
 503   // Leave the rest of flags unchanged.
 504   //
 505   //    7 6 5 4 3 2 1 0
 506   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 507   //    0 0 1 0 1 0 1 1   (0x2B)
 508   //
 509   __ andl(Address(rsp, 0), 0xffffff2b);
 510   __ popf();
 511   __ bind(exit);
 512 }
 513 
 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 515   Label done;
 516   __ movl(dst, -1);
 517   __ jcc(Assembler::parity, done);
 518   __ jcc(Assembler::below, done);
 519   __ setb(Assembler::notEqual, dst);
 520   __ movzbl(dst, dst);
 521   __ bind(done);
 522 }
 523 
 524 
 525 //=============================================================================
 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 527 
 528 int Compile::ConstantTable::calculate_table_base_offset() const {
 529   return 0;  // absolute addressing, no offset
 530 }
 531 
 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 534   ShouldNotReachHere();
 535 }
 536 
 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 538   // Empty encoding
 539 }
 540 
 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 542   return 0;
 543 }
 544 
 545 #ifndef PRODUCT
 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   st->print("# MachConstantBaseNode (empty encoding)");
 548 }
 549 #endif
 550 
 551 
 552 //=============================================================================
 553 #ifndef PRODUCT
 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 555   Compile* C = ra_->C;
 556 
 557   int framesize = C->frame_size_in_bytes();
 558   int bangsize = C->bang_size_in_bytes();
 559   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 560   // Remove wordSize for return addr which is already pushed.
 561   framesize -= wordSize;
 562 
 563   if (C->need_stack_bang(bangsize)) {
 564     framesize -= wordSize;
 565     st->print("# stack bang (%d bytes)", bangsize);
 566     st->print("\n\t");
 567     st->print("PUSH   EBP\t# Save EBP");
 568     if (PreserveFramePointer) {
 569       st->print("\n\t");
 570       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 571     }
 572     if (framesize) {
 573       st->print("\n\t");
 574       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 575     }
 576   } else {
 577     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 578     st->print("\n\t");
 579     framesize -= wordSize;
 580     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 581     if (PreserveFramePointer) {
 582       st->print("\n\t");
 583       st->print("MOV    EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
 584     }
 585   }
 586 
 587   if (VerifyStackAtCalls) {
 588     st->print("\n\t");
 589     framesize -= wordSize;
 590     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 591   }
 592 
 593   if( C->in_24_bit_fp_mode() ) {
 594     st->print("\n\t");
 595     st->print("FLDCW  \t# load 24 bit fpu control word");
 596   }
 597   if (UseSSE >= 2 && VerifyFPU) {
 598     st->print("\n\t");
 599     st->print("# verify FPU stack (must be clean on entry)");
 600   }
 601 
 602 #ifdef ASSERT
 603   if (VerifyStackAtCalls) {
 604     st->print("\n\t");
 605     st->print("# stack alignment check");
 606   }
 607 #endif
 608   st->cr();
 609 }
 610 #endif
 611 
 612 
 613 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 614   Compile* C = ra_->C;
 615   MacroAssembler _masm(&cbuf);
 616 
 617   int framesize = C->frame_size_in_bytes();
 618   int bangsize = C->bang_size_in_bytes();
 619 
 620   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 621 
 622   C->set_frame_complete(cbuf.insts_size());
 623 
 624   if (C->has_mach_constant_base_node()) {
 625     // NOTE: We set the table base offset here because users might be
 626     // emitted before MachConstantBaseNode.
 627     Compile::ConstantTable& constant_table = C->constant_table();
 628     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 629   }
 630 }
 631 
 632 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 633   return MachNode::size(ra_); // too many variables; just compute it the hard way
 634 }
 635 
 636 int MachPrologNode::reloc() const {
 637   return 0; // a large enough number
 638 }
 639 
 640 //=============================================================================
 641 #ifndef PRODUCT
 642 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 643   Compile *C = ra_->C;
 644   int framesize = C->frame_size_in_bytes();
 645   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 646   // Remove two words for return addr and rbp,
 647   framesize -= 2*wordSize;
 648 
 649   if (C->max_vector_size() > 16) {
 650     st->print("VZEROUPPER");
 651     st->cr(); st->print("\t");
 652   }
 653   if (C->in_24_bit_fp_mode()) {
 654     st->print("FLDCW  standard control word");
 655     st->cr(); st->print("\t");
 656   }
 657   if (framesize) {
 658     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 659     st->cr(); st->print("\t");
 660   }
 661   st->print_cr("POPL   EBP"); st->print("\t");
 662   if (do_polling() && C->is_method_compilation()) {
 663     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 664     st->cr(); st->print("\t");
 665   }
 666 }
 667 #endif
 668 
 669 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 670   Compile *C = ra_->C;
 671 
 672   if (C->max_vector_size() > 16) {
 673     // Clear upper bits of YMM registers when current compiled code uses
 674     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 675     MacroAssembler masm(&cbuf);
 676     masm.vzeroupper();
 677   }
 678   // If method set FPU control word, restore to standard control word
 679   if (C->in_24_bit_fp_mode()) {
 680     MacroAssembler masm(&cbuf);
 681     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 682   }
 683 
 684   int framesize = C->frame_size_in_bytes();
 685   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 686   // Remove two words for return addr and rbp,
 687   framesize -= 2*wordSize;
 688 
 689   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 690 
 691   if (framesize >= 128) {
 692     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 693     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 694     emit_d32(cbuf, framesize);
 695   } else if (framesize) {
 696     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 697     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 698     emit_d8(cbuf, framesize);
 699   }
 700 
 701   emit_opcode(cbuf, 0x58 | EBP_enc);
 702 
 703   if (do_polling() && C->is_method_compilation()) {
 704     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 705     emit_opcode(cbuf,0x85);
 706     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 707     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 708   }
 709 }
 710 
 711 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 712   Compile *C = ra_->C;
 713   // If method set FPU control word, restore to standard control word
 714   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 715   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 716   if (do_polling() && C->is_method_compilation()) size += 6;
 717 
 718   int framesize = C->frame_size_in_bytes();
 719   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 720   // Remove two words for return addr and rbp,
 721   framesize -= 2*wordSize;
 722 
 723   size++; // popl rbp,
 724 
 725   if (framesize >= 128) {
 726     size += 6;
 727   } else {
 728     size += framesize ? 3 : 0;
 729   }
 730   return size;
 731 }
 732 
 733 int MachEpilogNode::reloc() const {
 734   return 0; // a large enough number
 735 }
 736 
 737 const Pipeline * MachEpilogNode::pipeline() const {
 738   return MachNode::pipeline_class();
 739 }
 740 
 741 int MachEpilogNode::safepoint_offset() const { return 0; }
 742 
 743 //=============================================================================
 744 
 745 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 746 static enum RC rc_class( OptoReg::Name reg ) {
 747 
 748   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 749   if (OptoReg::is_stack(reg)) return rc_stack;
 750 
 751   VMReg r = OptoReg::as_VMReg(reg);
 752   if (r->is_Register()) return rc_int;
 753   if (r->is_FloatRegister()) {
 754     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 755     return rc_float;
 756   }
 757   assert(r->is_XMMRegister(), "must be");
 758   return rc_xmm;
 759 }
 760 
 761 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 762                         int opcode, const char *op_str, int size, outputStream* st ) {
 763   if( cbuf ) {
 764     emit_opcode  (*cbuf, opcode );
 765     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 766 #ifndef PRODUCT
 767   } else if( !do_size ) {
 768     if( size != 0 ) st->print("\n\t");
 769     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 770       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 771       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 772     } else { // FLD, FST, PUSH, POP
 773       st->print("%s [ESP + #%d]",op_str,offset);
 774     }
 775 #endif
 776   }
 777   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 778   return size+3+offset_size;
 779 }
 780 
 781 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 782 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 783                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 784   int in_size_in_bits = Assembler::EVEX_32bit;
 785   int evex_encoding = 0;
 786   if (reg_lo+1 == reg_hi) {
 787     in_size_in_bits = Assembler::EVEX_64bit;
 788     evex_encoding = Assembler::VEX_W;
 789   }
 790   if (cbuf) {
 791     MacroAssembler _masm(cbuf);
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 843       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 844                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 845     } else {
 846       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 847                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 848     }
 849 #ifndef PRODUCT
 850   } else if (!do_size) {
 851     if (size != 0) st->print("\n\t");
 852     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 853       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 855       } else {
 856         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       }
 858     } else {
 859       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 860         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       } else {
 862         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       }
 864     }
 865 #endif
 866   }
 867   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 868   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 869   int sz = (UseAVX > 2) ? 6 : 4;
 870   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 871       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 872   return size + sz;
 873 }
 874 
 875 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 876                             int src_hi, int dst_hi, int size, outputStream* st ) {
 877   // 32-bit
 878   if (cbuf) {
 879     MacroAssembler _masm(cbuf);
 880     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 881              as_Register(Matcher::_regEncode[src_lo]));
 882 #ifndef PRODUCT
 883   } else if (!do_size) {
 884     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 885 #endif
 886   }
 887   return (UseAVX> 2) ? 6 : 4;
 888 }
 889 
 890 
 891 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 892                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 893   // 32-bit
 894   if (cbuf) {
 895     MacroAssembler _masm(cbuf);
 896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 897              as_XMMRegister(Matcher::_regEncode[src_lo]));
 898 #ifndef PRODUCT
 899   } else if (!do_size) {
 900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 901 #endif
 902   }
 903   return (UseAVX> 2) ? 6 : 4;
 904 }
 905 
 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 907   if( cbuf ) {
 908     emit_opcode(*cbuf, 0x8B );
 909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 910 #ifndef PRODUCT
 911   } else if( !do_size ) {
 912     if( size != 0 ) st->print("\n\t");
 913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 914 #endif
 915   }
 916   return size+2;
 917 }
 918 
 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 920                                  int offset, int size, outputStream* st ) {
 921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 922     if( cbuf ) {
 923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 925 #ifndef PRODUCT
 926     } else if( !do_size ) {
 927       if( size != 0 ) st->print("\n\t");
 928       st->print("FLD    %s",Matcher::regName[src_lo]);
 929 #endif
 930     }
 931     size += 2;
 932   }
 933 
 934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 935   const char *op_str;
 936   int op;
 937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 939     op = 0xDD;
 940   } else {                   // 32-bit store
 941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 942     op = 0xD9;
 943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 944   }
 945 
 946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 947 }
 948 
 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 950 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 952 
 953 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 954                             int stack_offset, int reg, uint ireg, outputStream* st);
 955 
 956 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 957                                      int dst_offset, uint ireg, outputStream* st) {
 958   int calc_size = 0;
 959   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 960   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 961   switch (ireg) {
 962   case Op_VecS:
 963     calc_size = 3+src_offset_size + 3+dst_offset_size;
 964     break;
 965   case Op_VecD:
 966     calc_size = 3+src_offset_size + 3+dst_offset_size;
 967     src_offset += 4;
 968     dst_offset += 4;
 969     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 970     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 971     calc_size += 3+src_offset_size + 3+dst_offset_size;
 972     break;
 973   case Op_VecX:
 974   case Op_VecY:
 975   case Op_VecZ:
 976     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 977     break;
 978   default:
 979     ShouldNotReachHere();
 980   }
 981   if (cbuf) {
 982     MacroAssembler _masm(cbuf);
 983     int offset = __ offset();
 984     switch (ireg) {
 985     case Op_VecS:
 986       __ pushl(Address(rsp, src_offset));
 987       __ popl (Address(rsp, dst_offset));
 988       break;
 989     case Op_VecD:
 990       __ pushl(Address(rsp, src_offset));
 991       __ popl (Address(rsp, dst_offset));
 992       __ pushl(Address(rsp, src_offset+4));
 993       __ popl (Address(rsp, dst_offset+4));
 994       break;
 995     case Op_VecX:
 996       __ movdqu(Address(rsp, -16), xmm0);
 997       __ movdqu(xmm0, Address(rsp, src_offset));
 998       __ movdqu(Address(rsp, dst_offset), xmm0);
 999       __ movdqu(xmm0, Address(rsp, -16));
1000       break;
1001     case Op_VecY:
1002       __ vmovdqu(Address(rsp, -32), xmm0);
1003       __ vmovdqu(xmm0, Address(rsp, src_offset));
1004       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1005       __ vmovdqu(xmm0, Address(rsp, -32));
1006     case Op_VecZ:
1007       __ evmovdqu(Address(rsp, -64), xmm0, 2);
1008       __ evmovdqu(xmm0, Address(rsp, src_offset), 2);
1009       __ evmovdqu(Address(rsp, dst_offset), xmm0, 2);
1010       __ evmovdqu(xmm0, Address(rsp, -64), 2);
1011       break;
1012     default:
1013       ShouldNotReachHere();
1014     }
1015     int size = __ offset() - offset;
1016     assert(size == calc_size, "incorrect size calculattion");
1017     return size;
1018 #ifndef PRODUCT
1019   } else if (!do_size) {
1020     switch (ireg) {
1021     case Op_VecS:
1022       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1023                 "popl    [rsp + #%d]",
1024                 src_offset, dst_offset);
1025       break;
1026     case Op_VecD:
1027       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1028                 "popq    [rsp + #%d]\n\t"
1029                 "pushl   [rsp + #%d]\n\t"
1030                 "popq    [rsp + #%d]",
1031                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1032       break;
1033      case Op_VecX:
1034       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1035                 "movdqu  xmm0, [rsp + #%d]\n\t"
1036                 "movdqu  [rsp + #%d], xmm0\n\t"
1037                 "movdqu  xmm0, [rsp - #16]",
1038                 src_offset, dst_offset);
1039       break;
1040     case Op_VecY:
1041       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1042                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1043                 "vmovdqu [rsp + #%d], xmm0\n\t"
1044                 "vmovdqu xmm0, [rsp - #32]",
1045                 src_offset, dst_offset);
1046     case Op_VecZ:
1047       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1048                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1049                 "vmovdqu [rsp + #%d], xmm0\n\t"
1050                 "vmovdqu xmm0, [rsp - #64]",
1051                 src_offset, dst_offset);
1052       break;
1053     default:
1054       ShouldNotReachHere();
1055     }
1056 #endif
1057   }
1058   return calc_size;
1059 }
1060 
1061 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1062   // Get registers to move
1063   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1064   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1065   OptoReg::Name dst_second = ra_->get_reg_second(this );
1066   OptoReg::Name dst_first = ra_->get_reg_first(this );
1067 
1068   enum RC src_second_rc = rc_class(src_second);
1069   enum RC src_first_rc = rc_class(src_first);
1070   enum RC dst_second_rc = rc_class(dst_second);
1071   enum RC dst_first_rc = rc_class(dst_first);
1072 
1073   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1074 
1075   // Generate spill code!
1076   int size = 0;
1077 
1078   if( src_first == dst_first && src_second == dst_second )
1079     return size;            // Self copy, no move
1080 
1081   if (bottom_type()->isa_vect() != NULL) {
1082     uint ireg = ideal_reg();
1083     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1084     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1085     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1086     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1087       // mem -> mem
1088       int src_offset = ra_->reg2offset(src_first);
1089       int dst_offset = ra_->reg2offset(dst_first);
1090       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1091     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1092       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1093     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1094       int stack_offset = ra_->reg2offset(dst_first);
1095       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1096     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1097       int stack_offset = ra_->reg2offset(src_first);
1098       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1099     } else {
1100       ShouldNotReachHere();
1101     }
1102   }
1103 
1104   // --------------------------------------
1105   // Check for mem-mem move.  push/pop to move.
1106   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1107     if( src_second == dst_first ) { // overlapping stack copy ranges
1108       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1109       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1110       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1111       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1112     }
1113     // move low bits
1114     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1115     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1116     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1117       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1118       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1119     }
1120     return size;
1121   }
1122 
1123   // --------------------------------------
1124   // Check for integer reg-reg copy
1125   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1126     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1127 
1128   // Check for integer store
1129   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1130     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1131 
1132   // Check for integer load
1133   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1134     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1135 
1136   // Check for integer reg-xmm reg copy
1137   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1138     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1139             "no 64 bit integer-float reg moves" );
1140     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1141   }
1142   // --------------------------------------
1143   // Check for float reg-reg copy
1144   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1145     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1146             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1147     if( cbuf ) {
1148 
1149       // Note the mucking with the register encode to compensate for the 0/1
1150       // indexing issue mentioned in a comment in the reg_def sections
1151       // for FPR registers many lines above here.
1152 
1153       if( src_first != FPR1L_num ) {
1154         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1155         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1156         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1157         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1158      } else {
1159         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1160         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1161      }
1162 #ifndef PRODUCT
1163     } else if( !do_size ) {
1164       if( size != 0 ) st->print("\n\t");
1165       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1166       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1167 #endif
1168     }
1169     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1170   }
1171 
1172   // Check for float store
1173   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1174     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1175   }
1176 
1177   // Check for float load
1178   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1179     int offset = ra_->reg2offset(src_first);
1180     const char *op_str;
1181     int op;
1182     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1183       op_str = "FLD_D";
1184       op = 0xDD;
1185     } else {                   // 32-bit load
1186       op_str = "FLD_S";
1187       op = 0xD9;
1188       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1189     }
1190     if( cbuf ) {
1191       emit_opcode  (*cbuf, op );
1192       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1193       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1194       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1195 #ifndef PRODUCT
1196     } else if( !do_size ) {
1197       if( size != 0 ) st->print("\n\t");
1198       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1199 #endif
1200     }
1201     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1202     return size + 3+offset_size+2;
1203   }
1204 
1205   // Check for xmm reg-reg copy
1206   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1207     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1208             (src_first+1 == src_second && dst_first+1 == dst_second),
1209             "no non-adjacent float-moves" );
1210     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1211   }
1212 
1213   // Check for xmm reg-integer reg copy
1214   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1215     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1216             "no 64 bit float-integer reg moves" );
1217     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1218   }
1219 
1220   // Check for xmm store
1221   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1222     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1223   }
1224 
1225   // Check for float xmm load
1226   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1227     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1228   }
1229 
1230   // Copy from float reg to xmm reg
1231   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1232     // copy to the top of stack from floating point reg
1233     // and use LEA to preserve flags
1234     if( cbuf ) {
1235       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1236       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1237       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1238       emit_d8(*cbuf,0xF8);
1239 #ifndef PRODUCT
1240     } else if( !do_size ) {
1241       if( size != 0 ) st->print("\n\t");
1242       st->print("LEA    ESP,[ESP-8]");
1243 #endif
1244     }
1245     size += 4;
1246 
1247     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1248 
1249     // Copy from the temp memory to the xmm reg.
1250     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1251 
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0x08);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP+8]");
1261 #endif
1262     }
1263     size += 4;
1264     return size;
1265   }
1266 
1267   assert( size > 0, "missed a case" );
1268 
1269   // --------------------------------------------------------------------
1270   // Check for second bits still needing moving.
1271   if( src_second == dst_second )
1272     return size;               // Self copy; no move
1273   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1274 
1275   // Check for second word int-int move
1276   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1277     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1278 
1279   // Check for second word integer store
1280   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1281     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1282 
1283   // Check for second word integer load
1284   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1285     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1286 
1287 
1288   Unimplemented();
1289   return 0; // Mute compiler
1290 }
1291 
1292 #ifndef PRODUCT
1293 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1294   implementation( NULL, ra_, false, st );
1295 }
1296 #endif
1297 
1298 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1299   implementation( &cbuf, ra_, false, NULL );
1300 }
1301 
1302 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1303   return implementation( NULL, ra_, true, NULL );
1304 }
1305 
1306 
1307 //=============================================================================
1308 #ifndef PRODUCT
1309 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1310   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1311   int reg = ra_->get_reg_first(this);
1312   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1313 }
1314 #endif
1315 
1316 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1318   int reg = ra_->get_encode(this);
1319   if( offset >= 128 ) {
1320     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1321     emit_rm(cbuf, 0x2, reg, 0x04);
1322     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1323     emit_d32(cbuf, offset);
1324   }
1325   else {
1326     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1327     emit_rm(cbuf, 0x1, reg, 0x04);
1328     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1329     emit_d8(cbuf, offset);
1330   }
1331 }
1332 
1333 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1335   if( offset >= 128 ) {
1336     return 7;
1337   }
1338   else {
1339     return 4;
1340   }
1341 }
1342 
1343 //=============================================================================
1344 #ifndef PRODUCT
1345 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1346   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1347   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1348   st->print_cr("\tNOP");
1349   st->print_cr("\tNOP");
1350   if( !OptoBreakpoint )
1351     st->print_cr("\tNOP");
1352 }
1353 #endif
1354 
1355 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1356   MacroAssembler masm(&cbuf);
1357 #ifdef ASSERT
1358   uint insts_size = cbuf.insts_size();
1359 #endif
1360   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1361   masm.jump_cc(Assembler::notEqual,
1362                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1363   /* WARNING these NOPs are critical so that verified entry point is properly
1364      aligned for patching by NativeJump::patch_verified_entry() */
1365   int nops_cnt = 2;
1366   if( !OptoBreakpoint ) // Leave space for int3
1367      nops_cnt += 1;
1368   masm.nop(nops_cnt);
1369 
1370   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1371 }
1372 
1373 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1374   return OptoBreakpoint ? 11 : 12;
1375 }
1376 
1377 
1378 //=============================================================================
1379 
1380 int Matcher::regnum_to_fpu_offset(int regnum) {
1381   return regnum - 32; // The FP registers are in the second chunk
1382 }
1383 
1384 // This is UltraSparc specific, true just means we have fast l2f conversion
1385 const bool Matcher::convL2FSupported(void) {
1386   return true;
1387 }
1388 
1389 // Is this branch offset short enough that a short branch can be used?
1390 //
1391 // NOTE: If the platform does not provide any short branch variants, then
1392 //       this method should return false for offset 0.
1393 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1394   // The passed offset is relative to address of the branch.
1395   // On 86 a branch displacement is calculated relative to address
1396   // of a next instruction.
1397   offset -= br_size;
1398 
1399   // the short version of jmpConUCF2 contains multiple branches,
1400   // making the reach slightly less
1401   if (rule == jmpConUCF2_rule)
1402     return (-126 <= offset && offset <= 125);
1403   return (-128 <= offset && offset <= 127);
1404 }
1405 
1406 const bool Matcher::isSimpleConstant64(jlong value) {
1407   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1408   return false;
1409 }
1410 
1411 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1412 const bool Matcher::init_array_count_is_in_bytes = false;
1413 
1414 // Threshold size for cleararray.
1415 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1416 
1417 // Needs 2 CMOV's for longs.
1418 const int Matcher::long_cmove_cost() { return 1; }
1419 
1420 // No CMOVF/CMOVD with SSE/SSE2
1421 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1422 
1423 // Does the CPU require late expand (see block.cpp for description of late expand)?
1424 const bool Matcher::require_postalloc_expand = false;
1425 
1426 // Should the Matcher clone shifts on addressing modes, expecting them to
1427 // be subsumed into complex addressing expressions or compute them into
1428 // registers?  True for Intel but false for most RISCs
1429 const bool Matcher::clone_shift_expressions = true;
1430 
1431 // Do we need to mask the count passed to shift instructions or does
1432 // the cpu only look at the lower 5/6 bits anyway?
1433 const bool Matcher::need_masked_shift_count = false;
1434 
1435 bool Matcher::narrow_oop_use_complex_address() {
1436   ShouldNotCallThis();
1437   return true;
1438 }
1439 
1440 bool Matcher::narrow_klass_use_complex_address() {
1441   ShouldNotCallThis();
1442   return true;
1443 }
1444 
1445 
1446 // Is it better to copy float constants, or load them directly from memory?
1447 // Intel can load a float constant from a direct address, requiring no
1448 // extra registers.  Most RISCs will have to materialize an address into a
1449 // register first, so they would do better to copy the constant from stack.
1450 const bool Matcher::rematerialize_float_constants = true;
1451 
1452 // If CPU can load and store mis-aligned doubles directly then no fixup is
1453 // needed.  Else we split the double into 2 integer pieces and move it
1454 // piece-by-piece.  Only happens when passing doubles into C code as the
1455 // Java calling convention forces doubles to be aligned.
1456 const bool Matcher::misaligned_doubles_ok = true;
1457 
1458 
1459 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1460   // Get the memory operand from the node
1461   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1462   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1463   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1464   uint opcnt     = 1;                 // First operand
1465   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1466   while( idx >= skipped+num_edges ) {
1467     skipped += num_edges;
1468     opcnt++;                          // Bump operand count
1469     assert( opcnt < numopnds, "Accessing non-existent operand" );
1470     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1471   }
1472 
1473   MachOper *memory = node->_opnds[opcnt];
1474   MachOper *new_memory = NULL;
1475   switch (memory->opcode()) {
1476   case DIRECT:
1477   case INDOFFSET32X:
1478     // No transformation necessary.
1479     return;
1480   case INDIRECT:
1481     new_memory = new indirect_win95_safeOper( );
1482     break;
1483   case INDOFFSET8:
1484     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1485     break;
1486   case INDOFFSET32:
1487     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1488     break;
1489   case INDINDEXOFFSET:
1490     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1491     break;
1492   case INDINDEXSCALE:
1493     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1494     break;
1495   case INDINDEXSCALEOFFSET:
1496     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1497     break;
1498   case LOAD_LONG_INDIRECT:
1499   case LOAD_LONG_INDOFFSET32:
1500     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1501     return;
1502   default:
1503     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1504     return;
1505   }
1506   node->_opnds[opcnt] = new_memory;
1507 }
1508 
1509 // Advertise here if the CPU requires explicit rounding operations
1510 // to implement the UseStrictFP mode.
1511 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1512 
1513 // Are floats conerted to double when stored to stack during deoptimization?
1514 // On x32 it is stored with convertion only when FPU is used for floats.
1515 bool Matcher::float_in_double() { return (UseSSE == 0); }
1516 
1517 // Do ints take an entire long register or just half?
1518 const bool Matcher::int_in_long = false;
1519 
1520 // Return whether or not this register is ever used as an argument.  This
1521 // function is used on startup to build the trampoline stubs in generateOptoStub.
1522 // Registers not mentioned will be killed by the VM call in the trampoline, and
1523 // arguments in those registers not be available to the callee.
1524 bool Matcher::can_be_java_arg( int reg ) {
1525   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1526   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1527   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1528   return false;
1529 }
1530 
1531 bool Matcher::is_spillable_arg( int reg ) {
1532   return can_be_java_arg(reg);
1533 }
1534 
1535 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1536   // Use hardware integer DIV instruction when
1537   // it is faster than a code which use multiply.
1538   // Only when constant divisor fits into 32 bit
1539   // (min_jint is excluded to get only correct
1540   // positive 32 bit values from negative).
1541   return VM_Version::has_fast_idiv() &&
1542          (divisor == (int)divisor && divisor != min_jint);
1543 }
1544 
1545 // Register for DIVI projection of divmodI
1546 RegMask Matcher::divI_proj_mask() {
1547   return EAX_REG_mask();
1548 }
1549 
1550 // Register for MODI projection of divmodI
1551 RegMask Matcher::modI_proj_mask() {
1552   return EDX_REG_mask();
1553 }
1554 
1555 // Register for DIVL projection of divmodL
1556 RegMask Matcher::divL_proj_mask() {
1557   ShouldNotReachHere();
1558   return RegMask();
1559 }
1560 
1561 // Register for MODL projection of divmodL
1562 RegMask Matcher::modL_proj_mask() {
1563   ShouldNotReachHere();
1564   return RegMask();
1565 }
1566 
1567 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1568   return NO_REG_mask();
1569 }
1570 
1571 // Returns true if the high 32 bits of the value is known to be zero.
1572 bool is_operand_hi32_zero(Node* n) {
1573   int opc = n->Opcode();
1574   if (opc == Op_AndL) {
1575     Node* o2 = n->in(2);
1576     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1577       return true;
1578     }
1579   }
1580   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1581     return true;
1582   }
1583   return false;
1584 }
1585 
1586 %}
1587 
1588 //----------ENCODING BLOCK-----------------------------------------------------
1589 // This block specifies the encoding classes used by the compiler to output
1590 // byte streams.  Encoding classes generate functions which are called by
1591 // Machine Instruction Nodes in order to generate the bit encoding of the
1592 // instruction.  Operands specify their base encoding interface with the
1593 // interface keyword.  There are currently supported four interfaces,
1594 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1595 // operand to generate a function which returns its register number when
1596 // queried.   CONST_INTER causes an operand to generate a function which
1597 // returns the value of the constant when queried.  MEMORY_INTER causes an
1598 // operand to generate four functions which return the Base Register, the
1599 // Index Register, the Scale Value, and the Offset Value of the operand when
1600 // queried.  COND_INTER causes an operand to generate six functions which
1601 // return the encoding code (ie - encoding bits for the instruction)
1602 // associated with each basic boolean condition for a conditional instruction.
1603 // Instructions specify two basic values for encoding.  They use the
1604 // ins_encode keyword to specify their encoding class (which must be one of
1605 // the class names specified in the encoding block), and they use the
1606 // opcode keyword to specify, in order, their primary, secondary, and
1607 // tertiary opcode.  Only the opcode sections which a particular instruction
1608 // needs for encoding need to be specified.
1609 encode %{
1610   // Build emit functions for each basic byte or larger field in the intel
1611   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1612   // code in the enc_class source block.  Emit functions will live in the
1613   // main source block for now.  In future, we can generalize this by
1614   // adding a syntax that specifies the sizes of fields in an order,
1615   // so that the adlc can build the emit functions automagically
1616 
1617   // Emit primary opcode
1618   enc_class OpcP %{
1619     emit_opcode(cbuf, $primary);
1620   %}
1621 
1622   // Emit secondary opcode
1623   enc_class OpcS %{
1624     emit_opcode(cbuf, $secondary);
1625   %}
1626 
1627   // Emit opcode directly
1628   enc_class Opcode(immI d8) %{
1629     emit_opcode(cbuf, $d8$$constant);
1630   %}
1631 
1632   enc_class SizePrefix %{
1633     emit_opcode(cbuf,0x66);
1634   %}
1635 
1636   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1637     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1638   %}
1639 
1640   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1641     emit_opcode(cbuf,$opcode$$constant);
1642     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1643   %}
1644 
1645   enc_class mov_r32_imm0( rRegI dst ) %{
1646     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1647     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1648   %}
1649 
1650   enc_class cdq_enc %{
1651     // Full implementation of Java idiv and irem; checks for
1652     // special case as described in JVM spec., p.243 & p.271.
1653     //
1654     //         normal case                           special case
1655     //
1656     // input : rax,: dividend                         min_int
1657     //         reg: divisor                          -1
1658     //
1659     // output: rax,: quotient  (= rax, idiv reg)       min_int
1660     //         rdx: remainder (= rax, irem reg)       0
1661     //
1662     //  Code sequnce:
1663     //
1664     //  81 F8 00 00 00 80    cmp         rax,80000000h
1665     //  0F 85 0B 00 00 00    jne         normal_case
1666     //  33 D2                xor         rdx,edx
1667     //  83 F9 FF             cmp         rcx,0FFh
1668     //  0F 84 03 00 00 00    je          done
1669     //                  normal_case:
1670     //  99                   cdq
1671     //  F7 F9                idiv        rax,ecx
1672     //                  done:
1673     //
1674     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1675     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1676     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1677     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1678     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1679     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1680     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1681     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1682     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1683     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1684     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1685     // normal_case:
1686     emit_opcode(cbuf,0x99);                                         // cdq
1687     // idiv (note: must be emitted by the user of this rule)
1688     // normal:
1689   %}
1690 
1691   // Dense encoding for older common ops
1692   enc_class Opc_plus(immI opcode, rRegI reg) %{
1693     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1694   %}
1695 
1696 
1697   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1698   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1699     // Check for 8-bit immediate, and set sign extend bit in opcode
1700     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1701       emit_opcode(cbuf, $primary | 0x02);
1702     }
1703     else {                          // If 32-bit immediate
1704       emit_opcode(cbuf, $primary);
1705     }
1706   %}
1707 
1708   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1709     // Emit primary opcode and set sign-extend bit
1710     // Check for 8-bit immediate, and set sign extend bit in opcode
1711     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1712       emit_opcode(cbuf, $primary | 0x02);    }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716     // Emit r/m byte with secondary opcode, after primary opcode.
1717     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1718   %}
1719 
1720   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1721     // Check for 8-bit immediate, and set sign extend bit in opcode
1722     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1723       $$$emit8$imm$$constant;
1724     }
1725     else {                          // If 32-bit immediate
1726       // Output immediate
1727       $$$emit32$imm$$constant;
1728     }
1729   %}
1730 
1731   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1732     // Emit primary opcode and set sign-extend bit
1733     // Check for 8-bit immediate, and set sign extend bit in opcode
1734     int con = (int)$imm$$constant; // Throw away top bits
1735     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1736     // Emit r/m byte with secondary opcode, after primary opcode.
1737     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1738     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1739     else                               emit_d32(cbuf,con);
1740   %}
1741 
1742   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1743     // Emit primary opcode and set sign-extend bit
1744     // Check for 8-bit immediate, and set sign extend bit in opcode
1745     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1746     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1747     // Emit r/m byte with tertiary opcode, after primary opcode.
1748     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1749     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1750     else                               emit_d32(cbuf,con);
1751   %}
1752 
1753   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1754     emit_cc(cbuf, $secondary, $dst$$reg );
1755   %}
1756 
1757   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1758     int destlo = $dst$$reg;
1759     int desthi = HIGH_FROM_LOW(destlo);
1760     // bswap lo
1761     emit_opcode(cbuf, 0x0F);
1762     emit_cc(cbuf, 0xC8, destlo);
1763     // bswap hi
1764     emit_opcode(cbuf, 0x0F);
1765     emit_cc(cbuf, 0xC8, desthi);
1766     // xchg lo and hi
1767     emit_opcode(cbuf, 0x87);
1768     emit_rm(cbuf, 0x3, destlo, desthi);
1769   %}
1770 
1771   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1772     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1773   %}
1774 
1775   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1776     $$$emit8$primary;
1777     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1778   %}
1779 
1780   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1781     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1782     emit_d8(cbuf, op >> 8 );
1783     emit_d8(cbuf, op & 255);
1784   %}
1785 
1786   // emulate a CMOV with a conditional branch around a MOV
1787   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1788     // Invert sense of branch from sense of CMOV
1789     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1790     emit_d8( cbuf, $brOffs$$constant );
1791   %}
1792 
1793   enc_class enc_PartialSubtypeCheck( ) %{
1794     Register Redi = as_Register(EDI_enc); // result register
1795     Register Reax = as_Register(EAX_enc); // super class
1796     Register Recx = as_Register(ECX_enc); // killed
1797     Register Resi = as_Register(ESI_enc); // sub class
1798     Label miss;
1799 
1800     MacroAssembler _masm(&cbuf);
1801     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1802                                      NULL, &miss,
1803                                      /*set_cond_codes:*/ true);
1804     if ($primary) {
1805       __ xorptr(Redi, Redi);
1806     }
1807     __ bind(miss);
1808   %}
1809 
1810   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1811     MacroAssembler masm(&cbuf);
1812     int start = masm.offset();
1813     if (UseSSE >= 2) {
1814       if (VerifyFPU) {
1815         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1816       }
1817     } else {
1818       // External c_calling_convention expects the FPU stack to be 'clean'.
1819       // Compiled code leaves it dirty.  Do cleanup now.
1820       masm.empty_FPU_stack();
1821     }
1822     if (sizeof_FFree_Float_Stack_All == -1) {
1823       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1824     } else {
1825       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1826     }
1827   %}
1828 
1829   enc_class Verify_FPU_For_Leaf %{
1830     if( VerifyFPU ) {
1831       MacroAssembler masm(&cbuf);
1832       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1833     }
1834   %}
1835 
1836   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1837     // This is the instruction starting address for relocation info.
1838     cbuf.set_insts_mark();
1839     $$$emit8$primary;
1840     // CALL directly to the runtime
1841     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1842                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1843 
1844     if (UseSSE >= 2) {
1845       MacroAssembler _masm(&cbuf);
1846       BasicType rt = tf()->return_type();
1847 
1848       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1849         // A C runtime call where the return value is unused.  In SSE2+
1850         // mode the result needs to be removed from the FPU stack.  It's
1851         // likely that this function call could be removed by the
1852         // optimizer if the C function is a pure function.
1853         __ ffree(0);
1854       } else if (rt == T_FLOAT) {
1855         __ lea(rsp, Address(rsp, -4));
1856         __ fstp_s(Address(rsp, 0));
1857         __ movflt(xmm0, Address(rsp, 0));
1858         __ lea(rsp, Address(rsp,  4));
1859       } else if (rt == T_DOUBLE) {
1860         __ lea(rsp, Address(rsp, -8));
1861         __ fstp_d(Address(rsp, 0));
1862         __ movdbl(xmm0, Address(rsp, 0));
1863         __ lea(rsp, Address(rsp,  8));
1864       }
1865     }
1866   %}
1867 
1868 
1869   enc_class pre_call_resets %{
1870     // If method sets FPU control word restore it here
1871     debug_only(int off0 = cbuf.insts_size());
1872     if (ra_->C->in_24_bit_fp_mode()) {
1873       MacroAssembler _masm(&cbuf);
1874       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1875     }
1876     if (ra_->C->max_vector_size() > 16) {
1877       // Clear upper bits of YMM registers when current compiled code uses
1878       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1879       MacroAssembler _masm(&cbuf);
1880       __ vzeroupper();
1881     }
1882     debug_only(int off1 = cbuf.insts_size());
1883     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1884   %}
1885 
1886   enc_class post_call_FPU %{
1887     // If method sets FPU control word do it here also
1888     if (Compile::current()->in_24_bit_fp_mode()) {
1889       MacroAssembler masm(&cbuf);
1890       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891     }
1892   %}
1893 
1894   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1895     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1896     // who we intended to call.
1897     cbuf.set_insts_mark();
1898     $$$emit8$primary;
1899     if (!_method) {
1900       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1901                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1902     } else if (_optimized_virtual) {
1903       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1904                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1905     } else {
1906       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1907                      static_call_Relocation::spec(), RELOC_IMM32 );
1908     }
1909     if (_method) {  // Emit stub for static call.
1910       CompiledStaticCall::emit_to_interp_stub(cbuf);
1911     }
1912   %}
1913 
1914   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1915     MacroAssembler _masm(&cbuf);
1916     __ ic_call((address)$meth$$method);
1917   %}
1918 
1919   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1920     int disp = in_bytes(Method::from_compiled_offset());
1921     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1922 
1923     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1924     cbuf.set_insts_mark();
1925     $$$emit8$primary;
1926     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1927     emit_d8(cbuf, disp);             // Displacement
1928 
1929   %}
1930 
1931 //   Following encoding is no longer used, but may be restored if calling
1932 //   convention changes significantly.
1933 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1934 //
1935 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1936 //     // int ic_reg     = Matcher::inline_cache_reg();
1937 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1938 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1939 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1940 //
1941 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1942 //     // // so we load it immediately before the call
1943 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1944 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1945 //
1946 //     // xor rbp,ebp
1947 //     emit_opcode(cbuf, 0x33);
1948 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1949 //
1950 //     // CALL to interpreter.
1951 //     cbuf.set_insts_mark();
1952 //     $$$emit8$primary;
1953 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1954 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1955 //   %}
1956 
1957   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1958     $$$emit8$primary;
1959     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1960     $$$emit8$shift$$constant;
1961   %}
1962 
1963   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1964     // Load immediate does not have a zero or sign extended version
1965     // for 8-bit immediates
1966     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1967     $$$emit32$src$$constant;
1968   %}
1969 
1970   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1971     // Load immediate does not have a zero or sign extended version
1972     // for 8-bit immediates
1973     emit_opcode(cbuf, $primary + $dst$$reg);
1974     $$$emit32$src$$constant;
1975   %}
1976 
1977   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1978     // Load immediate does not have a zero or sign extended version
1979     // for 8-bit immediates
1980     int dst_enc = $dst$$reg;
1981     int src_con = $src$$constant & 0x0FFFFFFFFL;
1982     if (src_con == 0) {
1983       // xor dst, dst
1984       emit_opcode(cbuf, 0x33);
1985       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1986     } else {
1987       emit_opcode(cbuf, $primary + dst_enc);
1988       emit_d32(cbuf, src_con);
1989     }
1990   %}
1991 
1992   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1993     // Load immediate does not have a zero or sign extended version
1994     // for 8-bit immediates
1995     int dst_enc = $dst$$reg + 2;
1996     int src_con = ((julong)($src$$constant)) >> 32;
1997     if (src_con == 0) {
1998       // xor dst, dst
1999       emit_opcode(cbuf, 0x33);
2000       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2001     } else {
2002       emit_opcode(cbuf, $primary + dst_enc);
2003       emit_d32(cbuf, src_con);
2004     }
2005   %}
2006 
2007 
2008   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2009   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2010     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2011   %}
2012 
2013   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2014     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2015   %}
2016 
2017   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2018     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2019   %}
2020 
2021   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2022     $$$emit8$primary;
2023     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2024   %}
2025 
2026   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2027     $$$emit8$secondary;
2028     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2029   %}
2030 
2031   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2032     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2033   %}
2034 
2035   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2036     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2037   %}
2038 
2039   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2040     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2044     // Output immediate
2045     $$$emit32$src$$constant;
2046   %}
2047 
2048   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2049     // Output Float immediate bits
2050     jfloat jf = $src$$constant;
2051     int    jf_as_bits = jint_cast( jf );
2052     emit_d32(cbuf, jf_as_bits);
2053   %}
2054 
2055   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2056     // Output Float immediate bits
2057     jfloat jf = $src$$constant;
2058     int    jf_as_bits = jint_cast( jf );
2059     emit_d32(cbuf, jf_as_bits);
2060   %}
2061 
2062   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2063     // Output immediate
2064     $$$emit16$src$$constant;
2065   %}
2066 
2067   enc_class Con_d32(immI src) %{
2068     emit_d32(cbuf,$src$$constant);
2069   %}
2070 
2071   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2072     // Output immediate memory reference
2073     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2074     emit_d32(cbuf, 0x00);
2075   %}
2076 
2077   enc_class lock_prefix( ) %{
2078     if( os::is_MP() )
2079       emit_opcode(cbuf,0xF0);         // [Lock]
2080   %}
2081 
2082   // Cmp-xchg long value.
2083   // Note: we need to swap rbx, and rcx before and after the
2084   //       cmpxchg8 instruction because the instruction uses
2085   //       rcx as the high order word of the new value to store but
2086   //       our register encoding uses rbx,.
2087   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2088 
2089     // XCHG  rbx,ecx
2090     emit_opcode(cbuf,0x87);
2091     emit_opcode(cbuf,0xD9);
2092     // [Lock]
2093     if( os::is_MP() )
2094       emit_opcode(cbuf,0xF0);
2095     // CMPXCHG8 [Eptr]
2096     emit_opcode(cbuf,0x0F);
2097     emit_opcode(cbuf,0xC7);
2098     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2099     // XCHG  rbx,ecx
2100     emit_opcode(cbuf,0x87);
2101     emit_opcode(cbuf,0xD9);
2102   %}
2103 
2104   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2105     // [Lock]
2106     if( os::is_MP() )
2107       emit_opcode(cbuf,0xF0);
2108 
2109     // CMPXCHG [Eptr]
2110     emit_opcode(cbuf,0x0F);
2111     emit_opcode(cbuf,0xB1);
2112     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2113   %}
2114 
2115   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2116     int res_encoding = $res$$reg;
2117 
2118     // MOV  res,0
2119     emit_opcode( cbuf, 0xB8 + res_encoding);
2120     emit_d32( cbuf, 0 );
2121     // JNE,s  fail
2122     emit_opcode(cbuf,0x75);
2123     emit_d8(cbuf, 5 );
2124     // MOV  res,1
2125     emit_opcode( cbuf, 0xB8 + res_encoding);
2126     emit_d32( cbuf, 1 );
2127     // fail:
2128   %}
2129 
2130   enc_class set_instruction_start( ) %{
2131     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2132   %}
2133 
2134   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2135     int reg_encoding = $ereg$$reg;
2136     int base  = $mem$$base;
2137     int index = $mem$$index;
2138     int scale = $mem$$scale;
2139     int displace = $mem$$disp;
2140     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2141     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2142   %}
2143 
2144   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2145     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2146     int base  = $mem$$base;
2147     int index = $mem$$index;
2148     int scale = $mem$$scale;
2149     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2150     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2151     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2152   %}
2153 
2154   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2155     int r1, r2;
2156     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2157     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2158     emit_opcode(cbuf,0x0F);
2159     emit_opcode(cbuf,$tertiary);
2160     emit_rm(cbuf, 0x3, r1, r2);
2161     emit_d8(cbuf,$cnt$$constant);
2162     emit_d8(cbuf,$primary);
2163     emit_rm(cbuf, 0x3, $secondary, r1);
2164     emit_d8(cbuf,$cnt$$constant);
2165   %}
2166 
2167   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2168     emit_opcode( cbuf, 0x8B ); // Move
2169     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2170     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2171       emit_d8(cbuf,$primary);
2172       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2173       emit_d8(cbuf,$cnt$$constant-32);
2174     }
2175     emit_d8(cbuf,$primary);
2176     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2177     emit_d8(cbuf,31);
2178   %}
2179 
2180   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2181     int r1, r2;
2182     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2183     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2184 
2185     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2186     emit_rm(cbuf, 0x3, r1, r2);
2187     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2188       emit_opcode(cbuf,$primary);
2189       emit_rm(cbuf, 0x3, $secondary, r1);
2190       emit_d8(cbuf,$cnt$$constant-32);
2191     }
2192     emit_opcode(cbuf,0x33);  // XOR r2,r2
2193     emit_rm(cbuf, 0x3, r2, r2);
2194   %}
2195 
2196   // Clone of RegMem but accepts an extra parameter to access each
2197   // half of a double in memory; it never needs relocation info.
2198   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2199     emit_opcode(cbuf,$opcode$$constant);
2200     int reg_encoding = $rm_reg$$reg;
2201     int base     = $mem$$base;
2202     int index    = $mem$$index;
2203     int scale    = $mem$$scale;
2204     int displace = $mem$$disp + $disp_for_half$$constant;
2205     relocInfo::relocType disp_reloc = relocInfo::none;
2206     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2207   %}
2208 
2209   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2210   //
2211   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2212   // and it never needs relocation information.
2213   // Frequently used to move data between FPU's Stack Top and memory.
2214   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2215     int rm_byte_opcode = $rm_opcode$$constant;
2216     int base     = $mem$$base;
2217     int index    = $mem$$index;
2218     int scale    = $mem$$scale;
2219     int displace = $mem$$disp;
2220     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2221     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2222   %}
2223 
2224   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2225     int rm_byte_opcode = $rm_opcode$$constant;
2226     int base     = $mem$$base;
2227     int index    = $mem$$index;
2228     int scale    = $mem$$scale;
2229     int displace = $mem$$disp;
2230     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2231     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2232   %}
2233 
2234   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2235     int reg_encoding = $dst$$reg;
2236     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2237     int index        = 0x04;            // 0x04 indicates no index
2238     int scale        = 0x00;            // 0x00 indicates no scale
2239     int displace     = $src1$$constant; // 0x00 indicates no displacement
2240     relocInfo::relocType disp_reloc = relocInfo::none;
2241     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2242   %}
2243 
2244   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2245     // Compare dst,src
2246     emit_opcode(cbuf,0x3B);
2247     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2248     // jmp dst < src around move
2249     emit_opcode(cbuf,0x7C);
2250     emit_d8(cbuf,2);
2251     // move dst,src
2252     emit_opcode(cbuf,0x8B);
2253     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2254   %}
2255 
2256   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2257     // Compare dst,src
2258     emit_opcode(cbuf,0x3B);
2259     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2260     // jmp dst > src around move
2261     emit_opcode(cbuf,0x7F);
2262     emit_d8(cbuf,2);
2263     // move dst,src
2264     emit_opcode(cbuf,0x8B);
2265     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2266   %}
2267 
2268   enc_class enc_FPR_store(memory mem, regDPR src) %{
2269     // If src is FPR1, we can just FST to store it.
2270     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2271     int reg_encoding = 0x2; // Just store
2272     int base  = $mem$$base;
2273     int index = $mem$$index;
2274     int scale = $mem$$scale;
2275     int displace = $mem$$disp;
2276     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2277     if( $src$$reg != FPR1L_enc ) {
2278       reg_encoding = 0x3;  // Store & pop
2279       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2280       emit_d8( cbuf, 0xC0-1+$src$$reg );
2281     }
2282     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2283     emit_opcode(cbuf,$primary);
2284     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2285   %}
2286 
2287   enc_class neg_reg(rRegI dst) %{
2288     // NEG $dst
2289     emit_opcode(cbuf,0xF7);
2290     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2291   %}
2292 
2293   enc_class setLT_reg(eCXRegI dst) %{
2294     // SETLT $dst
2295     emit_opcode(cbuf,0x0F);
2296     emit_opcode(cbuf,0x9C);
2297     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2298   %}
2299 
2300   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2301     int tmpReg = $tmp$$reg;
2302 
2303     // SUB $p,$q
2304     emit_opcode(cbuf,0x2B);
2305     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2306     // SBB $tmp,$tmp
2307     emit_opcode(cbuf,0x1B);
2308     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2309     // AND $tmp,$y
2310     emit_opcode(cbuf,0x23);
2311     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2312     // ADD $p,$tmp
2313     emit_opcode(cbuf,0x03);
2314     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2315   %}
2316 
2317   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2318     // TEST shift,32
2319     emit_opcode(cbuf,0xF7);
2320     emit_rm(cbuf, 0x3, 0, ECX_enc);
2321     emit_d32(cbuf,0x20);
2322     // JEQ,s small
2323     emit_opcode(cbuf, 0x74);
2324     emit_d8(cbuf, 0x04);
2325     // MOV    $dst.hi,$dst.lo
2326     emit_opcode( cbuf, 0x8B );
2327     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2328     // CLR    $dst.lo
2329     emit_opcode(cbuf, 0x33);
2330     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2331 // small:
2332     // SHLD   $dst.hi,$dst.lo,$shift
2333     emit_opcode(cbuf,0x0F);
2334     emit_opcode(cbuf,0xA5);
2335     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2336     // SHL    $dst.lo,$shift"
2337     emit_opcode(cbuf,0xD3);
2338     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2339   %}
2340 
2341   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2342     // TEST shift,32
2343     emit_opcode(cbuf,0xF7);
2344     emit_rm(cbuf, 0x3, 0, ECX_enc);
2345     emit_d32(cbuf,0x20);
2346     // JEQ,s small
2347     emit_opcode(cbuf, 0x74);
2348     emit_d8(cbuf, 0x04);
2349     // MOV    $dst.lo,$dst.hi
2350     emit_opcode( cbuf, 0x8B );
2351     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2352     // CLR    $dst.hi
2353     emit_opcode(cbuf, 0x33);
2354     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2355 // small:
2356     // SHRD   $dst.lo,$dst.hi,$shift
2357     emit_opcode(cbuf,0x0F);
2358     emit_opcode(cbuf,0xAD);
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2360     // SHR    $dst.hi,$shift"
2361     emit_opcode(cbuf,0xD3);
2362     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2363   %}
2364 
2365   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2366     // TEST shift,32
2367     emit_opcode(cbuf,0xF7);
2368     emit_rm(cbuf, 0x3, 0, ECX_enc);
2369     emit_d32(cbuf,0x20);
2370     // JEQ,s small
2371     emit_opcode(cbuf, 0x74);
2372     emit_d8(cbuf, 0x05);
2373     // MOV    $dst.lo,$dst.hi
2374     emit_opcode( cbuf, 0x8B );
2375     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2376     // SAR    $dst.hi,31
2377     emit_opcode(cbuf, 0xC1);
2378     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2379     emit_d8(cbuf, 0x1F );
2380 // small:
2381     // SHRD   $dst.lo,$dst.hi,$shift
2382     emit_opcode(cbuf,0x0F);
2383     emit_opcode(cbuf,0xAD);
2384     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2385     // SAR    $dst.hi,$shift"
2386     emit_opcode(cbuf,0xD3);
2387     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2388   %}
2389 
2390 
2391   // ----------------- Encodings for floating point unit -----------------
2392   // May leave result in FPU-TOS or FPU reg depending on opcodes
2393   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2394     $$$emit8$primary;
2395     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2396   %}
2397 
2398   // Pop argument in FPR0 with FSTP ST(0)
2399   enc_class PopFPU() %{
2400     emit_opcode( cbuf, 0xDD );
2401     emit_d8( cbuf, 0xD8 );
2402   %}
2403 
2404   // !!!!! equivalent to Pop_Reg_F
2405   enc_class Pop_Reg_DPR( regDPR dst ) %{
2406     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2407     emit_d8( cbuf, 0xD8+$dst$$reg );
2408   %}
2409 
2410   enc_class Push_Reg_DPR( regDPR dst ) %{
2411     emit_opcode( cbuf, 0xD9 );
2412     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2413   %}
2414 
2415   enc_class strictfp_bias1( regDPR dst ) %{
2416     emit_opcode( cbuf, 0xDB );           // FLD m80real
2417     emit_opcode( cbuf, 0x2D );
2418     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2419     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2420     emit_opcode( cbuf, 0xC8+$dst$$reg );
2421   %}
2422 
2423   enc_class strictfp_bias2( regDPR dst ) %{
2424     emit_opcode( cbuf, 0xDB );           // FLD m80real
2425     emit_opcode( cbuf, 0x2D );
2426     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2427     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2428     emit_opcode( cbuf, 0xC8+$dst$$reg );
2429   %}
2430 
2431   // Special case for moving an integer register to a stack slot.
2432   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2433     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2434   %}
2435 
2436   // Special case for moving a register to a stack slot.
2437   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2438     // Opcode already emitted
2439     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2440     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2441     emit_d32(cbuf, $dst$$disp);   // Displacement
2442   %}
2443 
2444   // Push the integer in stackSlot 'src' onto FP-stack
2445   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2446     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2447   %}
2448 
2449   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2450   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2451     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2452   %}
2453 
2454   // Same as Pop_Mem_F except for opcode
2455   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2456   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2457     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2458   %}
2459 
2460   enc_class Pop_Reg_FPR( regFPR dst ) %{
2461     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2462     emit_d8( cbuf, 0xD8+$dst$$reg );
2463   %}
2464 
2465   enc_class Push_Reg_FPR( regFPR dst ) %{
2466     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2467     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2468   %}
2469 
2470   // Push FPU's float to a stack-slot, and pop FPU-stack
2471   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2472     int pop = 0x02;
2473     if ($src$$reg != FPR1L_enc) {
2474       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2475       emit_d8( cbuf, 0xC0-1+$src$$reg );
2476       pop = 0x03;
2477     }
2478     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2479   %}
2480 
2481   // Push FPU's double to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2483     int pop = 0x02;
2484     if ($src$$reg != FPR1L_enc) {
2485       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2486       emit_d8( cbuf, 0xC0-1+$src$$reg );
2487       pop = 0x03;
2488     }
2489     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2490   %}
2491 
2492   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2493   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2494     int pop = 0xD0 - 1; // -1 since we skip FLD
2495     if ($src$$reg != FPR1L_enc) {
2496       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2497       emit_d8( cbuf, 0xC0-1+$src$$reg );
2498       pop = 0xD8;
2499     }
2500     emit_opcode( cbuf, 0xDD );
2501     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2502   %}
2503 
2504 
2505   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2506     // load dst in FPR0
2507     emit_opcode( cbuf, 0xD9 );
2508     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2509     if ($src$$reg != FPR1L_enc) {
2510       // fincstp
2511       emit_opcode (cbuf, 0xD9);
2512       emit_opcode (cbuf, 0xF7);
2513       // swap src with FPR1:
2514       // FXCH FPR1 with src
2515       emit_opcode(cbuf, 0xD9);
2516       emit_d8(cbuf, 0xC8-1+$src$$reg );
2517       // fdecstp
2518       emit_opcode (cbuf, 0xD9);
2519       emit_opcode (cbuf, 0xF6);
2520     }
2521   %}
2522 
2523   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2524     MacroAssembler _masm(&cbuf);
2525     __ subptr(rsp, 8);
2526     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2527     __ fld_d(Address(rsp, 0));
2528     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2529     __ fld_d(Address(rsp, 0));
2530   %}
2531 
2532   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2533     MacroAssembler _masm(&cbuf);
2534     __ subptr(rsp, 4);
2535     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2536     __ fld_s(Address(rsp, 0));
2537     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2538     __ fld_s(Address(rsp, 0));
2539   %}
2540 
2541   enc_class Push_ResultD(regD dst) %{
2542     MacroAssembler _masm(&cbuf);
2543     __ fstp_d(Address(rsp, 0));
2544     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2545     __ addptr(rsp, 8);
2546   %}
2547 
2548   enc_class Push_ResultF(regF dst, immI d8) %{
2549     MacroAssembler _masm(&cbuf);
2550     __ fstp_s(Address(rsp, 0));
2551     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2552     __ addptr(rsp, $d8$$constant);
2553   %}
2554 
2555   enc_class Push_SrcD(regD src) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560   %}
2561 
2562   enc_class push_stack_temp_qword() %{
2563     MacroAssembler _masm(&cbuf);
2564     __ subptr(rsp, 8);
2565   %}
2566 
2567   enc_class pop_stack_temp_qword() %{
2568     MacroAssembler _masm(&cbuf);
2569     __ addptr(rsp, 8);
2570   %}
2571 
2572   enc_class push_xmm_to_fpr1(regD src) %{
2573     MacroAssembler _masm(&cbuf);
2574     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2575     __ fld_d(Address(rsp, 0));
2576   %}
2577 
2578   enc_class Push_Result_Mod_DPR( regDPR src) %{
2579     if ($src$$reg != FPR1L_enc) {
2580       // fincstp
2581       emit_opcode (cbuf, 0xD9);
2582       emit_opcode (cbuf, 0xF7);
2583       // FXCH FPR1 with src
2584       emit_opcode(cbuf, 0xD9);
2585       emit_d8(cbuf, 0xC8-1+$src$$reg );
2586       // fdecstp
2587       emit_opcode (cbuf, 0xD9);
2588       emit_opcode (cbuf, 0xF6);
2589     }
2590     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2591     // // FSTP   FPR$dst$$reg
2592     // emit_opcode( cbuf, 0xDD );
2593     // emit_d8( cbuf, 0xD8+$dst$$reg );
2594   %}
2595 
2596   enc_class fnstsw_sahf_skip_parity() %{
2597     // fnstsw ax
2598     emit_opcode( cbuf, 0xDF );
2599     emit_opcode( cbuf, 0xE0 );
2600     // sahf
2601     emit_opcode( cbuf, 0x9E );
2602     // jnp  ::skip
2603     emit_opcode( cbuf, 0x7B );
2604     emit_opcode( cbuf, 0x05 );
2605   %}
2606 
2607   enc_class emitModDPR() %{
2608     // fprem must be iterative
2609     // :: loop
2610     // fprem
2611     emit_opcode( cbuf, 0xD9 );
2612     emit_opcode( cbuf, 0xF8 );
2613     // wait
2614     emit_opcode( cbuf, 0x9b );
2615     // fnstsw ax
2616     emit_opcode( cbuf, 0xDF );
2617     emit_opcode( cbuf, 0xE0 );
2618     // sahf
2619     emit_opcode( cbuf, 0x9E );
2620     // jp  ::loop
2621     emit_opcode( cbuf, 0x0F );
2622     emit_opcode( cbuf, 0x8A );
2623     emit_opcode( cbuf, 0xF4 );
2624     emit_opcode( cbuf, 0xFF );
2625     emit_opcode( cbuf, 0xFF );
2626     emit_opcode( cbuf, 0xFF );
2627   %}
2628 
2629   enc_class fpu_flags() %{
2630     // fnstsw_ax
2631     emit_opcode( cbuf, 0xDF);
2632     emit_opcode( cbuf, 0xE0);
2633     // test ax,0x0400
2634     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2635     emit_opcode( cbuf, 0xA9 );
2636     emit_d16   ( cbuf, 0x0400 );
2637     // // // This sequence works, but stalls for 12-16 cycles on PPro
2638     // // test rax,0x0400
2639     // emit_opcode( cbuf, 0xA9 );
2640     // emit_d32   ( cbuf, 0x00000400 );
2641     //
2642     // jz exit (no unordered comparison)
2643     emit_opcode( cbuf, 0x74 );
2644     emit_d8    ( cbuf, 0x02 );
2645     // mov ah,1 - treat as LT case (set carry flag)
2646     emit_opcode( cbuf, 0xB4 );
2647     emit_d8    ( cbuf, 0x01 );
2648     // sahf
2649     emit_opcode( cbuf, 0x9E);
2650   %}
2651 
2652   enc_class cmpF_P6_fixup() %{
2653     // Fixup the integer flags in case comparison involved a NaN
2654     //
2655     // JNP exit (no unordered comparison, P-flag is set by NaN)
2656     emit_opcode( cbuf, 0x7B );
2657     emit_d8    ( cbuf, 0x03 );
2658     // MOV AH,1 - treat as LT case (set carry flag)
2659     emit_opcode( cbuf, 0xB4 );
2660     emit_d8    ( cbuf, 0x01 );
2661     // SAHF
2662     emit_opcode( cbuf, 0x9E);
2663     // NOP     // target for branch to avoid branch to branch
2664     emit_opcode( cbuf, 0x90);
2665   %}
2666 
2667 //     fnstsw_ax();
2668 //     sahf();
2669 //     movl(dst, nan_result);
2670 //     jcc(Assembler::parity, exit);
2671 //     movl(dst, less_result);
2672 //     jcc(Assembler::below, exit);
2673 //     movl(dst, equal_result);
2674 //     jcc(Assembler::equal, exit);
2675 //     movl(dst, greater_result);
2676 
2677 // less_result     =  1;
2678 // greater_result  = -1;
2679 // equal_result    = 0;
2680 // nan_result      = -1;
2681 
2682   enc_class CmpF_Result(rRegI dst) %{
2683     // fnstsw_ax();
2684     emit_opcode( cbuf, 0xDF);
2685     emit_opcode( cbuf, 0xE0);
2686     // sahf
2687     emit_opcode( cbuf, 0x9E);
2688     // movl(dst, nan_result);
2689     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2690     emit_d32( cbuf, -1 );
2691     // jcc(Assembler::parity, exit);
2692     emit_opcode( cbuf, 0x7A );
2693     emit_d8    ( cbuf, 0x13 );
2694     // movl(dst, less_result);
2695     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2696     emit_d32( cbuf, -1 );
2697     // jcc(Assembler::below, exit);
2698     emit_opcode( cbuf, 0x72 );
2699     emit_d8    ( cbuf, 0x0C );
2700     // movl(dst, equal_result);
2701     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2702     emit_d32( cbuf, 0 );
2703     // jcc(Assembler::equal, exit);
2704     emit_opcode( cbuf, 0x74 );
2705     emit_d8    ( cbuf, 0x05 );
2706     // movl(dst, greater_result);
2707     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2708     emit_d32( cbuf, 1 );
2709   %}
2710 
2711 
2712   // Compare the longs and set flags
2713   // BROKEN!  Do Not use as-is
2714   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2715     // CMP    $src1.hi,$src2.hi
2716     emit_opcode( cbuf, 0x3B );
2717     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2718     // JNE,s  done
2719     emit_opcode(cbuf,0x75);
2720     emit_d8(cbuf, 2 );
2721     // CMP    $src1.lo,$src2.lo
2722     emit_opcode( cbuf, 0x3B );
2723     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2724 // done:
2725   %}
2726 
2727   enc_class convert_int_long( regL dst, rRegI src ) %{
2728     // mov $dst.lo,$src
2729     int dst_encoding = $dst$$reg;
2730     int src_encoding = $src$$reg;
2731     encode_Copy( cbuf, dst_encoding  , src_encoding );
2732     // mov $dst.hi,$src
2733     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2734     // sar $dst.hi,31
2735     emit_opcode( cbuf, 0xC1 );
2736     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2737     emit_d8(cbuf, 0x1F );
2738   %}
2739 
2740   enc_class convert_long_double( eRegL src ) %{
2741     // push $src.hi
2742     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2743     // push $src.lo
2744     emit_opcode(cbuf, 0x50+$src$$reg  );
2745     // fild 64-bits at [SP]
2746     emit_opcode(cbuf,0xdf);
2747     emit_d8(cbuf, 0x6C);
2748     emit_d8(cbuf, 0x24);
2749     emit_d8(cbuf, 0x00);
2750     // pop stack
2751     emit_opcode(cbuf, 0x83); // add  SP, #8
2752     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2753     emit_d8(cbuf, 0x8);
2754   %}
2755 
2756   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2757     // IMUL   EDX:EAX,$src1
2758     emit_opcode( cbuf, 0xF7 );
2759     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2760     // SAR    EDX,$cnt-32
2761     int shift_count = ((int)$cnt$$constant) - 32;
2762     if (shift_count > 0) {
2763       emit_opcode(cbuf, 0xC1);
2764       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2765       emit_d8(cbuf, shift_count);
2766     }
2767   %}
2768 
2769   // this version doesn't have add sp, 8
2770   enc_class convert_long_double2( eRegL src ) %{
2771     // push $src.hi
2772     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2773     // push $src.lo
2774     emit_opcode(cbuf, 0x50+$src$$reg  );
2775     // fild 64-bits at [SP]
2776     emit_opcode(cbuf,0xdf);
2777     emit_d8(cbuf, 0x6C);
2778     emit_d8(cbuf, 0x24);
2779     emit_d8(cbuf, 0x00);
2780   %}
2781 
2782   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2783     // Basic idea: long = (long)int * (long)int
2784     // IMUL EDX:EAX, src
2785     emit_opcode( cbuf, 0xF7 );
2786     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2787   %}
2788 
2789   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2790     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2791     // MUL EDX:EAX, src
2792     emit_opcode( cbuf, 0xF7 );
2793     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2794   %}
2795 
2796   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2797     // Basic idea: lo(result) = lo(x_lo * y_lo)
2798     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2799     // MOV    $tmp,$src.lo
2800     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2801     // IMUL   $tmp,EDX
2802     emit_opcode( cbuf, 0x0F );
2803     emit_opcode( cbuf, 0xAF );
2804     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2805     // MOV    EDX,$src.hi
2806     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2807     // IMUL   EDX,EAX
2808     emit_opcode( cbuf, 0x0F );
2809     emit_opcode( cbuf, 0xAF );
2810     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2811     // ADD    $tmp,EDX
2812     emit_opcode( cbuf, 0x03 );
2813     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2814     // MUL   EDX:EAX,$src.lo
2815     emit_opcode( cbuf, 0xF7 );
2816     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2817     // ADD    EDX,ESI
2818     emit_opcode( cbuf, 0x03 );
2819     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2820   %}
2821 
2822   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2823     // Basic idea: lo(result) = lo(src * y_lo)
2824     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2825     // IMUL   $tmp,EDX,$src
2826     emit_opcode( cbuf, 0x6B );
2827     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2828     emit_d8( cbuf, (int)$src$$constant );
2829     // MOV    EDX,$src
2830     emit_opcode(cbuf, 0xB8 + EDX_enc);
2831     emit_d32( cbuf, (int)$src$$constant );
2832     // MUL   EDX:EAX,EDX
2833     emit_opcode( cbuf, 0xF7 );
2834     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2835     // ADD    EDX,ESI
2836     emit_opcode( cbuf, 0x03 );
2837     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2838   %}
2839 
2840   enc_class long_div( eRegL src1, eRegL src2 ) %{
2841     // PUSH src1.hi
2842     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2843     // PUSH src1.lo
2844     emit_opcode(cbuf,               0x50+$src1$$reg  );
2845     // PUSH src2.hi
2846     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2847     // PUSH src2.lo
2848     emit_opcode(cbuf,               0x50+$src2$$reg  );
2849     // CALL directly to the runtime
2850     cbuf.set_insts_mark();
2851     emit_opcode(cbuf,0xE8);       // Call into runtime
2852     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2853     // Restore stack
2854     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2855     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2856     emit_d8(cbuf, 4*4);
2857   %}
2858 
2859   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2860     // PUSH src1.hi
2861     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2862     // PUSH src1.lo
2863     emit_opcode(cbuf,               0x50+$src1$$reg  );
2864     // PUSH src2.hi
2865     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2866     // PUSH src2.lo
2867     emit_opcode(cbuf,               0x50+$src2$$reg  );
2868     // CALL directly to the runtime
2869     cbuf.set_insts_mark();
2870     emit_opcode(cbuf,0xE8);       // Call into runtime
2871     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2872     // Restore stack
2873     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2874     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2875     emit_d8(cbuf, 4*4);
2876   %}
2877 
2878   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2879     // MOV   $tmp,$src.lo
2880     emit_opcode(cbuf, 0x8B);
2881     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2882     // OR    $tmp,$src.hi
2883     emit_opcode(cbuf, 0x0B);
2884     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2885   %}
2886 
2887   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2888     // CMP    $src1.lo,$src2.lo
2889     emit_opcode( cbuf, 0x3B );
2890     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2891     // JNE,s  skip
2892     emit_cc(cbuf, 0x70, 0x5);
2893     emit_d8(cbuf,2);
2894     // CMP    $src1.hi,$src2.hi
2895     emit_opcode( cbuf, 0x3B );
2896     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2897   %}
2898 
2899   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2900     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2901     emit_opcode( cbuf, 0x3B );
2902     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2903     // MOV    $tmp,$src1.hi
2904     emit_opcode( cbuf, 0x8B );
2905     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2906     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2907     emit_opcode( cbuf, 0x1B );
2908     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2909   %}
2910 
2911   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2912     // XOR    $tmp,$tmp
2913     emit_opcode(cbuf,0x33);  // XOR
2914     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2915     // CMP    $tmp,$src.lo
2916     emit_opcode( cbuf, 0x3B );
2917     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2918     // SBB    $tmp,$src.hi
2919     emit_opcode( cbuf, 0x1B );
2920     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2921   %}
2922 
2923  // Sniff, sniff... smells like Gnu Superoptimizer
2924   enc_class neg_long( eRegL dst ) %{
2925     emit_opcode(cbuf,0xF7);    // NEG hi
2926     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2927     emit_opcode(cbuf,0xF7);    // NEG lo
2928     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2929     emit_opcode(cbuf,0x83);    // SBB hi,0
2930     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2931     emit_d8    (cbuf,0 );
2932   %}
2933 
2934   enc_class enc_pop_rdx() %{
2935     emit_opcode(cbuf,0x5A);
2936   %}
2937 
2938   enc_class enc_rethrow() %{
2939     cbuf.set_insts_mark();
2940     emit_opcode(cbuf, 0xE9);        // jmp    entry
2941     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2942                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2943   %}
2944 
2945 
2946   // Convert a double to an int.  Java semantics require we do complex
2947   // manglelations in the corner cases.  So we set the rounding mode to
2948   // 'zero', store the darned double down as an int, and reset the
2949   // rounding mode to 'nearest'.  The hardware throws an exception which
2950   // patches up the correct value directly to the stack.
2951   enc_class DPR2I_encoding( regDPR src ) %{
2952     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2953     // exceptions here, so that a NAN or other corner-case value will
2954     // thrown an exception (but normal values get converted at full speed).
2955     // However, I2C adapters and other float-stack manglers leave pending
2956     // invalid-op exceptions hanging.  We would have to clear them before
2957     // enabling them and that is more expensive than just testing for the
2958     // invalid value Intel stores down in the corner cases.
2959     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2960     emit_opcode(cbuf,0x2D);
2961     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2962     // Allocate a word
2963     emit_opcode(cbuf,0x83);            // SUB ESP,4
2964     emit_opcode(cbuf,0xEC);
2965     emit_d8(cbuf,0x04);
2966     // Encoding assumes a double has been pushed into FPR0.
2967     // Store down the double as an int, popping the FPU stack
2968     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2969     emit_opcode(cbuf,0x1C);
2970     emit_d8(cbuf,0x24);
2971     // Restore the rounding mode; mask the exception
2972     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2973     emit_opcode(cbuf,0x2D);
2974     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2975         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2976         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2977 
2978     // Load the converted int; adjust CPU stack
2979     emit_opcode(cbuf,0x58);       // POP EAX
2980     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2981     emit_d32   (cbuf,0x80000000); //         0x80000000
2982     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2983     emit_d8    (cbuf,0x07);       // Size of slow_call
2984     // Push src onto stack slow-path
2985     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2986     emit_d8    (cbuf,0xC0-1+$src$$reg );
2987     // CALL directly to the runtime
2988     cbuf.set_insts_mark();
2989     emit_opcode(cbuf,0xE8);       // Call into runtime
2990     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2991     // Carry on here...
2992   %}
2993 
2994   enc_class DPR2L_encoding( regDPR src ) %{
2995     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2996     emit_opcode(cbuf,0x2D);
2997     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2998     // Allocate a word
2999     emit_opcode(cbuf,0x83);            // SUB ESP,8
3000     emit_opcode(cbuf,0xEC);
3001     emit_d8(cbuf,0x08);
3002     // Encoding assumes a double has been pushed into FPR0.
3003     // Store down the double as a long, popping the FPU stack
3004     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3005     emit_opcode(cbuf,0x3C);
3006     emit_d8(cbuf,0x24);
3007     // Restore the rounding mode; mask the exception
3008     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3009     emit_opcode(cbuf,0x2D);
3010     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3011         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3012         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3013 
3014     // Load the converted int; adjust CPU stack
3015     emit_opcode(cbuf,0x58);       // POP EAX
3016     emit_opcode(cbuf,0x5A);       // POP EDX
3017     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3018     emit_d8    (cbuf,0xFA);       // rdx
3019     emit_d32   (cbuf,0x80000000); //         0x80000000
3020     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3021     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3022     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3023     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3024     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3025     emit_d8    (cbuf,0x07);       // Size of slow_call
3026     // Push src onto stack slow-path
3027     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3028     emit_d8    (cbuf,0xC0-1+$src$$reg );
3029     // CALL directly to the runtime
3030     cbuf.set_insts_mark();
3031     emit_opcode(cbuf,0xE8);       // Call into runtime
3032     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3033     // Carry on here...
3034   %}
3035 
3036   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3037     // Operand was loaded from memory into fp ST (stack top)
3038     // FMUL   ST,$src  /* D8 C8+i */
3039     emit_opcode(cbuf, 0xD8);
3040     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3041   %}
3042 
3043   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3044     // FADDP  ST,src2  /* D8 C0+i */
3045     emit_opcode(cbuf, 0xD8);
3046     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3047     //could use FADDP  src2,fpST  /* DE C0+i */
3048   %}
3049 
3050   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3051     // FADDP  src2,ST  /* DE C0+i */
3052     emit_opcode(cbuf, 0xDE);
3053     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3054   %}
3055 
3056   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3057     // Operand has been loaded into fp ST (stack top)
3058       // FSUB   ST,$src1
3059       emit_opcode(cbuf, 0xD8);
3060       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3061 
3062       // FDIV
3063       emit_opcode(cbuf, 0xD8);
3064       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3065   %}
3066 
3067   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3068     // Operand was loaded from memory into fp ST (stack top)
3069     // FADD   ST,$src  /* D8 C0+i */
3070     emit_opcode(cbuf, 0xD8);
3071     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3072 
3073     // FMUL  ST,src2  /* D8 C*+i */
3074     emit_opcode(cbuf, 0xD8);
3075     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3076   %}
3077 
3078 
3079   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3080     // Operand was loaded from memory into fp ST (stack top)
3081     // FADD   ST,$src  /* D8 C0+i */
3082     emit_opcode(cbuf, 0xD8);
3083     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3084 
3085     // FMULP  src2,ST  /* DE C8+i */
3086     emit_opcode(cbuf, 0xDE);
3087     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3088   %}
3089 
3090   // Atomically load the volatile long
3091   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3092     emit_opcode(cbuf,0xDF);
3093     int rm_byte_opcode = 0x05;
3094     int base     = $mem$$base;
3095     int index    = $mem$$index;
3096     int scale    = $mem$$scale;
3097     int displace = $mem$$disp;
3098     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3099     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3100     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3101   %}
3102 
3103   // Volatile Store Long.  Must be atomic, so move it into
3104   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3105   // target address before the store (for null-ptr checks)
3106   // so the memory operand is used twice in the encoding.
3107   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3108     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3109     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3110     emit_opcode(cbuf,0xDF);
3111     int rm_byte_opcode = 0x07;
3112     int base     = $mem$$base;
3113     int index    = $mem$$index;
3114     int scale    = $mem$$scale;
3115     int displace = $mem$$disp;
3116     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3117     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3118   %}
3119 
3120   // Safepoint Poll.  This polls the safepoint page, and causes an
3121   // exception if it is not readable. Unfortunately, it kills the condition code
3122   // in the process
3123   // We current use TESTL [spp],EDI
3124   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3125 
3126   enc_class Safepoint_Poll() %{
3127     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3128     emit_opcode(cbuf,0x85);
3129     emit_rm (cbuf, 0x0, 0x7, 0x5);
3130     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3131   %}
3132 %}
3133 
3134 
3135 //----------FRAME--------------------------------------------------------------
3136 // Definition of frame structure and management information.
3137 //
3138 //  S T A C K   L A Y O U T    Allocators stack-slot number
3139 //                             |   (to get allocators register number
3140 //  G  Owned by    |        |  v    add OptoReg::stack0())
3141 //  r   CALLER     |        |
3142 //  o     |        +--------+      pad to even-align allocators stack-slot
3143 //  w     V        |  pad0  |        numbers; owned by CALLER
3144 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3145 //  h     ^        |   in   |  5
3146 //        |        |  args  |  4   Holes in incoming args owned by SELF
3147 //  |     |        |        |  3
3148 //  |     |        +--------+
3149 //  V     |        | old out|      Empty on Intel, window on Sparc
3150 //        |    old |preserve|      Must be even aligned.
3151 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3152 //        |        |   in   |  3   area for Intel ret address
3153 //     Owned by    |preserve|      Empty on Sparc.
3154 //       SELF      +--------+
3155 //        |        |  pad2  |  2   pad to align old SP
3156 //        |        +--------+  1
3157 //        |        | locks  |  0
3158 //        |        +--------+----> OptoReg::stack0(), even aligned
3159 //        |        |  pad1  | 11   pad to align new SP
3160 //        |        +--------+
3161 //        |        |        | 10
3162 //        |        | spills |  9   spills
3163 //        V        |        |  8   (pad0 slot for callee)
3164 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3165 //        ^        |  out   |  7
3166 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3167 //     Owned by    +--------+
3168 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3169 //        |    new |preserve|      Must be even-aligned.
3170 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3171 //        |        |        |
3172 //
3173 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3174 //         known from SELF's arguments and the Java calling convention.
3175 //         Region 6-7 is determined per call site.
3176 // Note 2: If the calling convention leaves holes in the incoming argument
3177 //         area, those holes are owned by SELF.  Holes in the outgoing area
3178 //         are owned by the CALLEE.  Holes should not be nessecary in the
3179 //         incoming area, as the Java calling convention is completely under
3180 //         the control of the AD file.  Doubles can be sorted and packed to
3181 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3182 //         varargs C calling conventions.
3183 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3184 //         even aligned with pad0 as needed.
3185 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3186 //         region 6-11 is even aligned; it may be padded out more so that
3187 //         the region from SP to FP meets the minimum stack alignment.
3188 
3189 frame %{
3190   // What direction does stack grow in (assumed to be same for C & Java)
3191   stack_direction(TOWARDS_LOW);
3192 
3193   // These three registers define part of the calling convention
3194   // between compiled code and the interpreter.
3195   inline_cache_reg(EAX);                // Inline Cache Register
3196   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3197 
3198   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3199   cisc_spilling_operand_name(indOffset32);
3200 
3201   // Number of stack slots consumed by locking an object
3202   sync_stack_slots(1);
3203 
3204   // Compiled code's Frame Pointer
3205   frame_pointer(ESP);
3206   // Interpreter stores its frame pointer in a register which is
3207   // stored to the stack by I2CAdaptors.
3208   // I2CAdaptors convert from interpreted java to compiled java.
3209   interpreter_frame_pointer(EBP);
3210 
3211   // Stack alignment requirement
3212   // Alignment size in bytes (128-bit -> 16 bytes)
3213   stack_alignment(StackAlignmentInBytes);
3214 
3215   // Number of stack slots between incoming argument block and the start of
3216   // a new frame.  The PROLOG must add this many slots to the stack.  The
3217   // EPILOG must remove this many slots.  Intel needs one slot for
3218   // return address and one for rbp, (must save rbp)
3219   in_preserve_stack_slots(2+VerifyStackAtCalls);
3220 
3221   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3222   // for calls to C.  Supports the var-args backing area for register parms.
3223   varargs_C_out_slots_killed(0);
3224 
3225   // The after-PROLOG location of the return address.  Location of
3226   // return address specifies a type (REG or STACK) and a number
3227   // representing the register number (i.e. - use a register name) or
3228   // stack slot.
3229   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3230   // Otherwise, it is above the locks and verification slot and alignment word
3231   return_addr(STACK - 1 +
3232               round_to((Compile::current()->in_preserve_stack_slots() +
3233                         Compile::current()->fixed_slots()),
3234                        stack_alignment_in_slots()));
3235 
3236   // Body of function which returns an integer array locating
3237   // arguments either in registers or in stack slots.  Passed an array
3238   // of ideal registers called "sig" and a "length" count.  Stack-slot
3239   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3240   // arguments for a CALLEE.  Incoming stack arguments are
3241   // automatically biased by the preserve_stack_slots field above.
3242   calling_convention %{
3243     // No difference between ingoing/outgoing just pass false
3244     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3245   %}
3246 
3247 
3248   // Body of function which returns an integer array locating
3249   // arguments either in registers or in stack slots.  Passed an array
3250   // of ideal registers called "sig" and a "length" count.  Stack-slot
3251   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3252   // arguments for a CALLEE.  Incoming stack arguments are
3253   // automatically biased by the preserve_stack_slots field above.
3254   c_calling_convention %{
3255     // This is obviously always outgoing
3256     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3257   %}
3258 
3259   // Location of C & interpreter return values
3260   c_return_value %{
3261     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3262     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3263     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3264 
3265     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3266     // that C functions return float and double results in XMM0.
3267     if( ideal_reg == Op_RegD && UseSSE>=2 )
3268       return OptoRegPair(XMM0b_num,XMM0_num);
3269     if( ideal_reg == Op_RegF && UseSSE>=2 )
3270       return OptoRegPair(OptoReg::Bad,XMM0_num);
3271 
3272     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3273   %}
3274 
3275   // Location of return values
3276   return_value %{
3277     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3278     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3279     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3280     if( ideal_reg == Op_RegD && UseSSE>=2 )
3281       return OptoRegPair(XMM0b_num,XMM0_num);
3282     if( ideal_reg == Op_RegF && UseSSE>=1 )
3283       return OptoRegPair(OptoReg::Bad,XMM0_num);
3284     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3285   %}
3286 
3287 %}
3288 
3289 //----------ATTRIBUTES---------------------------------------------------------
3290 //----------Operand Attributes-------------------------------------------------
3291 op_attrib op_cost(0);        // Required cost attribute
3292 
3293 //----------Instruction Attributes---------------------------------------------
3294 ins_attrib ins_cost(100);       // Required cost attribute
3295 ins_attrib ins_size(8);         // Required size attribute (in bits)
3296 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3297                                 // non-matching short branch variant of some
3298                                                             // long branch?
3299 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3300                                 // specifies the alignment that some part of the instruction (not
3301                                 // necessarily the start) requires.  If > 1, a compute_padding()
3302                                 // function must be provided for the instruction
3303 
3304 //----------OPERANDS-----------------------------------------------------------
3305 // Operand definitions must precede instruction definitions for correct parsing
3306 // in the ADLC because operands constitute user defined types which are used in
3307 // instruction definitions.
3308 
3309 //----------Simple Operands----------------------------------------------------
3310 // Immediate Operands
3311 // Integer Immediate
3312 operand immI() %{
3313   match(ConI);
3314 
3315   op_cost(10);
3316   format %{ %}
3317   interface(CONST_INTER);
3318 %}
3319 
3320 // Constant for test vs zero
3321 operand immI0() %{
3322   predicate(n->get_int() == 0);
3323   match(ConI);
3324 
3325   op_cost(0);
3326   format %{ %}
3327   interface(CONST_INTER);
3328 %}
3329 
3330 // Constant for increment
3331 operand immI1() %{
3332   predicate(n->get_int() == 1);
3333   match(ConI);
3334 
3335   op_cost(0);
3336   format %{ %}
3337   interface(CONST_INTER);
3338 %}
3339 
3340 // Constant for decrement
3341 operand immI_M1() %{
3342   predicate(n->get_int() == -1);
3343   match(ConI);
3344 
3345   op_cost(0);
3346   format %{ %}
3347   interface(CONST_INTER);
3348 %}
3349 
3350 // Valid scale values for addressing modes
3351 operand immI2() %{
3352   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3353   match(ConI);
3354 
3355   format %{ %}
3356   interface(CONST_INTER);
3357 %}
3358 
3359 operand immI8() %{
3360   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3361   match(ConI);
3362 
3363   op_cost(5);
3364   format %{ %}
3365   interface(CONST_INTER);
3366 %}
3367 
3368 operand immI16() %{
3369   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3370   match(ConI);
3371 
3372   op_cost(10);
3373   format %{ %}
3374   interface(CONST_INTER);
3375 %}
3376 
3377 // Int Immediate non-negative
3378 operand immU31()
3379 %{
3380   predicate(n->get_int() >= 0);
3381   match(ConI);
3382 
3383   op_cost(0);
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 // Constant for long shifts
3389 operand immI_32() %{
3390   predicate( n->get_int() == 32 );
3391   match(ConI);
3392 
3393   op_cost(0);
3394   format %{ %}
3395   interface(CONST_INTER);
3396 %}
3397 
3398 operand immI_1_31() %{
3399   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3400   match(ConI);
3401 
3402   op_cost(0);
3403   format %{ %}
3404   interface(CONST_INTER);
3405 %}
3406 
3407 operand immI_32_63() %{
3408   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3409   match(ConI);
3410   op_cost(0);
3411 
3412   format %{ %}
3413   interface(CONST_INTER);
3414 %}
3415 
3416 operand immI_1() %{
3417   predicate( n->get_int() == 1 );
3418   match(ConI);
3419 
3420   op_cost(0);
3421   format %{ %}
3422   interface(CONST_INTER);
3423 %}
3424 
3425 operand immI_2() %{
3426   predicate( n->get_int() == 2 );
3427   match(ConI);
3428 
3429   op_cost(0);
3430   format %{ %}
3431   interface(CONST_INTER);
3432 %}
3433 
3434 operand immI_3() %{
3435   predicate( n->get_int() == 3 );
3436   match(ConI);
3437 
3438   op_cost(0);
3439   format %{ %}
3440   interface(CONST_INTER);
3441 %}
3442 
3443 // Pointer Immediate
3444 operand immP() %{
3445   match(ConP);
3446 
3447   op_cost(10);
3448   format %{ %}
3449   interface(CONST_INTER);
3450 %}
3451 
3452 // NULL Pointer Immediate
3453 operand immP0() %{
3454   predicate( n->get_ptr() == 0 );
3455   match(ConP);
3456   op_cost(0);
3457 
3458   format %{ %}
3459   interface(CONST_INTER);
3460 %}
3461 
3462 // Long Immediate
3463 operand immL() %{
3464   match(ConL);
3465 
3466   op_cost(20);
3467   format %{ %}
3468   interface(CONST_INTER);
3469 %}
3470 
3471 // Long Immediate zero
3472 operand immL0() %{
3473   predicate( n->get_long() == 0L );
3474   match(ConL);
3475   op_cost(0);
3476 
3477   format %{ %}
3478   interface(CONST_INTER);
3479 %}
3480 
3481 // Long Immediate zero
3482 operand immL_M1() %{
3483   predicate( n->get_long() == -1L );
3484   match(ConL);
3485   op_cost(0);
3486 
3487   format %{ %}
3488   interface(CONST_INTER);
3489 %}
3490 
3491 // Long immediate from 0 to 127.
3492 // Used for a shorter form of long mul by 10.
3493 operand immL_127() %{
3494   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3495   match(ConL);
3496   op_cost(0);
3497 
3498   format %{ %}
3499   interface(CONST_INTER);
3500 %}
3501 
3502 // Long Immediate: low 32-bit mask
3503 operand immL_32bits() %{
3504   predicate(n->get_long() == 0xFFFFFFFFL);
3505   match(ConL);
3506   op_cost(0);
3507 
3508   format %{ %}
3509   interface(CONST_INTER);
3510 %}
3511 
3512 // Long Immediate: low 32-bit mask
3513 operand immL32() %{
3514   predicate(n->get_long() == (int)(n->get_long()));
3515   match(ConL);
3516   op_cost(20);
3517 
3518   format %{ %}
3519   interface(CONST_INTER);
3520 %}
3521 
3522 //Double Immediate zero
3523 operand immDPR0() %{
3524   // Do additional (and counter-intuitive) test against NaN to work around VC++
3525   // bug that generates code such that NaNs compare equal to 0.0
3526   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3527   match(ConD);
3528 
3529   op_cost(5);
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Double Immediate one
3535 operand immDPR1() %{
3536   predicate( UseSSE<=1 && n->getd() == 1.0 );
3537   match(ConD);
3538 
3539   op_cost(5);
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Double Immediate
3545 operand immDPR() %{
3546   predicate(UseSSE<=1);
3547   match(ConD);
3548 
3549   op_cost(5);
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 operand immD() %{
3555   predicate(UseSSE>=2);
3556   match(ConD);
3557 
3558   op_cost(5);
3559   format %{ %}
3560   interface(CONST_INTER);
3561 %}
3562 
3563 // Double Immediate zero
3564 operand immD0() %{
3565   // Do additional (and counter-intuitive) test against NaN to work around VC++
3566   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3567   // compare equal to -0.0.
3568   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3569   match(ConD);
3570 
3571   format %{ %}
3572   interface(CONST_INTER);
3573 %}
3574 
3575 // Float Immediate zero
3576 operand immFPR0() %{
3577   predicate(UseSSE == 0 && n->getf() == 0.0F);
3578   match(ConF);
3579 
3580   op_cost(5);
3581   format %{ %}
3582   interface(CONST_INTER);
3583 %}
3584 
3585 // Float Immediate one
3586 operand immFPR1() %{
3587   predicate(UseSSE == 0 && n->getf() == 1.0F);
3588   match(ConF);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Float Immediate
3596 operand immFPR() %{
3597   predicate( UseSSE == 0 );
3598   match(ConF);
3599 
3600   op_cost(5);
3601   format %{ %}
3602   interface(CONST_INTER);
3603 %}
3604 
3605 // Float Immediate
3606 operand immF() %{
3607   predicate(UseSSE >= 1);
3608   match(ConF);
3609 
3610   op_cost(5);
3611   format %{ %}
3612   interface(CONST_INTER);
3613 %}
3614 
3615 // Float Immediate zero.  Zero and not -0.0
3616 operand immF0() %{
3617   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3618   match(ConF);
3619 
3620   op_cost(5);
3621   format %{ %}
3622   interface(CONST_INTER);
3623 %}
3624 
3625 // Immediates for special shifts (sign extend)
3626 
3627 // Constants for increment
3628 operand immI_16() %{
3629   predicate( n->get_int() == 16 );
3630   match(ConI);
3631 
3632   format %{ %}
3633   interface(CONST_INTER);
3634 %}
3635 
3636 operand immI_24() %{
3637   predicate( n->get_int() == 24 );
3638   match(ConI);
3639 
3640   format %{ %}
3641   interface(CONST_INTER);
3642 %}
3643 
3644 // Constant for byte-wide masking
3645 operand immI_255() %{
3646   predicate( n->get_int() == 255 );
3647   match(ConI);
3648 
3649   format %{ %}
3650   interface(CONST_INTER);
3651 %}
3652 
3653 // Constant for short-wide masking
3654 operand immI_65535() %{
3655   predicate(n->get_int() == 65535);
3656   match(ConI);
3657 
3658   format %{ %}
3659   interface(CONST_INTER);
3660 %}
3661 
3662 // Register Operands
3663 // Integer Register
3664 operand rRegI() %{
3665   constraint(ALLOC_IN_RC(int_reg));
3666   match(RegI);
3667   match(xRegI);
3668   match(eAXRegI);
3669   match(eBXRegI);
3670   match(eCXRegI);
3671   match(eDXRegI);
3672   match(eDIRegI);
3673   match(eSIRegI);
3674 
3675   format %{ %}
3676   interface(REG_INTER);
3677 %}
3678 
3679 // Subset of Integer Register
3680 operand xRegI(rRegI reg) %{
3681   constraint(ALLOC_IN_RC(int_x_reg));
3682   match(reg);
3683   match(eAXRegI);
3684   match(eBXRegI);
3685   match(eCXRegI);
3686   match(eDXRegI);
3687 
3688   format %{ %}
3689   interface(REG_INTER);
3690 %}
3691 
3692 // Special Registers
3693 operand eAXRegI(xRegI reg) %{
3694   constraint(ALLOC_IN_RC(eax_reg));
3695   match(reg);
3696   match(rRegI);
3697 
3698   format %{ "EAX" %}
3699   interface(REG_INTER);
3700 %}
3701 
3702 // Special Registers
3703 operand eBXRegI(xRegI reg) %{
3704   constraint(ALLOC_IN_RC(ebx_reg));
3705   match(reg);
3706   match(rRegI);
3707 
3708   format %{ "EBX" %}
3709   interface(REG_INTER);
3710 %}
3711 
3712 operand eCXRegI(xRegI reg) %{
3713   constraint(ALLOC_IN_RC(ecx_reg));
3714   match(reg);
3715   match(rRegI);
3716 
3717   format %{ "ECX" %}
3718   interface(REG_INTER);
3719 %}
3720 
3721 operand eDXRegI(xRegI reg) %{
3722   constraint(ALLOC_IN_RC(edx_reg));
3723   match(reg);
3724   match(rRegI);
3725 
3726   format %{ "EDX" %}
3727   interface(REG_INTER);
3728 %}
3729 
3730 operand eDIRegI(xRegI reg) %{
3731   constraint(ALLOC_IN_RC(edi_reg));
3732   match(reg);
3733   match(rRegI);
3734 
3735   format %{ "EDI" %}
3736   interface(REG_INTER);
3737 %}
3738 
3739 operand naxRegI() %{
3740   constraint(ALLOC_IN_RC(nax_reg));
3741   match(RegI);
3742   match(eCXRegI);
3743   match(eDXRegI);
3744   match(eSIRegI);
3745   match(eDIRegI);
3746 
3747   format %{ %}
3748   interface(REG_INTER);
3749 %}
3750 
3751 operand nadxRegI() %{
3752   constraint(ALLOC_IN_RC(nadx_reg));
3753   match(RegI);
3754   match(eBXRegI);
3755   match(eCXRegI);
3756   match(eSIRegI);
3757   match(eDIRegI);
3758 
3759   format %{ %}
3760   interface(REG_INTER);
3761 %}
3762 
3763 operand ncxRegI() %{
3764   constraint(ALLOC_IN_RC(ncx_reg));
3765   match(RegI);
3766   match(eAXRegI);
3767   match(eDXRegI);
3768   match(eSIRegI);
3769   match(eDIRegI);
3770 
3771   format %{ %}
3772   interface(REG_INTER);
3773 %}
3774 
3775 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3776 // //
3777 operand eSIRegI(xRegI reg) %{
3778    constraint(ALLOC_IN_RC(esi_reg));
3779    match(reg);
3780    match(rRegI);
3781 
3782    format %{ "ESI" %}
3783    interface(REG_INTER);
3784 %}
3785 
3786 // Pointer Register
3787 operand anyRegP() %{
3788   constraint(ALLOC_IN_RC(any_reg));
3789   match(RegP);
3790   match(eAXRegP);
3791   match(eBXRegP);
3792   match(eCXRegP);
3793   match(eDIRegP);
3794   match(eRegP);
3795 
3796   format %{ %}
3797   interface(REG_INTER);
3798 %}
3799 
3800 operand eRegP() %{
3801   constraint(ALLOC_IN_RC(int_reg));
3802   match(RegP);
3803   match(eAXRegP);
3804   match(eBXRegP);
3805   match(eCXRegP);
3806   match(eDIRegP);
3807 
3808   format %{ %}
3809   interface(REG_INTER);
3810 %}
3811 
3812 // On windows95, EBP is not safe to use for implicit null tests.
3813 operand eRegP_no_EBP() %{
3814   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3815   match(RegP);
3816   match(eAXRegP);
3817   match(eBXRegP);
3818   match(eCXRegP);
3819   match(eDIRegP);
3820 
3821   op_cost(100);
3822   format %{ %}
3823   interface(REG_INTER);
3824 %}
3825 
3826 operand naxRegP() %{
3827   constraint(ALLOC_IN_RC(nax_reg));
3828   match(RegP);
3829   match(eBXRegP);
3830   match(eDXRegP);
3831   match(eCXRegP);
3832   match(eSIRegP);
3833   match(eDIRegP);
3834 
3835   format %{ %}
3836   interface(REG_INTER);
3837 %}
3838 
3839 operand nabxRegP() %{
3840   constraint(ALLOC_IN_RC(nabx_reg));
3841   match(RegP);
3842   match(eCXRegP);
3843   match(eDXRegP);
3844   match(eSIRegP);
3845   match(eDIRegP);
3846 
3847   format %{ %}
3848   interface(REG_INTER);
3849 %}
3850 
3851 operand pRegP() %{
3852   constraint(ALLOC_IN_RC(p_reg));
3853   match(RegP);
3854   match(eBXRegP);
3855   match(eDXRegP);
3856   match(eSIRegP);
3857   match(eDIRegP);
3858 
3859   format %{ %}
3860   interface(REG_INTER);
3861 %}
3862 
3863 // Special Registers
3864 // Return a pointer value
3865 operand eAXRegP(eRegP reg) %{
3866   constraint(ALLOC_IN_RC(eax_reg));
3867   match(reg);
3868   format %{ "EAX" %}
3869   interface(REG_INTER);
3870 %}
3871 
3872 // Used in AtomicAdd
3873 operand eBXRegP(eRegP reg) %{
3874   constraint(ALLOC_IN_RC(ebx_reg));
3875   match(reg);
3876   format %{ "EBX" %}
3877   interface(REG_INTER);
3878 %}
3879 
3880 // Tail-call (interprocedural jump) to interpreter
3881 operand eCXRegP(eRegP reg) %{
3882   constraint(ALLOC_IN_RC(ecx_reg));
3883   match(reg);
3884   format %{ "ECX" %}
3885   interface(REG_INTER);
3886 %}
3887 
3888 operand eSIRegP(eRegP reg) %{
3889   constraint(ALLOC_IN_RC(esi_reg));
3890   match(reg);
3891   format %{ "ESI" %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Used in rep stosw
3896 operand eDIRegP(eRegP reg) %{
3897   constraint(ALLOC_IN_RC(edi_reg));
3898   match(reg);
3899   format %{ "EDI" %}
3900   interface(REG_INTER);
3901 %}
3902 
3903 operand eRegL() %{
3904   constraint(ALLOC_IN_RC(long_reg));
3905   match(RegL);
3906   match(eADXRegL);
3907 
3908   format %{ %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 operand eADXRegL( eRegL reg ) %{
3913   constraint(ALLOC_IN_RC(eadx_reg));
3914   match(reg);
3915 
3916   format %{ "EDX:EAX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eBCXRegL( eRegL reg ) %{
3921   constraint(ALLOC_IN_RC(ebcx_reg));
3922   match(reg);
3923 
3924   format %{ "EBX:ECX" %}
3925   interface(REG_INTER);
3926 %}
3927 
3928 // Special case for integer high multiply
3929 operand eADXRegL_low_only() %{
3930   constraint(ALLOC_IN_RC(eadx_reg));
3931   match(RegL);
3932 
3933   format %{ "EAX" %}
3934   interface(REG_INTER);
3935 %}
3936 
3937 // Flags register, used as output of compare instructions
3938 operand eFlagsReg() %{
3939   constraint(ALLOC_IN_RC(int_flags));
3940   match(RegFlags);
3941 
3942   format %{ "EFLAGS" %}
3943   interface(REG_INTER);
3944 %}
3945 
3946 // Flags register, used as output of FLOATING POINT compare instructions
3947 operand eFlagsRegU() %{
3948   constraint(ALLOC_IN_RC(int_flags));
3949   match(RegFlags);
3950 
3951   format %{ "EFLAGS_U" %}
3952   interface(REG_INTER);
3953 %}
3954 
3955 operand eFlagsRegUCF() %{
3956   constraint(ALLOC_IN_RC(int_flags));
3957   match(RegFlags);
3958   predicate(false);
3959 
3960   format %{ "EFLAGS_U_CF" %}
3961   interface(REG_INTER);
3962 %}
3963 
3964 // Condition Code Register used by long compare
3965 operand flagsReg_long_LTGE() %{
3966   constraint(ALLOC_IN_RC(int_flags));
3967   match(RegFlags);
3968   format %{ "FLAGS_LTGE" %}
3969   interface(REG_INTER);
3970 %}
3971 operand flagsReg_long_EQNE() %{
3972   constraint(ALLOC_IN_RC(int_flags));
3973   match(RegFlags);
3974   format %{ "FLAGS_EQNE" %}
3975   interface(REG_INTER);
3976 %}
3977 operand flagsReg_long_LEGT() %{
3978   constraint(ALLOC_IN_RC(int_flags));
3979   match(RegFlags);
3980   format %{ "FLAGS_LEGT" %}
3981   interface(REG_INTER);
3982 %}
3983 
3984 // Float register operands
3985 operand regDPR() %{
3986   predicate( UseSSE < 2 );
3987   constraint(ALLOC_IN_RC(fp_dbl_reg));
3988   match(RegD);
3989   match(regDPR1);
3990   match(regDPR2);
3991   format %{ %}
3992   interface(REG_INTER);
3993 %}
3994 
3995 operand regDPR1(regDPR reg) %{
3996   predicate( UseSSE < 2 );
3997   constraint(ALLOC_IN_RC(fp_dbl_reg0));
3998   match(reg);
3999   format %{ "FPR1" %}
4000   interface(REG_INTER);
4001 %}
4002 
4003 operand regDPR2(regDPR reg) %{
4004   predicate( UseSSE < 2 );
4005   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4006   match(reg);
4007   format %{ "FPR2" %}
4008   interface(REG_INTER);
4009 %}
4010 
4011 operand regnotDPR1(regDPR reg) %{
4012   predicate( UseSSE < 2 );
4013   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4014   match(reg);
4015   format %{ %}
4016   interface(REG_INTER);
4017 %}
4018 
4019 // Float register operands
4020 operand regFPR() %{
4021   predicate( UseSSE < 2 );
4022   constraint(ALLOC_IN_RC(fp_flt_reg));
4023   match(RegF);
4024   match(regFPR1);
4025   format %{ %}
4026   interface(REG_INTER);
4027 %}
4028 
4029 // Float register operands
4030 operand regFPR1(regFPR reg) %{
4031   predicate( UseSSE < 2 );
4032   constraint(ALLOC_IN_RC(fp_flt_reg0));
4033   match(reg);
4034   format %{ "FPR1" %}
4035   interface(REG_INTER);
4036 %}
4037 
4038 // XMM Float register operands
4039 operand regF() %{
4040   predicate( UseSSE>=1 );
4041   constraint(ALLOC_IN_RC(float_reg_legacy));
4042   match(RegF);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 // XMM Double register operands
4048 operand regD() %{
4049   predicate( UseSSE>=2 );
4050   constraint(ALLOC_IN_RC(double_reg_legacy));
4051   match(RegD);
4052   format %{ %}
4053   interface(REG_INTER);
4054 %}
4055 
4056 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4057 // runtime code generation via reg_class_dynamic.
4058 operand vecS() %{
4059   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4060   match(VecS);
4061 
4062   format %{ %}
4063   interface(REG_INTER);
4064 %}
4065 
4066 operand vecD() %{
4067   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4068   match(VecD);
4069 
4070   format %{ %}
4071   interface(REG_INTER);
4072 %}
4073 
4074 operand vecX() %{
4075   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4076   match(VecX);
4077 
4078   format %{ %}
4079   interface(REG_INTER);
4080 %}
4081 
4082 operand vecY() %{
4083   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4084   match(VecY);
4085 
4086   format %{ %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 //----------Memory Operands----------------------------------------------------
4091 // Direct Memory Operand
4092 operand direct(immP addr) %{
4093   match(addr);
4094 
4095   format %{ "[$addr]" %}
4096   interface(MEMORY_INTER) %{
4097     base(0xFFFFFFFF);
4098     index(0x4);
4099     scale(0x0);
4100     disp($addr);
4101   %}
4102 %}
4103 
4104 // Indirect Memory Operand
4105 operand indirect(eRegP reg) %{
4106   constraint(ALLOC_IN_RC(int_reg));
4107   match(reg);
4108 
4109   format %{ "[$reg]" %}
4110   interface(MEMORY_INTER) %{
4111     base($reg);
4112     index(0x4);
4113     scale(0x0);
4114     disp(0x0);
4115   %}
4116 %}
4117 
4118 // Indirect Memory Plus Short Offset Operand
4119 operand indOffset8(eRegP reg, immI8 off) %{
4120   match(AddP reg off);
4121 
4122   format %{ "[$reg + $off]" %}
4123   interface(MEMORY_INTER) %{
4124     base($reg);
4125     index(0x4);
4126     scale(0x0);
4127     disp($off);
4128   %}
4129 %}
4130 
4131 // Indirect Memory Plus Long Offset Operand
4132 operand indOffset32(eRegP reg, immI off) %{
4133   match(AddP reg off);
4134 
4135   format %{ "[$reg + $off]" %}
4136   interface(MEMORY_INTER) %{
4137     base($reg);
4138     index(0x4);
4139     scale(0x0);
4140     disp($off);
4141   %}
4142 %}
4143 
4144 // Indirect Memory Plus Long Offset Operand
4145 operand indOffset32X(rRegI reg, immP off) %{
4146   match(AddP off reg);
4147 
4148   format %{ "[$reg + $off]" %}
4149   interface(MEMORY_INTER) %{
4150     base($reg);
4151     index(0x4);
4152     scale(0x0);
4153     disp($off);
4154   %}
4155 %}
4156 
4157 // Indirect Memory Plus Index Register Plus Offset Operand
4158 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4159   match(AddP (AddP reg ireg) off);
4160 
4161   op_cost(10);
4162   format %{"[$reg + $off + $ireg]" %}
4163   interface(MEMORY_INTER) %{
4164     base($reg);
4165     index($ireg);
4166     scale(0x0);
4167     disp($off);
4168   %}
4169 %}
4170 
4171 // Indirect Memory Plus Index Register Plus Offset Operand
4172 operand indIndex(eRegP reg, rRegI ireg) %{
4173   match(AddP reg ireg);
4174 
4175   op_cost(10);
4176   format %{"[$reg + $ireg]" %}
4177   interface(MEMORY_INTER) %{
4178     base($reg);
4179     index($ireg);
4180     scale(0x0);
4181     disp(0x0);
4182   %}
4183 %}
4184 
4185 // // -------------------------------------------------------------------------
4186 // // 486 architecture doesn't support "scale * index + offset" with out a base
4187 // // -------------------------------------------------------------------------
4188 // // Scaled Memory Operands
4189 // // Indirect Memory Times Scale Plus Offset Operand
4190 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4191 //   match(AddP off (LShiftI ireg scale));
4192 //
4193 //   op_cost(10);
4194 //   format %{"[$off + $ireg << $scale]" %}
4195 //   interface(MEMORY_INTER) %{
4196 //     base(0x4);
4197 //     index($ireg);
4198 //     scale($scale);
4199 //     disp($off);
4200 //   %}
4201 // %}
4202 
4203 // Indirect Memory Times Scale Plus Index Register
4204 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4205   match(AddP reg (LShiftI ireg scale));
4206 
4207   op_cost(10);
4208   format %{"[$reg + $ireg << $scale]" %}
4209   interface(MEMORY_INTER) %{
4210     base($reg);
4211     index($ireg);
4212     scale($scale);
4213     disp(0x0);
4214   %}
4215 %}
4216 
4217 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4218 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4219   match(AddP (AddP reg (LShiftI ireg scale)) off);
4220 
4221   op_cost(10);
4222   format %{"[$reg + $off + $ireg << $scale]" %}
4223   interface(MEMORY_INTER) %{
4224     base($reg);
4225     index($ireg);
4226     scale($scale);
4227     disp($off);
4228   %}
4229 %}
4230 
4231 //----------Load Long Memory Operands------------------------------------------
4232 // The load-long idiom will use it's address expression again after loading
4233 // the first word of the long.  If the load-long destination overlaps with
4234 // registers used in the addressing expression, the 2nd half will be loaded
4235 // from a clobbered address.  Fix this by requiring that load-long use
4236 // address registers that do not overlap with the load-long target.
4237 
4238 // load-long support
4239 operand load_long_RegP() %{
4240   constraint(ALLOC_IN_RC(esi_reg));
4241   match(RegP);
4242   match(eSIRegP);
4243   op_cost(100);
4244   format %{  %}
4245   interface(REG_INTER);
4246 %}
4247 
4248 // Indirect Memory Operand Long
4249 operand load_long_indirect(load_long_RegP reg) %{
4250   constraint(ALLOC_IN_RC(esi_reg));
4251   match(reg);
4252 
4253   format %{ "[$reg]" %}
4254   interface(MEMORY_INTER) %{
4255     base($reg);
4256     index(0x4);
4257     scale(0x0);
4258     disp(0x0);
4259   %}
4260 %}
4261 
4262 // Indirect Memory Plus Long Offset Operand
4263 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4264   match(AddP reg off);
4265 
4266   format %{ "[$reg + $off]" %}
4267   interface(MEMORY_INTER) %{
4268     base($reg);
4269     index(0x4);
4270     scale(0x0);
4271     disp($off);
4272   %}
4273 %}
4274 
4275 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4276 
4277 
4278 //----------Special Memory Operands--------------------------------------------
4279 // Stack Slot Operand - This operand is used for loading and storing temporary
4280 //                      values on the stack where a match requires a value to
4281 //                      flow through memory.
4282 operand stackSlotP(sRegP reg) %{
4283   constraint(ALLOC_IN_RC(stack_slots));
4284   // No match rule because this operand is only generated in matching
4285   format %{ "[$reg]" %}
4286   interface(MEMORY_INTER) %{
4287     base(0x4);   // ESP
4288     index(0x4);  // No Index
4289     scale(0x0);  // No Scale
4290     disp($reg);  // Stack Offset
4291   %}
4292 %}
4293 
4294 operand stackSlotI(sRegI reg) %{
4295   constraint(ALLOC_IN_RC(stack_slots));
4296   // No match rule because this operand is only generated in matching
4297   format %{ "[$reg]" %}
4298   interface(MEMORY_INTER) %{
4299     base(0x4);   // ESP
4300     index(0x4);  // No Index
4301     scale(0x0);  // No Scale
4302     disp($reg);  // Stack Offset
4303   %}
4304 %}
4305 
4306 operand stackSlotF(sRegF reg) %{
4307   constraint(ALLOC_IN_RC(stack_slots));
4308   // No match rule because this operand is only generated in matching
4309   format %{ "[$reg]" %}
4310   interface(MEMORY_INTER) %{
4311     base(0x4);   // ESP
4312     index(0x4);  // No Index
4313     scale(0x0);  // No Scale
4314     disp($reg);  // Stack Offset
4315   %}
4316 %}
4317 
4318 operand stackSlotD(sRegD reg) %{
4319   constraint(ALLOC_IN_RC(stack_slots));
4320   // No match rule because this operand is only generated in matching
4321   format %{ "[$reg]" %}
4322   interface(MEMORY_INTER) %{
4323     base(0x4);   // ESP
4324     index(0x4);  // No Index
4325     scale(0x0);  // No Scale
4326     disp($reg);  // Stack Offset
4327   %}
4328 %}
4329 
4330 operand stackSlotL(sRegL reg) %{
4331   constraint(ALLOC_IN_RC(stack_slots));
4332   // No match rule because this operand is only generated in matching
4333   format %{ "[$reg]" %}
4334   interface(MEMORY_INTER) %{
4335     base(0x4);   // ESP
4336     index(0x4);  // No Index
4337     scale(0x0);  // No Scale
4338     disp($reg);  // Stack Offset
4339   %}
4340 %}
4341 
4342 //----------Memory Operands - Win95 Implicit Null Variants----------------
4343 // Indirect Memory Operand
4344 operand indirect_win95_safe(eRegP_no_EBP reg)
4345 %{
4346   constraint(ALLOC_IN_RC(int_reg));
4347   match(reg);
4348 
4349   op_cost(100);
4350   format %{ "[$reg]" %}
4351   interface(MEMORY_INTER) %{
4352     base($reg);
4353     index(0x4);
4354     scale(0x0);
4355     disp(0x0);
4356   %}
4357 %}
4358 
4359 // Indirect Memory Plus Short Offset Operand
4360 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4361 %{
4362   match(AddP reg off);
4363 
4364   op_cost(100);
4365   format %{ "[$reg + $off]" %}
4366   interface(MEMORY_INTER) %{
4367     base($reg);
4368     index(0x4);
4369     scale(0x0);
4370     disp($off);
4371   %}
4372 %}
4373 
4374 // Indirect Memory Plus Long Offset Operand
4375 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4376 %{
4377   match(AddP reg off);
4378 
4379   op_cost(100);
4380   format %{ "[$reg + $off]" %}
4381   interface(MEMORY_INTER) %{
4382     base($reg);
4383     index(0x4);
4384     scale(0x0);
4385     disp($off);
4386   %}
4387 %}
4388 
4389 // Indirect Memory Plus Index Register Plus Offset Operand
4390 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4391 %{
4392   match(AddP (AddP reg ireg) off);
4393 
4394   op_cost(100);
4395   format %{"[$reg + $off + $ireg]" %}
4396   interface(MEMORY_INTER) %{
4397     base($reg);
4398     index($ireg);
4399     scale(0x0);
4400     disp($off);
4401   %}
4402 %}
4403 
4404 // Indirect Memory Times Scale Plus Index Register
4405 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4406 %{
4407   match(AddP reg (LShiftI ireg scale));
4408 
4409   op_cost(100);
4410   format %{"[$reg + $ireg << $scale]" %}
4411   interface(MEMORY_INTER) %{
4412     base($reg);
4413     index($ireg);
4414     scale($scale);
4415     disp(0x0);
4416   %}
4417 %}
4418 
4419 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4420 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4421 %{
4422   match(AddP (AddP reg (LShiftI ireg scale)) off);
4423 
4424   op_cost(100);
4425   format %{"[$reg + $off + $ireg << $scale]" %}
4426   interface(MEMORY_INTER) %{
4427     base($reg);
4428     index($ireg);
4429     scale($scale);
4430     disp($off);
4431   %}
4432 %}
4433 
4434 //----------Conditional Branch Operands----------------------------------------
4435 // Comparison Op  - This is the operation of the comparison, and is limited to
4436 //                  the following set of codes:
4437 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4438 //
4439 // Other attributes of the comparison, such as unsignedness, are specified
4440 // by the comparison instruction that sets a condition code flags register.
4441 // That result is represented by a flags operand whose subtype is appropriate
4442 // to the unsignedness (etc.) of the comparison.
4443 //
4444 // Later, the instruction which matches both the Comparison Op (a Bool) and
4445 // the flags (produced by the Cmp) specifies the coding of the comparison op
4446 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4447 
4448 // Comparision Code
4449 operand cmpOp() %{
4450   match(Bool);
4451 
4452   format %{ "" %}
4453   interface(COND_INTER) %{
4454     equal(0x4, "e");
4455     not_equal(0x5, "ne");
4456     less(0xC, "l");
4457     greater_equal(0xD, "ge");
4458     less_equal(0xE, "le");
4459     greater(0xF, "g");
4460     overflow(0x0, "o");
4461     no_overflow(0x1, "no");
4462   %}
4463 %}
4464 
4465 // Comparison Code, unsigned compare.  Used by FP also, with
4466 // C2 (unordered) turned into GT or LT already.  The other bits
4467 // C0 and C3 are turned into Carry & Zero flags.
4468 operand cmpOpU() %{
4469   match(Bool);
4470 
4471   format %{ "" %}
4472   interface(COND_INTER) %{
4473     equal(0x4, "e");
4474     not_equal(0x5, "ne");
4475     less(0x2, "b");
4476     greater_equal(0x3, "nb");
4477     less_equal(0x6, "be");
4478     greater(0x7, "nbe");
4479     overflow(0x0, "o");
4480     no_overflow(0x1, "no");
4481   %}
4482 %}
4483 
4484 // Floating comparisons that don't require any fixup for the unordered case
4485 operand cmpOpUCF() %{
4486   match(Bool);
4487   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4488             n->as_Bool()->_test._test == BoolTest::ge ||
4489             n->as_Bool()->_test._test == BoolTest::le ||
4490             n->as_Bool()->_test._test == BoolTest::gt);
4491   format %{ "" %}
4492   interface(COND_INTER) %{
4493     equal(0x4, "e");
4494     not_equal(0x5, "ne");
4495     less(0x2, "b");
4496     greater_equal(0x3, "nb");
4497     less_equal(0x6, "be");
4498     greater(0x7, "nbe");
4499     overflow(0x0, "o");
4500     no_overflow(0x1, "no");
4501   %}
4502 %}
4503 
4504 
4505 // Floating comparisons that can be fixed up with extra conditional jumps
4506 operand cmpOpUCF2() %{
4507   match(Bool);
4508   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4509             n->as_Bool()->_test._test == BoolTest::eq);
4510   format %{ "" %}
4511   interface(COND_INTER) %{
4512     equal(0x4, "e");
4513     not_equal(0x5, "ne");
4514     less(0x2, "b");
4515     greater_equal(0x3, "nb");
4516     less_equal(0x6, "be");
4517     greater(0x7, "nbe");
4518     overflow(0x0, "o");
4519     no_overflow(0x1, "no");
4520   %}
4521 %}
4522 
4523 // Comparison Code for FP conditional move
4524 operand cmpOp_fcmov() %{
4525   match(Bool);
4526 
4527   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4528             n->as_Bool()->_test._test != BoolTest::no_overflow);
4529   format %{ "" %}
4530   interface(COND_INTER) %{
4531     equal        (0x0C8);
4532     not_equal    (0x1C8);
4533     less         (0x0C0);
4534     greater_equal(0x1C0);
4535     less_equal   (0x0D0);
4536     greater      (0x1D0);
4537     overflow(0x0, "o"); // not really supported by the instruction
4538     no_overflow(0x1, "no"); // not really supported by the instruction
4539   %}
4540 %}
4541 
4542 // Comparision Code used in long compares
4543 operand cmpOp_commute() %{
4544   match(Bool);
4545 
4546   format %{ "" %}
4547   interface(COND_INTER) %{
4548     equal(0x4, "e");
4549     not_equal(0x5, "ne");
4550     less(0xF, "g");
4551     greater_equal(0xE, "le");
4552     less_equal(0xD, "ge");
4553     greater(0xC, "l");
4554     overflow(0x0, "o");
4555     no_overflow(0x1, "no");
4556   %}
4557 %}
4558 
4559 //----------OPERAND CLASSES----------------------------------------------------
4560 // Operand Classes are groups of operands that are used as to simplify
4561 // instruction definitions by not requiring the AD writer to specify separate
4562 // instructions for every form of operand when the instruction accepts
4563 // multiple operand types with the same basic encoding and format.  The classic
4564 // case of this is memory operands.
4565 
4566 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4567                indIndex, indIndexScale, indIndexScaleOffset);
4568 
4569 // Long memory operations are encoded in 2 instructions and a +4 offset.
4570 // This means some kind of offset is always required and you cannot use
4571 // an oop as the offset (done when working on static globals).
4572 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4573                     indIndex, indIndexScale, indIndexScaleOffset);
4574 
4575 
4576 //----------PIPELINE-----------------------------------------------------------
4577 // Rules which define the behavior of the target architectures pipeline.
4578 pipeline %{
4579 
4580 //----------ATTRIBUTES---------------------------------------------------------
4581 attributes %{
4582   variable_size_instructions;        // Fixed size instructions
4583   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4584   instruction_unit_size = 1;         // An instruction is 1 bytes long
4585   instruction_fetch_unit_size = 16;  // The processor fetches one line
4586   instruction_fetch_units = 1;       // of 16 bytes
4587 
4588   // List of nop instructions
4589   nops( MachNop );
4590 %}
4591 
4592 //----------RESOURCES----------------------------------------------------------
4593 // Resources are the functional units available to the machine
4594 
4595 // Generic P2/P3 pipeline
4596 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4597 // 3 instructions decoded per cycle.
4598 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4599 // 2 ALU op, only ALU0 handles mul/div instructions.
4600 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4601            MS0, MS1, MEM = MS0 | MS1,
4602            BR, FPU,
4603            ALU0, ALU1, ALU = ALU0 | ALU1 );
4604 
4605 //----------PIPELINE DESCRIPTION-----------------------------------------------
4606 // Pipeline Description specifies the stages in the machine's pipeline
4607 
4608 // Generic P2/P3 pipeline
4609 pipe_desc(S0, S1, S2, S3, S4, S5);
4610 
4611 //----------PIPELINE CLASSES---------------------------------------------------
4612 // Pipeline Classes describe the stages in which input and output are
4613 // referenced by the hardware pipeline.
4614 
4615 // Naming convention: ialu or fpu
4616 // Then: _reg
4617 // Then: _reg if there is a 2nd register
4618 // Then: _long if it's a pair of instructions implementing a long
4619 // Then: _fat if it requires the big decoder
4620 //   Or: _mem if it requires the big decoder and a memory unit.
4621 
4622 // Integer ALU reg operation
4623 pipe_class ialu_reg(rRegI dst) %{
4624     single_instruction;
4625     dst    : S4(write);
4626     dst    : S3(read);
4627     DECODE : S0;        // any decoder
4628     ALU    : S3;        // any alu
4629 %}
4630 
4631 // Long ALU reg operation
4632 pipe_class ialu_reg_long(eRegL dst) %{
4633     instruction_count(2);
4634     dst    : S4(write);
4635     dst    : S3(read);
4636     DECODE : S0(2);     // any 2 decoders
4637     ALU    : S3(2);     // both alus
4638 %}
4639 
4640 // Integer ALU reg operation using big decoder
4641 pipe_class ialu_reg_fat(rRegI dst) %{
4642     single_instruction;
4643     dst    : S4(write);
4644     dst    : S3(read);
4645     D0     : S0;        // big decoder only
4646     ALU    : S3;        // any alu
4647 %}
4648 
4649 // Long ALU reg operation using big decoder
4650 pipe_class ialu_reg_long_fat(eRegL dst) %{
4651     instruction_count(2);
4652     dst    : S4(write);
4653     dst    : S3(read);
4654     D0     : S0(2);     // big decoder only; twice
4655     ALU    : S3(2);     // any 2 alus
4656 %}
4657 
4658 // Integer ALU reg-reg operation
4659 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4660     single_instruction;
4661     dst    : S4(write);
4662     src    : S3(read);
4663     DECODE : S0;        // any decoder
4664     ALU    : S3;        // any alu
4665 %}
4666 
4667 // Long ALU reg-reg operation
4668 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4669     instruction_count(2);
4670     dst    : S4(write);
4671     src    : S3(read);
4672     DECODE : S0(2);     // any 2 decoders
4673     ALU    : S3(2);     // both alus
4674 %}
4675 
4676 // Integer ALU reg-reg operation
4677 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4678     single_instruction;
4679     dst    : S4(write);
4680     src    : S3(read);
4681     D0     : S0;        // big decoder only
4682     ALU    : S3;        // any alu
4683 %}
4684 
4685 // Long ALU reg-reg operation
4686 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4687     instruction_count(2);
4688     dst    : S4(write);
4689     src    : S3(read);
4690     D0     : S0(2);     // big decoder only; twice
4691     ALU    : S3(2);     // both alus
4692 %}
4693 
4694 // Integer ALU reg-mem operation
4695 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4696     single_instruction;
4697     dst    : S5(write);
4698     mem    : S3(read);
4699     D0     : S0;        // big decoder only
4700     ALU    : S4;        // any alu
4701     MEM    : S3;        // any mem
4702 %}
4703 
4704 // Long ALU reg-mem operation
4705 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4706     instruction_count(2);
4707     dst    : S5(write);
4708     mem    : S3(read);
4709     D0     : S0(2);     // big decoder only; twice
4710     ALU    : S4(2);     // any 2 alus
4711     MEM    : S3(2);     // both mems
4712 %}
4713 
4714 // Integer mem operation (prefetch)
4715 pipe_class ialu_mem(memory mem)
4716 %{
4717     single_instruction;
4718     mem    : S3(read);
4719     D0     : S0;        // big decoder only
4720     MEM    : S3;        // any mem
4721 %}
4722 
4723 // Integer Store to Memory
4724 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4725     single_instruction;
4726     mem    : S3(read);
4727     src    : S5(read);
4728     D0     : S0;        // big decoder only
4729     ALU    : S4;        // any alu
4730     MEM    : S3;
4731 %}
4732 
4733 // Long Store to Memory
4734 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4735     instruction_count(2);
4736     mem    : S3(read);
4737     src    : S5(read);
4738     D0     : S0(2);     // big decoder only; twice
4739     ALU    : S4(2);     // any 2 alus
4740     MEM    : S3(2);     // Both mems
4741 %}
4742 
4743 // Integer Store to Memory
4744 pipe_class ialu_mem_imm(memory mem) %{
4745     single_instruction;
4746     mem    : S3(read);
4747     D0     : S0;        // big decoder only
4748     ALU    : S4;        // any alu
4749     MEM    : S3;
4750 %}
4751 
4752 // Integer ALU0 reg-reg operation
4753 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4754     single_instruction;
4755     dst    : S4(write);
4756     src    : S3(read);
4757     D0     : S0;        // Big decoder only
4758     ALU0   : S3;        // only alu0
4759 %}
4760 
4761 // Integer ALU0 reg-mem operation
4762 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4763     single_instruction;
4764     dst    : S5(write);
4765     mem    : S3(read);
4766     D0     : S0;        // big decoder only
4767     ALU0   : S4;        // ALU0 only
4768     MEM    : S3;        // any mem
4769 %}
4770 
4771 // Integer ALU reg-reg operation
4772 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4773     single_instruction;
4774     cr     : S4(write);
4775     src1   : S3(read);
4776     src2   : S3(read);
4777     DECODE : S0;        // any decoder
4778     ALU    : S3;        // any alu
4779 %}
4780 
4781 // Integer ALU reg-imm operation
4782 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4783     single_instruction;
4784     cr     : S4(write);
4785     src1   : S3(read);
4786     DECODE : S0;        // any decoder
4787     ALU    : S3;        // any alu
4788 %}
4789 
4790 // Integer ALU reg-mem operation
4791 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4792     single_instruction;
4793     cr     : S4(write);
4794     src1   : S3(read);
4795     src2   : S3(read);
4796     D0     : S0;        // big decoder only
4797     ALU    : S4;        // any alu
4798     MEM    : S3;
4799 %}
4800 
4801 // Conditional move reg-reg
4802 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4803     instruction_count(4);
4804     y      : S4(read);
4805     q      : S3(read);
4806     p      : S3(read);
4807     DECODE : S0(4);     // any decoder
4808 %}
4809 
4810 // Conditional move reg-reg
4811 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4812     single_instruction;
4813     dst    : S4(write);
4814     src    : S3(read);
4815     cr     : S3(read);
4816     DECODE : S0;        // any decoder
4817 %}
4818 
4819 // Conditional move reg-mem
4820 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4821     single_instruction;
4822     dst    : S4(write);
4823     src    : S3(read);
4824     cr     : S3(read);
4825     DECODE : S0;        // any decoder
4826     MEM    : S3;
4827 %}
4828 
4829 // Conditional move reg-reg long
4830 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4831     single_instruction;
4832     dst    : S4(write);
4833     src    : S3(read);
4834     cr     : S3(read);
4835     DECODE : S0(2);     // any 2 decoders
4836 %}
4837 
4838 // Conditional move double reg-reg
4839 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4840     single_instruction;
4841     dst    : S4(write);
4842     src    : S3(read);
4843     cr     : S3(read);
4844     DECODE : S0;        // any decoder
4845 %}
4846 
4847 // Float reg-reg operation
4848 pipe_class fpu_reg(regDPR dst) %{
4849     instruction_count(2);
4850     dst    : S3(read);
4851     DECODE : S0(2);     // any 2 decoders
4852     FPU    : S3;
4853 %}
4854 
4855 // Float reg-reg operation
4856 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4857     instruction_count(2);
4858     dst    : S4(write);
4859     src    : S3(read);
4860     DECODE : S0(2);     // any 2 decoders
4861     FPU    : S3;
4862 %}
4863 
4864 // Float reg-reg operation
4865 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4866     instruction_count(3);
4867     dst    : S4(write);
4868     src1   : S3(read);
4869     src2   : S3(read);
4870     DECODE : S0(3);     // any 3 decoders
4871     FPU    : S3(2);
4872 %}
4873 
4874 // Float reg-reg operation
4875 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4876     instruction_count(4);
4877     dst    : S4(write);
4878     src1   : S3(read);
4879     src2   : S3(read);
4880     src3   : S3(read);
4881     DECODE : S0(4);     // any 3 decoders
4882     FPU    : S3(2);
4883 %}
4884 
4885 // Float reg-reg operation
4886 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4887     instruction_count(4);
4888     dst    : S4(write);
4889     src1   : S3(read);
4890     src2   : S3(read);
4891     src3   : S3(read);
4892     DECODE : S1(3);     // any 3 decoders
4893     D0     : S0;        // Big decoder only
4894     FPU    : S3(2);
4895     MEM    : S3;
4896 %}
4897 
4898 // Float reg-mem operation
4899 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4900     instruction_count(2);
4901     dst    : S5(write);
4902     mem    : S3(read);
4903     D0     : S0;        // big decoder only
4904     DECODE : S1;        // any decoder for FPU POP
4905     FPU    : S4;
4906     MEM    : S3;        // any mem
4907 %}
4908 
4909 // Float reg-mem operation
4910 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4911     instruction_count(3);
4912     dst    : S5(write);
4913     src1   : S3(read);
4914     mem    : S3(read);
4915     D0     : S0;        // big decoder only
4916     DECODE : S1(2);     // any decoder for FPU POP
4917     FPU    : S4;
4918     MEM    : S3;        // any mem
4919 %}
4920 
4921 // Float mem-reg operation
4922 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4923     instruction_count(2);
4924     src    : S5(read);
4925     mem    : S3(read);
4926     DECODE : S0;        // any decoder for FPU PUSH
4927     D0     : S1;        // big decoder only
4928     FPU    : S4;
4929     MEM    : S3;        // any mem
4930 %}
4931 
4932 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4933     instruction_count(3);
4934     src1   : S3(read);
4935     src2   : S3(read);
4936     mem    : S3(read);
4937     DECODE : S0(2);     // any decoder for FPU PUSH
4938     D0     : S1;        // big decoder only
4939     FPU    : S4;
4940     MEM    : S3;        // any mem
4941 %}
4942 
4943 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4944     instruction_count(3);
4945     src1   : S3(read);
4946     src2   : S3(read);
4947     mem    : S4(read);
4948     DECODE : S0;        // any decoder for FPU PUSH
4949     D0     : S0(2);     // big decoder only
4950     FPU    : S4;
4951     MEM    : S3(2);     // any mem
4952 %}
4953 
4954 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4955     instruction_count(2);
4956     src1   : S3(read);
4957     dst    : S4(read);
4958     D0     : S0(2);     // big decoder only
4959     MEM    : S3(2);     // any mem
4960 %}
4961 
4962 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4963     instruction_count(3);
4964     src1   : S3(read);
4965     src2   : S3(read);
4966     dst    : S4(read);
4967     D0     : S0(3);     // big decoder only
4968     FPU    : S4;
4969     MEM    : S3(3);     // any mem
4970 %}
4971 
4972 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4973     instruction_count(3);
4974     src1   : S4(read);
4975     mem    : S4(read);
4976     DECODE : S0;        // any decoder for FPU PUSH
4977     D0     : S0(2);     // big decoder only
4978     FPU    : S4;
4979     MEM    : S3(2);     // any mem
4980 %}
4981 
4982 // Float load constant
4983 pipe_class fpu_reg_con(regDPR dst) %{
4984     instruction_count(2);
4985     dst    : S5(write);
4986     D0     : S0;        // big decoder only for the load
4987     DECODE : S1;        // any decoder for FPU POP
4988     FPU    : S4;
4989     MEM    : S3;        // any mem
4990 %}
4991 
4992 // Float load constant
4993 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4994     instruction_count(3);
4995     dst    : S5(write);
4996     src    : S3(read);
4997     D0     : S0;        // big decoder only for the load
4998     DECODE : S1(2);     // any decoder for FPU POP
4999     FPU    : S4;
5000     MEM    : S3;        // any mem
5001 %}
5002 
5003 // UnConditional branch
5004 pipe_class pipe_jmp( label labl ) %{
5005     single_instruction;
5006     BR   : S3;
5007 %}
5008 
5009 // Conditional branch
5010 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5011     single_instruction;
5012     cr    : S1(read);
5013     BR    : S3;
5014 %}
5015 
5016 // Allocation idiom
5017 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5018     instruction_count(1); force_serialization;
5019     fixed_latency(6);
5020     heap_ptr : S3(read);
5021     DECODE   : S0(3);
5022     D0       : S2;
5023     MEM      : S3;
5024     ALU      : S3(2);
5025     dst      : S5(write);
5026     BR       : S5;
5027 %}
5028 
5029 // Generic big/slow expanded idiom
5030 pipe_class pipe_slow(  ) %{
5031     instruction_count(10); multiple_bundles; force_serialization;
5032     fixed_latency(100);
5033     D0  : S0(2);
5034     MEM : S3(2);
5035 %}
5036 
5037 // The real do-nothing guy
5038 pipe_class empty( ) %{
5039     instruction_count(0);
5040 %}
5041 
5042 // Define the class for the Nop node
5043 define %{
5044    MachNop = empty;
5045 %}
5046 
5047 %}
5048 
5049 //----------INSTRUCTIONS-------------------------------------------------------
5050 //
5051 // match      -- States which machine-independent subtree may be replaced
5052 //               by this instruction.
5053 // ins_cost   -- The estimated cost of this instruction is used by instruction
5054 //               selection to identify a minimum cost tree of machine
5055 //               instructions that matches a tree of machine-independent
5056 //               instructions.
5057 // format     -- A string providing the disassembly for this instruction.
5058 //               The value of an instruction's operand may be inserted
5059 //               by referring to it with a '$' prefix.
5060 // opcode     -- Three instruction opcodes may be provided.  These are referred
5061 //               to within an encode class as $primary, $secondary, and $tertiary
5062 //               respectively.  The primary opcode is commonly used to
5063 //               indicate the type of machine instruction, while secondary
5064 //               and tertiary are often used for prefix options or addressing
5065 //               modes.
5066 // ins_encode -- A list of encode classes with parameters. The encode class
5067 //               name must have been defined in an 'enc_class' specification
5068 //               in the encode section of the architecture description.
5069 
5070 //----------BSWAP-Instruction--------------------------------------------------
5071 instruct bytes_reverse_int(rRegI dst) %{
5072   match(Set dst (ReverseBytesI dst));
5073 
5074   format %{ "BSWAP  $dst" %}
5075   opcode(0x0F, 0xC8);
5076   ins_encode( OpcP, OpcSReg(dst) );
5077   ins_pipe( ialu_reg );
5078 %}
5079 
5080 instruct bytes_reverse_long(eRegL dst) %{
5081   match(Set dst (ReverseBytesL dst));
5082 
5083   format %{ "BSWAP  $dst.lo\n\t"
5084             "BSWAP  $dst.hi\n\t"
5085             "XCHG   $dst.lo $dst.hi" %}
5086 
5087   ins_cost(125);
5088   ins_encode( bswap_long_bytes(dst) );
5089   ins_pipe( ialu_reg_reg);
5090 %}
5091 
5092 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5093   match(Set dst (ReverseBytesUS dst));
5094   effect(KILL cr);
5095 
5096   format %{ "BSWAP  $dst\n\t"
5097             "SHR    $dst,16\n\t" %}
5098   ins_encode %{
5099     __ bswapl($dst$$Register);
5100     __ shrl($dst$$Register, 16);
5101   %}
5102   ins_pipe( ialu_reg );
5103 %}
5104 
5105 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5106   match(Set dst (ReverseBytesS dst));
5107   effect(KILL cr);
5108 
5109   format %{ "BSWAP  $dst\n\t"
5110             "SAR    $dst,16\n\t" %}
5111   ins_encode %{
5112     __ bswapl($dst$$Register);
5113     __ sarl($dst$$Register, 16);
5114   %}
5115   ins_pipe( ialu_reg );
5116 %}
5117 
5118 
5119 //---------- Zeros Count Instructions ------------------------------------------
5120 
5121 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5122   predicate(UseCountLeadingZerosInstruction);
5123   match(Set dst (CountLeadingZerosI src));
5124   effect(KILL cr);
5125 
5126   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5127   ins_encode %{
5128     __ lzcntl($dst$$Register, $src$$Register);
5129   %}
5130   ins_pipe(ialu_reg);
5131 %}
5132 
5133 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5134   predicate(!UseCountLeadingZerosInstruction);
5135   match(Set dst (CountLeadingZerosI src));
5136   effect(KILL cr);
5137 
5138   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5139             "JNZ    skip\n\t"
5140             "MOV    $dst, -1\n"
5141       "skip:\n\t"
5142             "NEG    $dst\n\t"
5143             "ADD    $dst, 31" %}
5144   ins_encode %{
5145     Register Rdst = $dst$$Register;
5146     Register Rsrc = $src$$Register;
5147     Label skip;
5148     __ bsrl(Rdst, Rsrc);
5149     __ jccb(Assembler::notZero, skip);
5150     __ movl(Rdst, -1);
5151     __ bind(skip);
5152     __ negl(Rdst);
5153     __ addl(Rdst, BitsPerInt - 1);
5154   %}
5155   ins_pipe(ialu_reg);
5156 %}
5157 
5158 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5159   predicate(UseCountLeadingZerosInstruction);
5160   match(Set dst (CountLeadingZerosL src));
5161   effect(TEMP dst, KILL cr);
5162 
5163   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5164             "JNC    done\n\t"
5165             "LZCNT  $dst, $src.lo\n\t"
5166             "ADD    $dst, 32\n"
5167       "done:" %}
5168   ins_encode %{
5169     Register Rdst = $dst$$Register;
5170     Register Rsrc = $src$$Register;
5171     Label done;
5172     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5173     __ jccb(Assembler::carryClear, done);
5174     __ lzcntl(Rdst, Rsrc);
5175     __ addl(Rdst, BitsPerInt);
5176     __ bind(done);
5177   %}
5178   ins_pipe(ialu_reg);
5179 %}
5180 
5181 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5182   predicate(!UseCountLeadingZerosInstruction);
5183   match(Set dst (CountLeadingZerosL src));
5184   effect(TEMP dst, KILL cr);
5185 
5186   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5187             "JZ     msw_is_zero\n\t"
5188             "ADD    $dst, 32\n\t"
5189             "JMP    not_zero\n"
5190       "msw_is_zero:\n\t"
5191             "BSR    $dst, $src.lo\n\t"
5192             "JNZ    not_zero\n\t"
5193             "MOV    $dst, -1\n"
5194       "not_zero:\n\t"
5195             "NEG    $dst\n\t"
5196             "ADD    $dst, 63\n" %}
5197  ins_encode %{
5198     Register Rdst = $dst$$Register;
5199     Register Rsrc = $src$$Register;
5200     Label msw_is_zero;
5201     Label not_zero;
5202     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5203     __ jccb(Assembler::zero, msw_is_zero);
5204     __ addl(Rdst, BitsPerInt);
5205     __ jmpb(not_zero);
5206     __ bind(msw_is_zero);
5207     __ bsrl(Rdst, Rsrc);
5208     __ jccb(Assembler::notZero, not_zero);
5209     __ movl(Rdst, -1);
5210     __ bind(not_zero);
5211     __ negl(Rdst);
5212     __ addl(Rdst, BitsPerLong - 1);
5213   %}
5214   ins_pipe(ialu_reg);
5215 %}
5216 
5217 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5218   predicate(UseCountTrailingZerosInstruction);
5219   match(Set dst (CountTrailingZerosI src));
5220   effect(KILL cr);
5221 
5222   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5223   ins_encode %{
5224     __ tzcntl($dst$$Register, $src$$Register);
5225   %}
5226   ins_pipe(ialu_reg);
5227 %}
5228 
5229 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5230   predicate(!UseCountTrailingZerosInstruction);
5231   match(Set dst (CountTrailingZerosI src));
5232   effect(KILL cr);
5233 
5234   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5235             "JNZ    done\n\t"
5236             "MOV    $dst, 32\n"
5237       "done:" %}
5238   ins_encode %{
5239     Register Rdst = $dst$$Register;
5240     Label done;
5241     __ bsfl(Rdst, $src$$Register);
5242     __ jccb(Assembler::notZero, done);
5243     __ movl(Rdst, BitsPerInt);
5244     __ bind(done);
5245   %}
5246   ins_pipe(ialu_reg);
5247 %}
5248 
5249 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5250   predicate(UseCountTrailingZerosInstruction);
5251   match(Set dst (CountTrailingZerosL src));
5252   effect(TEMP dst, KILL cr);
5253 
5254   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5255             "JNC    done\n\t"
5256             "TZCNT  $dst, $src.hi\n\t"
5257             "ADD    $dst, 32\n"
5258             "done:" %}
5259   ins_encode %{
5260     Register Rdst = $dst$$Register;
5261     Register Rsrc = $src$$Register;
5262     Label done;
5263     __ tzcntl(Rdst, Rsrc);
5264     __ jccb(Assembler::carryClear, done);
5265     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5266     __ addl(Rdst, BitsPerInt);
5267     __ bind(done);
5268   %}
5269   ins_pipe(ialu_reg);
5270 %}
5271 
5272 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5273   predicate(!UseCountTrailingZerosInstruction);
5274   match(Set dst (CountTrailingZerosL src));
5275   effect(TEMP dst, KILL cr);
5276 
5277   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5278             "JNZ    done\n\t"
5279             "BSF    $dst, $src.hi\n\t"
5280             "JNZ    msw_not_zero\n\t"
5281             "MOV    $dst, 32\n"
5282       "msw_not_zero:\n\t"
5283             "ADD    $dst, 32\n"
5284       "done:" %}
5285   ins_encode %{
5286     Register Rdst = $dst$$Register;
5287     Register Rsrc = $src$$Register;
5288     Label msw_not_zero;
5289     Label done;
5290     __ bsfl(Rdst, Rsrc);
5291     __ jccb(Assembler::notZero, done);
5292     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5293     __ jccb(Assembler::notZero, msw_not_zero);
5294     __ movl(Rdst, BitsPerInt);
5295     __ bind(msw_not_zero);
5296     __ addl(Rdst, BitsPerInt);
5297     __ bind(done);
5298   %}
5299   ins_pipe(ialu_reg);
5300 %}
5301 
5302 
5303 //---------- Population Count Instructions -------------------------------------
5304 
5305 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5306   predicate(UsePopCountInstruction);
5307   match(Set dst (PopCountI src));
5308   effect(KILL cr);
5309 
5310   format %{ "POPCNT $dst, $src" %}
5311   ins_encode %{
5312     __ popcntl($dst$$Register, $src$$Register);
5313   %}
5314   ins_pipe(ialu_reg);
5315 %}
5316 
5317 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5318   predicate(UsePopCountInstruction);
5319   match(Set dst (PopCountI (LoadI mem)));
5320   effect(KILL cr);
5321 
5322   format %{ "POPCNT $dst, $mem" %}
5323   ins_encode %{
5324     __ popcntl($dst$$Register, $mem$$Address);
5325   %}
5326   ins_pipe(ialu_reg);
5327 %}
5328 
5329 // Note: Long.bitCount(long) returns an int.
5330 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5331   predicate(UsePopCountInstruction);
5332   match(Set dst (PopCountL src));
5333   effect(KILL cr, TEMP tmp, TEMP dst);
5334 
5335   format %{ "POPCNT $dst, $src.lo\n\t"
5336             "POPCNT $tmp, $src.hi\n\t"
5337             "ADD    $dst, $tmp" %}
5338   ins_encode %{
5339     __ popcntl($dst$$Register, $src$$Register);
5340     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5341     __ addl($dst$$Register, $tmp$$Register);
5342   %}
5343   ins_pipe(ialu_reg);
5344 %}
5345 
5346 // Note: Long.bitCount(long) returns an int.
5347 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5348   predicate(UsePopCountInstruction);
5349   match(Set dst (PopCountL (LoadL mem)));
5350   effect(KILL cr, TEMP tmp, TEMP dst);
5351 
5352   format %{ "POPCNT $dst, $mem\n\t"
5353             "POPCNT $tmp, $mem+4\n\t"
5354             "ADD    $dst, $tmp" %}
5355   ins_encode %{
5356     //__ popcntl($dst$$Register, $mem$$Address$$first);
5357     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5358     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5359     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5360     __ addl($dst$$Register, $tmp$$Register);
5361   %}
5362   ins_pipe(ialu_reg);
5363 %}
5364 
5365 
5366 //----------Load/Store/Move Instructions---------------------------------------
5367 //----------Load Instructions--------------------------------------------------
5368 // Load Byte (8bit signed)
5369 instruct loadB(xRegI dst, memory mem) %{
5370   match(Set dst (LoadB mem));
5371 
5372   ins_cost(125);
5373   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5374 
5375   ins_encode %{
5376     __ movsbl($dst$$Register, $mem$$Address);
5377   %}
5378 
5379   ins_pipe(ialu_reg_mem);
5380 %}
5381 
5382 // Load Byte (8bit signed) into Long Register
5383 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5384   match(Set dst (ConvI2L (LoadB mem)));
5385   effect(KILL cr);
5386 
5387   ins_cost(375);
5388   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5389             "MOV    $dst.hi,$dst.lo\n\t"
5390             "SAR    $dst.hi,7" %}
5391 
5392   ins_encode %{
5393     __ movsbl($dst$$Register, $mem$$Address);
5394     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5395     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5396   %}
5397 
5398   ins_pipe(ialu_reg_mem);
5399 %}
5400 
5401 // Load Unsigned Byte (8bit UNsigned)
5402 instruct loadUB(xRegI dst, memory mem) %{
5403   match(Set dst (LoadUB mem));
5404 
5405   ins_cost(125);
5406   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5407 
5408   ins_encode %{
5409     __ movzbl($dst$$Register, $mem$$Address);
5410   %}
5411 
5412   ins_pipe(ialu_reg_mem);
5413 %}
5414 
5415 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5416 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5417   match(Set dst (ConvI2L (LoadUB mem)));
5418   effect(KILL cr);
5419 
5420   ins_cost(250);
5421   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5422             "XOR    $dst.hi,$dst.hi" %}
5423 
5424   ins_encode %{
5425     Register Rdst = $dst$$Register;
5426     __ movzbl(Rdst, $mem$$Address);
5427     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5428   %}
5429 
5430   ins_pipe(ialu_reg_mem);
5431 %}
5432 
5433 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5434 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5435   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5436   effect(KILL cr);
5437 
5438   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5439             "XOR    $dst.hi,$dst.hi\n\t"
5440             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5441   ins_encode %{
5442     Register Rdst = $dst$$Register;
5443     __ movzbl(Rdst, $mem$$Address);
5444     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5445     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5446   %}
5447   ins_pipe(ialu_reg_mem);
5448 %}
5449 
5450 // Load Short (16bit signed)
5451 instruct loadS(rRegI dst, memory mem) %{
5452   match(Set dst (LoadS mem));
5453 
5454   ins_cost(125);
5455   format %{ "MOVSX  $dst,$mem\t# short" %}
5456 
5457   ins_encode %{
5458     __ movswl($dst$$Register, $mem$$Address);
5459   %}
5460 
5461   ins_pipe(ialu_reg_mem);
5462 %}
5463 
5464 // Load Short (16 bit signed) to Byte (8 bit signed)
5465 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5466   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5467 
5468   ins_cost(125);
5469   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5470   ins_encode %{
5471     __ movsbl($dst$$Register, $mem$$Address);
5472   %}
5473   ins_pipe(ialu_reg_mem);
5474 %}
5475 
5476 // Load Short (16bit signed) into Long Register
5477 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5478   match(Set dst (ConvI2L (LoadS mem)));
5479   effect(KILL cr);
5480 
5481   ins_cost(375);
5482   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5483             "MOV    $dst.hi,$dst.lo\n\t"
5484             "SAR    $dst.hi,15" %}
5485 
5486   ins_encode %{
5487     __ movswl($dst$$Register, $mem$$Address);
5488     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5489     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5490   %}
5491 
5492   ins_pipe(ialu_reg_mem);
5493 %}
5494 
5495 // Load Unsigned Short/Char (16bit unsigned)
5496 instruct loadUS(rRegI dst, memory mem) %{
5497   match(Set dst (LoadUS mem));
5498 
5499   ins_cost(125);
5500   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5501 
5502   ins_encode %{
5503     __ movzwl($dst$$Register, $mem$$Address);
5504   %}
5505 
5506   ins_pipe(ialu_reg_mem);
5507 %}
5508 
5509 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5510 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5511   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5512 
5513   ins_cost(125);
5514   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5515   ins_encode %{
5516     __ movsbl($dst$$Register, $mem$$Address);
5517   %}
5518   ins_pipe(ialu_reg_mem);
5519 %}
5520 
5521 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5522 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5523   match(Set dst (ConvI2L (LoadUS mem)));
5524   effect(KILL cr);
5525 
5526   ins_cost(250);
5527   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5528             "XOR    $dst.hi,$dst.hi" %}
5529 
5530   ins_encode %{
5531     __ movzwl($dst$$Register, $mem$$Address);
5532     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5533   %}
5534 
5535   ins_pipe(ialu_reg_mem);
5536 %}
5537 
5538 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5539 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5540   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5541   effect(KILL cr);
5542 
5543   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5544             "XOR    $dst.hi,$dst.hi" %}
5545   ins_encode %{
5546     Register Rdst = $dst$$Register;
5547     __ movzbl(Rdst, $mem$$Address);
5548     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5549   %}
5550   ins_pipe(ialu_reg_mem);
5551 %}
5552 
5553 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5554 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5555   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5556   effect(KILL cr);
5557 
5558   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5559             "XOR    $dst.hi,$dst.hi\n\t"
5560             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5561   ins_encode %{
5562     Register Rdst = $dst$$Register;
5563     __ movzwl(Rdst, $mem$$Address);
5564     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5565     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5566   %}
5567   ins_pipe(ialu_reg_mem);
5568 %}
5569 
5570 // Load Integer
5571 instruct loadI(rRegI dst, memory mem) %{
5572   match(Set dst (LoadI mem));
5573 
5574   ins_cost(125);
5575   format %{ "MOV    $dst,$mem\t# int" %}
5576 
5577   ins_encode %{
5578     __ movl($dst$$Register, $mem$$Address);
5579   %}
5580 
5581   ins_pipe(ialu_reg_mem);
5582 %}
5583 
5584 // Load Integer (32 bit signed) to Byte (8 bit signed)
5585 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5586   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5587 
5588   ins_cost(125);
5589   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5590   ins_encode %{
5591     __ movsbl($dst$$Register, $mem$$Address);
5592   %}
5593   ins_pipe(ialu_reg_mem);
5594 %}
5595 
5596 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5597 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5598   match(Set dst (AndI (LoadI mem) mask));
5599 
5600   ins_cost(125);
5601   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5602   ins_encode %{
5603     __ movzbl($dst$$Register, $mem$$Address);
5604   %}
5605   ins_pipe(ialu_reg_mem);
5606 %}
5607 
5608 // Load Integer (32 bit signed) to Short (16 bit signed)
5609 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5610   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5611 
5612   ins_cost(125);
5613   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5614   ins_encode %{
5615     __ movswl($dst$$Register, $mem$$Address);
5616   %}
5617   ins_pipe(ialu_reg_mem);
5618 %}
5619 
5620 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5621 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5622   match(Set dst (AndI (LoadI mem) mask));
5623 
5624   ins_cost(125);
5625   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5626   ins_encode %{
5627     __ movzwl($dst$$Register, $mem$$Address);
5628   %}
5629   ins_pipe(ialu_reg_mem);
5630 %}
5631 
5632 // Load Integer into Long Register
5633 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5634   match(Set dst (ConvI2L (LoadI mem)));
5635   effect(KILL cr);
5636 
5637   ins_cost(375);
5638   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5639             "MOV    $dst.hi,$dst.lo\n\t"
5640             "SAR    $dst.hi,31" %}
5641 
5642   ins_encode %{
5643     __ movl($dst$$Register, $mem$$Address);
5644     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5645     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5646   %}
5647 
5648   ins_pipe(ialu_reg_mem);
5649 %}
5650 
5651 // Load Integer with mask 0xFF into Long Register
5652 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5653   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5654   effect(KILL cr);
5655 
5656   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5657             "XOR    $dst.hi,$dst.hi" %}
5658   ins_encode %{
5659     Register Rdst = $dst$$Register;
5660     __ movzbl(Rdst, $mem$$Address);
5661     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5662   %}
5663   ins_pipe(ialu_reg_mem);
5664 %}
5665 
5666 // Load Integer with mask 0xFFFF into Long Register
5667 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5668   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5669   effect(KILL cr);
5670 
5671   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5672             "XOR    $dst.hi,$dst.hi" %}
5673   ins_encode %{
5674     Register Rdst = $dst$$Register;
5675     __ movzwl(Rdst, $mem$$Address);
5676     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5677   %}
5678   ins_pipe(ialu_reg_mem);
5679 %}
5680 
5681 // Load Integer with 31-bit mask into Long Register
5682 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5683   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5684   effect(KILL cr);
5685 
5686   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5687             "XOR    $dst.hi,$dst.hi\n\t"
5688             "AND    $dst.lo,$mask" %}
5689   ins_encode %{
5690     Register Rdst = $dst$$Register;
5691     __ movl(Rdst, $mem$$Address);
5692     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5693     __ andl(Rdst, $mask$$constant);
5694   %}
5695   ins_pipe(ialu_reg_mem);
5696 %}
5697 
5698 // Load Unsigned Integer into Long Register
5699 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5700   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5701   effect(KILL cr);
5702 
5703   ins_cost(250);
5704   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5705             "XOR    $dst.hi,$dst.hi" %}
5706 
5707   ins_encode %{
5708     __ movl($dst$$Register, $mem$$Address);
5709     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5710   %}
5711 
5712   ins_pipe(ialu_reg_mem);
5713 %}
5714 
5715 // Load Long.  Cannot clobber address while loading, so restrict address
5716 // register to ESI
5717 instruct loadL(eRegL dst, load_long_memory mem) %{
5718   predicate(!((LoadLNode*)n)->require_atomic_access());
5719   match(Set dst (LoadL mem));
5720 
5721   ins_cost(250);
5722   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5723             "MOV    $dst.hi,$mem+4" %}
5724 
5725   ins_encode %{
5726     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5727     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5728     __ movl($dst$$Register, Amemlo);
5729     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5730   %}
5731 
5732   ins_pipe(ialu_reg_long_mem);
5733 %}
5734 
5735 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5736 // then store it down to the stack and reload on the int
5737 // side.
5738 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5739   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5740   match(Set dst (LoadL mem));
5741 
5742   ins_cost(200);
5743   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5744             "FISTp  $dst" %}
5745   ins_encode(enc_loadL_volatile(mem,dst));
5746   ins_pipe( fpu_reg_mem );
5747 %}
5748 
5749 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5750   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5751   match(Set dst (LoadL mem));
5752   effect(TEMP tmp);
5753   ins_cost(180);
5754   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5755             "MOVSD  $dst,$tmp" %}
5756   ins_encode %{
5757     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5758     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5759   %}
5760   ins_pipe( pipe_slow );
5761 %}
5762 
5763 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5764   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5765   match(Set dst (LoadL mem));
5766   effect(TEMP tmp);
5767   ins_cost(160);
5768   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5769             "MOVD   $dst.lo,$tmp\n\t"
5770             "PSRLQ  $tmp,32\n\t"
5771             "MOVD   $dst.hi,$tmp" %}
5772   ins_encode %{
5773     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5774     __ movdl($dst$$Register, $tmp$$XMMRegister);
5775     __ psrlq($tmp$$XMMRegister, 32);
5776     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5777   %}
5778   ins_pipe( pipe_slow );
5779 %}
5780 
5781 // Load Range
5782 instruct loadRange(rRegI dst, memory mem) %{
5783   match(Set dst (LoadRange mem));
5784 
5785   ins_cost(125);
5786   format %{ "MOV    $dst,$mem" %}
5787   opcode(0x8B);
5788   ins_encode( OpcP, RegMem(dst,mem));
5789   ins_pipe( ialu_reg_mem );
5790 %}
5791 
5792 
5793 // Load Pointer
5794 instruct loadP(eRegP dst, memory mem) %{
5795   match(Set dst (LoadP mem));
5796 
5797   ins_cost(125);
5798   format %{ "MOV    $dst,$mem" %}
5799   opcode(0x8B);
5800   ins_encode( OpcP, RegMem(dst,mem));
5801   ins_pipe( ialu_reg_mem );
5802 %}
5803 
5804 // Load Klass Pointer
5805 instruct loadKlass(eRegP dst, memory mem) %{
5806   match(Set dst (LoadKlass mem));
5807 
5808   ins_cost(125);
5809   format %{ "MOV    $dst,$mem" %}
5810   opcode(0x8B);
5811   ins_encode( OpcP, RegMem(dst,mem));
5812   ins_pipe( ialu_reg_mem );
5813 %}
5814 
5815 // Load Double
5816 instruct loadDPR(regDPR dst, memory mem) %{
5817   predicate(UseSSE<=1);
5818   match(Set dst (LoadD mem));
5819 
5820   ins_cost(150);
5821   format %{ "FLD_D  ST,$mem\n\t"
5822             "FSTP   $dst" %}
5823   opcode(0xDD);               /* DD /0 */
5824   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5825               Pop_Reg_DPR(dst) );
5826   ins_pipe( fpu_reg_mem );
5827 %}
5828 
5829 // Load Double to XMM
5830 instruct loadD(regD dst, memory mem) %{
5831   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5832   match(Set dst (LoadD mem));
5833   ins_cost(145);
5834   format %{ "MOVSD  $dst,$mem" %}
5835   ins_encode %{
5836     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5837   %}
5838   ins_pipe( pipe_slow );
5839 %}
5840 
5841 instruct loadD_partial(regD dst, memory mem) %{
5842   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5843   match(Set dst (LoadD mem));
5844   ins_cost(145);
5845   format %{ "MOVLPD $dst,$mem" %}
5846   ins_encode %{
5847     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5848   %}
5849   ins_pipe( pipe_slow );
5850 %}
5851 
5852 // Load to XMM register (single-precision floating point)
5853 // MOVSS instruction
5854 instruct loadF(regF dst, memory mem) %{
5855   predicate(UseSSE>=1);
5856   match(Set dst (LoadF mem));
5857   ins_cost(145);
5858   format %{ "MOVSS  $dst,$mem" %}
5859   ins_encode %{
5860     __ movflt ($dst$$XMMRegister, $mem$$Address);
5861   %}
5862   ins_pipe( pipe_slow );
5863 %}
5864 
5865 // Load Float
5866 instruct loadFPR(regFPR dst, memory mem) %{
5867   predicate(UseSSE==0);
5868   match(Set dst (LoadF mem));
5869 
5870   ins_cost(150);
5871   format %{ "FLD_S  ST,$mem\n\t"
5872             "FSTP   $dst" %}
5873   opcode(0xD9);               /* D9 /0 */
5874   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5875               Pop_Reg_FPR(dst) );
5876   ins_pipe( fpu_reg_mem );
5877 %}
5878 
5879 // Load Effective Address
5880 instruct leaP8(eRegP dst, indOffset8 mem) %{
5881   match(Set dst mem);
5882 
5883   ins_cost(110);
5884   format %{ "LEA    $dst,$mem" %}
5885   opcode(0x8D);
5886   ins_encode( OpcP, RegMem(dst,mem));
5887   ins_pipe( ialu_reg_reg_fat );
5888 %}
5889 
5890 instruct leaP32(eRegP dst, indOffset32 mem) %{
5891   match(Set dst mem);
5892 
5893   ins_cost(110);
5894   format %{ "LEA    $dst,$mem" %}
5895   opcode(0x8D);
5896   ins_encode( OpcP, RegMem(dst,mem));
5897   ins_pipe( ialu_reg_reg_fat );
5898 %}
5899 
5900 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5901   match(Set dst mem);
5902 
5903   ins_cost(110);
5904   format %{ "LEA    $dst,$mem" %}
5905   opcode(0x8D);
5906   ins_encode( OpcP, RegMem(dst,mem));
5907   ins_pipe( ialu_reg_reg_fat );
5908 %}
5909 
5910 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5911   match(Set dst mem);
5912 
5913   ins_cost(110);
5914   format %{ "LEA    $dst,$mem" %}
5915   opcode(0x8D);
5916   ins_encode( OpcP, RegMem(dst,mem));
5917   ins_pipe( ialu_reg_reg_fat );
5918 %}
5919 
5920 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5921   match(Set dst mem);
5922 
5923   ins_cost(110);
5924   format %{ "LEA    $dst,$mem" %}
5925   opcode(0x8D);
5926   ins_encode( OpcP, RegMem(dst,mem));
5927   ins_pipe( ialu_reg_reg_fat );
5928 %}
5929 
5930 // Load Constant
5931 instruct loadConI(rRegI dst, immI src) %{
5932   match(Set dst src);
5933 
5934   format %{ "MOV    $dst,$src" %}
5935   ins_encode( LdImmI(dst, src) );
5936   ins_pipe( ialu_reg_fat );
5937 %}
5938 
5939 // Load Constant zero
5940 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5941   match(Set dst src);
5942   effect(KILL cr);
5943 
5944   ins_cost(50);
5945   format %{ "XOR    $dst,$dst" %}
5946   opcode(0x33);  /* + rd */
5947   ins_encode( OpcP, RegReg( dst, dst ) );
5948   ins_pipe( ialu_reg );
5949 %}
5950 
5951 instruct loadConP(eRegP dst, immP src) %{
5952   match(Set dst src);
5953 
5954   format %{ "MOV    $dst,$src" %}
5955   opcode(0xB8);  /* + rd */
5956   ins_encode( LdImmP(dst, src) );
5957   ins_pipe( ialu_reg_fat );
5958 %}
5959 
5960 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5961   match(Set dst src);
5962   effect(KILL cr);
5963   ins_cost(200);
5964   format %{ "MOV    $dst.lo,$src.lo\n\t"
5965             "MOV    $dst.hi,$src.hi" %}
5966   opcode(0xB8);
5967   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5968   ins_pipe( ialu_reg_long_fat );
5969 %}
5970 
5971 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5972   match(Set dst src);
5973   effect(KILL cr);
5974   ins_cost(150);
5975   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5976             "XOR    $dst.hi,$dst.hi" %}
5977   opcode(0x33,0x33);
5978   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5979   ins_pipe( ialu_reg_long );
5980 %}
5981 
5982 // The instruction usage is guarded by predicate in operand immFPR().
5983 instruct loadConFPR(regFPR dst, immFPR con) %{
5984   match(Set dst con);
5985   ins_cost(125);
5986   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5987             "FSTP   $dst" %}
5988   ins_encode %{
5989     __ fld_s($constantaddress($con));
5990     __ fstp_d($dst$$reg);
5991   %}
5992   ins_pipe(fpu_reg_con);
5993 %}
5994 
5995 // The instruction usage is guarded by predicate in operand immFPR0().
5996 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5997   match(Set dst con);
5998   ins_cost(125);
5999   format %{ "FLDZ   ST\n\t"
6000             "FSTP   $dst" %}
6001   ins_encode %{
6002     __ fldz();
6003     __ fstp_d($dst$$reg);
6004   %}
6005   ins_pipe(fpu_reg_con);
6006 %}
6007 
6008 // The instruction usage is guarded by predicate in operand immFPR1().
6009 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6010   match(Set dst con);
6011   ins_cost(125);
6012   format %{ "FLD1   ST\n\t"
6013             "FSTP   $dst" %}
6014   ins_encode %{
6015     __ fld1();
6016     __ fstp_d($dst$$reg);
6017   %}
6018   ins_pipe(fpu_reg_con);
6019 %}
6020 
6021 // The instruction usage is guarded by predicate in operand immF().
6022 instruct loadConF(regF dst, immF con) %{
6023   match(Set dst con);
6024   ins_cost(125);
6025   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6026   ins_encode %{
6027     __ movflt($dst$$XMMRegister, $constantaddress($con));
6028   %}
6029   ins_pipe(pipe_slow);
6030 %}
6031 
6032 // The instruction usage is guarded by predicate in operand immF0().
6033 instruct loadConF0(regF dst, immF0 src) %{
6034   match(Set dst src);
6035   ins_cost(100);
6036   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6037   ins_encode %{
6038     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6039   %}
6040   ins_pipe(pipe_slow);
6041 %}
6042 
6043 // The instruction usage is guarded by predicate in operand immDPR().
6044 instruct loadConDPR(regDPR dst, immDPR con) %{
6045   match(Set dst con);
6046   ins_cost(125);
6047 
6048   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6049             "FSTP   $dst" %}
6050   ins_encode %{
6051     __ fld_d($constantaddress($con));
6052     __ fstp_d($dst$$reg);
6053   %}
6054   ins_pipe(fpu_reg_con);
6055 %}
6056 
6057 // The instruction usage is guarded by predicate in operand immDPR0().
6058 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6059   match(Set dst con);
6060   ins_cost(125);
6061 
6062   format %{ "FLDZ   ST\n\t"
6063             "FSTP   $dst" %}
6064   ins_encode %{
6065     __ fldz();
6066     __ fstp_d($dst$$reg);
6067   %}
6068   ins_pipe(fpu_reg_con);
6069 %}
6070 
6071 // The instruction usage is guarded by predicate in operand immDPR1().
6072 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6073   match(Set dst con);
6074   ins_cost(125);
6075 
6076   format %{ "FLD1   ST\n\t"
6077             "FSTP   $dst" %}
6078   ins_encode %{
6079     __ fld1();
6080     __ fstp_d($dst$$reg);
6081   %}
6082   ins_pipe(fpu_reg_con);
6083 %}
6084 
6085 // The instruction usage is guarded by predicate in operand immD().
6086 instruct loadConD(regD dst, immD con) %{
6087   match(Set dst con);
6088   ins_cost(125);
6089   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6090   ins_encode %{
6091     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6092   %}
6093   ins_pipe(pipe_slow);
6094 %}
6095 
6096 // The instruction usage is guarded by predicate in operand immD0().
6097 instruct loadConD0(regD dst, immD0 src) %{
6098   match(Set dst src);
6099   ins_cost(100);
6100   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6101   ins_encode %{
6102     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6103   %}
6104   ins_pipe( pipe_slow );
6105 %}
6106 
6107 // Load Stack Slot
6108 instruct loadSSI(rRegI dst, stackSlotI src) %{
6109   match(Set dst src);
6110   ins_cost(125);
6111 
6112   format %{ "MOV    $dst,$src" %}
6113   opcode(0x8B);
6114   ins_encode( OpcP, RegMem(dst,src));
6115   ins_pipe( ialu_reg_mem );
6116 %}
6117 
6118 instruct loadSSL(eRegL dst, stackSlotL src) %{
6119   match(Set dst src);
6120 
6121   ins_cost(200);
6122   format %{ "MOV    $dst,$src.lo\n\t"
6123             "MOV    $dst+4,$src.hi" %}
6124   opcode(0x8B, 0x8B);
6125   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6126   ins_pipe( ialu_mem_long_reg );
6127 %}
6128 
6129 // Load Stack Slot
6130 instruct loadSSP(eRegP dst, stackSlotP src) %{
6131   match(Set dst src);
6132   ins_cost(125);
6133 
6134   format %{ "MOV    $dst,$src" %}
6135   opcode(0x8B);
6136   ins_encode( OpcP, RegMem(dst,src));
6137   ins_pipe( ialu_reg_mem );
6138 %}
6139 
6140 // Load Stack Slot
6141 instruct loadSSF(regFPR dst, stackSlotF src) %{
6142   match(Set dst src);
6143   ins_cost(125);
6144 
6145   format %{ "FLD_S  $src\n\t"
6146             "FSTP   $dst" %}
6147   opcode(0xD9);               /* D9 /0, FLD m32real */
6148   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6149               Pop_Reg_FPR(dst) );
6150   ins_pipe( fpu_reg_mem );
6151 %}
6152 
6153 // Load Stack Slot
6154 instruct loadSSD(regDPR dst, stackSlotD src) %{
6155   match(Set dst src);
6156   ins_cost(125);
6157 
6158   format %{ "FLD_D  $src\n\t"
6159             "FSTP   $dst" %}
6160   opcode(0xDD);               /* DD /0, FLD m64real */
6161   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6162               Pop_Reg_DPR(dst) );
6163   ins_pipe( fpu_reg_mem );
6164 %}
6165 
6166 // Prefetch instructions for allocation.
6167 // Must be safe to execute with invalid address (cannot fault).
6168 
6169 instruct prefetchAlloc0( memory mem ) %{
6170   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6171   match(PrefetchAllocation mem);
6172   ins_cost(0);
6173   size(0);
6174   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6175   ins_encode();
6176   ins_pipe(empty);
6177 %}
6178 
6179 instruct prefetchAlloc( memory mem ) %{
6180   predicate(AllocatePrefetchInstr==3);
6181   match( PrefetchAllocation mem );
6182   ins_cost(100);
6183 
6184   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6185   ins_encode %{
6186     __ prefetchw($mem$$Address);
6187   %}
6188   ins_pipe(ialu_mem);
6189 %}
6190 
6191 instruct prefetchAllocNTA( memory mem ) %{
6192   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6193   match(PrefetchAllocation mem);
6194   ins_cost(100);
6195 
6196   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6197   ins_encode %{
6198     __ prefetchnta($mem$$Address);
6199   %}
6200   ins_pipe(ialu_mem);
6201 %}
6202 
6203 instruct prefetchAllocT0( memory mem ) %{
6204   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6205   match(PrefetchAllocation mem);
6206   ins_cost(100);
6207 
6208   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6209   ins_encode %{
6210     __ prefetcht0($mem$$Address);
6211   %}
6212   ins_pipe(ialu_mem);
6213 %}
6214 
6215 instruct prefetchAllocT2( memory mem ) %{
6216   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6217   match(PrefetchAllocation mem);
6218   ins_cost(100);
6219 
6220   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6221   ins_encode %{
6222     __ prefetcht2($mem$$Address);
6223   %}
6224   ins_pipe(ialu_mem);
6225 %}
6226 
6227 //----------Store Instructions-------------------------------------------------
6228 
6229 // Store Byte
6230 instruct storeB(memory mem, xRegI src) %{
6231   match(Set mem (StoreB mem src));
6232 
6233   ins_cost(125);
6234   format %{ "MOV8   $mem,$src" %}
6235   opcode(0x88);
6236   ins_encode( OpcP, RegMem( src, mem ) );
6237   ins_pipe( ialu_mem_reg );
6238 %}
6239 
6240 // Store Char/Short
6241 instruct storeC(memory mem, rRegI src) %{
6242   match(Set mem (StoreC mem src));
6243 
6244   ins_cost(125);
6245   format %{ "MOV16  $mem,$src" %}
6246   opcode(0x89, 0x66);
6247   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6248   ins_pipe( ialu_mem_reg );
6249 %}
6250 
6251 // Store Integer
6252 instruct storeI(memory mem, rRegI src) %{
6253   match(Set mem (StoreI mem src));
6254 
6255   ins_cost(125);
6256   format %{ "MOV    $mem,$src" %}
6257   opcode(0x89);
6258   ins_encode( OpcP, RegMem( src, mem ) );
6259   ins_pipe( ialu_mem_reg );
6260 %}
6261 
6262 // Store Long
6263 instruct storeL(long_memory mem, eRegL src) %{
6264   predicate(!((StoreLNode*)n)->require_atomic_access());
6265   match(Set mem (StoreL mem src));
6266 
6267   ins_cost(200);
6268   format %{ "MOV    $mem,$src.lo\n\t"
6269             "MOV    $mem+4,$src.hi" %}
6270   opcode(0x89, 0x89);
6271   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6272   ins_pipe( ialu_mem_long_reg );
6273 %}
6274 
6275 // Store Long to Integer
6276 instruct storeL2I(memory mem, eRegL src) %{
6277   match(Set mem (StoreI mem (ConvL2I src)));
6278 
6279   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6280   ins_encode %{
6281     __ movl($mem$$Address, $src$$Register);
6282   %}
6283   ins_pipe(ialu_mem_reg);
6284 %}
6285 
6286 // Volatile Store Long.  Must be atomic, so move it into
6287 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6288 // target address before the store (for null-ptr checks)
6289 // so the memory operand is used twice in the encoding.
6290 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6291   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6292   match(Set mem (StoreL mem src));
6293   effect( KILL cr );
6294   ins_cost(400);
6295   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6296             "FILD   $src\n\t"
6297             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6298   opcode(0x3B);
6299   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6300   ins_pipe( fpu_reg_mem );
6301 %}
6302 
6303 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6304   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6305   match(Set mem (StoreL mem src));
6306   effect( TEMP tmp, KILL cr );
6307   ins_cost(380);
6308   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6309             "MOVSD  $tmp,$src\n\t"
6310             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6311   ins_encode %{
6312     __ cmpl(rax, $mem$$Address);
6313     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6314     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6315   %}
6316   ins_pipe( pipe_slow );
6317 %}
6318 
6319 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6320   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6321   match(Set mem (StoreL mem src));
6322   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6323   ins_cost(360);
6324   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6325             "MOVD   $tmp,$src.lo\n\t"
6326             "MOVD   $tmp2,$src.hi\n\t"
6327             "PUNPCKLDQ $tmp,$tmp2\n\t"
6328             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6329   ins_encode %{
6330     __ cmpl(rax, $mem$$Address);
6331     __ movdl($tmp$$XMMRegister, $src$$Register);
6332     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6333     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6334     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6335   %}
6336   ins_pipe( pipe_slow );
6337 %}
6338 
6339 // Store Pointer; for storing unknown oops and raw pointers
6340 instruct storeP(memory mem, anyRegP src) %{
6341   match(Set mem (StoreP mem src));
6342 
6343   ins_cost(125);
6344   format %{ "MOV    $mem,$src" %}
6345   opcode(0x89);
6346   ins_encode( OpcP, RegMem( src, mem ) );
6347   ins_pipe( ialu_mem_reg );
6348 %}
6349 
6350 // Store Integer Immediate
6351 instruct storeImmI(memory mem, immI src) %{
6352   match(Set mem (StoreI mem src));
6353 
6354   ins_cost(150);
6355   format %{ "MOV    $mem,$src" %}
6356   opcode(0xC7);               /* C7 /0 */
6357   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6358   ins_pipe( ialu_mem_imm );
6359 %}
6360 
6361 // Store Short/Char Immediate
6362 instruct storeImmI16(memory mem, immI16 src) %{
6363   predicate(UseStoreImmI16);
6364   match(Set mem (StoreC mem src));
6365 
6366   ins_cost(150);
6367   format %{ "MOV16  $mem,$src" %}
6368   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6369   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6370   ins_pipe( ialu_mem_imm );
6371 %}
6372 
6373 // Store Pointer Immediate; null pointers or constant oops that do not
6374 // need card-mark barriers.
6375 instruct storeImmP(memory mem, immP src) %{
6376   match(Set mem (StoreP mem src));
6377 
6378   ins_cost(150);
6379   format %{ "MOV    $mem,$src" %}
6380   opcode(0xC7);               /* C7 /0 */
6381   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6382   ins_pipe( ialu_mem_imm );
6383 %}
6384 
6385 // Store Byte Immediate
6386 instruct storeImmB(memory mem, immI8 src) %{
6387   match(Set mem (StoreB mem src));
6388 
6389   ins_cost(150);
6390   format %{ "MOV8   $mem,$src" %}
6391   opcode(0xC6);               /* C6 /0 */
6392   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6393   ins_pipe( ialu_mem_imm );
6394 %}
6395 
6396 // Store CMS card-mark Immediate
6397 instruct storeImmCM(memory mem, immI8 src) %{
6398   match(Set mem (StoreCM mem src));
6399 
6400   ins_cost(150);
6401   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6402   opcode(0xC6);               /* C6 /0 */
6403   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6404   ins_pipe( ialu_mem_imm );
6405 %}
6406 
6407 // Store Double
6408 instruct storeDPR( memory mem, regDPR1 src) %{
6409   predicate(UseSSE<=1);
6410   match(Set mem (StoreD mem src));
6411 
6412   ins_cost(100);
6413   format %{ "FST_D  $mem,$src" %}
6414   opcode(0xDD);       /* DD /2 */
6415   ins_encode( enc_FPR_store(mem,src) );
6416   ins_pipe( fpu_mem_reg );
6417 %}
6418 
6419 // Store double does rounding on x86
6420 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6421   predicate(UseSSE<=1);
6422   match(Set mem (StoreD mem (RoundDouble src)));
6423 
6424   ins_cost(100);
6425   format %{ "FST_D  $mem,$src\t# round" %}
6426   opcode(0xDD);       /* DD /2 */
6427   ins_encode( enc_FPR_store(mem,src) );
6428   ins_pipe( fpu_mem_reg );
6429 %}
6430 
6431 // Store XMM register to memory (double-precision floating points)
6432 // MOVSD instruction
6433 instruct storeD(memory mem, regD src) %{
6434   predicate(UseSSE>=2);
6435   match(Set mem (StoreD mem src));
6436   ins_cost(95);
6437   format %{ "MOVSD  $mem,$src" %}
6438   ins_encode %{
6439     __ movdbl($mem$$Address, $src$$XMMRegister);
6440   %}
6441   ins_pipe( pipe_slow );
6442 %}
6443 
6444 // Store XMM register to memory (single-precision floating point)
6445 // MOVSS instruction
6446 instruct storeF(memory mem, regF src) %{
6447   predicate(UseSSE>=1);
6448   match(Set mem (StoreF mem src));
6449   ins_cost(95);
6450   format %{ "MOVSS  $mem,$src" %}
6451   ins_encode %{
6452     __ movflt($mem$$Address, $src$$XMMRegister);
6453   %}
6454   ins_pipe( pipe_slow );
6455 %}
6456 
6457 // Store Float
6458 instruct storeFPR( memory mem, regFPR1 src) %{
6459   predicate(UseSSE==0);
6460   match(Set mem (StoreF mem src));
6461 
6462   ins_cost(100);
6463   format %{ "FST_S  $mem,$src" %}
6464   opcode(0xD9);       /* D9 /2 */
6465   ins_encode( enc_FPR_store(mem,src) );
6466   ins_pipe( fpu_mem_reg );
6467 %}
6468 
6469 // Store Float does rounding on x86
6470 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6471   predicate(UseSSE==0);
6472   match(Set mem (StoreF mem (RoundFloat src)));
6473 
6474   ins_cost(100);
6475   format %{ "FST_S  $mem,$src\t# round" %}
6476   opcode(0xD9);       /* D9 /2 */
6477   ins_encode( enc_FPR_store(mem,src) );
6478   ins_pipe( fpu_mem_reg );
6479 %}
6480 
6481 // Store Float does rounding on x86
6482 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6483   predicate(UseSSE<=1);
6484   match(Set mem (StoreF mem (ConvD2F src)));
6485 
6486   ins_cost(100);
6487   format %{ "FST_S  $mem,$src\t# D-round" %}
6488   opcode(0xD9);       /* D9 /2 */
6489   ins_encode( enc_FPR_store(mem,src) );
6490   ins_pipe( fpu_mem_reg );
6491 %}
6492 
6493 // Store immediate Float value (it is faster than store from FPU register)
6494 // The instruction usage is guarded by predicate in operand immFPR().
6495 instruct storeFPR_imm( memory mem, immFPR src) %{
6496   match(Set mem (StoreF mem src));
6497 
6498   ins_cost(50);
6499   format %{ "MOV    $mem,$src\t# store float" %}
6500   opcode(0xC7);               /* C7 /0 */
6501   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6502   ins_pipe( ialu_mem_imm );
6503 %}
6504 
6505 // Store immediate Float value (it is faster than store from XMM register)
6506 // The instruction usage is guarded by predicate in operand immF().
6507 instruct storeF_imm( memory mem, immF src) %{
6508   match(Set mem (StoreF mem src));
6509 
6510   ins_cost(50);
6511   format %{ "MOV    $mem,$src\t# store float" %}
6512   opcode(0xC7);               /* C7 /0 */
6513   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6514   ins_pipe( ialu_mem_imm );
6515 %}
6516 
6517 // Store Integer to stack slot
6518 instruct storeSSI(stackSlotI dst, rRegI src) %{
6519   match(Set dst src);
6520 
6521   ins_cost(100);
6522   format %{ "MOV    $dst,$src" %}
6523   opcode(0x89);
6524   ins_encode( OpcPRegSS( dst, src ) );
6525   ins_pipe( ialu_mem_reg );
6526 %}
6527 
6528 // Store Integer to stack slot
6529 instruct storeSSP(stackSlotP dst, eRegP src) %{
6530   match(Set dst src);
6531 
6532   ins_cost(100);
6533   format %{ "MOV    $dst,$src" %}
6534   opcode(0x89);
6535   ins_encode( OpcPRegSS( dst, src ) );
6536   ins_pipe( ialu_mem_reg );
6537 %}
6538 
6539 // Store Long to stack slot
6540 instruct storeSSL(stackSlotL dst, eRegL src) %{
6541   match(Set dst src);
6542 
6543   ins_cost(200);
6544   format %{ "MOV    $dst,$src.lo\n\t"
6545             "MOV    $dst+4,$src.hi" %}
6546   opcode(0x89, 0x89);
6547   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6548   ins_pipe( ialu_mem_long_reg );
6549 %}
6550 
6551 //----------MemBar Instructions-----------------------------------------------
6552 // Memory barrier flavors
6553 
6554 instruct membar_acquire() %{
6555   match(MemBarAcquire);
6556   match(LoadFence);
6557   ins_cost(400);
6558 
6559   size(0);
6560   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6561   ins_encode();
6562   ins_pipe(empty);
6563 %}
6564 
6565 instruct membar_acquire_lock() %{
6566   match(MemBarAcquireLock);
6567   ins_cost(0);
6568 
6569   size(0);
6570   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6571   ins_encode( );
6572   ins_pipe(empty);
6573 %}
6574 
6575 instruct membar_release() %{
6576   match(MemBarRelease);
6577   match(StoreFence);
6578   ins_cost(400);
6579 
6580   size(0);
6581   format %{ "MEMBAR-release ! (empty encoding)" %}
6582   ins_encode( );
6583   ins_pipe(empty);
6584 %}
6585 
6586 instruct membar_release_lock() %{
6587   match(MemBarReleaseLock);
6588   ins_cost(0);
6589 
6590   size(0);
6591   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6592   ins_encode( );
6593   ins_pipe(empty);
6594 %}
6595 
6596 instruct membar_volatile(eFlagsReg cr) %{
6597   match(MemBarVolatile);
6598   effect(KILL cr);
6599   ins_cost(400);
6600 
6601   format %{
6602     $$template
6603     if (os::is_MP()) {
6604       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6605     } else {
6606       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6607     }
6608   %}
6609   ins_encode %{
6610     __ membar(Assembler::StoreLoad);
6611   %}
6612   ins_pipe(pipe_slow);
6613 %}
6614 
6615 instruct unnecessary_membar_volatile() %{
6616   match(MemBarVolatile);
6617   predicate(Matcher::post_store_load_barrier(n));
6618   ins_cost(0);
6619 
6620   size(0);
6621   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6622   ins_encode( );
6623   ins_pipe(empty);
6624 %}
6625 
6626 instruct membar_storestore() %{
6627   match(MemBarStoreStore);
6628   ins_cost(0);
6629 
6630   size(0);
6631   format %{ "MEMBAR-storestore (empty encoding)" %}
6632   ins_encode( );
6633   ins_pipe(empty);
6634 %}
6635 
6636 //----------Move Instructions--------------------------------------------------
6637 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6638   match(Set dst (CastX2P src));
6639   format %{ "# X2P  $dst, $src" %}
6640   ins_encode( /*empty encoding*/ );
6641   ins_cost(0);
6642   ins_pipe(empty);
6643 %}
6644 
6645 instruct castP2X(rRegI dst, eRegP src ) %{
6646   match(Set dst (CastP2X src));
6647   ins_cost(50);
6648   format %{ "MOV    $dst, $src\t# CastP2X" %}
6649   ins_encode( enc_Copy( dst, src) );
6650   ins_pipe( ialu_reg_reg );
6651 %}
6652 
6653 //----------Conditional Move---------------------------------------------------
6654 // Conditional move
6655 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6656   predicate(!VM_Version::supports_cmov() );
6657   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6658   ins_cost(200);
6659   format %{ "J$cop,us skip\t# signed cmove\n\t"
6660             "MOV    $dst,$src\n"
6661       "skip:" %}
6662   ins_encode %{
6663     Label Lskip;
6664     // Invert sense of branch from sense of CMOV
6665     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6666     __ movl($dst$$Register, $src$$Register);
6667     __ bind(Lskip);
6668   %}
6669   ins_pipe( pipe_cmov_reg );
6670 %}
6671 
6672 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6673   predicate(!VM_Version::supports_cmov() );
6674   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6675   ins_cost(200);
6676   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6677             "MOV    $dst,$src\n"
6678       "skip:" %}
6679   ins_encode %{
6680     Label Lskip;
6681     // Invert sense of branch from sense of CMOV
6682     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6683     __ movl($dst$$Register, $src$$Register);
6684     __ bind(Lskip);
6685   %}
6686   ins_pipe( pipe_cmov_reg );
6687 %}
6688 
6689 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6690   predicate(VM_Version::supports_cmov() );
6691   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6692   ins_cost(200);
6693   format %{ "CMOV$cop $dst,$src" %}
6694   opcode(0x0F,0x40);
6695   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6696   ins_pipe( pipe_cmov_reg );
6697 %}
6698 
6699 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6700   predicate(VM_Version::supports_cmov() );
6701   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6702   ins_cost(200);
6703   format %{ "CMOV$cop $dst,$src" %}
6704   opcode(0x0F,0x40);
6705   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6706   ins_pipe( pipe_cmov_reg );
6707 %}
6708 
6709 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6710   predicate(VM_Version::supports_cmov() );
6711   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6712   ins_cost(200);
6713   expand %{
6714     cmovI_regU(cop, cr, dst, src);
6715   %}
6716 %}
6717 
6718 // Conditional move
6719 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6720   predicate(VM_Version::supports_cmov() );
6721   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6722   ins_cost(250);
6723   format %{ "CMOV$cop $dst,$src" %}
6724   opcode(0x0F,0x40);
6725   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6726   ins_pipe( pipe_cmov_mem );
6727 %}
6728 
6729 // Conditional move
6730 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6731   predicate(VM_Version::supports_cmov() );
6732   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6733   ins_cost(250);
6734   format %{ "CMOV$cop $dst,$src" %}
6735   opcode(0x0F,0x40);
6736   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6737   ins_pipe( pipe_cmov_mem );
6738 %}
6739 
6740 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6741   predicate(VM_Version::supports_cmov() );
6742   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6743   ins_cost(250);
6744   expand %{
6745     cmovI_memU(cop, cr, dst, src);
6746   %}
6747 %}
6748 
6749 // Conditional move
6750 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6751   predicate(VM_Version::supports_cmov() );
6752   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6753   ins_cost(200);
6754   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6755   opcode(0x0F,0x40);
6756   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6757   ins_pipe( pipe_cmov_reg );
6758 %}
6759 
6760 // Conditional move (non-P6 version)
6761 // Note:  a CMoveP is generated for  stubs and native wrappers
6762 //        regardless of whether we are on a P6, so we
6763 //        emulate a cmov here
6764 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6765   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6766   ins_cost(300);
6767   format %{ "Jn$cop   skip\n\t"
6768           "MOV    $dst,$src\t# pointer\n"
6769       "skip:" %}
6770   opcode(0x8b);
6771   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6772   ins_pipe( pipe_cmov_reg );
6773 %}
6774 
6775 // Conditional move
6776 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6777   predicate(VM_Version::supports_cmov() );
6778   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6779   ins_cost(200);
6780   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6781   opcode(0x0F,0x40);
6782   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6783   ins_pipe( pipe_cmov_reg );
6784 %}
6785 
6786 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6787   predicate(VM_Version::supports_cmov() );
6788   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6789   ins_cost(200);
6790   expand %{
6791     cmovP_regU(cop, cr, dst, src);
6792   %}
6793 %}
6794 
6795 // DISABLED: Requires the ADLC to emit a bottom_type call that
6796 // correctly meets the two pointer arguments; one is an incoming
6797 // register but the other is a memory operand.  ALSO appears to
6798 // be buggy with implicit null checks.
6799 //
6800 //// Conditional move
6801 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6802 //  predicate(VM_Version::supports_cmov() );
6803 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6804 //  ins_cost(250);
6805 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6806 //  opcode(0x0F,0x40);
6807 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6808 //  ins_pipe( pipe_cmov_mem );
6809 //%}
6810 //
6811 //// Conditional move
6812 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6813 //  predicate(VM_Version::supports_cmov() );
6814 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6815 //  ins_cost(250);
6816 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6817 //  opcode(0x0F,0x40);
6818 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6819 //  ins_pipe( pipe_cmov_mem );
6820 //%}
6821 
6822 // Conditional move
6823 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6824   predicate(UseSSE<=1);
6825   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6826   ins_cost(200);
6827   format %{ "FCMOV$cop $dst,$src\t# double" %}
6828   opcode(0xDA);
6829   ins_encode( enc_cmov_dpr(cop,src) );
6830   ins_pipe( pipe_cmovDPR_reg );
6831 %}
6832 
6833 // Conditional move
6834 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6835   predicate(UseSSE==0);
6836   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6837   ins_cost(200);
6838   format %{ "FCMOV$cop $dst,$src\t# float" %}
6839   opcode(0xDA);
6840   ins_encode( enc_cmov_dpr(cop,src) );
6841   ins_pipe( pipe_cmovDPR_reg );
6842 %}
6843 
6844 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6845 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6846   predicate(UseSSE<=1);
6847   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6848   ins_cost(200);
6849   format %{ "Jn$cop   skip\n\t"
6850             "MOV    $dst,$src\t# double\n"
6851       "skip:" %}
6852   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6853   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6854   ins_pipe( pipe_cmovDPR_reg );
6855 %}
6856 
6857 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6858 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6859   predicate(UseSSE==0);
6860   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6861   ins_cost(200);
6862   format %{ "Jn$cop    skip\n\t"
6863             "MOV    $dst,$src\t# float\n"
6864       "skip:" %}
6865   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6866   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6867   ins_pipe( pipe_cmovDPR_reg );
6868 %}
6869 
6870 // No CMOVE with SSE/SSE2
6871 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6872   predicate (UseSSE>=1);
6873   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6874   ins_cost(200);
6875   format %{ "Jn$cop   skip\n\t"
6876             "MOVSS  $dst,$src\t# float\n"
6877       "skip:" %}
6878   ins_encode %{
6879     Label skip;
6880     // Invert sense of branch from sense of CMOV
6881     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6882     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6883     __ bind(skip);
6884   %}
6885   ins_pipe( pipe_slow );
6886 %}
6887 
6888 // No CMOVE with SSE/SSE2
6889 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6890   predicate (UseSSE>=2);
6891   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6892   ins_cost(200);
6893   format %{ "Jn$cop   skip\n\t"
6894             "MOVSD  $dst,$src\t# float\n"
6895       "skip:" %}
6896   ins_encode %{
6897     Label skip;
6898     // Invert sense of branch from sense of CMOV
6899     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6900     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6901     __ bind(skip);
6902   %}
6903   ins_pipe( pipe_slow );
6904 %}
6905 
6906 // unsigned version
6907 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6908   predicate (UseSSE>=1);
6909   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6910   ins_cost(200);
6911   format %{ "Jn$cop   skip\n\t"
6912             "MOVSS  $dst,$src\t# float\n"
6913       "skip:" %}
6914   ins_encode %{
6915     Label skip;
6916     // Invert sense of branch from sense of CMOV
6917     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6918     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6919     __ bind(skip);
6920   %}
6921   ins_pipe( pipe_slow );
6922 %}
6923 
6924 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6925   predicate (UseSSE>=1);
6926   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6927   ins_cost(200);
6928   expand %{
6929     fcmovF_regU(cop, cr, dst, src);
6930   %}
6931 %}
6932 
6933 // unsigned version
6934 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6935   predicate (UseSSE>=2);
6936   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6937   ins_cost(200);
6938   format %{ "Jn$cop   skip\n\t"
6939             "MOVSD  $dst,$src\t# float\n"
6940       "skip:" %}
6941   ins_encode %{
6942     Label skip;
6943     // Invert sense of branch from sense of CMOV
6944     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6945     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6946     __ bind(skip);
6947   %}
6948   ins_pipe( pipe_slow );
6949 %}
6950 
6951 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6952   predicate (UseSSE>=2);
6953   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6954   ins_cost(200);
6955   expand %{
6956     fcmovD_regU(cop, cr, dst, src);
6957   %}
6958 %}
6959 
6960 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6961   predicate(VM_Version::supports_cmov() );
6962   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6963   ins_cost(200);
6964   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6965             "CMOV$cop $dst.hi,$src.hi" %}
6966   opcode(0x0F,0x40);
6967   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6968   ins_pipe( pipe_cmov_reg_long );
6969 %}
6970 
6971 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6972   predicate(VM_Version::supports_cmov() );
6973   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6974   ins_cost(200);
6975   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6976             "CMOV$cop $dst.hi,$src.hi" %}
6977   opcode(0x0F,0x40);
6978   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6979   ins_pipe( pipe_cmov_reg_long );
6980 %}
6981 
6982 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6983   predicate(VM_Version::supports_cmov() );
6984   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6985   ins_cost(200);
6986   expand %{
6987     cmovL_regU(cop, cr, dst, src);
6988   %}
6989 %}
6990 
6991 //----------Arithmetic Instructions--------------------------------------------
6992 //----------Addition Instructions----------------------------------------------
6993 
6994 // Integer Addition Instructions
6995 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
6996   match(Set dst (AddI dst src));
6997   effect(KILL cr);
6998 
6999   size(2);
7000   format %{ "ADD    $dst,$src" %}
7001   opcode(0x03);
7002   ins_encode( OpcP, RegReg( dst, src) );
7003   ins_pipe( ialu_reg_reg );
7004 %}
7005 
7006 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7007   match(Set dst (AddI dst src));
7008   effect(KILL cr);
7009 
7010   format %{ "ADD    $dst,$src" %}
7011   opcode(0x81, 0x00); /* /0 id */
7012   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7013   ins_pipe( ialu_reg );
7014 %}
7015 
7016 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7017   predicate(UseIncDec);
7018   match(Set dst (AddI dst src));
7019   effect(KILL cr);
7020 
7021   size(1);
7022   format %{ "INC    $dst" %}
7023   opcode(0x40); /*  */
7024   ins_encode( Opc_plus( primary, dst ) );
7025   ins_pipe( ialu_reg );
7026 %}
7027 
7028 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7029   match(Set dst (AddI src0 src1));
7030   ins_cost(110);
7031 
7032   format %{ "LEA    $dst,[$src0 + $src1]" %}
7033   opcode(0x8D); /* 0x8D /r */
7034   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7035   ins_pipe( ialu_reg_reg );
7036 %}
7037 
7038 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7039   match(Set dst (AddP src0 src1));
7040   ins_cost(110);
7041 
7042   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7043   opcode(0x8D); /* 0x8D /r */
7044   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7045   ins_pipe( ialu_reg_reg );
7046 %}
7047 
7048 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7049   predicate(UseIncDec);
7050   match(Set dst (AddI dst src));
7051   effect(KILL cr);
7052 
7053   size(1);
7054   format %{ "DEC    $dst" %}
7055   opcode(0x48); /*  */
7056   ins_encode( Opc_plus( primary, dst ) );
7057   ins_pipe( ialu_reg );
7058 %}
7059 
7060 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7061   match(Set dst (AddP dst src));
7062   effect(KILL cr);
7063 
7064   size(2);
7065   format %{ "ADD    $dst,$src" %}
7066   opcode(0x03);
7067   ins_encode( OpcP, RegReg( dst, src) );
7068   ins_pipe( ialu_reg_reg );
7069 %}
7070 
7071 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7072   match(Set dst (AddP dst src));
7073   effect(KILL cr);
7074 
7075   format %{ "ADD    $dst,$src" %}
7076   opcode(0x81,0x00); /* Opcode 81 /0 id */
7077   // ins_encode( RegImm( dst, src) );
7078   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7079   ins_pipe( ialu_reg );
7080 %}
7081 
7082 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7083   match(Set dst (AddI dst (LoadI src)));
7084   effect(KILL cr);
7085 
7086   ins_cost(125);
7087   format %{ "ADD    $dst,$src" %}
7088   opcode(0x03);
7089   ins_encode( OpcP, RegMem( dst, src) );
7090   ins_pipe( ialu_reg_mem );
7091 %}
7092 
7093 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7094   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7095   effect(KILL cr);
7096 
7097   ins_cost(150);
7098   format %{ "ADD    $dst,$src" %}
7099   opcode(0x01);  /* Opcode 01 /r */
7100   ins_encode( OpcP, RegMem( src, dst ) );
7101   ins_pipe( ialu_mem_reg );
7102 %}
7103 
7104 // Add Memory with Immediate
7105 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7106   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7107   effect(KILL cr);
7108 
7109   ins_cost(125);
7110   format %{ "ADD    $dst,$src" %}
7111   opcode(0x81);               /* Opcode 81 /0 id */
7112   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7113   ins_pipe( ialu_mem_imm );
7114 %}
7115 
7116 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7117   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7118   effect(KILL cr);
7119 
7120   ins_cost(125);
7121   format %{ "INC    $dst" %}
7122   opcode(0xFF);               /* Opcode FF /0 */
7123   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7124   ins_pipe( ialu_mem_imm );
7125 %}
7126 
7127 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7128   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7129   effect(KILL cr);
7130 
7131   ins_cost(125);
7132   format %{ "DEC    $dst" %}
7133   opcode(0xFF);               /* Opcode FF /1 */
7134   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7135   ins_pipe( ialu_mem_imm );
7136 %}
7137 
7138 
7139 instruct checkCastPP( eRegP dst ) %{
7140   match(Set dst (CheckCastPP dst));
7141 
7142   size(0);
7143   format %{ "#checkcastPP of $dst" %}
7144   ins_encode( /*empty encoding*/ );
7145   ins_pipe( empty );
7146 %}
7147 
7148 instruct castPP( eRegP dst ) %{
7149   match(Set dst (CastPP dst));
7150   format %{ "#castPP of $dst" %}
7151   ins_encode( /*empty encoding*/ );
7152   ins_pipe( empty );
7153 %}
7154 
7155 instruct castII( rRegI dst ) %{
7156   match(Set dst (CastII dst));
7157   format %{ "#castII of $dst" %}
7158   ins_encode( /*empty encoding*/ );
7159   ins_cost(0);
7160   ins_pipe( empty );
7161 %}
7162 
7163 
7164 // Load-locked - same as a regular pointer load when used with compare-swap
7165 instruct loadPLocked(eRegP dst, memory mem) %{
7166   match(Set dst (LoadPLocked mem));
7167 
7168   ins_cost(125);
7169   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7170   opcode(0x8B);
7171   ins_encode( OpcP, RegMem(dst,mem));
7172   ins_pipe( ialu_reg_mem );
7173 %}
7174 
7175 // Conditional-store of the updated heap-top.
7176 // Used during allocation of the shared heap.
7177 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7178 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7179   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7180   // EAX is killed if there is contention, but then it's also unused.
7181   // In the common case of no contention, EAX holds the new oop address.
7182   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7183   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7184   ins_pipe( pipe_cmpxchg );
7185 %}
7186 
7187 // Conditional-store of an int value.
7188 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7189 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7190   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7191   effect(KILL oldval);
7192   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7193   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7194   ins_pipe( pipe_cmpxchg );
7195 %}
7196 
7197 // Conditional-store of a long value.
7198 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7199 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7200   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7201   effect(KILL oldval);
7202   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7203             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7204             "XCHG   EBX,ECX"
7205   %}
7206   ins_encode %{
7207     // Note: we need to swap rbx, and rcx before and after the
7208     //       cmpxchg8 instruction because the instruction uses
7209     //       rcx as the high order word of the new value to store but
7210     //       our register encoding uses rbx.
7211     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7212     if( os::is_MP() )
7213       __ lock();
7214     __ cmpxchg8($mem$$Address);
7215     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7216   %}
7217   ins_pipe( pipe_cmpxchg );
7218 %}
7219 
7220 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7221 
7222 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7223   predicate(VM_Version::supports_cx8());
7224   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7225   effect(KILL cr, KILL oldval);
7226   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7227             "MOV    $res,0\n\t"
7228             "JNE,s  fail\n\t"
7229             "MOV    $res,1\n"
7230           "fail:" %}
7231   ins_encode( enc_cmpxchg8(mem_ptr),
7232               enc_flags_ne_to_boolean(res) );
7233   ins_pipe( pipe_cmpxchg );
7234 %}
7235 
7236 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7237   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7238   effect(KILL cr, KILL oldval);
7239   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7240             "MOV    $res,0\n\t"
7241             "JNE,s  fail\n\t"
7242             "MOV    $res,1\n"
7243           "fail:" %}
7244   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7245   ins_pipe( pipe_cmpxchg );
7246 %}
7247 
7248 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7249   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7250   effect(KILL cr, KILL oldval);
7251   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7252             "MOV    $res,0\n\t"
7253             "JNE,s  fail\n\t"
7254             "MOV    $res,1\n"
7255           "fail:" %}
7256   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7257   ins_pipe( pipe_cmpxchg );
7258 %}
7259 
7260 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7261   predicate(n->as_LoadStore()->result_not_used());
7262   match(Set dummy (GetAndAddI mem add));
7263   effect(KILL cr);
7264   format %{ "ADDL  [$mem],$add" %}
7265   ins_encode %{
7266     if (os::is_MP()) { __ lock(); }
7267     __ addl($mem$$Address, $add$$constant);
7268   %}
7269   ins_pipe( pipe_cmpxchg );
7270 %}
7271 
7272 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7273   match(Set newval (GetAndAddI mem newval));
7274   effect(KILL cr);
7275   format %{ "XADDL  [$mem],$newval" %}
7276   ins_encode %{
7277     if (os::is_MP()) { __ lock(); }
7278     __ xaddl($mem$$Address, $newval$$Register);
7279   %}
7280   ins_pipe( pipe_cmpxchg );
7281 %}
7282 
7283 instruct xchgI( memory mem, rRegI newval) %{
7284   match(Set newval (GetAndSetI mem newval));
7285   format %{ "XCHGL  $newval,[$mem]" %}
7286   ins_encode %{
7287     __ xchgl($newval$$Register, $mem$$Address);
7288   %}
7289   ins_pipe( pipe_cmpxchg );
7290 %}
7291 
7292 instruct xchgP( memory mem, pRegP newval) %{
7293   match(Set newval (GetAndSetP mem newval));
7294   format %{ "XCHGL  $newval,[$mem]" %}
7295   ins_encode %{
7296     __ xchgl($newval$$Register, $mem$$Address);
7297   %}
7298   ins_pipe( pipe_cmpxchg );
7299 %}
7300 
7301 //----------Subtraction Instructions-------------------------------------------
7302 
7303 // Integer Subtraction Instructions
7304 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7305   match(Set dst (SubI dst src));
7306   effect(KILL cr);
7307 
7308   size(2);
7309   format %{ "SUB    $dst,$src" %}
7310   opcode(0x2B);
7311   ins_encode( OpcP, RegReg( dst, src) );
7312   ins_pipe( ialu_reg_reg );
7313 %}
7314 
7315 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7316   match(Set dst (SubI dst src));
7317   effect(KILL cr);
7318 
7319   format %{ "SUB    $dst,$src" %}
7320   opcode(0x81,0x05);  /* Opcode 81 /5 */
7321   // ins_encode( RegImm( dst, src) );
7322   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7323   ins_pipe( ialu_reg );
7324 %}
7325 
7326 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7327   match(Set dst (SubI dst (LoadI src)));
7328   effect(KILL cr);
7329 
7330   ins_cost(125);
7331   format %{ "SUB    $dst,$src" %}
7332   opcode(0x2B);
7333   ins_encode( OpcP, RegMem( dst, src) );
7334   ins_pipe( ialu_reg_mem );
7335 %}
7336 
7337 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7338   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7339   effect(KILL cr);
7340 
7341   ins_cost(150);
7342   format %{ "SUB    $dst,$src" %}
7343   opcode(0x29);  /* Opcode 29 /r */
7344   ins_encode( OpcP, RegMem( src, dst ) );
7345   ins_pipe( ialu_mem_reg );
7346 %}
7347 
7348 // Subtract from a pointer
7349 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7350   match(Set dst (AddP dst (SubI zero src)));
7351   effect(KILL cr);
7352 
7353   size(2);
7354   format %{ "SUB    $dst,$src" %}
7355   opcode(0x2B);
7356   ins_encode( OpcP, RegReg( dst, src) );
7357   ins_pipe( ialu_reg_reg );
7358 %}
7359 
7360 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7361   match(Set dst (SubI zero dst));
7362   effect(KILL cr);
7363 
7364   size(2);
7365   format %{ "NEG    $dst" %}
7366   opcode(0xF7,0x03);  // Opcode F7 /3
7367   ins_encode( OpcP, RegOpc( dst ) );
7368   ins_pipe( ialu_reg );
7369 %}
7370 
7371 //----------Multiplication/Division Instructions-------------------------------
7372 // Integer Multiplication Instructions
7373 // Multiply Register
7374 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7375   match(Set dst (MulI dst src));
7376   effect(KILL cr);
7377 
7378   size(3);
7379   ins_cost(300);
7380   format %{ "IMUL   $dst,$src" %}
7381   opcode(0xAF, 0x0F);
7382   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7383   ins_pipe( ialu_reg_reg_alu0 );
7384 %}
7385 
7386 // Multiply 32-bit Immediate
7387 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7388   match(Set dst (MulI src imm));
7389   effect(KILL cr);
7390 
7391   ins_cost(300);
7392   format %{ "IMUL   $dst,$src,$imm" %}
7393   opcode(0x69);  /* 69 /r id */
7394   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7395   ins_pipe( ialu_reg_reg_alu0 );
7396 %}
7397 
7398 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7399   match(Set dst src);
7400   effect(KILL cr);
7401 
7402   // Note that this is artificially increased to make it more expensive than loadConL
7403   ins_cost(250);
7404   format %{ "MOV    EAX,$src\t// low word only" %}
7405   opcode(0xB8);
7406   ins_encode( LdImmL_Lo(dst, src) );
7407   ins_pipe( ialu_reg_fat );
7408 %}
7409 
7410 // Multiply by 32-bit Immediate, taking the shifted high order results
7411 //  (special case for shift by 32)
7412 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7413   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7414   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7415              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7416              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7417   effect(USE src1, KILL cr);
7418 
7419   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7420   ins_cost(0*100 + 1*400 - 150);
7421   format %{ "IMUL   EDX:EAX,$src1" %}
7422   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7423   ins_pipe( pipe_slow );
7424 %}
7425 
7426 // Multiply by 32-bit Immediate, taking the shifted high order results
7427 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7428   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7429   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7430              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7431              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7432   effect(USE src1, KILL cr);
7433 
7434   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7435   ins_cost(1*100 + 1*400 - 150);
7436   format %{ "IMUL   EDX:EAX,$src1\n\t"
7437             "SAR    EDX,$cnt-32" %}
7438   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7439   ins_pipe( pipe_slow );
7440 %}
7441 
7442 // Multiply Memory 32-bit Immediate
7443 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7444   match(Set dst (MulI (LoadI src) imm));
7445   effect(KILL cr);
7446 
7447   ins_cost(300);
7448   format %{ "IMUL   $dst,$src,$imm" %}
7449   opcode(0x69);  /* 69 /r id */
7450   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7451   ins_pipe( ialu_reg_mem_alu0 );
7452 %}
7453 
7454 // Multiply Memory
7455 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7456   match(Set dst (MulI dst (LoadI src)));
7457   effect(KILL cr);
7458 
7459   ins_cost(350);
7460   format %{ "IMUL   $dst,$src" %}
7461   opcode(0xAF, 0x0F);
7462   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7463   ins_pipe( ialu_reg_mem_alu0 );
7464 %}
7465 
7466 // Multiply Register Int to Long
7467 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7468   // Basic Idea: long = (long)int * (long)int
7469   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7470   effect(DEF dst, USE src, USE src1, KILL flags);
7471 
7472   ins_cost(300);
7473   format %{ "IMUL   $dst,$src1" %}
7474 
7475   ins_encode( long_int_multiply( dst, src1 ) );
7476   ins_pipe( ialu_reg_reg_alu0 );
7477 %}
7478 
7479 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7480   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7481   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7482   effect(KILL flags);
7483 
7484   ins_cost(300);
7485   format %{ "MUL    $dst,$src1" %}
7486 
7487   ins_encode( long_uint_multiply(dst, src1) );
7488   ins_pipe( ialu_reg_reg_alu0 );
7489 %}
7490 
7491 // Multiply Register Long
7492 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7493   match(Set dst (MulL dst src));
7494   effect(KILL cr, TEMP tmp);
7495   ins_cost(4*100+3*400);
7496 // Basic idea: lo(result) = lo(x_lo * y_lo)
7497 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7498   format %{ "MOV    $tmp,$src.lo\n\t"
7499             "IMUL   $tmp,EDX\n\t"
7500             "MOV    EDX,$src.hi\n\t"
7501             "IMUL   EDX,EAX\n\t"
7502             "ADD    $tmp,EDX\n\t"
7503             "MUL    EDX:EAX,$src.lo\n\t"
7504             "ADD    EDX,$tmp" %}
7505   ins_encode( long_multiply( dst, src, tmp ) );
7506   ins_pipe( pipe_slow );
7507 %}
7508 
7509 // Multiply Register Long where the left operand's high 32 bits are zero
7510 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7511   predicate(is_operand_hi32_zero(n->in(1)));
7512   match(Set dst (MulL dst src));
7513   effect(KILL cr, TEMP tmp);
7514   ins_cost(2*100+2*400);
7515 // Basic idea: lo(result) = lo(x_lo * y_lo)
7516 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7517   format %{ "MOV    $tmp,$src.hi\n\t"
7518             "IMUL   $tmp,EAX\n\t"
7519             "MUL    EDX:EAX,$src.lo\n\t"
7520             "ADD    EDX,$tmp" %}
7521   ins_encode %{
7522     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7523     __ imull($tmp$$Register, rax);
7524     __ mull($src$$Register);
7525     __ addl(rdx, $tmp$$Register);
7526   %}
7527   ins_pipe( pipe_slow );
7528 %}
7529 
7530 // Multiply Register Long where the right operand's high 32 bits are zero
7531 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7532   predicate(is_operand_hi32_zero(n->in(2)));
7533   match(Set dst (MulL dst src));
7534   effect(KILL cr, TEMP tmp);
7535   ins_cost(2*100+2*400);
7536 // Basic idea: lo(result) = lo(x_lo * y_lo)
7537 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7538   format %{ "MOV    $tmp,$src.lo\n\t"
7539             "IMUL   $tmp,EDX\n\t"
7540             "MUL    EDX:EAX,$src.lo\n\t"
7541             "ADD    EDX,$tmp" %}
7542   ins_encode %{
7543     __ movl($tmp$$Register, $src$$Register);
7544     __ imull($tmp$$Register, rdx);
7545     __ mull($src$$Register);
7546     __ addl(rdx, $tmp$$Register);
7547   %}
7548   ins_pipe( pipe_slow );
7549 %}
7550 
7551 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7552 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7553   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7554   match(Set dst (MulL dst src));
7555   effect(KILL cr);
7556   ins_cost(1*400);
7557 // Basic idea: lo(result) = lo(x_lo * y_lo)
7558 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7559   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7560   ins_encode %{
7561     __ mull($src$$Register);
7562   %}
7563   ins_pipe( pipe_slow );
7564 %}
7565 
7566 // Multiply Register Long by small constant
7567 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7568   match(Set dst (MulL dst src));
7569   effect(KILL cr, TEMP tmp);
7570   ins_cost(2*100+2*400);
7571   size(12);
7572 // Basic idea: lo(result) = lo(src * EAX)
7573 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7574   format %{ "IMUL   $tmp,EDX,$src\n\t"
7575             "MOV    EDX,$src\n\t"
7576             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7577             "ADD    EDX,$tmp" %}
7578   ins_encode( long_multiply_con( dst, src, tmp ) );
7579   ins_pipe( pipe_slow );
7580 %}
7581 
7582 // Integer DIV with Register
7583 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7584   match(Set rax (DivI rax div));
7585   effect(KILL rdx, KILL cr);
7586   size(26);
7587   ins_cost(30*100+10*100);
7588   format %{ "CMP    EAX,0x80000000\n\t"
7589             "JNE,s  normal\n\t"
7590             "XOR    EDX,EDX\n\t"
7591             "CMP    ECX,-1\n\t"
7592             "JE,s   done\n"
7593     "normal: CDQ\n\t"
7594             "IDIV   $div\n\t"
7595     "done:"        %}
7596   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7597   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7598   ins_pipe( ialu_reg_reg_alu0 );
7599 %}
7600 
7601 // Divide Register Long
7602 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7603   match(Set dst (DivL src1 src2));
7604   effect( KILL cr, KILL cx, KILL bx );
7605   ins_cost(10000);
7606   format %{ "PUSH   $src1.hi\n\t"
7607             "PUSH   $src1.lo\n\t"
7608             "PUSH   $src2.hi\n\t"
7609             "PUSH   $src2.lo\n\t"
7610             "CALL   SharedRuntime::ldiv\n\t"
7611             "ADD    ESP,16" %}
7612   ins_encode( long_div(src1,src2) );
7613   ins_pipe( pipe_slow );
7614 %}
7615 
7616 // Integer DIVMOD with Register, both quotient and mod results
7617 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7618   match(DivModI rax div);
7619   effect(KILL cr);
7620   size(26);
7621   ins_cost(30*100+10*100);
7622   format %{ "CMP    EAX,0x80000000\n\t"
7623             "JNE,s  normal\n\t"
7624             "XOR    EDX,EDX\n\t"
7625             "CMP    ECX,-1\n\t"
7626             "JE,s   done\n"
7627     "normal: CDQ\n\t"
7628             "IDIV   $div\n\t"
7629     "done:"        %}
7630   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7631   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7632   ins_pipe( pipe_slow );
7633 %}
7634 
7635 // Integer MOD with Register
7636 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7637   match(Set rdx (ModI rax div));
7638   effect(KILL rax, KILL cr);
7639 
7640   size(26);
7641   ins_cost(300);
7642   format %{ "CDQ\n\t"
7643             "IDIV   $div" %}
7644   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7645   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7646   ins_pipe( ialu_reg_reg_alu0 );
7647 %}
7648 
7649 // Remainder Register Long
7650 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7651   match(Set dst (ModL src1 src2));
7652   effect( KILL cr, KILL cx, KILL bx );
7653   ins_cost(10000);
7654   format %{ "PUSH   $src1.hi\n\t"
7655             "PUSH   $src1.lo\n\t"
7656             "PUSH   $src2.hi\n\t"
7657             "PUSH   $src2.lo\n\t"
7658             "CALL   SharedRuntime::lrem\n\t"
7659             "ADD    ESP,16" %}
7660   ins_encode( long_mod(src1,src2) );
7661   ins_pipe( pipe_slow );
7662 %}
7663 
7664 // Divide Register Long (no special case since divisor != -1)
7665 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7666   match(Set dst (DivL dst imm));
7667   effect( TEMP tmp, TEMP tmp2, KILL cr );
7668   ins_cost(1000);
7669   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7670             "XOR    $tmp2,$tmp2\n\t"
7671             "CMP    $tmp,EDX\n\t"
7672             "JA,s   fast\n\t"
7673             "MOV    $tmp2,EAX\n\t"
7674             "MOV    EAX,EDX\n\t"
7675             "MOV    EDX,0\n\t"
7676             "JLE,s  pos\n\t"
7677             "LNEG   EAX : $tmp2\n\t"
7678             "DIV    $tmp # unsigned division\n\t"
7679             "XCHG   EAX,$tmp2\n\t"
7680             "DIV    $tmp\n\t"
7681             "LNEG   $tmp2 : EAX\n\t"
7682             "JMP,s  done\n"
7683     "pos:\n\t"
7684             "DIV    $tmp\n\t"
7685             "XCHG   EAX,$tmp2\n"
7686     "fast:\n\t"
7687             "DIV    $tmp\n"
7688     "done:\n\t"
7689             "MOV    EDX,$tmp2\n\t"
7690             "NEG    EDX:EAX # if $imm < 0" %}
7691   ins_encode %{
7692     int con = (int)$imm$$constant;
7693     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7694     int pcon = (con > 0) ? con : -con;
7695     Label Lfast, Lpos, Ldone;
7696 
7697     __ movl($tmp$$Register, pcon);
7698     __ xorl($tmp2$$Register,$tmp2$$Register);
7699     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7700     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7701 
7702     __ movl($tmp2$$Register, $dst$$Register); // save
7703     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7704     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7705     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7706 
7707     // Negative dividend.
7708     // convert value to positive to use unsigned division
7709     __ lneg($dst$$Register, $tmp2$$Register);
7710     __ divl($tmp$$Register);
7711     __ xchgl($dst$$Register, $tmp2$$Register);
7712     __ divl($tmp$$Register);
7713     // revert result back to negative
7714     __ lneg($tmp2$$Register, $dst$$Register);
7715     __ jmpb(Ldone);
7716 
7717     __ bind(Lpos);
7718     __ divl($tmp$$Register); // Use unsigned division
7719     __ xchgl($dst$$Register, $tmp2$$Register);
7720     // Fallthrow for final divide, tmp2 has 32 bit hi result
7721 
7722     __ bind(Lfast);
7723     // fast path: src is positive
7724     __ divl($tmp$$Register); // Use unsigned division
7725 
7726     __ bind(Ldone);
7727     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7728     if (con < 0) {
7729       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7730     }
7731   %}
7732   ins_pipe( pipe_slow );
7733 %}
7734 
7735 // Remainder Register Long (remainder fit into 32 bits)
7736 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7737   match(Set dst (ModL dst imm));
7738   effect( TEMP tmp, TEMP tmp2, KILL cr );
7739   ins_cost(1000);
7740   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7741             "CMP    $tmp,EDX\n\t"
7742             "JA,s   fast\n\t"
7743             "MOV    $tmp2,EAX\n\t"
7744             "MOV    EAX,EDX\n\t"
7745             "MOV    EDX,0\n\t"
7746             "JLE,s  pos\n\t"
7747             "LNEG   EAX : $tmp2\n\t"
7748             "DIV    $tmp # unsigned division\n\t"
7749             "MOV    EAX,$tmp2\n\t"
7750             "DIV    $tmp\n\t"
7751             "NEG    EDX\n\t"
7752             "JMP,s  done\n"
7753     "pos:\n\t"
7754             "DIV    $tmp\n\t"
7755             "MOV    EAX,$tmp2\n"
7756     "fast:\n\t"
7757             "DIV    $tmp\n"
7758     "done:\n\t"
7759             "MOV    EAX,EDX\n\t"
7760             "SAR    EDX,31\n\t" %}
7761   ins_encode %{
7762     int con = (int)$imm$$constant;
7763     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7764     int pcon = (con > 0) ? con : -con;
7765     Label  Lfast, Lpos, Ldone;
7766 
7767     __ movl($tmp$$Register, pcon);
7768     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7769     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7770 
7771     __ movl($tmp2$$Register, $dst$$Register); // save
7772     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7773     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7774     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7775 
7776     // Negative dividend.
7777     // convert value to positive to use unsigned division
7778     __ lneg($dst$$Register, $tmp2$$Register);
7779     __ divl($tmp$$Register);
7780     __ movl($dst$$Register, $tmp2$$Register);
7781     __ divl($tmp$$Register);
7782     // revert remainder back to negative
7783     __ negl(HIGH_FROM_LOW($dst$$Register));
7784     __ jmpb(Ldone);
7785 
7786     __ bind(Lpos);
7787     __ divl($tmp$$Register);
7788     __ movl($dst$$Register, $tmp2$$Register);
7789 
7790     __ bind(Lfast);
7791     // fast path: src is positive
7792     __ divl($tmp$$Register);
7793 
7794     __ bind(Ldone);
7795     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7796     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7797 
7798   %}
7799   ins_pipe( pipe_slow );
7800 %}
7801 
7802 // Integer Shift Instructions
7803 // Shift Left by one
7804 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7805   match(Set dst (LShiftI dst shift));
7806   effect(KILL cr);
7807 
7808   size(2);
7809   format %{ "SHL    $dst,$shift" %}
7810   opcode(0xD1, 0x4);  /* D1 /4 */
7811   ins_encode( OpcP, RegOpc( dst ) );
7812   ins_pipe( ialu_reg );
7813 %}
7814 
7815 // Shift Left by 8-bit immediate
7816 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7817   match(Set dst (LShiftI dst shift));
7818   effect(KILL cr);
7819 
7820   size(3);
7821   format %{ "SHL    $dst,$shift" %}
7822   opcode(0xC1, 0x4);  /* C1 /4 ib */
7823   ins_encode( RegOpcImm( dst, shift) );
7824   ins_pipe( ialu_reg );
7825 %}
7826 
7827 // Shift Left by variable
7828 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7829   match(Set dst (LShiftI dst shift));
7830   effect(KILL cr);
7831 
7832   size(2);
7833   format %{ "SHL    $dst,$shift" %}
7834   opcode(0xD3, 0x4);  /* D3 /4 */
7835   ins_encode( OpcP, RegOpc( dst ) );
7836   ins_pipe( ialu_reg_reg );
7837 %}
7838 
7839 // Arithmetic shift right by one
7840 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7841   match(Set dst (RShiftI dst shift));
7842   effect(KILL cr);
7843 
7844   size(2);
7845   format %{ "SAR    $dst,$shift" %}
7846   opcode(0xD1, 0x7);  /* D1 /7 */
7847   ins_encode( OpcP, RegOpc( dst ) );
7848   ins_pipe( ialu_reg );
7849 %}
7850 
7851 // Arithmetic shift right by one
7852 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7853   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7854   effect(KILL cr);
7855   format %{ "SAR    $dst,$shift" %}
7856   opcode(0xD1, 0x7);  /* D1 /7 */
7857   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7858   ins_pipe( ialu_mem_imm );
7859 %}
7860 
7861 // Arithmetic Shift Right by 8-bit immediate
7862 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7863   match(Set dst (RShiftI dst shift));
7864   effect(KILL cr);
7865 
7866   size(3);
7867   format %{ "SAR    $dst,$shift" %}
7868   opcode(0xC1, 0x7);  /* C1 /7 ib */
7869   ins_encode( RegOpcImm( dst, shift ) );
7870   ins_pipe( ialu_mem_imm );
7871 %}
7872 
7873 // Arithmetic Shift Right by 8-bit immediate
7874 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7875   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7876   effect(KILL cr);
7877 
7878   format %{ "SAR    $dst,$shift" %}
7879   opcode(0xC1, 0x7);  /* C1 /7 ib */
7880   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7881   ins_pipe( ialu_mem_imm );
7882 %}
7883 
7884 // Arithmetic Shift Right by variable
7885 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7886   match(Set dst (RShiftI dst shift));
7887   effect(KILL cr);
7888 
7889   size(2);
7890   format %{ "SAR    $dst,$shift" %}
7891   opcode(0xD3, 0x7);  /* D3 /7 */
7892   ins_encode( OpcP, RegOpc( dst ) );
7893   ins_pipe( ialu_reg_reg );
7894 %}
7895 
7896 // Logical shift right by one
7897 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7898   match(Set dst (URShiftI dst shift));
7899   effect(KILL cr);
7900 
7901   size(2);
7902   format %{ "SHR    $dst,$shift" %}
7903   opcode(0xD1, 0x5);  /* D1 /5 */
7904   ins_encode( OpcP, RegOpc( dst ) );
7905   ins_pipe( ialu_reg );
7906 %}
7907 
7908 // Logical Shift Right by 8-bit immediate
7909 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7910   match(Set dst (URShiftI dst shift));
7911   effect(KILL cr);
7912 
7913   size(3);
7914   format %{ "SHR    $dst,$shift" %}
7915   opcode(0xC1, 0x5);  /* C1 /5 ib */
7916   ins_encode( RegOpcImm( dst, shift) );
7917   ins_pipe( ialu_reg );
7918 %}
7919 
7920 
7921 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7922 // This idiom is used by the compiler for the i2b bytecode.
7923 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7924   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7925 
7926   size(3);
7927   format %{ "MOVSX  $dst,$src :8" %}
7928   ins_encode %{
7929     __ movsbl($dst$$Register, $src$$Register);
7930   %}
7931   ins_pipe(ialu_reg_reg);
7932 %}
7933 
7934 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7935 // This idiom is used by the compiler the i2s bytecode.
7936 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7937   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7938 
7939   size(3);
7940   format %{ "MOVSX  $dst,$src :16" %}
7941   ins_encode %{
7942     __ movswl($dst$$Register, $src$$Register);
7943   %}
7944   ins_pipe(ialu_reg_reg);
7945 %}
7946 
7947 
7948 // Logical Shift Right by variable
7949 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7950   match(Set dst (URShiftI dst shift));
7951   effect(KILL cr);
7952 
7953   size(2);
7954   format %{ "SHR    $dst,$shift" %}
7955   opcode(0xD3, 0x5);  /* D3 /5 */
7956   ins_encode( OpcP, RegOpc( dst ) );
7957   ins_pipe( ialu_reg_reg );
7958 %}
7959 
7960 
7961 //----------Logical Instructions-----------------------------------------------
7962 //----------Integer Logical Instructions---------------------------------------
7963 // And Instructions
7964 // And Register with Register
7965 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7966   match(Set dst (AndI dst src));
7967   effect(KILL cr);
7968 
7969   size(2);
7970   format %{ "AND    $dst,$src" %}
7971   opcode(0x23);
7972   ins_encode( OpcP, RegReg( dst, src) );
7973   ins_pipe( ialu_reg_reg );
7974 %}
7975 
7976 // And Register with Immediate
7977 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7978   match(Set dst (AndI dst src));
7979   effect(KILL cr);
7980 
7981   format %{ "AND    $dst,$src" %}
7982   opcode(0x81,0x04);  /* Opcode 81 /4 */
7983   // ins_encode( RegImm( dst, src) );
7984   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7985   ins_pipe( ialu_reg );
7986 %}
7987 
7988 // And Register with Memory
7989 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7990   match(Set dst (AndI dst (LoadI src)));
7991   effect(KILL cr);
7992 
7993   ins_cost(125);
7994   format %{ "AND    $dst,$src" %}
7995   opcode(0x23);
7996   ins_encode( OpcP, RegMem( dst, src) );
7997   ins_pipe( ialu_reg_mem );
7998 %}
7999 
8000 // And Memory with Register
8001 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8002   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8003   effect(KILL cr);
8004 
8005   ins_cost(150);
8006   format %{ "AND    $dst,$src" %}
8007   opcode(0x21);  /* Opcode 21 /r */
8008   ins_encode( OpcP, RegMem( src, dst ) );
8009   ins_pipe( ialu_mem_reg );
8010 %}
8011 
8012 // And Memory with Immediate
8013 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8014   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8015   effect(KILL cr);
8016 
8017   ins_cost(125);
8018   format %{ "AND    $dst,$src" %}
8019   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8020   // ins_encode( MemImm( dst, src) );
8021   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8022   ins_pipe( ialu_mem_imm );
8023 %}
8024 
8025 // BMI1 instructions
8026 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8027   match(Set dst (AndI (XorI src1 minus_1) src2));
8028   predicate(UseBMI1Instructions);
8029   effect(KILL cr);
8030 
8031   format %{ "ANDNL  $dst, $src1, $src2" %}
8032 
8033   ins_encode %{
8034     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8035   %}
8036   ins_pipe(ialu_reg);
8037 %}
8038 
8039 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8040   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8041   predicate(UseBMI1Instructions);
8042   effect(KILL cr);
8043 
8044   ins_cost(125);
8045   format %{ "ANDNL  $dst, $src1, $src2" %}
8046 
8047   ins_encode %{
8048     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8049   %}
8050   ins_pipe(ialu_reg_mem);
8051 %}
8052 
8053 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8054   match(Set dst (AndI (SubI imm_zero src) src));
8055   predicate(UseBMI1Instructions);
8056   effect(KILL cr);
8057 
8058   format %{ "BLSIL  $dst, $src" %}
8059 
8060   ins_encode %{
8061     __ blsil($dst$$Register, $src$$Register);
8062   %}
8063   ins_pipe(ialu_reg);
8064 %}
8065 
8066 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8067   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8068   predicate(UseBMI1Instructions);
8069   effect(KILL cr);
8070 
8071   ins_cost(125);
8072   format %{ "BLSIL  $dst, $src" %}
8073 
8074   ins_encode %{
8075     __ blsil($dst$$Register, $src$$Address);
8076   %}
8077   ins_pipe(ialu_reg_mem);
8078 %}
8079 
8080 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8081 %{
8082   match(Set dst (XorI (AddI src minus_1) src));
8083   predicate(UseBMI1Instructions);
8084   effect(KILL cr);
8085 
8086   format %{ "BLSMSKL $dst, $src" %}
8087 
8088   ins_encode %{
8089     __ blsmskl($dst$$Register, $src$$Register);
8090   %}
8091 
8092   ins_pipe(ialu_reg);
8093 %}
8094 
8095 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8096 %{
8097   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8098   predicate(UseBMI1Instructions);
8099   effect(KILL cr);
8100 
8101   ins_cost(125);
8102   format %{ "BLSMSKL $dst, $src" %}
8103 
8104   ins_encode %{
8105     __ blsmskl($dst$$Register, $src$$Address);
8106   %}
8107 
8108   ins_pipe(ialu_reg_mem);
8109 %}
8110 
8111 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8112 %{
8113   match(Set dst (AndI (AddI src minus_1) src) );
8114   predicate(UseBMI1Instructions);
8115   effect(KILL cr);
8116 
8117   format %{ "BLSRL  $dst, $src" %}
8118 
8119   ins_encode %{
8120     __ blsrl($dst$$Register, $src$$Register);
8121   %}
8122 
8123   ins_pipe(ialu_reg);
8124 %}
8125 
8126 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8127 %{
8128   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8129   predicate(UseBMI1Instructions);
8130   effect(KILL cr);
8131 
8132   ins_cost(125);
8133   format %{ "BLSRL  $dst, $src" %}
8134 
8135   ins_encode %{
8136     __ blsrl($dst$$Register, $src$$Address);
8137   %}
8138 
8139   ins_pipe(ialu_reg_mem);
8140 %}
8141 
8142 // Or Instructions
8143 // Or Register with Register
8144 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8145   match(Set dst (OrI dst src));
8146   effect(KILL cr);
8147 
8148   size(2);
8149   format %{ "OR     $dst,$src" %}
8150   opcode(0x0B);
8151   ins_encode( OpcP, RegReg( dst, src) );
8152   ins_pipe( ialu_reg_reg );
8153 %}
8154 
8155 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8156   match(Set dst (OrI dst (CastP2X src)));
8157   effect(KILL cr);
8158 
8159   size(2);
8160   format %{ "OR     $dst,$src" %}
8161   opcode(0x0B);
8162   ins_encode( OpcP, RegReg( dst, src) );
8163   ins_pipe( ialu_reg_reg );
8164 %}
8165 
8166 
8167 // Or Register with Immediate
8168 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8169   match(Set dst (OrI dst src));
8170   effect(KILL cr);
8171 
8172   format %{ "OR     $dst,$src" %}
8173   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8174   // ins_encode( RegImm( dst, src) );
8175   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8176   ins_pipe( ialu_reg );
8177 %}
8178 
8179 // Or Register with Memory
8180 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8181   match(Set dst (OrI dst (LoadI src)));
8182   effect(KILL cr);
8183 
8184   ins_cost(125);
8185   format %{ "OR     $dst,$src" %}
8186   opcode(0x0B);
8187   ins_encode( OpcP, RegMem( dst, src) );
8188   ins_pipe( ialu_reg_mem );
8189 %}
8190 
8191 // Or Memory with Register
8192 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8193   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8194   effect(KILL cr);
8195 
8196   ins_cost(150);
8197   format %{ "OR     $dst,$src" %}
8198   opcode(0x09);  /* Opcode 09 /r */
8199   ins_encode( OpcP, RegMem( src, dst ) );
8200   ins_pipe( ialu_mem_reg );
8201 %}
8202 
8203 // Or Memory with Immediate
8204 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8205   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8206   effect(KILL cr);
8207 
8208   ins_cost(125);
8209   format %{ "OR     $dst,$src" %}
8210   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8211   // ins_encode( MemImm( dst, src) );
8212   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8213   ins_pipe( ialu_mem_imm );
8214 %}
8215 
8216 // ROL/ROR
8217 // ROL expand
8218 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8219   effect(USE_DEF dst, USE shift, KILL cr);
8220 
8221   format %{ "ROL    $dst, $shift" %}
8222   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8223   ins_encode( OpcP, RegOpc( dst ));
8224   ins_pipe( ialu_reg );
8225 %}
8226 
8227 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8228   effect(USE_DEF dst, USE shift, KILL cr);
8229 
8230   format %{ "ROL    $dst, $shift" %}
8231   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8232   ins_encode( RegOpcImm(dst, shift) );
8233   ins_pipe(ialu_reg);
8234 %}
8235 
8236 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8237   effect(USE_DEF dst, USE shift, KILL cr);
8238 
8239   format %{ "ROL    $dst, $shift" %}
8240   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8241   ins_encode(OpcP, RegOpc(dst));
8242   ins_pipe( ialu_reg_reg );
8243 %}
8244 // end of ROL expand
8245 
8246 // ROL 32bit by one once
8247 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8248   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8249 
8250   expand %{
8251     rolI_eReg_imm1(dst, lshift, cr);
8252   %}
8253 %}
8254 
8255 // ROL 32bit var by imm8 once
8256 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8257   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8258   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8259 
8260   expand %{
8261     rolI_eReg_imm8(dst, lshift, cr);
8262   %}
8263 %}
8264 
8265 // ROL 32bit var by var once
8266 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8267   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8268 
8269   expand %{
8270     rolI_eReg_CL(dst, shift, cr);
8271   %}
8272 %}
8273 
8274 // ROL 32bit var by var once
8275 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8276   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8277 
8278   expand %{
8279     rolI_eReg_CL(dst, shift, cr);
8280   %}
8281 %}
8282 
8283 // ROR expand
8284 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8285   effect(USE_DEF dst, USE shift, KILL cr);
8286 
8287   format %{ "ROR    $dst, $shift" %}
8288   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8289   ins_encode( OpcP, RegOpc( dst ) );
8290   ins_pipe( ialu_reg );
8291 %}
8292 
8293 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8294   effect (USE_DEF dst, USE shift, KILL cr);
8295 
8296   format %{ "ROR    $dst, $shift" %}
8297   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8298   ins_encode( RegOpcImm(dst, shift) );
8299   ins_pipe( ialu_reg );
8300 %}
8301 
8302 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8303   effect(USE_DEF dst, USE shift, KILL cr);
8304 
8305   format %{ "ROR    $dst, $shift" %}
8306   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8307   ins_encode(OpcP, RegOpc(dst));
8308   ins_pipe( ialu_reg_reg );
8309 %}
8310 // end of ROR expand
8311 
8312 // ROR right once
8313 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8314   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8315 
8316   expand %{
8317     rorI_eReg_imm1(dst, rshift, cr);
8318   %}
8319 %}
8320 
8321 // ROR 32bit by immI8 once
8322 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8323   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8324   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8325 
8326   expand %{
8327     rorI_eReg_imm8(dst, rshift, cr);
8328   %}
8329 %}
8330 
8331 // ROR 32bit var by var once
8332 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8333   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8334 
8335   expand %{
8336     rorI_eReg_CL(dst, shift, cr);
8337   %}
8338 %}
8339 
8340 // ROR 32bit var by var once
8341 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8342   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8343 
8344   expand %{
8345     rorI_eReg_CL(dst, shift, cr);
8346   %}
8347 %}
8348 
8349 // Xor Instructions
8350 // Xor Register with Register
8351 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8352   match(Set dst (XorI dst src));
8353   effect(KILL cr);
8354 
8355   size(2);
8356   format %{ "XOR    $dst,$src" %}
8357   opcode(0x33);
8358   ins_encode( OpcP, RegReg( dst, src) );
8359   ins_pipe( ialu_reg_reg );
8360 %}
8361 
8362 // Xor Register with Immediate -1
8363 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8364   match(Set dst (XorI dst imm));
8365 
8366   size(2);
8367   format %{ "NOT    $dst" %}
8368   ins_encode %{
8369      __ notl($dst$$Register);
8370   %}
8371   ins_pipe( ialu_reg );
8372 %}
8373 
8374 // Xor Register with Immediate
8375 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8376   match(Set dst (XorI dst src));
8377   effect(KILL cr);
8378 
8379   format %{ "XOR    $dst,$src" %}
8380   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8381   // ins_encode( RegImm( dst, src) );
8382   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8383   ins_pipe( ialu_reg );
8384 %}
8385 
8386 // Xor Register with Memory
8387 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8388   match(Set dst (XorI dst (LoadI src)));
8389   effect(KILL cr);
8390 
8391   ins_cost(125);
8392   format %{ "XOR    $dst,$src" %}
8393   opcode(0x33);
8394   ins_encode( OpcP, RegMem(dst, src) );
8395   ins_pipe( ialu_reg_mem );
8396 %}
8397 
8398 // Xor Memory with Register
8399 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8400   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8401   effect(KILL cr);
8402 
8403   ins_cost(150);
8404   format %{ "XOR    $dst,$src" %}
8405   opcode(0x31);  /* Opcode 31 /r */
8406   ins_encode( OpcP, RegMem( src, dst ) );
8407   ins_pipe( ialu_mem_reg );
8408 %}
8409 
8410 // Xor Memory with Immediate
8411 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8412   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8413   effect(KILL cr);
8414 
8415   ins_cost(125);
8416   format %{ "XOR    $dst,$src" %}
8417   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8418   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8419   ins_pipe( ialu_mem_imm );
8420 %}
8421 
8422 //----------Convert Int to Boolean---------------------------------------------
8423 
8424 instruct movI_nocopy(rRegI dst, rRegI src) %{
8425   effect( DEF dst, USE src );
8426   format %{ "MOV    $dst,$src" %}
8427   ins_encode( enc_Copy( dst, src) );
8428   ins_pipe( ialu_reg_reg );
8429 %}
8430 
8431 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8432   effect( USE_DEF dst, USE src, KILL cr );
8433 
8434   size(4);
8435   format %{ "NEG    $dst\n\t"
8436             "ADC    $dst,$src" %}
8437   ins_encode( neg_reg(dst),
8438               OpcRegReg(0x13,dst,src) );
8439   ins_pipe( ialu_reg_reg_long );
8440 %}
8441 
8442 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8443   match(Set dst (Conv2B src));
8444 
8445   expand %{
8446     movI_nocopy(dst,src);
8447     ci2b(dst,src,cr);
8448   %}
8449 %}
8450 
8451 instruct movP_nocopy(rRegI dst, eRegP src) %{
8452   effect( DEF dst, USE src );
8453   format %{ "MOV    $dst,$src" %}
8454   ins_encode( enc_Copy( dst, src) );
8455   ins_pipe( ialu_reg_reg );
8456 %}
8457 
8458 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8459   effect( USE_DEF dst, USE src, KILL cr );
8460   format %{ "NEG    $dst\n\t"
8461             "ADC    $dst,$src" %}
8462   ins_encode( neg_reg(dst),
8463               OpcRegReg(0x13,dst,src) );
8464   ins_pipe( ialu_reg_reg_long );
8465 %}
8466 
8467 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8468   match(Set dst (Conv2B src));
8469 
8470   expand %{
8471     movP_nocopy(dst,src);
8472     cp2b(dst,src,cr);
8473   %}
8474 %}
8475 
8476 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8477   match(Set dst (CmpLTMask p q));
8478   effect(KILL cr);
8479   ins_cost(400);
8480 
8481   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8482   format %{ "XOR    $dst,$dst\n\t"
8483             "CMP    $p,$q\n\t"
8484             "SETlt  $dst\n\t"
8485             "NEG    $dst" %}
8486   ins_encode %{
8487     Register Rp = $p$$Register;
8488     Register Rq = $q$$Register;
8489     Register Rd = $dst$$Register;
8490     Label done;
8491     __ xorl(Rd, Rd);
8492     __ cmpl(Rp, Rq);
8493     __ setb(Assembler::less, Rd);
8494     __ negl(Rd);
8495   %}
8496 
8497   ins_pipe(pipe_slow);
8498 %}
8499 
8500 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8501   match(Set dst (CmpLTMask dst zero));
8502   effect(DEF dst, KILL cr);
8503   ins_cost(100);
8504 
8505   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8506   ins_encode %{
8507   __ sarl($dst$$Register, 31);
8508   %}
8509   ins_pipe(ialu_reg);
8510 %}
8511 
8512 /* better to save a register than avoid a branch */
8513 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8514   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8515   effect(KILL cr);
8516   ins_cost(400);
8517   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8518             "JGE    done\n\t"
8519             "ADD    $p,$y\n"
8520             "done:  " %}
8521   ins_encode %{
8522     Register Rp = $p$$Register;
8523     Register Rq = $q$$Register;
8524     Register Ry = $y$$Register;
8525     Label done;
8526     __ subl(Rp, Rq);
8527     __ jccb(Assembler::greaterEqual, done);
8528     __ addl(Rp, Ry);
8529     __ bind(done);
8530   %}
8531 
8532   ins_pipe(pipe_cmplt);
8533 %}
8534 
8535 /* better to save a register than avoid a branch */
8536 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8537   match(Set y (AndI (CmpLTMask p q) y));
8538   effect(KILL cr);
8539 
8540   ins_cost(300);
8541 
8542   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8543             "JLT      done\n\t"
8544             "XORL     $y, $y\n"
8545             "done:  " %}
8546   ins_encode %{
8547     Register Rp = $p$$Register;
8548     Register Rq = $q$$Register;
8549     Register Ry = $y$$Register;
8550     Label done;
8551     __ cmpl(Rp, Rq);
8552     __ jccb(Assembler::less, done);
8553     __ xorl(Ry, Ry);
8554     __ bind(done);
8555   %}
8556 
8557   ins_pipe(pipe_cmplt);
8558 %}
8559 
8560 /* If I enable this, I encourage spilling in the inner loop of compress.
8561 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8562   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8563 */
8564 //----------Overflow Math Instructions-----------------------------------------
8565 
8566 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8567 %{
8568   match(Set cr (OverflowAddI op1 op2));
8569   effect(DEF cr, USE_KILL op1, USE op2);
8570 
8571   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8572 
8573   ins_encode %{
8574     __ addl($op1$$Register, $op2$$Register);
8575   %}
8576   ins_pipe(ialu_reg_reg);
8577 %}
8578 
8579 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8580 %{
8581   match(Set cr (OverflowAddI op1 op2));
8582   effect(DEF cr, USE_KILL op1, USE op2);
8583 
8584   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8585 
8586   ins_encode %{
8587     __ addl($op1$$Register, $op2$$constant);
8588   %}
8589   ins_pipe(ialu_reg_reg);
8590 %}
8591 
8592 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8593 %{
8594   match(Set cr (OverflowSubI op1 op2));
8595 
8596   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8597   ins_encode %{
8598     __ cmpl($op1$$Register, $op2$$Register);
8599   %}
8600   ins_pipe(ialu_reg_reg);
8601 %}
8602 
8603 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8604 %{
8605   match(Set cr (OverflowSubI op1 op2));
8606 
8607   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8608   ins_encode %{
8609     __ cmpl($op1$$Register, $op2$$constant);
8610   %}
8611   ins_pipe(ialu_reg_reg);
8612 %}
8613 
8614 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8615 %{
8616   match(Set cr (OverflowSubI zero op2));
8617   effect(DEF cr, USE_KILL op2);
8618 
8619   format %{ "NEG    $op2\t# overflow check int" %}
8620   ins_encode %{
8621     __ negl($op2$$Register);
8622   %}
8623   ins_pipe(ialu_reg_reg);
8624 %}
8625 
8626 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8627 %{
8628   match(Set cr (OverflowMulI op1 op2));
8629   effect(DEF cr, USE_KILL op1, USE op2);
8630 
8631   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8632   ins_encode %{
8633     __ imull($op1$$Register, $op2$$Register);
8634   %}
8635   ins_pipe(ialu_reg_reg_alu0);
8636 %}
8637 
8638 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8639 %{
8640   match(Set cr (OverflowMulI op1 op2));
8641   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8642 
8643   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8644   ins_encode %{
8645     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8646   %}
8647   ins_pipe(ialu_reg_reg_alu0);
8648 %}
8649 
8650 //----------Long Instructions------------------------------------------------
8651 // Add Long Register with Register
8652 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8653   match(Set dst (AddL dst src));
8654   effect(KILL cr);
8655   ins_cost(200);
8656   format %{ "ADD    $dst.lo,$src.lo\n\t"
8657             "ADC    $dst.hi,$src.hi" %}
8658   opcode(0x03, 0x13);
8659   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8660   ins_pipe( ialu_reg_reg_long );
8661 %}
8662 
8663 // Add Long Register with Immediate
8664 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8665   match(Set dst (AddL dst src));
8666   effect(KILL cr);
8667   format %{ "ADD    $dst.lo,$src.lo\n\t"
8668             "ADC    $dst.hi,$src.hi" %}
8669   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8670   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8671   ins_pipe( ialu_reg_long );
8672 %}
8673 
8674 // Add Long Register with Memory
8675 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8676   match(Set dst (AddL dst (LoadL mem)));
8677   effect(KILL cr);
8678   ins_cost(125);
8679   format %{ "ADD    $dst.lo,$mem\n\t"
8680             "ADC    $dst.hi,$mem+4" %}
8681   opcode(0x03, 0x13);
8682   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8683   ins_pipe( ialu_reg_long_mem );
8684 %}
8685 
8686 // Subtract Long Register with Register.
8687 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8688   match(Set dst (SubL dst src));
8689   effect(KILL cr);
8690   ins_cost(200);
8691   format %{ "SUB    $dst.lo,$src.lo\n\t"
8692             "SBB    $dst.hi,$src.hi" %}
8693   opcode(0x2B, 0x1B);
8694   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8695   ins_pipe( ialu_reg_reg_long );
8696 %}
8697 
8698 // Subtract Long Register with Immediate
8699 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8700   match(Set dst (SubL dst src));
8701   effect(KILL cr);
8702   format %{ "SUB    $dst.lo,$src.lo\n\t"
8703             "SBB    $dst.hi,$src.hi" %}
8704   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8705   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8706   ins_pipe( ialu_reg_long );
8707 %}
8708 
8709 // Subtract Long Register with Memory
8710 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8711   match(Set dst (SubL dst (LoadL mem)));
8712   effect(KILL cr);
8713   ins_cost(125);
8714   format %{ "SUB    $dst.lo,$mem\n\t"
8715             "SBB    $dst.hi,$mem+4" %}
8716   opcode(0x2B, 0x1B);
8717   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8718   ins_pipe( ialu_reg_long_mem );
8719 %}
8720 
8721 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8722   match(Set dst (SubL zero dst));
8723   effect(KILL cr);
8724   ins_cost(300);
8725   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8726   ins_encode( neg_long(dst) );
8727   ins_pipe( ialu_reg_reg_long );
8728 %}
8729 
8730 // And Long Register with Register
8731 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8732   match(Set dst (AndL dst src));
8733   effect(KILL cr);
8734   format %{ "AND    $dst.lo,$src.lo\n\t"
8735             "AND    $dst.hi,$src.hi" %}
8736   opcode(0x23,0x23);
8737   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8738   ins_pipe( ialu_reg_reg_long );
8739 %}
8740 
8741 // And Long Register with Immediate
8742 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8743   match(Set dst (AndL dst src));
8744   effect(KILL cr);
8745   format %{ "AND    $dst.lo,$src.lo\n\t"
8746             "AND    $dst.hi,$src.hi" %}
8747   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8748   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8749   ins_pipe( ialu_reg_long );
8750 %}
8751 
8752 // And Long Register with Memory
8753 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8754   match(Set dst (AndL dst (LoadL mem)));
8755   effect(KILL cr);
8756   ins_cost(125);
8757   format %{ "AND    $dst.lo,$mem\n\t"
8758             "AND    $dst.hi,$mem+4" %}
8759   opcode(0x23, 0x23);
8760   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8761   ins_pipe( ialu_reg_long_mem );
8762 %}
8763 
8764 // BMI1 instructions
8765 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8766   match(Set dst (AndL (XorL src1 minus_1) src2));
8767   predicate(UseBMI1Instructions);
8768   effect(KILL cr, TEMP dst);
8769 
8770   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8771             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8772          %}
8773 
8774   ins_encode %{
8775     Register Rdst = $dst$$Register;
8776     Register Rsrc1 = $src1$$Register;
8777     Register Rsrc2 = $src2$$Register;
8778     __ andnl(Rdst, Rsrc1, Rsrc2);
8779     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8780   %}
8781   ins_pipe(ialu_reg_reg_long);
8782 %}
8783 
8784 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8785   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8786   predicate(UseBMI1Instructions);
8787   effect(KILL cr, TEMP dst);
8788 
8789   ins_cost(125);
8790   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8791             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8792          %}
8793 
8794   ins_encode %{
8795     Register Rdst = $dst$$Register;
8796     Register Rsrc1 = $src1$$Register;
8797     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8798 
8799     __ andnl(Rdst, Rsrc1, $src2$$Address);
8800     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8801   %}
8802   ins_pipe(ialu_reg_mem);
8803 %}
8804 
8805 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8806   match(Set dst (AndL (SubL imm_zero src) src));
8807   predicate(UseBMI1Instructions);
8808   effect(KILL cr, TEMP dst);
8809 
8810   format %{ "MOVL   $dst.hi, 0\n\t"
8811             "BLSIL  $dst.lo, $src.lo\n\t"
8812             "JNZ    done\n\t"
8813             "BLSIL  $dst.hi, $src.hi\n"
8814             "done:"
8815          %}
8816 
8817   ins_encode %{
8818     Label done;
8819     Register Rdst = $dst$$Register;
8820     Register Rsrc = $src$$Register;
8821     __ movl(HIGH_FROM_LOW(Rdst), 0);
8822     __ blsil(Rdst, Rsrc);
8823     __ jccb(Assembler::notZero, done);
8824     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8825     __ bind(done);
8826   %}
8827   ins_pipe(ialu_reg);
8828 %}
8829 
8830 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8831   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8832   predicate(UseBMI1Instructions);
8833   effect(KILL cr, TEMP dst);
8834 
8835   ins_cost(125);
8836   format %{ "MOVL   $dst.hi, 0\n\t"
8837             "BLSIL  $dst.lo, $src\n\t"
8838             "JNZ    done\n\t"
8839             "BLSIL  $dst.hi, $src+4\n"
8840             "done:"
8841          %}
8842 
8843   ins_encode %{
8844     Label done;
8845     Register Rdst = $dst$$Register;
8846     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8847 
8848     __ movl(HIGH_FROM_LOW(Rdst), 0);
8849     __ blsil(Rdst, $src$$Address);
8850     __ jccb(Assembler::notZero, done);
8851     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8852     __ bind(done);
8853   %}
8854   ins_pipe(ialu_reg_mem);
8855 %}
8856 
8857 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8858 %{
8859   match(Set dst (XorL (AddL src minus_1) src));
8860   predicate(UseBMI1Instructions);
8861   effect(KILL cr, TEMP dst);
8862 
8863   format %{ "MOVL    $dst.hi, 0\n\t"
8864             "BLSMSKL $dst.lo, $src.lo\n\t"
8865             "JNC     done\n\t"
8866             "BLSMSKL $dst.hi, $src.hi\n"
8867             "done:"
8868          %}
8869 
8870   ins_encode %{
8871     Label done;
8872     Register Rdst = $dst$$Register;
8873     Register Rsrc = $src$$Register;
8874     __ movl(HIGH_FROM_LOW(Rdst), 0);
8875     __ blsmskl(Rdst, Rsrc);
8876     __ jccb(Assembler::carryClear, done);
8877     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8878     __ bind(done);
8879   %}
8880 
8881   ins_pipe(ialu_reg);
8882 %}
8883 
8884 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8885 %{
8886   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8887   predicate(UseBMI1Instructions);
8888   effect(KILL cr, TEMP dst);
8889 
8890   ins_cost(125);
8891   format %{ "MOVL    $dst.hi, 0\n\t"
8892             "BLSMSKL $dst.lo, $src\n\t"
8893             "JNC     done\n\t"
8894             "BLSMSKL $dst.hi, $src+4\n"
8895             "done:"
8896          %}
8897 
8898   ins_encode %{
8899     Label done;
8900     Register Rdst = $dst$$Register;
8901     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8902 
8903     __ movl(HIGH_FROM_LOW(Rdst), 0);
8904     __ blsmskl(Rdst, $src$$Address);
8905     __ jccb(Assembler::carryClear, done);
8906     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8907     __ bind(done);
8908   %}
8909 
8910   ins_pipe(ialu_reg_mem);
8911 %}
8912 
8913 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8914 %{
8915   match(Set dst (AndL (AddL src minus_1) src) );
8916   predicate(UseBMI1Instructions);
8917   effect(KILL cr, TEMP dst);
8918 
8919   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8920             "BLSRL  $dst.lo, $src.lo\n\t"
8921             "JNC    done\n\t"
8922             "BLSRL  $dst.hi, $src.hi\n"
8923             "done:"
8924   %}
8925 
8926   ins_encode %{
8927     Label done;
8928     Register Rdst = $dst$$Register;
8929     Register Rsrc = $src$$Register;
8930     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8931     __ blsrl(Rdst, Rsrc);
8932     __ jccb(Assembler::carryClear, done);
8933     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8934     __ bind(done);
8935   %}
8936 
8937   ins_pipe(ialu_reg);
8938 %}
8939 
8940 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8941 %{
8942   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8943   predicate(UseBMI1Instructions);
8944   effect(KILL cr, TEMP dst);
8945 
8946   ins_cost(125);
8947   format %{ "MOVL   $dst.hi, $src+4\n\t"
8948             "BLSRL  $dst.lo, $src\n\t"
8949             "JNC    done\n\t"
8950             "BLSRL  $dst.hi, $src+4\n"
8951             "done:"
8952   %}
8953 
8954   ins_encode %{
8955     Label done;
8956     Register Rdst = $dst$$Register;
8957     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8958     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8959     __ blsrl(Rdst, $src$$Address);
8960     __ jccb(Assembler::carryClear, done);
8961     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8962     __ bind(done);
8963   %}
8964 
8965   ins_pipe(ialu_reg_mem);
8966 %}
8967 
8968 // Or Long Register with Register
8969 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8970   match(Set dst (OrL dst src));
8971   effect(KILL cr);
8972   format %{ "OR     $dst.lo,$src.lo\n\t"
8973             "OR     $dst.hi,$src.hi" %}
8974   opcode(0x0B,0x0B);
8975   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8976   ins_pipe( ialu_reg_reg_long );
8977 %}
8978 
8979 // Or Long Register with Immediate
8980 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8981   match(Set dst (OrL dst src));
8982   effect(KILL cr);
8983   format %{ "OR     $dst.lo,$src.lo\n\t"
8984             "OR     $dst.hi,$src.hi" %}
8985   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8986   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8987   ins_pipe( ialu_reg_long );
8988 %}
8989 
8990 // Or Long Register with Memory
8991 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8992   match(Set dst (OrL dst (LoadL mem)));
8993   effect(KILL cr);
8994   ins_cost(125);
8995   format %{ "OR     $dst.lo,$mem\n\t"
8996             "OR     $dst.hi,$mem+4" %}
8997   opcode(0x0B,0x0B);
8998   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8999   ins_pipe( ialu_reg_long_mem );
9000 %}
9001 
9002 // Xor Long Register with Register
9003 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9004   match(Set dst (XorL dst src));
9005   effect(KILL cr);
9006   format %{ "XOR    $dst.lo,$src.lo\n\t"
9007             "XOR    $dst.hi,$src.hi" %}
9008   opcode(0x33,0x33);
9009   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9010   ins_pipe( ialu_reg_reg_long );
9011 %}
9012 
9013 // Xor Long Register with Immediate -1
9014 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9015   match(Set dst (XorL dst imm));
9016   format %{ "NOT    $dst.lo\n\t"
9017             "NOT    $dst.hi" %}
9018   ins_encode %{
9019      __ notl($dst$$Register);
9020      __ notl(HIGH_FROM_LOW($dst$$Register));
9021   %}
9022   ins_pipe( ialu_reg_long );
9023 %}
9024 
9025 // Xor Long Register with Immediate
9026 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9027   match(Set dst (XorL dst src));
9028   effect(KILL cr);
9029   format %{ "XOR    $dst.lo,$src.lo\n\t"
9030             "XOR    $dst.hi,$src.hi" %}
9031   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9032   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9033   ins_pipe( ialu_reg_long );
9034 %}
9035 
9036 // Xor Long Register with Memory
9037 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9038   match(Set dst (XorL dst (LoadL mem)));
9039   effect(KILL cr);
9040   ins_cost(125);
9041   format %{ "XOR    $dst.lo,$mem\n\t"
9042             "XOR    $dst.hi,$mem+4" %}
9043   opcode(0x33,0x33);
9044   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9045   ins_pipe( ialu_reg_long_mem );
9046 %}
9047 
9048 // Shift Left Long by 1
9049 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9050   predicate(UseNewLongLShift);
9051   match(Set dst (LShiftL dst cnt));
9052   effect(KILL cr);
9053   ins_cost(100);
9054   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9055             "ADC    $dst.hi,$dst.hi" %}
9056   ins_encode %{
9057     __ addl($dst$$Register,$dst$$Register);
9058     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9059   %}
9060   ins_pipe( ialu_reg_long );
9061 %}
9062 
9063 // Shift Left Long by 2
9064 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9065   predicate(UseNewLongLShift);
9066   match(Set dst (LShiftL dst cnt));
9067   effect(KILL cr);
9068   ins_cost(100);
9069   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9070             "ADC    $dst.hi,$dst.hi\n\t"
9071             "ADD    $dst.lo,$dst.lo\n\t"
9072             "ADC    $dst.hi,$dst.hi" %}
9073   ins_encode %{
9074     __ addl($dst$$Register,$dst$$Register);
9075     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9076     __ addl($dst$$Register,$dst$$Register);
9077     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9078   %}
9079   ins_pipe( ialu_reg_long );
9080 %}
9081 
9082 // Shift Left Long by 3
9083 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9084   predicate(UseNewLongLShift);
9085   match(Set dst (LShiftL dst cnt));
9086   effect(KILL cr);
9087   ins_cost(100);
9088   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9089             "ADC    $dst.hi,$dst.hi\n\t"
9090             "ADD    $dst.lo,$dst.lo\n\t"
9091             "ADC    $dst.hi,$dst.hi\n\t"
9092             "ADD    $dst.lo,$dst.lo\n\t"
9093             "ADC    $dst.hi,$dst.hi" %}
9094   ins_encode %{
9095     __ addl($dst$$Register,$dst$$Register);
9096     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9097     __ addl($dst$$Register,$dst$$Register);
9098     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9099     __ addl($dst$$Register,$dst$$Register);
9100     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9101   %}
9102   ins_pipe( ialu_reg_long );
9103 %}
9104 
9105 // Shift Left Long by 1-31
9106 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9107   match(Set dst (LShiftL dst cnt));
9108   effect(KILL cr);
9109   ins_cost(200);
9110   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9111             "SHL    $dst.lo,$cnt" %}
9112   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9113   ins_encode( move_long_small_shift(dst,cnt) );
9114   ins_pipe( ialu_reg_long );
9115 %}
9116 
9117 // Shift Left Long by 32-63
9118 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9119   match(Set dst (LShiftL dst cnt));
9120   effect(KILL cr);
9121   ins_cost(300);
9122   format %{ "MOV    $dst.hi,$dst.lo\n"
9123           "\tSHL    $dst.hi,$cnt-32\n"
9124           "\tXOR    $dst.lo,$dst.lo" %}
9125   opcode(0xC1, 0x4);  /* C1 /4 ib */
9126   ins_encode( move_long_big_shift_clr(dst,cnt) );
9127   ins_pipe( ialu_reg_long );
9128 %}
9129 
9130 // Shift Left Long by variable
9131 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9132   match(Set dst (LShiftL dst shift));
9133   effect(KILL cr);
9134   ins_cost(500+200);
9135   size(17);
9136   format %{ "TEST   $shift,32\n\t"
9137             "JEQ,s  small\n\t"
9138             "MOV    $dst.hi,$dst.lo\n\t"
9139             "XOR    $dst.lo,$dst.lo\n"
9140     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9141             "SHL    $dst.lo,$shift" %}
9142   ins_encode( shift_left_long( dst, shift ) );
9143   ins_pipe( pipe_slow );
9144 %}
9145 
9146 // Shift Right Long by 1-31
9147 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9148   match(Set dst (URShiftL dst cnt));
9149   effect(KILL cr);
9150   ins_cost(200);
9151   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9152             "SHR    $dst.hi,$cnt" %}
9153   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9154   ins_encode( move_long_small_shift(dst,cnt) );
9155   ins_pipe( ialu_reg_long );
9156 %}
9157 
9158 // Shift Right Long by 32-63
9159 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9160   match(Set dst (URShiftL dst cnt));
9161   effect(KILL cr);
9162   ins_cost(300);
9163   format %{ "MOV    $dst.lo,$dst.hi\n"
9164           "\tSHR    $dst.lo,$cnt-32\n"
9165           "\tXOR    $dst.hi,$dst.hi" %}
9166   opcode(0xC1, 0x5);  /* C1 /5 ib */
9167   ins_encode( move_long_big_shift_clr(dst,cnt) );
9168   ins_pipe( ialu_reg_long );
9169 %}
9170 
9171 // Shift Right Long by variable
9172 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9173   match(Set dst (URShiftL dst shift));
9174   effect(KILL cr);
9175   ins_cost(600);
9176   size(17);
9177   format %{ "TEST   $shift,32\n\t"
9178             "JEQ,s  small\n\t"
9179             "MOV    $dst.lo,$dst.hi\n\t"
9180             "XOR    $dst.hi,$dst.hi\n"
9181     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9182             "SHR    $dst.hi,$shift" %}
9183   ins_encode( shift_right_long( dst, shift ) );
9184   ins_pipe( pipe_slow );
9185 %}
9186 
9187 // Shift Right Long by 1-31
9188 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9189   match(Set dst (RShiftL dst cnt));
9190   effect(KILL cr);
9191   ins_cost(200);
9192   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9193             "SAR    $dst.hi,$cnt" %}
9194   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9195   ins_encode( move_long_small_shift(dst,cnt) );
9196   ins_pipe( ialu_reg_long );
9197 %}
9198 
9199 // Shift Right Long by 32-63
9200 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9201   match(Set dst (RShiftL dst cnt));
9202   effect(KILL cr);
9203   ins_cost(300);
9204   format %{ "MOV    $dst.lo,$dst.hi\n"
9205           "\tSAR    $dst.lo,$cnt-32\n"
9206           "\tSAR    $dst.hi,31" %}
9207   opcode(0xC1, 0x7);  /* C1 /7 ib */
9208   ins_encode( move_long_big_shift_sign(dst,cnt) );
9209   ins_pipe( ialu_reg_long );
9210 %}
9211 
9212 // Shift Right arithmetic Long by variable
9213 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9214   match(Set dst (RShiftL dst shift));
9215   effect(KILL cr);
9216   ins_cost(600);
9217   size(18);
9218   format %{ "TEST   $shift,32\n\t"
9219             "JEQ,s  small\n\t"
9220             "MOV    $dst.lo,$dst.hi\n\t"
9221             "SAR    $dst.hi,31\n"
9222     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9223             "SAR    $dst.hi,$shift" %}
9224   ins_encode( shift_right_arith_long( dst, shift ) );
9225   ins_pipe( pipe_slow );
9226 %}
9227 
9228 
9229 //----------Double Instructions------------------------------------------------
9230 // Double Math
9231 
9232 // Compare & branch
9233 
9234 // P6 version of float compare, sets condition codes in EFLAGS
9235 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9236   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9237   match(Set cr (CmpD src1 src2));
9238   effect(KILL rax);
9239   ins_cost(150);
9240   format %{ "FLD    $src1\n\t"
9241             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9242             "JNP    exit\n\t"
9243             "MOV    ah,1       // saw a NaN, set CF\n\t"
9244             "SAHF\n"
9245      "exit:\tNOP               // avoid branch to branch" %}
9246   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9247   ins_encode( Push_Reg_DPR(src1),
9248               OpcP, RegOpc(src2),
9249               cmpF_P6_fixup );
9250   ins_pipe( pipe_slow );
9251 %}
9252 
9253 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9254   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9255   match(Set cr (CmpD src1 src2));
9256   ins_cost(150);
9257   format %{ "FLD    $src1\n\t"
9258             "FUCOMIP ST,$src2  // P6 instruction" %}
9259   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9260   ins_encode( Push_Reg_DPR(src1),
9261               OpcP, RegOpc(src2));
9262   ins_pipe( pipe_slow );
9263 %}
9264 
9265 // Compare & branch
9266 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9267   predicate(UseSSE<=1);
9268   match(Set cr (CmpD src1 src2));
9269   effect(KILL rax);
9270   ins_cost(200);
9271   format %{ "FLD    $src1\n\t"
9272             "FCOMp  $src2\n\t"
9273             "FNSTSW AX\n\t"
9274             "TEST   AX,0x400\n\t"
9275             "JZ,s   flags\n\t"
9276             "MOV    AH,1\t# unordered treat as LT\n"
9277     "flags:\tSAHF" %}
9278   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9279   ins_encode( Push_Reg_DPR(src1),
9280               OpcP, RegOpc(src2),
9281               fpu_flags);
9282   ins_pipe( pipe_slow );
9283 %}
9284 
9285 // Compare vs zero into -1,0,1
9286 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9287   predicate(UseSSE<=1);
9288   match(Set dst (CmpD3 src1 zero));
9289   effect(KILL cr, KILL rax);
9290   ins_cost(280);
9291   format %{ "FTSTD  $dst,$src1" %}
9292   opcode(0xE4, 0xD9);
9293   ins_encode( Push_Reg_DPR(src1),
9294               OpcS, OpcP, PopFPU,
9295               CmpF_Result(dst));
9296   ins_pipe( pipe_slow );
9297 %}
9298 
9299 // Compare into -1,0,1
9300 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9301   predicate(UseSSE<=1);
9302   match(Set dst (CmpD3 src1 src2));
9303   effect(KILL cr, KILL rax);
9304   ins_cost(300);
9305   format %{ "FCMPD  $dst,$src1,$src2" %}
9306   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9307   ins_encode( Push_Reg_DPR(src1),
9308               OpcP, RegOpc(src2),
9309               CmpF_Result(dst));
9310   ins_pipe( pipe_slow );
9311 %}
9312 
9313 // float compare and set condition codes in EFLAGS by XMM regs
9314 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9315   predicate(UseSSE>=2);
9316   match(Set cr (CmpD src1 src2));
9317   ins_cost(145);
9318   format %{ "UCOMISD $src1,$src2\n\t"
9319             "JNP,s   exit\n\t"
9320             "PUSHF\t# saw NaN, set CF\n\t"
9321             "AND     [rsp], #0xffffff2b\n\t"
9322             "POPF\n"
9323     "exit:" %}
9324   ins_encode %{
9325     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9326     emit_cmpfp_fixup(_masm);
9327   %}
9328   ins_pipe( pipe_slow );
9329 %}
9330 
9331 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9332   predicate(UseSSE>=2);
9333   match(Set cr (CmpD src1 src2));
9334   ins_cost(100);
9335   format %{ "UCOMISD $src1,$src2" %}
9336   ins_encode %{
9337     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9338   %}
9339   ins_pipe( pipe_slow );
9340 %}
9341 
9342 // float compare and set condition codes in EFLAGS by XMM regs
9343 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9344   predicate(UseSSE>=2);
9345   match(Set cr (CmpD src1 (LoadD src2)));
9346   ins_cost(145);
9347   format %{ "UCOMISD $src1,$src2\n\t"
9348             "JNP,s   exit\n\t"
9349             "PUSHF\t# saw NaN, set CF\n\t"
9350             "AND     [rsp], #0xffffff2b\n\t"
9351             "POPF\n"
9352     "exit:" %}
9353   ins_encode %{
9354     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9355     emit_cmpfp_fixup(_masm);
9356   %}
9357   ins_pipe( pipe_slow );
9358 %}
9359 
9360 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9361   predicate(UseSSE>=2);
9362   match(Set cr (CmpD src1 (LoadD src2)));
9363   ins_cost(100);
9364   format %{ "UCOMISD $src1,$src2" %}
9365   ins_encode %{
9366     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9367   %}
9368   ins_pipe( pipe_slow );
9369 %}
9370 
9371 // Compare into -1,0,1 in XMM
9372 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9373   predicate(UseSSE>=2);
9374   match(Set dst (CmpD3 src1 src2));
9375   effect(KILL cr);
9376   ins_cost(255);
9377   format %{ "UCOMISD $src1, $src2\n\t"
9378             "MOV     $dst, #-1\n\t"
9379             "JP,s    done\n\t"
9380             "JB,s    done\n\t"
9381             "SETNE   $dst\n\t"
9382             "MOVZB   $dst, $dst\n"
9383     "done:" %}
9384   ins_encode %{
9385     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9386     emit_cmpfp3(_masm, $dst$$Register);
9387   %}
9388   ins_pipe( pipe_slow );
9389 %}
9390 
9391 // Compare into -1,0,1 in XMM and memory
9392 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9393   predicate(UseSSE>=2);
9394   match(Set dst (CmpD3 src1 (LoadD src2)));
9395   effect(KILL cr);
9396   ins_cost(275);
9397   format %{ "UCOMISD $src1, $src2\n\t"
9398             "MOV     $dst, #-1\n\t"
9399             "JP,s    done\n\t"
9400             "JB,s    done\n\t"
9401             "SETNE   $dst\n\t"
9402             "MOVZB   $dst, $dst\n"
9403     "done:" %}
9404   ins_encode %{
9405     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9406     emit_cmpfp3(_masm, $dst$$Register);
9407   %}
9408   ins_pipe( pipe_slow );
9409 %}
9410 
9411 
9412 instruct subDPR_reg(regDPR dst, regDPR src) %{
9413   predicate (UseSSE <=1);
9414   match(Set dst (SubD dst src));
9415 
9416   format %{ "FLD    $src\n\t"
9417             "DSUBp  $dst,ST" %}
9418   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9419   ins_cost(150);
9420   ins_encode( Push_Reg_DPR(src),
9421               OpcP, RegOpc(dst) );
9422   ins_pipe( fpu_reg_reg );
9423 %}
9424 
9425 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9426   predicate (UseSSE <=1);
9427   match(Set dst (RoundDouble (SubD src1 src2)));
9428   ins_cost(250);
9429 
9430   format %{ "FLD    $src2\n\t"
9431             "DSUB   ST,$src1\n\t"
9432             "FSTP_D $dst\t# D-round" %}
9433   opcode(0xD8, 0x5);
9434   ins_encode( Push_Reg_DPR(src2),
9435               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9436   ins_pipe( fpu_mem_reg_reg );
9437 %}
9438 
9439 
9440 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9441   predicate (UseSSE <=1);
9442   match(Set dst (SubD dst (LoadD src)));
9443   ins_cost(150);
9444 
9445   format %{ "FLD    $src\n\t"
9446             "DSUBp  $dst,ST" %}
9447   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9448   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9449               OpcP, RegOpc(dst) );
9450   ins_pipe( fpu_reg_mem );
9451 %}
9452 
9453 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9454   predicate (UseSSE<=1);
9455   match(Set dst (AbsD src));
9456   ins_cost(100);
9457   format %{ "FABS" %}
9458   opcode(0xE1, 0xD9);
9459   ins_encode( OpcS, OpcP );
9460   ins_pipe( fpu_reg_reg );
9461 %}
9462 
9463 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9464   predicate(UseSSE<=1);
9465   match(Set dst (NegD src));
9466   ins_cost(100);
9467   format %{ "FCHS" %}
9468   opcode(0xE0, 0xD9);
9469   ins_encode( OpcS, OpcP );
9470   ins_pipe( fpu_reg_reg );
9471 %}
9472 
9473 instruct addDPR_reg(regDPR dst, regDPR src) %{
9474   predicate(UseSSE<=1);
9475   match(Set dst (AddD dst src));
9476   format %{ "FLD    $src\n\t"
9477             "DADD   $dst,ST" %}
9478   size(4);
9479   ins_cost(150);
9480   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9481   ins_encode( Push_Reg_DPR(src),
9482               OpcP, RegOpc(dst) );
9483   ins_pipe( fpu_reg_reg );
9484 %}
9485 
9486 
9487 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9488   predicate(UseSSE<=1);
9489   match(Set dst (RoundDouble (AddD src1 src2)));
9490   ins_cost(250);
9491 
9492   format %{ "FLD    $src2\n\t"
9493             "DADD   ST,$src1\n\t"
9494             "FSTP_D $dst\t# D-round" %}
9495   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9496   ins_encode( Push_Reg_DPR(src2),
9497               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9498   ins_pipe( fpu_mem_reg_reg );
9499 %}
9500 
9501 
9502 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9503   predicate(UseSSE<=1);
9504   match(Set dst (AddD dst (LoadD src)));
9505   ins_cost(150);
9506 
9507   format %{ "FLD    $src\n\t"
9508             "DADDp  $dst,ST" %}
9509   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9510   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9511               OpcP, RegOpc(dst) );
9512   ins_pipe( fpu_reg_mem );
9513 %}
9514 
9515 // add-to-memory
9516 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9517   predicate(UseSSE<=1);
9518   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9519   ins_cost(150);
9520 
9521   format %{ "FLD_D  $dst\n\t"
9522             "DADD   ST,$src\n\t"
9523             "FST_D  $dst" %}
9524   opcode(0xDD, 0x0);
9525   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9526               Opcode(0xD8), RegOpc(src),
9527               set_instruction_start,
9528               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9529   ins_pipe( fpu_reg_mem );
9530 %}
9531 
9532 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9533   predicate(UseSSE<=1);
9534   match(Set dst (AddD dst con));
9535   ins_cost(125);
9536   format %{ "FLD1\n\t"
9537             "DADDp  $dst,ST" %}
9538   ins_encode %{
9539     __ fld1();
9540     __ faddp($dst$$reg);
9541   %}
9542   ins_pipe(fpu_reg);
9543 %}
9544 
9545 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9546   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9547   match(Set dst (AddD dst con));
9548   ins_cost(200);
9549   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9550             "DADDp  $dst,ST" %}
9551   ins_encode %{
9552     __ fld_d($constantaddress($con));
9553     __ faddp($dst$$reg);
9554   %}
9555   ins_pipe(fpu_reg_mem);
9556 %}
9557 
9558 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9559   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9560   match(Set dst (RoundDouble (AddD src con)));
9561   ins_cost(200);
9562   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9563             "DADD   ST,$src\n\t"
9564             "FSTP_D $dst\t# D-round" %}
9565   ins_encode %{
9566     __ fld_d($constantaddress($con));
9567     __ fadd($src$$reg);
9568     __ fstp_d(Address(rsp, $dst$$disp));
9569   %}
9570   ins_pipe(fpu_mem_reg_con);
9571 %}
9572 
9573 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9574   predicate(UseSSE<=1);
9575   match(Set dst (MulD dst src));
9576   format %{ "FLD    $src\n\t"
9577             "DMULp  $dst,ST" %}
9578   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9579   ins_cost(150);
9580   ins_encode( Push_Reg_DPR(src),
9581               OpcP, RegOpc(dst) );
9582   ins_pipe( fpu_reg_reg );
9583 %}
9584 
9585 // Strict FP instruction biases argument before multiply then
9586 // biases result to avoid double rounding of subnormals.
9587 //
9588 // scale arg1 by multiplying arg1 by 2^(-15360)
9589 // load arg2
9590 // multiply scaled arg1 by arg2
9591 // rescale product by 2^(15360)
9592 //
9593 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9594   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9595   match(Set dst (MulD dst src));
9596   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9597 
9598   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9599             "DMULp  $dst,ST\n\t"
9600             "FLD    $src\n\t"
9601             "DMULp  $dst,ST\n\t"
9602             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9603             "DMULp  $dst,ST\n\t" %}
9604   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9605   ins_encode( strictfp_bias1(dst),
9606               Push_Reg_DPR(src),
9607               OpcP, RegOpc(dst),
9608               strictfp_bias2(dst) );
9609   ins_pipe( fpu_reg_reg );
9610 %}
9611 
9612 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9613   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9614   match(Set dst (MulD dst con));
9615   ins_cost(200);
9616   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9617             "DMULp  $dst,ST" %}
9618   ins_encode %{
9619     __ fld_d($constantaddress($con));
9620     __ fmulp($dst$$reg);
9621   %}
9622   ins_pipe(fpu_reg_mem);
9623 %}
9624 
9625 
9626 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9627   predicate( UseSSE<=1 );
9628   match(Set dst (MulD dst (LoadD src)));
9629   ins_cost(200);
9630   format %{ "FLD_D  $src\n\t"
9631             "DMULp  $dst,ST" %}
9632   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9633   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9634               OpcP, RegOpc(dst) );
9635   ins_pipe( fpu_reg_mem );
9636 %}
9637 
9638 //
9639 // Cisc-alternate to reg-reg multiply
9640 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9641   predicate( UseSSE<=1 );
9642   match(Set dst (MulD src (LoadD mem)));
9643   ins_cost(250);
9644   format %{ "FLD_D  $mem\n\t"
9645             "DMUL   ST,$src\n\t"
9646             "FSTP_D $dst" %}
9647   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9648   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9649               OpcReg_FPR(src),
9650               Pop_Reg_DPR(dst) );
9651   ins_pipe( fpu_reg_reg_mem );
9652 %}
9653 
9654 
9655 // MACRO3 -- addDPR a mulDPR
9656 // This instruction is a '2-address' instruction in that the result goes
9657 // back to src2.  This eliminates a move from the macro; possibly the
9658 // register allocator will have to add it back (and maybe not).
9659 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9660   predicate( UseSSE<=1 );
9661   match(Set src2 (AddD (MulD src0 src1) src2));
9662   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9663             "DMUL   ST,$src1\n\t"
9664             "DADDp  $src2,ST" %}
9665   ins_cost(250);
9666   opcode(0xDD); /* LoadD DD /0 */
9667   ins_encode( Push_Reg_FPR(src0),
9668               FMul_ST_reg(src1),
9669               FAddP_reg_ST(src2) );
9670   ins_pipe( fpu_reg_reg_reg );
9671 %}
9672 
9673 
9674 // MACRO3 -- subDPR a mulDPR
9675 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9676   predicate( UseSSE<=1 );
9677   match(Set src2 (SubD (MulD src0 src1) src2));
9678   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9679             "DMUL   ST,$src1\n\t"
9680             "DSUBRp $src2,ST" %}
9681   ins_cost(250);
9682   ins_encode( Push_Reg_FPR(src0),
9683               FMul_ST_reg(src1),
9684               Opcode(0xDE), Opc_plus(0xE0,src2));
9685   ins_pipe( fpu_reg_reg_reg );
9686 %}
9687 
9688 
9689 instruct divDPR_reg(regDPR dst, regDPR src) %{
9690   predicate( UseSSE<=1 );
9691   match(Set dst (DivD dst src));
9692 
9693   format %{ "FLD    $src\n\t"
9694             "FDIVp  $dst,ST" %}
9695   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9696   ins_cost(150);
9697   ins_encode( Push_Reg_DPR(src),
9698               OpcP, RegOpc(dst) );
9699   ins_pipe( fpu_reg_reg );
9700 %}
9701 
9702 // Strict FP instruction biases argument before division then
9703 // biases result, to avoid double rounding of subnormals.
9704 //
9705 // scale dividend by multiplying dividend by 2^(-15360)
9706 // load divisor
9707 // divide scaled dividend by divisor
9708 // rescale quotient by 2^(15360)
9709 //
9710 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9711   predicate (UseSSE<=1);
9712   match(Set dst (DivD dst src));
9713   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9714   ins_cost(01);
9715 
9716   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9717             "DMULp  $dst,ST\n\t"
9718             "FLD    $src\n\t"
9719             "FDIVp  $dst,ST\n\t"
9720             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9721             "DMULp  $dst,ST\n\t" %}
9722   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9723   ins_encode( strictfp_bias1(dst),
9724               Push_Reg_DPR(src),
9725               OpcP, RegOpc(dst),
9726               strictfp_bias2(dst) );
9727   ins_pipe( fpu_reg_reg );
9728 %}
9729 
9730 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9731   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9732   match(Set dst (RoundDouble (DivD src1 src2)));
9733 
9734   format %{ "FLD    $src1\n\t"
9735             "FDIV   ST,$src2\n\t"
9736             "FSTP_D $dst\t# D-round" %}
9737   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9738   ins_encode( Push_Reg_DPR(src1),
9739               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9740   ins_pipe( fpu_mem_reg_reg );
9741 %}
9742 
9743 
9744 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9745   predicate(UseSSE<=1);
9746   match(Set dst (ModD dst src));
9747   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9748 
9749   format %{ "DMOD   $dst,$src" %}
9750   ins_cost(250);
9751   ins_encode(Push_Reg_Mod_DPR(dst, src),
9752               emitModDPR(),
9753               Push_Result_Mod_DPR(src),
9754               Pop_Reg_DPR(dst));
9755   ins_pipe( pipe_slow );
9756 %}
9757 
9758 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9759   predicate(UseSSE>=2);
9760   match(Set dst (ModD src0 src1));
9761   effect(KILL rax, KILL cr);
9762 
9763   format %{ "SUB    ESP,8\t # DMOD\n"
9764           "\tMOVSD  [ESP+0],$src1\n"
9765           "\tFLD_D  [ESP+0]\n"
9766           "\tMOVSD  [ESP+0],$src0\n"
9767           "\tFLD_D  [ESP+0]\n"
9768      "loop:\tFPREM\n"
9769           "\tFWAIT\n"
9770           "\tFNSTSW AX\n"
9771           "\tSAHF\n"
9772           "\tJP     loop\n"
9773           "\tFSTP_D [ESP+0]\n"
9774           "\tMOVSD  $dst,[ESP+0]\n"
9775           "\tADD    ESP,8\n"
9776           "\tFSTP   ST0\t # Restore FPU Stack"
9777     %}
9778   ins_cost(250);
9779   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9780   ins_pipe( pipe_slow );
9781 %}
9782 
9783 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9784   predicate (UseSSE<=1);
9785   match(Set dst (SinD src));
9786   ins_cost(1800);
9787   format %{ "DSIN   $dst" %}
9788   opcode(0xD9, 0xFE);
9789   ins_encode( OpcP, OpcS );
9790   ins_pipe( pipe_slow );
9791 %}
9792 
9793 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9794   predicate (UseSSE>=2);
9795   match(Set dst (SinD dst));
9796   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9797   ins_cost(1800);
9798   format %{ "DSIN   $dst" %}
9799   opcode(0xD9, 0xFE);
9800   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9801   ins_pipe( pipe_slow );
9802 %}
9803 
9804 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9805   predicate (UseSSE<=1);
9806   match(Set dst (CosD src));
9807   ins_cost(1800);
9808   format %{ "DCOS   $dst" %}
9809   opcode(0xD9, 0xFF);
9810   ins_encode( OpcP, OpcS );
9811   ins_pipe( pipe_slow );
9812 %}
9813 
9814 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9815   predicate (UseSSE>=2);
9816   match(Set dst (CosD dst));
9817   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9818   ins_cost(1800);
9819   format %{ "DCOS   $dst" %}
9820   opcode(0xD9, 0xFF);
9821   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9822   ins_pipe( pipe_slow );
9823 %}
9824 
9825 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9826   predicate (UseSSE<=1);
9827   match(Set dst(TanD src));
9828   format %{ "DTAN   $dst" %}
9829   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9830               Opcode(0xDD), Opcode(0xD8));   // fstp st
9831   ins_pipe( pipe_slow );
9832 %}
9833 
9834 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9835   predicate (UseSSE>=2);
9836   match(Set dst(TanD dst));
9837   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9838   format %{ "DTAN   $dst" %}
9839   ins_encode( Push_SrcD(dst),
9840               Opcode(0xD9), Opcode(0xF2),    // fptan
9841               Opcode(0xDD), Opcode(0xD8),   // fstp st
9842               Push_ResultD(dst) );
9843   ins_pipe( pipe_slow );
9844 %}
9845 
9846 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9847   predicate (UseSSE<=1);
9848   match(Set dst(AtanD dst src));
9849   format %{ "DATA   $dst,$src" %}
9850   opcode(0xD9, 0xF3);
9851   ins_encode( Push_Reg_DPR(src),
9852               OpcP, OpcS, RegOpc(dst) );
9853   ins_pipe( pipe_slow );
9854 %}
9855 
9856 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9857   predicate (UseSSE>=2);
9858   match(Set dst(AtanD dst src));
9859   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9860   format %{ "DATA   $dst,$src" %}
9861   opcode(0xD9, 0xF3);
9862   ins_encode( Push_SrcD(src),
9863               OpcP, OpcS, Push_ResultD(dst) );
9864   ins_pipe( pipe_slow );
9865 %}
9866 
9867 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9868   predicate (UseSSE<=1);
9869   match(Set dst (SqrtD src));
9870   format %{ "DSQRT  $dst,$src" %}
9871   opcode(0xFA, 0xD9);
9872   ins_encode( Push_Reg_DPR(src),
9873               OpcS, OpcP, Pop_Reg_DPR(dst) );
9874   ins_pipe( pipe_slow );
9875 %}
9876 
9877 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9878   predicate (UseSSE<=1);
9879   match(Set Y (PowD X Y));  // Raise X to the Yth power
9880   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9881   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9882   ins_encode %{
9883     __ subptr(rsp, 8);
9884     __ fld_s($X$$reg - 1);
9885     __ fast_pow();
9886     __ addptr(rsp, 8);
9887   %}
9888   ins_pipe( pipe_slow );
9889 %}
9890 
9891 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9892   predicate (UseSSE>=2);
9893   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9894   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9895   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9896   ins_encode %{
9897     __ subptr(rsp, 8);
9898     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9899     __ fld_d(Address(rsp, 0));
9900     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9901     __ fld_d(Address(rsp, 0));
9902     __ fast_pow();
9903     __ fstp_d(Address(rsp, 0));
9904     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9905     __ addptr(rsp, 8);
9906   %}
9907   ins_pipe( pipe_slow );
9908 %}
9909 
9910 
9911 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9912   predicate (UseSSE<=1);
9913   match(Set dpr1 (ExpD dpr1));
9914   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9915   format %{ "fast_exp $dpr1 -> $dpr1  // KILL $rax, $rcx, $rdx" %}
9916   ins_encode %{
9917     __ fast_exp();
9918   %}
9919   ins_pipe( pipe_slow );
9920 %}
9921 
9922 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9923   predicate (UseSSE>=2);
9924   match(Set dst (ExpD src));
9925   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9926   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
9927   ins_encode %{
9928     __ subptr(rsp, 8);
9929     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9930     __ fld_d(Address(rsp, 0));
9931     __ fast_exp();
9932     __ fstp_d(Address(rsp, 0));
9933     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9934     __ addptr(rsp, 8);
9935   %}
9936   ins_pipe( pipe_slow );
9937 %}
9938 
9939 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9940   predicate (UseSSE<=1);
9941   // The source Double operand on FPU stack
9942   match(Set dst (Log10D src));
9943   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9944   // fxch         ; swap ST(0) with ST(1)
9945   // fyl2x        ; compute log_10(2) * log_2(x)
9946   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9947             "FXCH   \n\t"
9948             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9949          %}
9950   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9951               Opcode(0xD9), Opcode(0xC9),   // fxch
9952               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9953 
9954   ins_pipe( pipe_slow );
9955 %}
9956 
9957 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9958   predicate (UseSSE>=2);
9959   effect(KILL cr);
9960   match(Set dst (Log10D src));
9961   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9962   // fyl2x        ; compute log_10(2) * log_2(x)
9963   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9964             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9965          %}
9966   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9967               Push_SrcD(src),
9968               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9969               Push_ResultD(dst));
9970 
9971   ins_pipe( pipe_slow );
9972 %}
9973 
9974 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
9975   predicate (UseSSE<=1);
9976   // The source Double operand on FPU stack
9977   match(Set dst (LogD src));
9978   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9979   // fxch         ; swap ST(0) with ST(1)
9980   // fyl2x        ; compute log_e(2) * log_2(x)
9981   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9982             "FXCH   \n\t"
9983             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9984          %}
9985   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9986               Opcode(0xD9), Opcode(0xC9),   // fxch
9987               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9988 
9989   ins_pipe( pipe_slow );
9990 %}
9991 
9992 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
9993   predicate (UseSSE>=2);
9994   effect(KILL cr);
9995   // The source and result Double operands in XMM registers
9996   match(Set dst (LogD src));
9997   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9998   // fyl2x        ; compute log_e(2) * log_2(x)
9999   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10000             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10001          %}
10002   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10003               Push_SrcD(src),
10004               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10005               Push_ResultD(dst));
10006   ins_pipe( pipe_slow );
10007 %}
10008 
10009 //-------------Float Instructions-------------------------------
10010 // Float Math
10011 
10012 // Code for float compare:
10013 //     fcompp();
10014 //     fwait(); fnstsw_ax();
10015 //     sahf();
10016 //     movl(dst, unordered_result);
10017 //     jcc(Assembler::parity, exit);
10018 //     movl(dst, less_result);
10019 //     jcc(Assembler::below, exit);
10020 //     movl(dst, equal_result);
10021 //     jcc(Assembler::equal, exit);
10022 //     movl(dst, greater_result);
10023 //   exit:
10024 
10025 // P6 version of float compare, sets condition codes in EFLAGS
10026 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10027   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10028   match(Set cr (CmpF src1 src2));
10029   effect(KILL rax);
10030   ins_cost(150);
10031   format %{ "FLD    $src1\n\t"
10032             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10033             "JNP    exit\n\t"
10034             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10035             "SAHF\n"
10036      "exit:\tNOP               // avoid branch to branch" %}
10037   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10038   ins_encode( Push_Reg_DPR(src1),
10039               OpcP, RegOpc(src2),
10040               cmpF_P6_fixup );
10041   ins_pipe( pipe_slow );
10042 %}
10043 
10044 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10045   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10046   match(Set cr (CmpF src1 src2));
10047   ins_cost(100);
10048   format %{ "FLD    $src1\n\t"
10049             "FUCOMIP ST,$src2  // P6 instruction" %}
10050   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10051   ins_encode( Push_Reg_DPR(src1),
10052               OpcP, RegOpc(src2));
10053   ins_pipe( pipe_slow );
10054 %}
10055 
10056 
10057 // Compare & branch
10058 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10059   predicate(UseSSE == 0);
10060   match(Set cr (CmpF src1 src2));
10061   effect(KILL rax);
10062   ins_cost(200);
10063   format %{ "FLD    $src1\n\t"
10064             "FCOMp  $src2\n\t"
10065             "FNSTSW AX\n\t"
10066             "TEST   AX,0x400\n\t"
10067             "JZ,s   flags\n\t"
10068             "MOV    AH,1\t# unordered treat as LT\n"
10069     "flags:\tSAHF" %}
10070   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10071   ins_encode( Push_Reg_DPR(src1),
10072               OpcP, RegOpc(src2),
10073               fpu_flags);
10074   ins_pipe( pipe_slow );
10075 %}
10076 
10077 // Compare vs zero into -1,0,1
10078 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10079   predicate(UseSSE == 0);
10080   match(Set dst (CmpF3 src1 zero));
10081   effect(KILL cr, KILL rax);
10082   ins_cost(280);
10083   format %{ "FTSTF  $dst,$src1" %}
10084   opcode(0xE4, 0xD9);
10085   ins_encode( Push_Reg_DPR(src1),
10086               OpcS, OpcP, PopFPU,
10087               CmpF_Result(dst));
10088   ins_pipe( pipe_slow );
10089 %}
10090 
10091 // Compare into -1,0,1
10092 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10093   predicate(UseSSE == 0);
10094   match(Set dst (CmpF3 src1 src2));
10095   effect(KILL cr, KILL rax);
10096   ins_cost(300);
10097   format %{ "FCMPF  $dst,$src1,$src2" %}
10098   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10099   ins_encode( Push_Reg_DPR(src1),
10100               OpcP, RegOpc(src2),
10101               CmpF_Result(dst));
10102   ins_pipe( pipe_slow );
10103 %}
10104 
10105 // float compare and set condition codes in EFLAGS by XMM regs
10106 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10107   predicate(UseSSE>=1);
10108   match(Set cr (CmpF src1 src2));
10109   ins_cost(145);
10110   format %{ "UCOMISS $src1,$src2\n\t"
10111             "JNP,s   exit\n\t"
10112             "PUSHF\t# saw NaN, set CF\n\t"
10113             "AND     [rsp], #0xffffff2b\n\t"
10114             "POPF\n"
10115     "exit:" %}
10116   ins_encode %{
10117     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10118     emit_cmpfp_fixup(_masm);
10119   %}
10120   ins_pipe( pipe_slow );
10121 %}
10122 
10123 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10124   predicate(UseSSE>=1);
10125   match(Set cr (CmpF src1 src2));
10126   ins_cost(100);
10127   format %{ "UCOMISS $src1,$src2" %}
10128   ins_encode %{
10129     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10130   %}
10131   ins_pipe( pipe_slow );
10132 %}
10133 
10134 // float compare and set condition codes in EFLAGS by XMM regs
10135 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10136   predicate(UseSSE>=1);
10137   match(Set cr (CmpF src1 (LoadF src2)));
10138   ins_cost(165);
10139   format %{ "UCOMISS $src1,$src2\n\t"
10140             "JNP,s   exit\n\t"
10141             "PUSHF\t# saw NaN, set CF\n\t"
10142             "AND     [rsp], #0xffffff2b\n\t"
10143             "POPF\n"
10144     "exit:" %}
10145   ins_encode %{
10146     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10147     emit_cmpfp_fixup(_masm);
10148   %}
10149   ins_pipe( pipe_slow );
10150 %}
10151 
10152 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10153   predicate(UseSSE>=1);
10154   match(Set cr (CmpF src1 (LoadF src2)));
10155   ins_cost(100);
10156   format %{ "UCOMISS $src1,$src2" %}
10157   ins_encode %{
10158     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10159   %}
10160   ins_pipe( pipe_slow );
10161 %}
10162 
10163 // Compare into -1,0,1 in XMM
10164 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10165   predicate(UseSSE>=1);
10166   match(Set dst (CmpF3 src1 src2));
10167   effect(KILL cr);
10168   ins_cost(255);
10169   format %{ "UCOMISS $src1, $src2\n\t"
10170             "MOV     $dst, #-1\n\t"
10171             "JP,s    done\n\t"
10172             "JB,s    done\n\t"
10173             "SETNE   $dst\n\t"
10174             "MOVZB   $dst, $dst\n"
10175     "done:" %}
10176   ins_encode %{
10177     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10178     emit_cmpfp3(_masm, $dst$$Register);
10179   %}
10180   ins_pipe( pipe_slow );
10181 %}
10182 
10183 // Compare into -1,0,1 in XMM and memory
10184 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10185   predicate(UseSSE>=1);
10186   match(Set dst (CmpF3 src1 (LoadF src2)));
10187   effect(KILL cr);
10188   ins_cost(275);
10189   format %{ "UCOMISS $src1, $src2\n\t"
10190             "MOV     $dst, #-1\n\t"
10191             "JP,s    done\n\t"
10192             "JB,s    done\n\t"
10193             "SETNE   $dst\n\t"
10194             "MOVZB   $dst, $dst\n"
10195     "done:" %}
10196   ins_encode %{
10197     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10198     emit_cmpfp3(_masm, $dst$$Register);
10199   %}
10200   ins_pipe( pipe_slow );
10201 %}
10202 
10203 // Spill to obtain 24-bit precision
10204 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10205   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10206   match(Set dst (SubF src1 src2));
10207 
10208   format %{ "FSUB   $dst,$src1 - $src2" %}
10209   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10210   ins_encode( Push_Reg_FPR(src1),
10211               OpcReg_FPR(src2),
10212               Pop_Mem_FPR(dst) );
10213   ins_pipe( fpu_mem_reg_reg );
10214 %}
10215 //
10216 // This instruction does not round to 24-bits
10217 instruct subFPR_reg(regFPR dst, regFPR src) %{
10218   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10219   match(Set dst (SubF dst src));
10220 
10221   format %{ "FSUB   $dst,$src" %}
10222   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10223   ins_encode( Push_Reg_FPR(src),
10224               OpcP, RegOpc(dst) );
10225   ins_pipe( fpu_reg_reg );
10226 %}
10227 
10228 // Spill to obtain 24-bit precision
10229 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10230   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10231   match(Set dst (AddF src1 src2));
10232 
10233   format %{ "FADD   $dst,$src1,$src2" %}
10234   opcode(0xD8, 0x0); /* D8 C0+i */
10235   ins_encode( Push_Reg_FPR(src2),
10236               OpcReg_FPR(src1),
10237               Pop_Mem_FPR(dst) );
10238   ins_pipe( fpu_mem_reg_reg );
10239 %}
10240 //
10241 // This instruction does not round to 24-bits
10242 instruct addFPR_reg(regFPR dst, regFPR src) %{
10243   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10244   match(Set dst (AddF dst src));
10245 
10246   format %{ "FLD    $src\n\t"
10247             "FADDp  $dst,ST" %}
10248   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10249   ins_encode( Push_Reg_FPR(src),
10250               OpcP, RegOpc(dst) );
10251   ins_pipe( fpu_reg_reg );
10252 %}
10253 
10254 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10255   predicate(UseSSE==0);
10256   match(Set dst (AbsF src));
10257   ins_cost(100);
10258   format %{ "FABS" %}
10259   opcode(0xE1, 0xD9);
10260   ins_encode( OpcS, OpcP );
10261   ins_pipe( fpu_reg_reg );
10262 %}
10263 
10264 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10265   predicate(UseSSE==0);
10266   match(Set dst (NegF src));
10267   ins_cost(100);
10268   format %{ "FCHS" %}
10269   opcode(0xE0, 0xD9);
10270   ins_encode( OpcS, OpcP );
10271   ins_pipe( fpu_reg_reg );
10272 %}
10273 
10274 // Cisc-alternate to addFPR_reg
10275 // Spill to obtain 24-bit precision
10276 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10277   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10278   match(Set dst (AddF src1 (LoadF src2)));
10279 
10280   format %{ "FLD    $src2\n\t"
10281             "FADD   ST,$src1\n\t"
10282             "FSTP_S $dst" %}
10283   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10284   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10285               OpcReg_FPR(src1),
10286               Pop_Mem_FPR(dst) );
10287   ins_pipe( fpu_mem_reg_mem );
10288 %}
10289 //
10290 // Cisc-alternate to addFPR_reg
10291 // This instruction does not round to 24-bits
10292 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10293   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10294   match(Set dst (AddF dst (LoadF src)));
10295 
10296   format %{ "FADD   $dst,$src" %}
10297   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10298   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10299               OpcP, RegOpc(dst) );
10300   ins_pipe( fpu_reg_mem );
10301 %}
10302 
10303 // // Following two instructions for _222_mpegaudio
10304 // Spill to obtain 24-bit precision
10305 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10306   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10307   match(Set dst (AddF src1 src2));
10308 
10309   format %{ "FADD   $dst,$src1,$src2" %}
10310   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10311   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10312               OpcReg_FPR(src2),
10313               Pop_Mem_FPR(dst) );
10314   ins_pipe( fpu_mem_reg_mem );
10315 %}
10316 
10317 // Cisc-spill variant
10318 // Spill to obtain 24-bit precision
10319 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10320   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10321   match(Set dst (AddF src1 (LoadF src2)));
10322 
10323   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10324   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10325   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10326               set_instruction_start,
10327               OpcP, RMopc_Mem(secondary,src1),
10328               Pop_Mem_FPR(dst) );
10329   ins_pipe( fpu_mem_mem_mem );
10330 %}
10331 
10332 // Spill to obtain 24-bit precision
10333 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10334   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10335   match(Set dst (AddF src1 src2));
10336 
10337   format %{ "FADD   $dst,$src1,$src2" %}
10338   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10339   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10340               set_instruction_start,
10341               OpcP, RMopc_Mem(secondary,src1),
10342               Pop_Mem_FPR(dst) );
10343   ins_pipe( fpu_mem_mem_mem );
10344 %}
10345 
10346 
10347 // Spill to obtain 24-bit precision
10348 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10349   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10350   match(Set dst (AddF src con));
10351   format %{ "FLD    $src\n\t"
10352             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10353             "FSTP_S $dst"  %}
10354   ins_encode %{
10355     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10356     __ fadd_s($constantaddress($con));
10357     __ fstp_s(Address(rsp, $dst$$disp));
10358   %}
10359   ins_pipe(fpu_mem_reg_con);
10360 %}
10361 //
10362 // This instruction does not round to 24-bits
10363 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10364   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10365   match(Set dst (AddF src con));
10366   format %{ "FLD    $src\n\t"
10367             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10368             "FSTP   $dst"  %}
10369   ins_encode %{
10370     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10371     __ fadd_s($constantaddress($con));
10372     __ fstp_d($dst$$reg);
10373   %}
10374   ins_pipe(fpu_reg_reg_con);
10375 %}
10376 
10377 // Spill to obtain 24-bit precision
10378 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10379   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10380   match(Set dst (MulF src1 src2));
10381 
10382   format %{ "FLD    $src1\n\t"
10383             "FMUL   $src2\n\t"
10384             "FSTP_S $dst"  %}
10385   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10386   ins_encode( Push_Reg_FPR(src1),
10387               OpcReg_FPR(src2),
10388               Pop_Mem_FPR(dst) );
10389   ins_pipe( fpu_mem_reg_reg );
10390 %}
10391 //
10392 // This instruction does not round to 24-bits
10393 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10394   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10395   match(Set dst (MulF src1 src2));
10396 
10397   format %{ "FLD    $src1\n\t"
10398             "FMUL   $src2\n\t"
10399             "FSTP_S $dst"  %}
10400   opcode(0xD8, 0x1); /* D8 C8+i */
10401   ins_encode( Push_Reg_FPR(src2),
10402               OpcReg_FPR(src1),
10403               Pop_Reg_FPR(dst) );
10404   ins_pipe( fpu_reg_reg_reg );
10405 %}
10406 
10407 
10408 // Spill to obtain 24-bit precision
10409 // Cisc-alternate to reg-reg multiply
10410 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10411   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10412   match(Set dst (MulF src1 (LoadF src2)));
10413 
10414   format %{ "FLD_S  $src2\n\t"
10415             "FMUL   $src1\n\t"
10416             "FSTP_S $dst"  %}
10417   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10418   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10419               OpcReg_FPR(src1),
10420               Pop_Mem_FPR(dst) );
10421   ins_pipe( fpu_mem_reg_mem );
10422 %}
10423 //
10424 // This instruction does not round to 24-bits
10425 // Cisc-alternate to reg-reg multiply
10426 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10427   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10428   match(Set dst (MulF src1 (LoadF src2)));
10429 
10430   format %{ "FMUL   $dst,$src1,$src2" %}
10431   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10432   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10433               OpcReg_FPR(src1),
10434               Pop_Reg_FPR(dst) );
10435   ins_pipe( fpu_reg_reg_mem );
10436 %}
10437 
10438 // Spill to obtain 24-bit precision
10439 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10440   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10441   match(Set dst (MulF src1 src2));
10442 
10443   format %{ "FMUL   $dst,$src1,$src2" %}
10444   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10445   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10446               set_instruction_start,
10447               OpcP, RMopc_Mem(secondary,src1),
10448               Pop_Mem_FPR(dst) );
10449   ins_pipe( fpu_mem_mem_mem );
10450 %}
10451 
10452 // Spill to obtain 24-bit precision
10453 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10454   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10455   match(Set dst (MulF src con));
10456 
10457   format %{ "FLD    $src\n\t"
10458             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10459             "FSTP_S $dst"  %}
10460   ins_encode %{
10461     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10462     __ fmul_s($constantaddress($con));
10463     __ fstp_s(Address(rsp, $dst$$disp));
10464   %}
10465   ins_pipe(fpu_mem_reg_con);
10466 %}
10467 //
10468 // This instruction does not round to 24-bits
10469 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10470   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10471   match(Set dst (MulF src con));
10472 
10473   format %{ "FLD    $src\n\t"
10474             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10475             "FSTP   $dst"  %}
10476   ins_encode %{
10477     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10478     __ fmul_s($constantaddress($con));
10479     __ fstp_d($dst$$reg);
10480   %}
10481   ins_pipe(fpu_reg_reg_con);
10482 %}
10483 
10484 
10485 //
10486 // MACRO1 -- subsume unshared load into mulFPR
10487 // This instruction does not round to 24-bits
10488 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10489   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10490   match(Set dst (MulF (LoadF mem1) src));
10491 
10492   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10493             "FMUL   ST,$src\n\t"
10494             "FSTP   $dst" %}
10495   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10496   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10497               OpcReg_FPR(src),
10498               Pop_Reg_FPR(dst) );
10499   ins_pipe( fpu_reg_reg_mem );
10500 %}
10501 //
10502 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10503 // This instruction does not round to 24-bits
10504 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10505   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10506   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10507   ins_cost(95);
10508 
10509   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10510             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10511             "FADD   ST,$src2\n\t"
10512             "FSTP   $dst" %}
10513   opcode(0xD9); /* LoadF D9 /0 */
10514   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10515               FMul_ST_reg(src1),
10516               FAdd_ST_reg(src2),
10517               Pop_Reg_FPR(dst) );
10518   ins_pipe( fpu_reg_mem_reg_reg );
10519 %}
10520 
10521 // MACRO3 -- addFPR a mulFPR
10522 // This instruction does not round to 24-bits.  It is a '2-address'
10523 // instruction in that the result goes back to src2.  This eliminates
10524 // a move from the macro; possibly the register allocator will have
10525 // to add it back (and maybe not).
10526 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10527   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10528   match(Set src2 (AddF (MulF src0 src1) src2));
10529 
10530   format %{ "FLD    $src0     ===MACRO3===\n\t"
10531             "FMUL   ST,$src1\n\t"
10532             "FADDP  $src2,ST" %}
10533   opcode(0xD9); /* LoadF D9 /0 */
10534   ins_encode( Push_Reg_FPR(src0),
10535               FMul_ST_reg(src1),
10536               FAddP_reg_ST(src2) );
10537   ins_pipe( fpu_reg_reg_reg );
10538 %}
10539 
10540 // MACRO4 -- divFPR subFPR
10541 // This instruction does not round to 24-bits
10542 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10543   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10544   match(Set dst (DivF (SubF src2 src1) src3));
10545 
10546   format %{ "FLD    $src2   ===MACRO4===\n\t"
10547             "FSUB   ST,$src1\n\t"
10548             "FDIV   ST,$src3\n\t"
10549             "FSTP  $dst" %}
10550   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10551   ins_encode( Push_Reg_FPR(src2),
10552               subFPR_divFPR_encode(src1,src3),
10553               Pop_Reg_FPR(dst) );
10554   ins_pipe( fpu_reg_reg_reg_reg );
10555 %}
10556 
10557 // Spill to obtain 24-bit precision
10558 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10559   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10560   match(Set dst (DivF src1 src2));
10561 
10562   format %{ "FDIV   $dst,$src1,$src2" %}
10563   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10564   ins_encode( Push_Reg_FPR(src1),
10565               OpcReg_FPR(src2),
10566               Pop_Mem_FPR(dst) );
10567   ins_pipe( fpu_mem_reg_reg );
10568 %}
10569 //
10570 // This instruction does not round to 24-bits
10571 instruct divFPR_reg(regFPR dst, regFPR src) %{
10572   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10573   match(Set dst (DivF dst src));
10574 
10575   format %{ "FDIV   $dst,$src" %}
10576   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10577   ins_encode( Push_Reg_FPR(src),
10578               OpcP, RegOpc(dst) );
10579   ins_pipe( fpu_reg_reg );
10580 %}
10581 
10582 
10583 // Spill to obtain 24-bit precision
10584 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10585   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10586   match(Set dst (ModF src1 src2));
10587   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10588 
10589   format %{ "FMOD   $dst,$src1,$src2" %}
10590   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10591               emitModDPR(),
10592               Push_Result_Mod_DPR(src2),
10593               Pop_Mem_FPR(dst));
10594   ins_pipe( pipe_slow );
10595 %}
10596 //
10597 // This instruction does not round to 24-bits
10598 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10599   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10600   match(Set dst (ModF dst src));
10601   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10602 
10603   format %{ "FMOD   $dst,$src" %}
10604   ins_encode(Push_Reg_Mod_DPR(dst, src),
10605               emitModDPR(),
10606               Push_Result_Mod_DPR(src),
10607               Pop_Reg_FPR(dst));
10608   ins_pipe( pipe_slow );
10609 %}
10610 
10611 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10612   predicate(UseSSE>=1);
10613   match(Set dst (ModF src0 src1));
10614   effect(KILL rax, KILL cr);
10615   format %{ "SUB    ESP,4\t # FMOD\n"
10616           "\tMOVSS  [ESP+0],$src1\n"
10617           "\tFLD_S  [ESP+0]\n"
10618           "\tMOVSS  [ESP+0],$src0\n"
10619           "\tFLD_S  [ESP+0]\n"
10620      "loop:\tFPREM\n"
10621           "\tFWAIT\n"
10622           "\tFNSTSW AX\n"
10623           "\tSAHF\n"
10624           "\tJP     loop\n"
10625           "\tFSTP_S [ESP+0]\n"
10626           "\tMOVSS  $dst,[ESP+0]\n"
10627           "\tADD    ESP,4\n"
10628           "\tFSTP   ST0\t # Restore FPU Stack"
10629     %}
10630   ins_cost(250);
10631   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10632   ins_pipe( pipe_slow );
10633 %}
10634 
10635 
10636 //----------Arithmetic Conversion Instructions---------------------------------
10637 // The conversions operations are all Alpha sorted.  Please keep it that way!
10638 
10639 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10640   predicate(UseSSE==0);
10641   match(Set dst (RoundFloat src));
10642   ins_cost(125);
10643   format %{ "FST_S  $dst,$src\t# F-round" %}
10644   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10645   ins_pipe( fpu_mem_reg );
10646 %}
10647 
10648 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10649   predicate(UseSSE<=1);
10650   match(Set dst (RoundDouble src));
10651   ins_cost(125);
10652   format %{ "FST_D  $dst,$src\t# D-round" %}
10653   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10654   ins_pipe( fpu_mem_reg );
10655 %}
10656 
10657 // Force rounding to 24-bit precision and 6-bit exponent
10658 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10659   predicate(UseSSE==0);
10660   match(Set dst (ConvD2F src));
10661   format %{ "FST_S  $dst,$src\t# F-round" %}
10662   expand %{
10663     roundFloat_mem_reg(dst,src);
10664   %}
10665 %}
10666 
10667 // Force rounding to 24-bit precision and 6-bit exponent
10668 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10669   predicate(UseSSE==1);
10670   match(Set dst (ConvD2F src));
10671   effect( KILL cr );
10672   format %{ "SUB    ESP,4\n\t"
10673             "FST_S  [ESP],$src\t# F-round\n\t"
10674             "MOVSS  $dst,[ESP]\n\t"
10675             "ADD ESP,4" %}
10676   ins_encode %{
10677     __ subptr(rsp, 4);
10678     if ($src$$reg != FPR1L_enc) {
10679       __ fld_s($src$$reg-1);
10680       __ fstp_s(Address(rsp, 0));
10681     } else {
10682       __ fst_s(Address(rsp, 0));
10683     }
10684     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10685     __ addptr(rsp, 4);
10686   %}
10687   ins_pipe( pipe_slow );
10688 %}
10689 
10690 // Force rounding double precision to single precision
10691 instruct convD2F_reg(regF dst, regD src) %{
10692   predicate(UseSSE>=2);
10693   match(Set dst (ConvD2F src));
10694   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10695   ins_encode %{
10696     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10697   %}
10698   ins_pipe( pipe_slow );
10699 %}
10700 
10701 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10702   predicate(UseSSE==0);
10703   match(Set dst (ConvF2D src));
10704   format %{ "FST_S  $dst,$src\t# D-round" %}
10705   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10706   ins_pipe( fpu_reg_reg );
10707 %}
10708 
10709 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10710   predicate(UseSSE==1);
10711   match(Set dst (ConvF2D src));
10712   format %{ "FST_D  $dst,$src\t# D-round" %}
10713   expand %{
10714     roundDouble_mem_reg(dst,src);
10715   %}
10716 %}
10717 
10718 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10719   predicate(UseSSE==1);
10720   match(Set dst (ConvF2D src));
10721   effect( KILL cr );
10722   format %{ "SUB    ESP,4\n\t"
10723             "MOVSS  [ESP] $src\n\t"
10724             "FLD_S  [ESP]\n\t"
10725             "ADD    ESP,4\n\t"
10726             "FSTP   $dst\t# D-round" %}
10727   ins_encode %{
10728     __ subptr(rsp, 4);
10729     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10730     __ fld_s(Address(rsp, 0));
10731     __ addptr(rsp, 4);
10732     __ fstp_d($dst$$reg);
10733   %}
10734   ins_pipe( pipe_slow );
10735 %}
10736 
10737 instruct convF2D_reg(regD dst, regF src) %{
10738   predicate(UseSSE>=2);
10739   match(Set dst (ConvF2D src));
10740   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10741   ins_encode %{
10742     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10743   %}
10744   ins_pipe( pipe_slow );
10745 %}
10746 
10747 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10748 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10749   predicate(UseSSE<=1);
10750   match(Set dst (ConvD2I src));
10751   effect( KILL tmp, KILL cr );
10752   format %{ "FLD    $src\t# Convert double to int \n\t"
10753             "FLDCW  trunc mode\n\t"
10754             "SUB    ESP,4\n\t"
10755             "FISTp  [ESP + #0]\n\t"
10756             "FLDCW  std/24-bit mode\n\t"
10757             "POP    EAX\n\t"
10758             "CMP    EAX,0x80000000\n\t"
10759             "JNE,s  fast\n\t"
10760             "FLD_D  $src\n\t"
10761             "CALL   d2i_wrapper\n"
10762       "fast:" %}
10763   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10764   ins_pipe( pipe_slow );
10765 %}
10766 
10767 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10768 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10769   predicate(UseSSE>=2);
10770   match(Set dst (ConvD2I src));
10771   effect( KILL tmp, KILL cr );
10772   format %{ "CVTTSD2SI $dst, $src\n\t"
10773             "CMP    $dst,0x80000000\n\t"
10774             "JNE,s  fast\n\t"
10775             "SUB    ESP, 8\n\t"
10776             "MOVSD  [ESP], $src\n\t"
10777             "FLD_D  [ESP]\n\t"
10778             "ADD    ESP, 8\n\t"
10779             "CALL   d2i_wrapper\n"
10780       "fast:" %}
10781   ins_encode %{
10782     Label fast;
10783     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10784     __ cmpl($dst$$Register, 0x80000000);
10785     __ jccb(Assembler::notEqual, fast);
10786     __ subptr(rsp, 8);
10787     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10788     __ fld_d(Address(rsp, 0));
10789     __ addptr(rsp, 8);
10790     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10791     __ bind(fast);
10792   %}
10793   ins_pipe( pipe_slow );
10794 %}
10795 
10796 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10797   predicate(UseSSE<=1);
10798   match(Set dst (ConvD2L src));
10799   effect( KILL cr );
10800   format %{ "FLD    $src\t# Convert double to long\n\t"
10801             "FLDCW  trunc mode\n\t"
10802             "SUB    ESP,8\n\t"
10803             "FISTp  [ESP + #0]\n\t"
10804             "FLDCW  std/24-bit mode\n\t"
10805             "POP    EAX\n\t"
10806             "POP    EDX\n\t"
10807             "CMP    EDX,0x80000000\n\t"
10808             "JNE,s  fast\n\t"
10809             "TEST   EAX,EAX\n\t"
10810             "JNE,s  fast\n\t"
10811             "FLD    $src\n\t"
10812             "CALL   d2l_wrapper\n"
10813       "fast:" %}
10814   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10815   ins_pipe( pipe_slow );
10816 %}
10817 
10818 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10819 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10820   predicate (UseSSE>=2);
10821   match(Set dst (ConvD2L src));
10822   effect( KILL cr );
10823   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10824             "MOVSD  [ESP],$src\n\t"
10825             "FLD_D  [ESP]\n\t"
10826             "FLDCW  trunc mode\n\t"
10827             "FISTp  [ESP + #0]\n\t"
10828             "FLDCW  std/24-bit mode\n\t"
10829             "POP    EAX\n\t"
10830             "POP    EDX\n\t"
10831             "CMP    EDX,0x80000000\n\t"
10832             "JNE,s  fast\n\t"
10833             "TEST   EAX,EAX\n\t"
10834             "JNE,s  fast\n\t"
10835             "SUB    ESP,8\n\t"
10836             "MOVSD  [ESP],$src\n\t"
10837             "FLD_D  [ESP]\n\t"
10838             "ADD    ESP,8\n\t"
10839             "CALL   d2l_wrapper\n"
10840       "fast:" %}
10841   ins_encode %{
10842     Label fast;
10843     __ subptr(rsp, 8);
10844     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10845     __ fld_d(Address(rsp, 0));
10846     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10847     __ fistp_d(Address(rsp, 0));
10848     // Restore the rounding mode, mask the exception
10849     if (Compile::current()->in_24_bit_fp_mode()) {
10850       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10851     } else {
10852       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10853     }
10854     // Load the converted long, adjust CPU stack
10855     __ pop(rax);
10856     __ pop(rdx);
10857     __ cmpl(rdx, 0x80000000);
10858     __ jccb(Assembler::notEqual, fast);
10859     __ testl(rax, rax);
10860     __ jccb(Assembler::notEqual, fast);
10861     __ subptr(rsp, 8);
10862     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10863     __ fld_d(Address(rsp, 0));
10864     __ addptr(rsp, 8);
10865     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10866     __ bind(fast);
10867   %}
10868   ins_pipe( pipe_slow );
10869 %}
10870 
10871 // Convert a double to an int.  Java semantics require we do complex
10872 // manglations in the corner cases.  So we set the rounding mode to
10873 // 'zero', store the darned double down as an int, and reset the
10874 // rounding mode to 'nearest'.  The hardware stores a flag value down
10875 // if we would overflow or converted a NAN; we check for this and
10876 // and go the slow path if needed.
10877 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10878   predicate(UseSSE==0);
10879   match(Set dst (ConvF2I src));
10880   effect( KILL tmp, KILL cr );
10881   format %{ "FLD    $src\t# Convert float to int \n\t"
10882             "FLDCW  trunc mode\n\t"
10883             "SUB    ESP,4\n\t"
10884             "FISTp  [ESP + #0]\n\t"
10885             "FLDCW  std/24-bit mode\n\t"
10886             "POP    EAX\n\t"
10887             "CMP    EAX,0x80000000\n\t"
10888             "JNE,s  fast\n\t"
10889             "FLD    $src\n\t"
10890             "CALL   d2i_wrapper\n"
10891       "fast:" %}
10892   // DPR2I_encoding works for FPR2I
10893   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10894   ins_pipe( pipe_slow );
10895 %}
10896 
10897 // Convert a float in xmm to an int reg.
10898 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10899   predicate(UseSSE>=1);
10900   match(Set dst (ConvF2I src));
10901   effect( KILL tmp, KILL cr );
10902   format %{ "CVTTSS2SI $dst, $src\n\t"
10903             "CMP    $dst,0x80000000\n\t"
10904             "JNE,s  fast\n\t"
10905             "SUB    ESP, 4\n\t"
10906             "MOVSS  [ESP], $src\n\t"
10907             "FLD    [ESP]\n\t"
10908             "ADD    ESP, 4\n\t"
10909             "CALL   d2i_wrapper\n"
10910       "fast:" %}
10911   ins_encode %{
10912     Label fast;
10913     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10914     __ cmpl($dst$$Register, 0x80000000);
10915     __ jccb(Assembler::notEqual, fast);
10916     __ subptr(rsp, 4);
10917     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10918     __ fld_s(Address(rsp, 0));
10919     __ addptr(rsp, 4);
10920     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10921     __ bind(fast);
10922   %}
10923   ins_pipe( pipe_slow );
10924 %}
10925 
10926 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10927   predicate(UseSSE==0);
10928   match(Set dst (ConvF2L src));
10929   effect( KILL cr );
10930   format %{ "FLD    $src\t# Convert float to long\n\t"
10931             "FLDCW  trunc mode\n\t"
10932             "SUB    ESP,8\n\t"
10933             "FISTp  [ESP + #0]\n\t"
10934             "FLDCW  std/24-bit mode\n\t"
10935             "POP    EAX\n\t"
10936             "POP    EDX\n\t"
10937             "CMP    EDX,0x80000000\n\t"
10938             "JNE,s  fast\n\t"
10939             "TEST   EAX,EAX\n\t"
10940             "JNE,s  fast\n\t"
10941             "FLD    $src\n\t"
10942             "CALL   d2l_wrapper\n"
10943       "fast:" %}
10944   // DPR2L_encoding works for FPR2L
10945   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10946   ins_pipe( pipe_slow );
10947 %}
10948 
10949 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10950 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10951   predicate (UseSSE>=1);
10952   match(Set dst (ConvF2L src));
10953   effect( KILL cr );
10954   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10955             "MOVSS  [ESP],$src\n\t"
10956             "FLD_S  [ESP]\n\t"
10957             "FLDCW  trunc mode\n\t"
10958             "FISTp  [ESP + #0]\n\t"
10959             "FLDCW  std/24-bit mode\n\t"
10960             "POP    EAX\n\t"
10961             "POP    EDX\n\t"
10962             "CMP    EDX,0x80000000\n\t"
10963             "JNE,s  fast\n\t"
10964             "TEST   EAX,EAX\n\t"
10965             "JNE,s  fast\n\t"
10966             "SUB    ESP,4\t# Convert float to long\n\t"
10967             "MOVSS  [ESP],$src\n\t"
10968             "FLD_S  [ESP]\n\t"
10969             "ADD    ESP,4\n\t"
10970             "CALL   d2l_wrapper\n"
10971       "fast:" %}
10972   ins_encode %{
10973     Label fast;
10974     __ subptr(rsp, 8);
10975     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10976     __ fld_s(Address(rsp, 0));
10977     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10978     __ fistp_d(Address(rsp, 0));
10979     // Restore the rounding mode, mask the exception
10980     if (Compile::current()->in_24_bit_fp_mode()) {
10981       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10982     } else {
10983       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10984     }
10985     // Load the converted long, adjust CPU stack
10986     __ pop(rax);
10987     __ pop(rdx);
10988     __ cmpl(rdx, 0x80000000);
10989     __ jccb(Assembler::notEqual, fast);
10990     __ testl(rax, rax);
10991     __ jccb(Assembler::notEqual, fast);
10992     __ subptr(rsp, 4);
10993     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10994     __ fld_s(Address(rsp, 0));
10995     __ addptr(rsp, 4);
10996     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10997     __ bind(fast);
10998   %}
10999   ins_pipe( pipe_slow );
11000 %}
11001 
11002 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11003   predicate( UseSSE<=1 );
11004   match(Set dst (ConvI2D src));
11005   format %{ "FILD   $src\n\t"
11006             "FSTP   $dst" %}
11007   opcode(0xDB, 0x0);  /* DB /0 */
11008   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11009   ins_pipe( fpu_reg_mem );
11010 %}
11011 
11012 instruct convI2D_reg(regD dst, rRegI src) %{
11013   predicate( UseSSE>=2 && !UseXmmI2D );
11014   match(Set dst (ConvI2D src));
11015   format %{ "CVTSI2SD $dst,$src" %}
11016   ins_encode %{
11017     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11018   %}
11019   ins_pipe( pipe_slow );
11020 %}
11021 
11022 instruct convI2D_mem(regD dst, memory mem) %{
11023   predicate( UseSSE>=2 );
11024   match(Set dst (ConvI2D (LoadI mem)));
11025   format %{ "CVTSI2SD $dst,$mem" %}
11026   ins_encode %{
11027     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11028   %}
11029   ins_pipe( pipe_slow );
11030 %}
11031 
11032 instruct convXI2D_reg(regD dst, rRegI src)
11033 %{
11034   predicate( UseSSE>=2 && UseXmmI2D );
11035   match(Set dst (ConvI2D src));
11036 
11037   format %{ "MOVD  $dst,$src\n\t"
11038             "CVTDQ2PD $dst,$dst\t# i2d" %}
11039   ins_encode %{
11040     __ movdl($dst$$XMMRegister, $src$$Register);
11041     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11042   %}
11043   ins_pipe(pipe_slow); // XXX
11044 %}
11045 
11046 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11047   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11048   match(Set dst (ConvI2D (LoadI mem)));
11049   format %{ "FILD   $mem\n\t"
11050             "FSTP   $dst" %}
11051   opcode(0xDB);      /* DB /0 */
11052   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11053               Pop_Reg_DPR(dst));
11054   ins_pipe( fpu_reg_mem );
11055 %}
11056 
11057 // Convert a byte to a float; no rounding step needed.
11058 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11059   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11060   match(Set dst (ConvI2F src));
11061   format %{ "FILD   $src\n\t"
11062             "FSTP   $dst" %}
11063 
11064   opcode(0xDB, 0x0);  /* DB /0 */
11065   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11066   ins_pipe( fpu_reg_mem );
11067 %}
11068 
11069 // In 24-bit mode, force exponent rounding by storing back out
11070 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11071   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11072   match(Set dst (ConvI2F src));
11073   ins_cost(200);
11074   format %{ "FILD   $src\n\t"
11075             "FSTP_S $dst" %}
11076   opcode(0xDB, 0x0);  /* DB /0 */
11077   ins_encode( Push_Mem_I(src),
11078               Pop_Mem_FPR(dst));
11079   ins_pipe( fpu_mem_mem );
11080 %}
11081 
11082 // In 24-bit mode, force exponent rounding by storing back out
11083 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11084   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11085   match(Set dst (ConvI2F (LoadI mem)));
11086   ins_cost(200);
11087   format %{ "FILD   $mem\n\t"
11088             "FSTP_S $dst" %}
11089   opcode(0xDB);  /* DB /0 */
11090   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11091               Pop_Mem_FPR(dst));
11092   ins_pipe( fpu_mem_mem );
11093 %}
11094 
11095 // This instruction does not round to 24-bits
11096 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11097   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11098   match(Set dst (ConvI2F src));
11099   format %{ "FILD   $src\n\t"
11100             "FSTP   $dst" %}
11101   opcode(0xDB, 0x0);  /* DB /0 */
11102   ins_encode( Push_Mem_I(src),
11103               Pop_Reg_FPR(dst));
11104   ins_pipe( fpu_reg_mem );
11105 %}
11106 
11107 // This instruction does not round to 24-bits
11108 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11109   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11110   match(Set dst (ConvI2F (LoadI mem)));
11111   format %{ "FILD   $mem\n\t"
11112             "FSTP   $dst" %}
11113   opcode(0xDB);      /* DB /0 */
11114   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11115               Pop_Reg_FPR(dst));
11116   ins_pipe( fpu_reg_mem );
11117 %}
11118 
11119 // Convert an int to a float in xmm; no rounding step needed.
11120 instruct convI2F_reg(regF dst, rRegI src) %{
11121   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11122   match(Set dst (ConvI2F src));
11123   format %{ "CVTSI2SS $dst, $src" %}
11124   ins_encode %{
11125     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11126   %}
11127   ins_pipe( pipe_slow );
11128 %}
11129 
11130  instruct convXI2F_reg(regF dst, rRegI src)
11131 %{
11132   predicate( UseSSE>=2 && UseXmmI2F );
11133   match(Set dst (ConvI2F src));
11134 
11135   format %{ "MOVD  $dst,$src\n\t"
11136             "CVTDQ2PS $dst,$dst\t# i2f" %}
11137   ins_encode %{
11138     __ movdl($dst$$XMMRegister, $src$$Register);
11139     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11140   %}
11141   ins_pipe(pipe_slow); // XXX
11142 %}
11143 
11144 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11145   match(Set dst (ConvI2L src));
11146   effect(KILL cr);
11147   ins_cost(375);
11148   format %{ "MOV    $dst.lo,$src\n\t"
11149             "MOV    $dst.hi,$src\n\t"
11150             "SAR    $dst.hi,31" %}
11151   ins_encode(convert_int_long(dst,src));
11152   ins_pipe( ialu_reg_reg_long );
11153 %}
11154 
11155 // Zero-extend convert int to long
11156 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11157   match(Set dst (AndL (ConvI2L src) mask) );
11158   effect( KILL flags );
11159   ins_cost(250);
11160   format %{ "MOV    $dst.lo,$src\n\t"
11161             "XOR    $dst.hi,$dst.hi" %}
11162   opcode(0x33); // XOR
11163   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11164   ins_pipe( ialu_reg_reg_long );
11165 %}
11166 
11167 // Zero-extend long
11168 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11169   match(Set dst (AndL src mask) );
11170   effect( KILL flags );
11171   ins_cost(250);
11172   format %{ "MOV    $dst.lo,$src.lo\n\t"
11173             "XOR    $dst.hi,$dst.hi\n\t" %}
11174   opcode(0x33); // XOR
11175   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11176   ins_pipe( ialu_reg_reg_long );
11177 %}
11178 
11179 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11180   predicate (UseSSE<=1);
11181   match(Set dst (ConvL2D src));
11182   effect( KILL cr );
11183   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11184             "PUSH   $src.lo\n\t"
11185             "FILD   ST,[ESP + #0]\n\t"
11186             "ADD    ESP,8\n\t"
11187             "FSTP_D $dst\t# D-round" %}
11188   opcode(0xDF, 0x5);  /* DF /5 */
11189   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11190   ins_pipe( pipe_slow );
11191 %}
11192 
11193 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11194   predicate (UseSSE>=2);
11195   match(Set dst (ConvL2D src));
11196   effect( KILL cr );
11197   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11198             "PUSH   $src.lo\n\t"
11199             "FILD_D [ESP]\n\t"
11200             "FSTP_D [ESP]\n\t"
11201             "MOVSD  $dst,[ESP]\n\t"
11202             "ADD    ESP,8" %}
11203   opcode(0xDF, 0x5);  /* DF /5 */
11204   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11205   ins_pipe( pipe_slow );
11206 %}
11207 
11208 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11209   predicate (UseSSE>=1);
11210   match(Set dst (ConvL2F src));
11211   effect( KILL cr );
11212   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11213             "PUSH   $src.lo\n\t"
11214             "FILD_D [ESP]\n\t"
11215             "FSTP_S [ESP]\n\t"
11216             "MOVSS  $dst,[ESP]\n\t"
11217             "ADD    ESP,8" %}
11218   opcode(0xDF, 0x5);  /* DF /5 */
11219   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11220   ins_pipe( pipe_slow );
11221 %}
11222 
11223 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11224   match(Set dst (ConvL2F src));
11225   effect( KILL cr );
11226   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11227             "PUSH   $src.lo\n\t"
11228             "FILD   ST,[ESP + #0]\n\t"
11229             "ADD    ESP,8\n\t"
11230             "FSTP_S $dst\t# F-round" %}
11231   opcode(0xDF, 0x5);  /* DF /5 */
11232   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11233   ins_pipe( pipe_slow );
11234 %}
11235 
11236 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11237   match(Set dst (ConvL2I src));
11238   effect( DEF dst, USE src );
11239   format %{ "MOV    $dst,$src.lo" %}
11240   ins_encode(enc_CopyL_Lo(dst,src));
11241   ins_pipe( ialu_reg_reg );
11242 %}
11243 
11244 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11245   match(Set dst (MoveF2I src));
11246   effect( DEF dst, USE src );
11247   ins_cost(100);
11248   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11249   ins_encode %{
11250     __ movl($dst$$Register, Address(rsp, $src$$disp));
11251   %}
11252   ins_pipe( ialu_reg_mem );
11253 %}
11254 
11255 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11256   predicate(UseSSE==0);
11257   match(Set dst (MoveF2I src));
11258   effect( DEF dst, USE src );
11259 
11260   ins_cost(125);
11261   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11262   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11263   ins_pipe( fpu_mem_reg );
11264 %}
11265 
11266 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11267   predicate(UseSSE>=1);
11268   match(Set dst (MoveF2I src));
11269   effect( DEF dst, USE src );
11270 
11271   ins_cost(95);
11272   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11273   ins_encode %{
11274     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11275   %}
11276   ins_pipe( pipe_slow );
11277 %}
11278 
11279 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11280   predicate(UseSSE>=2);
11281   match(Set dst (MoveF2I src));
11282   effect( DEF dst, USE src );
11283   ins_cost(85);
11284   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11285   ins_encode %{
11286     __ movdl($dst$$Register, $src$$XMMRegister);
11287   %}
11288   ins_pipe( pipe_slow );
11289 %}
11290 
11291 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11292   match(Set dst (MoveI2F src));
11293   effect( DEF dst, USE src );
11294 
11295   ins_cost(100);
11296   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11297   ins_encode %{
11298     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11299   %}
11300   ins_pipe( ialu_mem_reg );
11301 %}
11302 
11303 
11304 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11305   predicate(UseSSE==0);
11306   match(Set dst (MoveI2F src));
11307   effect(DEF dst, USE src);
11308 
11309   ins_cost(125);
11310   format %{ "FLD_S  $src\n\t"
11311             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11312   opcode(0xD9);               /* D9 /0, FLD m32real */
11313   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11314               Pop_Reg_FPR(dst) );
11315   ins_pipe( fpu_reg_mem );
11316 %}
11317 
11318 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11319   predicate(UseSSE>=1);
11320   match(Set dst (MoveI2F src));
11321   effect( DEF dst, USE src );
11322 
11323   ins_cost(95);
11324   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11325   ins_encode %{
11326     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11327   %}
11328   ins_pipe( pipe_slow );
11329 %}
11330 
11331 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11332   predicate(UseSSE>=2);
11333   match(Set dst (MoveI2F src));
11334   effect( DEF dst, USE src );
11335 
11336   ins_cost(85);
11337   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11338   ins_encode %{
11339     __ movdl($dst$$XMMRegister, $src$$Register);
11340   %}
11341   ins_pipe( pipe_slow );
11342 %}
11343 
11344 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11345   match(Set dst (MoveD2L src));
11346   effect(DEF dst, USE src);
11347 
11348   ins_cost(250);
11349   format %{ "MOV    $dst.lo,$src\n\t"
11350             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11351   opcode(0x8B, 0x8B);
11352   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11353   ins_pipe( ialu_mem_long_reg );
11354 %}
11355 
11356 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11357   predicate(UseSSE<=1);
11358   match(Set dst (MoveD2L src));
11359   effect(DEF dst, USE src);
11360 
11361   ins_cost(125);
11362   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11363   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11364   ins_pipe( fpu_mem_reg );
11365 %}
11366 
11367 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11368   predicate(UseSSE>=2);
11369   match(Set dst (MoveD2L src));
11370   effect(DEF dst, USE src);
11371   ins_cost(95);
11372   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11373   ins_encode %{
11374     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11375   %}
11376   ins_pipe( pipe_slow );
11377 %}
11378 
11379 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11380   predicate(UseSSE>=2);
11381   match(Set dst (MoveD2L src));
11382   effect(DEF dst, USE src, TEMP tmp);
11383   ins_cost(85);
11384   format %{ "MOVD   $dst.lo,$src\n\t"
11385             "PSHUFLW $tmp,$src,0x4E\n\t"
11386             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11387   ins_encode %{
11388     __ movdl($dst$$Register, $src$$XMMRegister);
11389     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11390     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11391   %}
11392   ins_pipe( pipe_slow );
11393 %}
11394 
11395 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11396   match(Set dst (MoveL2D src));
11397   effect(DEF dst, USE src);
11398 
11399   ins_cost(200);
11400   format %{ "MOV    $dst,$src.lo\n\t"
11401             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11402   opcode(0x89, 0x89);
11403   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11404   ins_pipe( ialu_mem_long_reg );
11405 %}
11406 
11407 
11408 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11409   predicate(UseSSE<=1);
11410   match(Set dst (MoveL2D src));
11411   effect(DEF dst, USE src);
11412   ins_cost(125);
11413 
11414   format %{ "FLD_D  $src\n\t"
11415             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11416   opcode(0xDD);               /* DD /0, FLD m64real */
11417   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11418               Pop_Reg_DPR(dst) );
11419   ins_pipe( fpu_reg_mem );
11420 %}
11421 
11422 
11423 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11424   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11425   match(Set dst (MoveL2D src));
11426   effect(DEF dst, USE src);
11427 
11428   ins_cost(95);
11429   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11430   ins_encode %{
11431     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11432   %}
11433   ins_pipe( pipe_slow );
11434 %}
11435 
11436 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11437   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11438   match(Set dst (MoveL2D src));
11439   effect(DEF dst, USE src);
11440 
11441   ins_cost(95);
11442   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11443   ins_encode %{
11444     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11445   %}
11446   ins_pipe( pipe_slow );
11447 %}
11448 
11449 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11450   predicate(UseSSE>=2);
11451   match(Set dst (MoveL2D src));
11452   effect(TEMP dst, USE src, TEMP tmp);
11453   ins_cost(85);
11454   format %{ "MOVD   $dst,$src.lo\n\t"
11455             "MOVD   $tmp,$src.hi\n\t"
11456             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11457   ins_encode %{
11458     __ movdl($dst$$XMMRegister, $src$$Register);
11459     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11460     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11461   %}
11462   ins_pipe( pipe_slow );
11463 %}
11464 
11465 
11466 // =======================================================================
11467 // fast clearing of an array
11468 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11469   predicate(!UseFastStosb);
11470   match(Set dummy (ClearArray cnt base));
11471   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11472   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11473             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11474             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11475   ins_encode %{
11476     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11477   %}
11478   ins_pipe( pipe_slow );
11479 %}
11480 
11481 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11482   predicate(UseFastStosb);
11483   match(Set dummy (ClearArray cnt base));
11484   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11485   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11486             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11487             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11488   ins_encode %{
11489     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11490   %}
11491   ins_pipe( pipe_slow );
11492 %}
11493 
11494 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11495                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11496   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11497   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11498 
11499   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11500   ins_encode %{
11501     __ string_compare($str1$$Register, $str2$$Register,
11502                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11503                       $tmp1$$XMMRegister);
11504   %}
11505   ins_pipe( pipe_slow );
11506 %}
11507 
11508 // fast string equals
11509 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11510                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11511   match(Set result (StrEquals (Binary str1 str2) cnt));
11512   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11513 
11514   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11515   ins_encode %{
11516     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11517                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11518                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11519   %}
11520   ins_pipe( pipe_slow );
11521 %}
11522 
11523 // fast search of substring with known size.
11524 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11525                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11526   predicate(UseSSE42Intrinsics);
11527   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11528   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11529 
11530   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11531   ins_encode %{
11532     int icnt2 = (int)$int_cnt2$$constant;
11533     if (icnt2 >= 8) {
11534       // IndexOf for constant substrings with size >= 8 elements
11535       // which don't need to be loaded through stack.
11536       __ string_indexofC8($str1$$Register, $str2$$Register,
11537                           $cnt1$$Register, $cnt2$$Register,
11538                           icnt2, $result$$Register,
11539                           $vec$$XMMRegister, $tmp$$Register);
11540     } else {
11541       // Small strings are loaded through stack if they cross page boundary.
11542       __ string_indexof($str1$$Register, $str2$$Register,
11543                         $cnt1$$Register, $cnt2$$Register,
11544                         icnt2, $result$$Register,
11545                         $vec$$XMMRegister, $tmp$$Register);
11546     }
11547   %}
11548   ins_pipe( pipe_slow );
11549 %}
11550 
11551 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11552                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11553   predicate(UseSSE42Intrinsics);
11554   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11555   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11556 
11557   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11558   ins_encode %{
11559     __ string_indexof($str1$$Register, $str2$$Register,
11560                       $cnt1$$Register, $cnt2$$Register,
11561                       (-1), $result$$Register,
11562                       $vec$$XMMRegister, $tmp$$Register);
11563   %}
11564   ins_pipe( pipe_slow );
11565 %}
11566 
11567 // fast array equals
11568 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11569                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11570 %{
11571   match(Set result (AryEq ary1 ary2));
11572   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11573   //ins_cost(300);
11574 
11575   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11576   ins_encode %{
11577     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11578                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11579                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11580   %}
11581   ins_pipe( pipe_slow );
11582 %}
11583 
11584 // encode char[] to byte[] in ISO_8859_1
11585 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11586                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11587                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11588   match(Set result (EncodeISOArray src (Binary dst len)));
11589   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11590 
11591   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11592   ins_encode %{
11593     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11594                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11595                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11596   %}
11597   ins_pipe( pipe_slow );
11598 %}
11599 
11600 
11601 //----------Control Flow Instructions------------------------------------------
11602 // Signed compare Instructions
11603 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11604   match(Set cr (CmpI op1 op2));
11605   effect( DEF cr, USE op1, USE op2 );
11606   format %{ "CMP    $op1,$op2" %}
11607   opcode(0x3B);  /* Opcode 3B /r */
11608   ins_encode( OpcP, RegReg( op1, op2) );
11609   ins_pipe( ialu_cr_reg_reg );
11610 %}
11611 
11612 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11613   match(Set cr (CmpI op1 op2));
11614   effect( DEF cr, USE op1 );
11615   format %{ "CMP    $op1,$op2" %}
11616   opcode(0x81,0x07);  /* Opcode 81 /7 */
11617   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11618   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11619   ins_pipe( ialu_cr_reg_imm );
11620 %}
11621 
11622 // Cisc-spilled version of cmpI_eReg
11623 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11624   match(Set cr (CmpI op1 (LoadI op2)));
11625 
11626   format %{ "CMP    $op1,$op2" %}
11627   ins_cost(500);
11628   opcode(0x3B);  /* Opcode 3B /r */
11629   ins_encode( OpcP, RegMem( op1, op2) );
11630   ins_pipe( ialu_cr_reg_mem );
11631 %}
11632 
11633 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11634   match(Set cr (CmpI src zero));
11635   effect( DEF cr, USE src );
11636 
11637   format %{ "TEST   $src,$src" %}
11638   opcode(0x85);
11639   ins_encode( OpcP, RegReg( src, src ) );
11640   ins_pipe( ialu_cr_reg_imm );
11641 %}
11642 
11643 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11644   match(Set cr (CmpI (AndI src con) zero));
11645 
11646   format %{ "TEST   $src,$con" %}
11647   opcode(0xF7,0x00);
11648   ins_encode( OpcP, RegOpc(src), Con32(con) );
11649   ins_pipe( ialu_cr_reg_imm );
11650 %}
11651 
11652 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11653   match(Set cr (CmpI (AndI src mem) zero));
11654 
11655   format %{ "TEST   $src,$mem" %}
11656   opcode(0x85);
11657   ins_encode( OpcP, RegMem( src, mem ) );
11658   ins_pipe( ialu_cr_reg_mem );
11659 %}
11660 
11661 // Unsigned compare Instructions; really, same as signed except they
11662 // produce an eFlagsRegU instead of eFlagsReg.
11663 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11664   match(Set cr (CmpU op1 op2));
11665 
11666   format %{ "CMPu   $op1,$op2" %}
11667   opcode(0x3B);  /* Opcode 3B /r */
11668   ins_encode( OpcP, RegReg( op1, op2) );
11669   ins_pipe( ialu_cr_reg_reg );
11670 %}
11671 
11672 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11673   match(Set cr (CmpU op1 op2));
11674 
11675   format %{ "CMPu   $op1,$op2" %}
11676   opcode(0x81,0x07);  /* Opcode 81 /7 */
11677   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11678   ins_pipe( ialu_cr_reg_imm );
11679 %}
11680 
11681 // // Cisc-spilled version of cmpU_eReg
11682 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11683   match(Set cr (CmpU op1 (LoadI op2)));
11684 
11685   format %{ "CMPu   $op1,$op2" %}
11686   ins_cost(500);
11687   opcode(0x3B);  /* Opcode 3B /r */
11688   ins_encode( OpcP, RegMem( op1, op2) );
11689   ins_pipe( ialu_cr_reg_mem );
11690 %}
11691 
11692 // // Cisc-spilled version of cmpU_eReg
11693 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11694 //  match(Set cr (CmpU (LoadI op1) op2));
11695 //
11696 //  format %{ "CMPu   $op1,$op2" %}
11697 //  ins_cost(500);
11698 //  opcode(0x39);  /* Opcode 39 /r */
11699 //  ins_encode( OpcP, RegMem( op1, op2) );
11700 //%}
11701 
11702 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11703   match(Set cr (CmpU src zero));
11704 
11705   format %{ "TESTu  $src,$src" %}
11706   opcode(0x85);
11707   ins_encode( OpcP, RegReg( src, src ) );
11708   ins_pipe( ialu_cr_reg_imm );
11709 %}
11710 
11711 // Unsigned pointer compare Instructions
11712 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11713   match(Set cr (CmpP op1 op2));
11714 
11715   format %{ "CMPu   $op1,$op2" %}
11716   opcode(0x3B);  /* Opcode 3B /r */
11717   ins_encode( OpcP, RegReg( op1, op2) );
11718   ins_pipe( ialu_cr_reg_reg );
11719 %}
11720 
11721 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11722   match(Set cr (CmpP op1 op2));
11723 
11724   format %{ "CMPu   $op1,$op2" %}
11725   opcode(0x81,0x07);  /* Opcode 81 /7 */
11726   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11727   ins_pipe( ialu_cr_reg_imm );
11728 %}
11729 
11730 // // Cisc-spilled version of cmpP_eReg
11731 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11732   match(Set cr (CmpP op1 (LoadP op2)));
11733 
11734   format %{ "CMPu   $op1,$op2" %}
11735   ins_cost(500);
11736   opcode(0x3B);  /* Opcode 3B /r */
11737   ins_encode( OpcP, RegMem( op1, op2) );
11738   ins_pipe( ialu_cr_reg_mem );
11739 %}
11740 
11741 // // Cisc-spilled version of cmpP_eReg
11742 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11743 //  match(Set cr (CmpP (LoadP op1) op2));
11744 //
11745 //  format %{ "CMPu   $op1,$op2" %}
11746 //  ins_cost(500);
11747 //  opcode(0x39);  /* Opcode 39 /r */
11748 //  ins_encode( OpcP, RegMem( op1, op2) );
11749 //%}
11750 
11751 // Compare raw pointer (used in out-of-heap check).
11752 // Only works because non-oop pointers must be raw pointers
11753 // and raw pointers have no anti-dependencies.
11754 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11755   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11756   match(Set cr (CmpP op1 (LoadP op2)));
11757 
11758   format %{ "CMPu   $op1,$op2" %}
11759   opcode(0x3B);  /* Opcode 3B /r */
11760   ins_encode( OpcP, RegMem( op1, op2) );
11761   ins_pipe( ialu_cr_reg_mem );
11762 %}
11763 
11764 //
11765 // This will generate a signed flags result. This should be ok
11766 // since any compare to a zero should be eq/neq.
11767 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11768   match(Set cr (CmpP src zero));
11769 
11770   format %{ "TEST   $src,$src" %}
11771   opcode(0x85);
11772   ins_encode( OpcP, RegReg( src, src ) );
11773   ins_pipe( ialu_cr_reg_imm );
11774 %}
11775 
11776 // Cisc-spilled version of testP_reg
11777 // This will generate a signed flags result. This should be ok
11778 // since any compare to a zero should be eq/neq.
11779 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11780   match(Set cr (CmpP (LoadP op) zero));
11781 
11782   format %{ "TEST   $op,0xFFFFFFFF" %}
11783   ins_cost(500);
11784   opcode(0xF7);               /* Opcode F7 /0 */
11785   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11786   ins_pipe( ialu_cr_reg_imm );
11787 %}
11788 
11789 // Yanked all unsigned pointer compare operations.
11790 // Pointer compares are done with CmpP which is already unsigned.
11791 
11792 //----------Max and Min--------------------------------------------------------
11793 // Min Instructions
11794 ////
11795 //   *** Min and Max using the conditional move are slower than the
11796 //   *** branch version on a Pentium III.
11797 // // Conditional move for min
11798 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11799 //  effect( USE_DEF op2, USE op1, USE cr );
11800 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11801 //  opcode(0x4C,0x0F);
11802 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11803 //  ins_pipe( pipe_cmov_reg );
11804 //%}
11805 //
11806 //// Min Register with Register (P6 version)
11807 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11808 //  predicate(VM_Version::supports_cmov() );
11809 //  match(Set op2 (MinI op1 op2));
11810 //  ins_cost(200);
11811 //  expand %{
11812 //    eFlagsReg cr;
11813 //    compI_eReg(cr,op1,op2);
11814 //    cmovI_reg_lt(op2,op1,cr);
11815 //  %}
11816 //%}
11817 
11818 // Min Register with Register (generic version)
11819 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11820   match(Set dst (MinI dst src));
11821   effect(KILL flags);
11822   ins_cost(300);
11823 
11824   format %{ "MIN    $dst,$src" %}
11825   opcode(0xCC);
11826   ins_encode( min_enc(dst,src) );
11827   ins_pipe( pipe_slow );
11828 %}
11829 
11830 // Max Register with Register
11831 //   *** Min and Max using the conditional move are slower than the
11832 //   *** branch version on a Pentium III.
11833 // // Conditional move for max
11834 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11835 //  effect( USE_DEF op2, USE op1, USE cr );
11836 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11837 //  opcode(0x4F,0x0F);
11838 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11839 //  ins_pipe( pipe_cmov_reg );
11840 //%}
11841 //
11842 // // Max Register with Register (P6 version)
11843 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11844 //  predicate(VM_Version::supports_cmov() );
11845 //  match(Set op2 (MaxI op1 op2));
11846 //  ins_cost(200);
11847 //  expand %{
11848 //    eFlagsReg cr;
11849 //    compI_eReg(cr,op1,op2);
11850 //    cmovI_reg_gt(op2,op1,cr);
11851 //  %}
11852 //%}
11853 
11854 // Max Register with Register (generic version)
11855 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11856   match(Set dst (MaxI dst src));
11857   effect(KILL flags);
11858   ins_cost(300);
11859 
11860   format %{ "MAX    $dst,$src" %}
11861   opcode(0xCC);
11862   ins_encode( max_enc(dst,src) );
11863   ins_pipe( pipe_slow );
11864 %}
11865 
11866 // ============================================================================
11867 // Counted Loop limit node which represents exact final iterator value.
11868 // Note: the resulting value should fit into integer range since
11869 // counted loops have limit check on overflow.
11870 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11871   match(Set limit (LoopLimit (Binary init limit) stride));
11872   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11873   ins_cost(300);
11874 
11875   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11876   ins_encode %{
11877     int strd = (int)$stride$$constant;
11878     assert(strd != 1 && strd != -1, "sanity");
11879     int m1 = (strd > 0) ? 1 : -1;
11880     // Convert limit to long (EAX:EDX)
11881     __ cdql();
11882     // Convert init to long (init:tmp)
11883     __ movl($tmp$$Register, $init$$Register);
11884     __ sarl($tmp$$Register, 31);
11885     // $limit - $init
11886     __ subl($limit$$Register, $init$$Register);
11887     __ sbbl($limit_hi$$Register, $tmp$$Register);
11888     // + ($stride - 1)
11889     if (strd > 0) {
11890       __ addl($limit$$Register, (strd - 1));
11891       __ adcl($limit_hi$$Register, 0);
11892       __ movl($tmp$$Register, strd);
11893     } else {
11894       __ addl($limit$$Register, (strd + 1));
11895       __ adcl($limit_hi$$Register, -1);
11896       __ lneg($limit_hi$$Register, $limit$$Register);
11897       __ movl($tmp$$Register, -strd);
11898     }
11899     // signed devision: (EAX:EDX) / pos_stride
11900     __ idivl($tmp$$Register);
11901     if (strd < 0) {
11902       // restore sign
11903       __ negl($tmp$$Register);
11904     }
11905     // (EAX) * stride
11906     __ mull($tmp$$Register);
11907     // + init (ignore upper bits)
11908     __ addl($limit$$Register, $init$$Register);
11909   %}
11910   ins_pipe( pipe_slow );
11911 %}
11912 
11913 // ============================================================================
11914 // Branch Instructions
11915 // Jump Table
11916 instruct jumpXtnd(rRegI switch_val) %{
11917   match(Jump switch_val);
11918   ins_cost(350);
11919   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
11920   ins_encode %{
11921     // Jump to Address(table_base + switch_reg)
11922     Address index(noreg, $switch_val$$Register, Address::times_1);
11923     __ jump(ArrayAddress($constantaddress, index));
11924   %}
11925   ins_pipe(pipe_jmp);
11926 %}
11927 
11928 // Jump Direct - Label defines a relative address from JMP+1
11929 instruct jmpDir(label labl) %{
11930   match(Goto);
11931   effect(USE labl);
11932 
11933   ins_cost(300);
11934   format %{ "JMP    $labl" %}
11935   size(5);
11936   ins_encode %{
11937     Label* L = $labl$$label;
11938     __ jmp(*L, false); // Always long jump
11939   %}
11940   ins_pipe( pipe_jmp );
11941 %}
11942 
11943 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11944 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
11945   match(If cop cr);
11946   effect(USE labl);
11947 
11948   ins_cost(300);
11949   format %{ "J$cop    $labl" %}
11950   size(6);
11951   ins_encode %{
11952     Label* L = $labl$$label;
11953     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11954   %}
11955   ins_pipe( pipe_jcc );
11956 %}
11957 
11958 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11959 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
11960   match(CountedLoopEnd cop cr);
11961   effect(USE labl);
11962 
11963   ins_cost(300);
11964   format %{ "J$cop    $labl\t# Loop end" %}
11965   size(6);
11966   ins_encode %{
11967     Label* L = $labl$$label;
11968     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11969   %}
11970   ins_pipe( pipe_jcc );
11971 %}
11972 
11973 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11974 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11975   match(CountedLoopEnd cop cmp);
11976   effect(USE labl);
11977 
11978   ins_cost(300);
11979   format %{ "J$cop,u  $labl\t# Loop end" %}
11980   size(6);
11981   ins_encode %{
11982     Label* L = $labl$$label;
11983     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11984   %}
11985   ins_pipe( pipe_jcc );
11986 %}
11987 
11988 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11989   match(CountedLoopEnd cop cmp);
11990   effect(USE labl);
11991 
11992   ins_cost(200);
11993   format %{ "J$cop,u  $labl\t# Loop end" %}
11994   size(6);
11995   ins_encode %{
11996     Label* L = $labl$$label;
11997     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11998   %}
11999   ins_pipe( pipe_jcc );
12000 %}
12001 
12002 // Jump Direct Conditional - using unsigned comparison
12003 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12004   match(If cop cmp);
12005   effect(USE labl);
12006 
12007   ins_cost(300);
12008   format %{ "J$cop,u  $labl" %}
12009   size(6);
12010   ins_encode %{
12011     Label* L = $labl$$label;
12012     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12013   %}
12014   ins_pipe(pipe_jcc);
12015 %}
12016 
12017 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12018   match(If cop cmp);
12019   effect(USE labl);
12020 
12021   ins_cost(200);
12022   format %{ "J$cop,u  $labl" %}
12023   size(6);
12024   ins_encode %{
12025     Label* L = $labl$$label;
12026     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12027   %}
12028   ins_pipe(pipe_jcc);
12029 %}
12030 
12031 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12032   match(If cop cmp);
12033   effect(USE labl);
12034 
12035   ins_cost(200);
12036   format %{ $$template
12037     if ($cop$$cmpcode == Assembler::notEqual) {
12038       $$emit$$"JP,u   $labl\n\t"
12039       $$emit$$"J$cop,u   $labl"
12040     } else {
12041       $$emit$$"JP,u   done\n\t"
12042       $$emit$$"J$cop,u   $labl\n\t"
12043       $$emit$$"done:"
12044     }
12045   %}
12046   ins_encode %{
12047     Label* l = $labl$$label;
12048     if ($cop$$cmpcode == Assembler::notEqual) {
12049       __ jcc(Assembler::parity, *l, false);
12050       __ jcc(Assembler::notEqual, *l, false);
12051     } else if ($cop$$cmpcode == Assembler::equal) {
12052       Label done;
12053       __ jccb(Assembler::parity, done);
12054       __ jcc(Assembler::equal, *l, false);
12055       __ bind(done);
12056     } else {
12057        ShouldNotReachHere();
12058     }
12059   %}
12060   ins_pipe(pipe_jcc);
12061 %}
12062 
12063 // ============================================================================
12064 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12065 // array for an instance of the superklass.  Set a hidden internal cache on a
12066 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12067 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12068 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12069   match(Set result (PartialSubtypeCheck sub super));
12070   effect( KILL rcx, KILL cr );
12071 
12072   ins_cost(1100);  // slightly larger than the next version
12073   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12074             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12075             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12076             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12077             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12078             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12079             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12080      "miss:\t" %}
12081 
12082   opcode(0x1); // Force a XOR of EDI
12083   ins_encode( enc_PartialSubtypeCheck() );
12084   ins_pipe( pipe_slow );
12085 %}
12086 
12087 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12088   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12089   effect( KILL rcx, KILL result );
12090 
12091   ins_cost(1000);
12092   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12093             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12094             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12095             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12096             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12097             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12098      "miss:\t" %}
12099 
12100   opcode(0x0);  // No need to XOR EDI
12101   ins_encode( enc_PartialSubtypeCheck() );
12102   ins_pipe( pipe_slow );
12103 %}
12104 
12105 // ============================================================================
12106 // Branch Instructions -- short offset versions
12107 //
12108 // These instructions are used to replace jumps of a long offset (the default
12109 // match) with jumps of a shorter offset.  These instructions are all tagged
12110 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12111 // match rules in general matching.  Instead, the ADLC generates a conversion
12112 // method in the MachNode which can be used to do in-place replacement of the
12113 // long variant with the shorter variant.  The compiler will determine if a
12114 // branch can be taken by the is_short_branch_offset() predicate in the machine
12115 // specific code section of the file.
12116 
12117 // Jump Direct - Label defines a relative address from JMP+1
12118 instruct jmpDir_short(label labl) %{
12119   match(Goto);
12120   effect(USE labl);
12121 
12122   ins_cost(300);
12123   format %{ "JMP,s  $labl" %}
12124   size(2);
12125   ins_encode %{
12126     Label* L = $labl$$label;
12127     __ jmpb(*L);
12128   %}
12129   ins_pipe( pipe_jmp );
12130   ins_short_branch(1);
12131 %}
12132 
12133 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12134 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12135   match(If cop cr);
12136   effect(USE labl);
12137 
12138   ins_cost(300);
12139   format %{ "J$cop,s  $labl" %}
12140   size(2);
12141   ins_encode %{
12142     Label* L = $labl$$label;
12143     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12144   %}
12145   ins_pipe( pipe_jcc );
12146   ins_short_branch(1);
12147 %}
12148 
12149 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12150 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12151   match(CountedLoopEnd cop cr);
12152   effect(USE labl);
12153 
12154   ins_cost(300);
12155   format %{ "J$cop,s  $labl\t# Loop end" %}
12156   size(2);
12157   ins_encode %{
12158     Label* L = $labl$$label;
12159     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12160   %}
12161   ins_pipe( pipe_jcc );
12162   ins_short_branch(1);
12163 %}
12164 
12165 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12166 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12167   match(CountedLoopEnd cop cmp);
12168   effect(USE labl);
12169 
12170   ins_cost(300);
12171   format %{ "J$cop,us $labl\t# Loop end" %}
12172   size(2);
12173   ins_encode %{
12174     Label* L = $labl$$label;
12175     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12176   %}
12177   ins_pipe( pipe_jcc );
12178   ins_short_branch(1);
12179 %}
12180 
12181 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12182   match(CountedLoopEnd cop cmp);
12183   effect(USE labl);
12184 
12185   ins_cost(300);
12186   format %{ "J$cop,us $labl\t# Loop end" %}
12187   size(2);
12188   ins_encode %{
12189     Label* L = $labl$$label;
12190     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12191   %}
12192   ins_pipe( pipe_jcc );
12193   ins_short_branch(1);
12194 %}
12195 
12196 // Jump Direct Conditional - using unsigned comparison
12197 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12198   match(If cop cmp);
12199   effect(USE labl);
12200 
12201   ins_cost(300);
12202   format %{ "J$cop,us $labl" %}
12203   size(2);
12204   ins_encode %{
12205     Label* L = $labl$$label;
12206     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12207   %}
12208   ins_pipe( pipe_jcc );
12209   ins_short_branch(1);
12210 %}
12211 
12212 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12213   match(If cop cmp);
12214   effect(USE labl);
12215 
12216   ins_cost(300);
12217   format %{ "J$cop,us $labl" %}
12218   size(2);
12219   ins_encode %{
12220     Label* L = $labl$$label;
12221     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12222   %}
12223   ins_pipe( pipe_jcc );
12224   ins_short_branch(1);
12225 %}
12226 
12227 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12228   match(If cop cmp);
12229   effect(USE labl);
12230 
12231   ins_cost(300);
12232   format %{ $$template
12233     if ($cop$$cmpcode == Assembler::notEqual) {
12234       $$emit$$"JP,u,s   $labl\n\t"
12235       $$emit$$"J$cop,u,s   $labl"
12236     } else {
12237       $$emit$$"JP,u,s   done\n\t"
12238       $$emit$$"J$cop,u,s  $labl\n\t"
12239       $$emit$$"done:"
12240     }
12241   %}
12242   size(4);
12243   ins_encode %{
12244     Label* l = $labl$$label;
12245     if ($cop$$cmpcode == Assembler::notEqual) {
12246       __ jccb(Assembler::parity, *l);
12247       __ jccb(Assembler::notEqual, *l);
12248     } else if ($cop$$cmpcode == Assembler::equal) {
12249       Label done;
12250       __ jccb(Assembler::parity, done);
12251       __ jccb(Assembler::equal, *l);
12252       __ bind(done);
12253     } else {
12254        ShouldNotReachHere();
12255     }
12256   %}
12257   ins_pipe(pipe_jcc);
12258   ins_short_branch(1);
12259 %}
12260 
12261 // ============================================================================
12262 // Long Compare
12263 //
12264 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12265 // is tricky.  The flavor of compare used depends on whether we are testing
12266 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12267 // The GE test is the negated LT test.  The LE test can be had by commuting
12268 // the operands (yielding a GE test) and then negating; negate again for the
12269 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12270 // NE test is negated from that.
12271 
12272 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12273 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12274 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12275 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12276 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12277 // foo match ends up with the wrong leaf.  One fix is to not match both
12278 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12279 // both forms beat the trinary form of long-compare and both are very useful
12280 // on Intel which has so few registers.
12281 
12282 // Manifest a CmpL result in an integer register.  Very painful.
12283 // This is the test to avoid.
12284 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12285   match(Set dst (CmpL3 src1 src2));
12286   effect( KILL flags );
12287   ins_cost(1000);
12288   format %{ "XOR    $dst,$dst\n\t"
12289             "CMP    $src1.hi,$src2.hi\n\t"
12290             "JLT,s  m_one\n\t"
12291             "JGT,s  p_one\n\t"
12292             "CMP    $src1.lo,$src2.lo\n\t"
12293             "JB,s   m_one\n\t"
12294             "JEQ,s  done\n"
12295     "p_one:\tINC    $dst\n\t"
12296             "JMP,s  done\n"
12297     "m_one:\tDEC    $dst\n"
12298      "done:" %}
12299   ins_encode %{
12300     Label p_one, m_one, done;
12301     __ xorptr($dst$$Register, $dst$$Register);
12302     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12303     __ jccb(Assembler::less,    m_one);
12304     __ jccb(Assembler::greater, p_one);
12305     __ cmpl($src1$$Register, $src2$$Register);
12306     __ jccb(Assembler::below,   m_one);
12307     __ jccb(Assembler::equal,   done);
12308     __ bind(p_one);
12309     __ incrementl($dst$$Register);
12310     __ jmpb(done);
12311     __ bind(m_one);
12312     __ decrementl($dst$$Register);
12313     __ bind(done);
12314   %}
12315   ins_pipe( pipe_slow );
12316 %}
12317 
12318 //======
12319 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12320 // compares.  Can be used for LE or GT compares by reversing arguments.
12321 // NOT GOOD FOR EQ/NE tests.
12322 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12323   match( Set flags (CmpL src zero ));
12324   ins_cost(100);
12325   format %{ "TEST   $src.hi,$src.hi" %}
12326   opcode(0x85);
12327   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12328   ins_pipe( ialu_cr_reg_reg );
12329 %}
12330 
12331 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12332 // compares.  Can be used for LE or GT compares by reversing arguments.
12333 // NOT GOOD FOR EQ/NE tests.
12334 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12335   match( Set flags (CmpL src1 src2 ));
12336   effect( TEMP tmp );
12337   ins_cost(300);
12338   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12339             "MOV    $tmp,$src1.hi\n\t"
12340             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12341   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12342   ins_pipe( ialu_cr_reg_reg );
12343 %}
12344 
12345 // Long compares reg < zero/req OR reg >= zero/req.
12346 // Just a wrapper for a normal branch, plus the predicate test.
12347 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12348   match(If cmp flags);
12349   effect(USE labl);
12350   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12351   expand %{
12352     jmpCon(cmp,flags,labl);    // JLT or JGE...
12353   %}
12354 %}
12355 
12356 // Compare 2 longs and CMOVE longs.
12357 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12358   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12359   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12360   ins_cost(400);
12361   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12362             "CMOV$cmp $dst.hi,$src.hi" %}
12363   opcode(0x0F,0x40);
12364   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12365   ins_pipe( pipe_cmov_reg_long );
12366 %}
12367 
12368 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12369   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12370   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12371   ins_cost(500);
12372   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12373             "CMOV$cmp $dst.hi,$src.hi" %}
12374   opcode(0x0F,0x40);
12375   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12376   ins_pipe( pipe_cmov_reg_long );
12377 %}
12378 
12379 // Compare 2 longs and CMOVE ints.
12380 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12381   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12382   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12383   ins_cost(200);
12384   format %{ "CMOV$cmp $dst,$src" %}
12385   opcode(0x0F,0x40);
12386   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12387   ins_pipe( pipe_cmov_reg );
12388 %}
12389 
12390 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12391   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12392   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12393   ins_cost(250);
12394   format %{ "CMOV$cmp $dst,$src" %}
12395   opcode(0x0F,0x40);
12396   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12397   ins_pipe( pipe_cmov_mem );
12398 %}
12399 
12400 // Compare 2 longs and CMOVE ints.
12401 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12402   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12403   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12404   ins_cost(200);
12405   format %{ "CMOV$cmp $dst,$src" %}
12406   opcode(0x0F,0x40);
12407   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12408   ins_pipe( pipe_cmov_reg );
12409 %}
12410 
12411 // Compare 2 longs and CMOVE doubles
12412 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12413   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12414   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12415   ins_cost(200);
12416   expand %{
12417     fcmovDPR_regS(cmp,flags,dst,src);
12418   %}
12419 %}
12420 
12421 // Compare 2 longs and CMOVE doubles
12422 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12423   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12424   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12425   ins_cost(200);
12426   expand %{
12427     fcmovD_regS(cmp,flags,dst,src);
12428   %}
12429 %}
12430 
12431 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12432   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12433   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12434   ins_cost(200);
12435   expand %{
12436     fcmovFPR_regS(cmp,flags,dst,src);
12437   %}
12438 %}
12439 
12440 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12441   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12442   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12443   ins_cost(200);
12444   expand %{
12445     fcmovF_regS(cmp,flags,dst,src);
12446   %}
12447 %}
12448 
12449 //======
12450 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12451 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12452   match( Set flags (CmpL src zero ));
12453   effect(TEMP tmp);
12454   ins_cost(200);
12455   format %{ "MOV    $tmp,$src.lo\n\t"
12456             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12457   ins_encode( long_cmp_flags0( src, tmp ) );
12458   ins_pipe( ialu_reg_reg_long );
12459 %}
12460 
12461 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12462 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12463   match( Set flags (CmpL src1 src2 ));
12464   ins_cost(200+300);
12465   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12466             "JNE,s  skip\n\t"
12467             "CMP    $src1.hi,$src2.hi\n\t"
12468      "skip:\t" %}
12469   ins_encode( long_cmp_flags1( src1, src2 ) );
12470   ins_pipe( ialu_cr_reg_reg );
12471 %}
12472 
12473 // Long compare reg == zero/reg OR reg != zero/reg
12474 // Just a wrapper for a normal branch, plus the predicate test.
12475 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12476   match(If cmp flags);
12477   effect(USE labl);
12478   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12479   expand %{
12480     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12481   %}
12482 %}
12483 
12484 // Compare 2 longs and CMOVE longs.
12485 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12486   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12487   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12488   ins_cost(400);
12489   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12490             "CMOV$cmp $dst.hi,$src.hi" %}
12491   opcode(0x0F,0x40);
12492   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12493   ins_pipe( pipe_cmov_reg_long );
12494 %}
12495 
12496 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12497   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12498   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12499   ins_cost(500);
12500   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12501             "CMOV$cmp $dst.hi,$src.hi" %}
12502   opcode(0x0F,0x40);
12503   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12504   ins_pipe( pipe_cmov_reg_long );
12505 %}
12506 
12507 // Compare 2 longs and CMOVE ints.
12508 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12509   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12510   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12511   ins_cost(200);
12512   format %{ "CMOV$cmp $dst,$src" %}
12513   opcode(0x0F,0x40);
12514   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12515   ins_pipe( pipe_cmov_reg );
12516 %}
12517 
12518 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12519   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12520   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12521   ins_cost(250);
12522   format %{ "CMOV$cmp $dst,$src" %}
12523   opcode(0x0F,0x40);
12524   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12525   ins_pipe( pipe_cmov_mem );
12526 %}
12527 
12528 // Compare 2 longs and CMOVE ints.
12529 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12530   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12531   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12532   ins_cost(200);
12533   format %{ "CMOV$cmp $dst,$src" %}
12534   opcode(0x0F,0x40);
12535   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12536   ins_pipe( pipe_cmov_reg );
12537 %}
12538 
12539 // Compare 2 longs and CMOVE doubles
12540 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12541   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12542   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12543   ins_cost(200);
12544   expand %{
12545     fcmovDPR_regS(cmp,flags,dst,src);
12546   %}
12547 %}
12548 
12549 // Compare 2 longs and CMOVE doubles
12550 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12551   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12552   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12553   ins_cost(200);
12554   expand %{
12555     fcmovD_regS(cmp,flags,dst,src);
12556   %}
12557 %}
12558 
12559 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12560   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12561   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12562   ins_cost(200);
12563   expand %{
12564     fcmovFPR_regS(cmp,flags,dst,src);
12565   %}
12566 %}
12567 
12568 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12569   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12570   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12571   ins_cost(200);
12572   expand %{
12573     fcmovF_regS(cmp,flags,dst,src);
12574   %}
12575 %}
12576 
12577 //======
12578 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12579 // Same as cmpL_reg_flags_LEGT except must negate src
12580 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12581   match( Set flags (CmpL src zero ));
12582   effect( TEMP tmp );
12583   ins_cost(300);
12584   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12585             "CMP    $tmp,$src.lo\n\t"
12586             "SBB    $tmp,$src.hi\n\t" %}
12587   ins_encode( long_cmp_flags3(src, tmp) );
12588   ins_pipe( ialu_reg_reg_long );
12589 %}
12590 
12591 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12592 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12593 // requires a commuted test to get the same result.
12594 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12595   match( Set flags (CmpL src1 src2 ));
12596   effect( TEMP tmp );
12597   ins_cost(300);
12598   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12599             "MOV    $tmp,$src2.hi\n\t"
12600             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12601   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12602   ins_pipe( ialu_cr_reg_reg );
12603 %}
12604 
12605 // Long compares reg < zero/req OR reg >= zero/req.
12606 // Just a wrapper for a normal branch, plus the predicate test
12607 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12608   match(If cmp flags);
12609   effect(USE labl);
12610   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12611   ins_cost(300);
12612   expand %{
12613     jmpCon(cmp,flags,labl);    // JGT or JLE...
12614   %}
12615 %}
12616 
12617 // Compare 2 longs and CMOVE longs.
12618 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12619   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12620   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12621   ins_cost(400);
12622   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12623             "CMOV$cmp $dst.hi,$src.hi" %}
12624   opcode(0x0F,0x40);
12625   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12626   ins_pipe( pipe_cmov_reg_long );
12627 %}
12628 
12629 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12630   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12631   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12632   ins_cost(500);
12633   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12634             "CMOV$cmp $dst.hi,$src.hi+4" %}
12635   opcode(0x0F,0x40);
12636   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12637   ins_pipe( pipe_cmov_reg_long );
12638 %}
12639 
12640 // Compare 2 longs and CMOVE ints.
12641 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12642   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12643   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12644   ins_cost(200);
12645   format %{ "CMOV$cmp $dst,$src" %}
12646   opcode(0x0F,0x40);
12647   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12648   ins_pipe( pipe_cmov_reg );
12649 %}
12650 
12651 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12652   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12653   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12654   ins_cost(250);
12655   format %{ "CMOV$cmp $dst,$src" %}
12656   opcode(0x0F,0x40);
12657   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12658   ins_pipe( pipe_cmov_mem );
12659 %}
12660 
12661 // Compare 2 longs and CMOVE ptrs.
12662 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12663   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12664   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12665   ins_cost(200);
12666   format %{ "CMOV$cmp $dst,$src" %}
12667   opcode(0x0F,0x40);
12668   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12669   ins_pipe( pipe_cmov_reg );
12670 %}
12671 
12672 // Compare 2 longs and CMOVE doubles
12673 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12674   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12675   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12676   ins_cost(200);
12677   expand %{
12678     fcmovDPR_regS(cmp,flags,dst,src);
12679   %}
12680 %}
12681 
12682 // Compare 2 longs and CMOVE doubles
12683 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12684   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12685   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12686   ins_cost(200);
12687   expand %{
12688     fcmovD_regS(cmp,flags,dst,src);
12689   %}
12690 %}
12691 
12692 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12693   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12694   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12695   ins_cost(200);
12696   expand %{
12697     fcmovFPR_regS(cmp,flags,dst,src);
12698   %}
12699 %}
12700 
12701 
12702 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12703   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12704   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12705   ins_cost(200);
12706   expand %{
12707     fcmovF_regS(cmp,flags,dst,src);
12708   %}
12709 %}
12710 
12711 
12712 // ============================================================================
12713 // Procedure Call/Return Instructions
12714 // Call Java Static Instruction
12715 // Note: If this code changes, the corresponding ret_addr_offset() and
12716 //       compute_padding() functions will have to be adjusted.
12717 instruct CallStaticJavaDirect(method meth) %{
12718   match(CallStaticJava);
12719   effect(USE meth);
12720 
12721   ins_cost(300);
12722   format %{ "CALL,static " %}
12723   opcode(0xE8); /* E8 cd */
12724   ins_encode( pre_call_resets,
12725               Java_Static_Call( meth ),
12726               call_epilog,
12727               post_call_FPU );
12728   ins_pipe( pipe_slow );
12729   ins_alignment(4);
12730 %}
12731 
12732 // Call Java Dynamic Instruction
12733 // Note: If this code changes, the corresponding ret_addr_offset() and
12734 //       compute_padding() functions will have to be adjusted.
12735 instruct CallDynamicJavaDirect(method meth) %{
12736   match(CallDynamicJava);
12737   effect(USE meth);
12738 
12739   ins_cost(300);
12740   format %{ "MOV    EAX,(oop)-1\n\t"
12741             "CALL,dynamic" %}
12742   opcode(0xE8); /* E8 cd */
12743   ins_encode( pre_call_resets,
12744               Java_Dynamic_Call( meth ),
12745               call_epilog,
12746               post_call_FPU );
12747   ins_pipe( pipe_slow );
12748   ins_alignment(4);
12749 %}
12750 
12751 // Call Runtime Instruction
12752 instruct CallRuntimeDirect(method meth) %{
12753   match(CallRuntime );
12754   effect(USE meth);
12755 
12756   ins_cost(300);
12757   format %{ "CALL,runtime " %}
12758   opcode(0xE8); /* E8 cd */
12759   // Use FFREEs to clear entries in float stack
12760   ins_encode( pre_call_resets,
12761               FFree_Float_Stack_All,
12762               Java_To_Runtime( meth ),
12763               post_call_FPU );
12764   ins_pipe( pipe_slow );
12765 %}
12766 
12767 // Call runtime without safepoint
12768 instruct CallLeafDirect(method meth) %{
12769   match(CallLeaf);
12770   effect(USE meth);
12771 
12772   ins_cost(300);
12773   format %{ "CALL_LEAF,runtime " %}
12774   opcode(0xE8); /* E8 cd */
12775   ins_encode( pre_call_resets,
12776               FFree_Float_Stack_All,
12777               Java_To_Runtime( meth ),
12778               Verify_FPU_For_Leaf, post_call_FPU );
12779   ins_pipe( pipe_slow );
12780 %}
12781 
12782 instruct CallLeafNoFPDirect(method meth) %{
12783   match(CallLeafNoFP);
12784   effect(USE meth);
12785 
12786   ins_cost(300);
12787   format %{ "CALL_LEAF_NOFP,runtime " %}
12788   opcode(0xE8); /* E8 cd */
12789   ins_encode(Java_To_Runtime(meth));
12790   ins_pipe( pipe_slow );
12791 %}
12792 
12793 
12794 // Return Instruction
12795 // Remove the return address & jump to it.
12796 instruct Ret() %{
12797   match(Return);
12798   format %{ "RET" %}
12799   opcode(0xC3);
12800   ins_encode(OpcP);
12801   ins_pipe( pipe_jmp );
12802 %}
12803 
12804 // Tail Call; Jump from runtime stub to Java code.
12805 // Also known as an 'interprocedural jump'.
12806 // Target of jump will eventually return to caller.
12807 // TailJump below removes the return address.
12808 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12809   match(TailCall jump_target method_oop );
12810   ins_cost(300);
12811   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12812   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12813   ins_encode( OpcP, RegOpc(jump_target) );
12814   ins_pipe( pipe_jmp );
12815 %}
12816 
12817 
12818 // Tail Jump; remove the return address; jump to target.
12819 // TailCall above leaves the return address around.
12820 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12821   match( TailJump jump_target ex_oop );
12822   ins_cost(300);
12823   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12824             "JMP    $jump_target " %}
12825   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12826   ins_encode( enc_pop_rdx,
12827               OpcP, RegOpc(jump_target) );
12828   ins_pipe( pipe_jmp );
12829 %}
12830 
12831 // Create exception oop: created by stack-crawling runtime code.
12832 // Created exception is now available to this handler, and is setup
12833 // just prior to jumping to this handler.  No code emitted.
12834 instruct CreateException( eAXRegP ex_oop )
12835 %{
12836   match(Set ex_oop (CreateEx));
12837 
12838   size(0);
12839   // use the following format syntax
12840   format %{ "# exception oop is in EAX; no code emitted" %}
12841   ins_encode();
12842   ins_pipe( empty );
12843 %}
12844 
12845 
12846 // Rethrow exception:
12847 // The exception oop will come in the first argument position.
12848 // Then JUMP (not call) to the rethrow stub code.
12849 instruct RethrowException()
12850 %{
12851   match(Rethrow);
12852 
12853   // use the following format syntax
12854   format %{ "JMP    rethrow_stub" %}
12855   ins_encode(enc_rethrow);
12856   ins_pipe( pipe_jmp );
12857 %}
12858 
12859 // inlined locking and unlocking
12860 
12861 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12862   predicate(Compile::current()->use_rtm());
12863   match(Set cr (FastLock object box));
12864   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12865   ins_cost(300);
12866   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12867   ins_encode %{
12868     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12869                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12870                  _counters, _rtm_counters, _stack_rtm_counters,
12871                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12872                  true, ra_->C->profile_rtm());
12873   %}
12874   ins_pipe(pipe_slow);
12875 %}
12876 
12877 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12878   predicate(!Compile::current()->use_rtm());
12879   match(Set cr (FastLock object box));
12880   effect(TEMP tmp, TEMP scr, USE_KILL box);
12881   ins_cost(300);
12882   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12883   ins_encode %{
12884     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12885                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12886   %}
12887   ins_pipe(pipe_slow);
12888 %}
12889 
12890 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12891   match(Set cr (FastUnlock object box));
12892   effect(TEMP tmp, USE_KILL box);
12893   ins_cost(300);
12894   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12895   ins_encode %{
12896     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12897   %}
12898   ins_pipe(pipe_slow);
12899 %}
12900 
12901 
12902 
12903 // ============================================================================
12904 // Safepoint Instruction
12905 instruct safePoint_poll(eFlagsReg cr) %{
12906   match(SafePoint);
12907   effect(KILL cr);
12908 
12909   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12910   // On SPARC that might be acceptable as we can generate the address with
12911   // just a sethi, saving an or.  By polling at offset 0 we can end up
12912   // putting additional pressure on the index-0 in the D$.  Because of
12913   // alignment (just like the situation at hand) the lower indices tend
12914   // to see more traffic.  It'd be better to change the polling address
12915   // to offset 0 of the last $line in the polling page.
12916 
12917   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
12918   ins_cost(125);
12919   size(6) ;
12920   ins_encode( Safepoint_Poll() );
12921   ins_pipe( ialu_reg_mem );
12922 %}
12923 
12924 
12925 // ============================================================================
12926 // This name is KNOWN by the ADLC and cannot be changed.
12927 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12928 // for this guy.
12929 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
12930   match(Set dst (ThreadLocal));
12931   effect(DEF dst, KILL cr);
12932 
12933   format %{ "MOV    $dst, Thread::current()" %}
12934   ins_encode %{
12935     Register dstReg = as_Register($dst$$reg);
12936     __ get_thread(dstReg);
12937   %}
12938   ins_pipe( ialu_reg_fat );
12939 %}
12940 
12941 
12942 
12943 //----------PEEPHOLE RULES-----------------------------------------------------
12944 // These must follow all instruction definitions as they use the names
12945 // defined in the instructions definitions.
12946 //
12947 // peepmatch ( root_instr_name [preceding_instruction]* );
12948 //
12949 // peepconstraint %{
12950 // (instruction_number.operand_name relational_op instruction_number.operand_name
12951 //  [, ...] );
12952 // // instruction numbers are zero-based using left to right order in peepmatch
12953 //
12954 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12955 // // provide an instruction_number.operand_name for each operand that appears
12956 // // in the replacement instruction's match rule
12957 //
12958 // ---------VM FLAGS---------------------------------------------------------
12959 //
12960 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12961 //
12962 // Each peephole rule is given an identifying number starting with zero and
12963 // increasing by one in the order seen by the parser.  An individual peephole
12964 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12965 // on the command-line.
12966 //
12967 // ---------CURRENT LIMITATIONS----------------------------------------------
12968 //
12969 // Only match adjacent instructions in same basic block
12970 // Only equality constraints
12971 // Only constraints between operands, not (0.dest_reg == EAX_enc)
12972 // Only one replacement instruction
12973 //
12974 // ---------EXAMPLE----------------------------------------------------------
12975 //
12976 // // pertinent parts of existing instructions in architecture description
12977 // instruct movI(rRegI dst, rRegI src) %{
12978 //   match(Set dst (CopyI src));
12979 // %}
12980 //
12981 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
12982 //   match(Set dst (AddI dst src));
12983 //   effect(KILL cr);
12984 // %}
12985 //
12986 // // Change (inc mov) to lea
12987 // peephole %{
12988 //   // increment preceeded by register-register move
12989 //   peepmatch ( incI_eReg movI );
12990 //   // require that the destination register of the increment
12991 //   // match the destination register of the move
12992 //   peepconstraint ( 0.dst == 1.dst );
12993 //   // construct a replacement instruction that sets
12994 //   // the destination to ( move's source register + one )
12995 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12996 // %}
12997 //
12998 // Implementation no longer uses movX instructions since
12999 // machine-independent system no longer uses CopyX nodes.
13000 //
13001 // peephole %{
13002 //   peepmatch ( incI_eReg movI );
13003 //   peepconstraint ( 0.dst == 1.dst );
13004 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13005 // %}
13006 //
13007 // peephole %{
13008 //   peepmatch ( decI_eReg movI );
13009 //   peepconstraint ( 0.dst == 1.dst );
13010 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13011 // %}
13012 //
13013 // peephole %{
13014 //   peepmatch ( addI_eReg_imm movI );
13015 //   peepconstraint ( 0.dst == 1.dst );
13016 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13017 // %}
13018 //
13019 // peephole %{
13020 //   peepmatch ( addP_eReg_imm movP );
13021 //   peepconstraint ( 0.dst == 1.dst );
13022 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13023 // %}
13024 
13025 // // Change load of spilled value to only a spill
13026 // instruct storeI(memory mem, rRegI src) %{
13027 //   match(Set mem (StoreI mem src));
13028 // %}
13029 //
13030 // instruct loadI(rRegI dst, memory mem) %{
13031 //   match(Set dst (LoadI mem));
13032 // %}
13033 //
13034 peephole %{
13035   peepmatch ( loadI storeI );
13036   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13037   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13038 %}
13039 
13040 //----------SMARTSPILL RULES---------------------------------------------------
13041 // These must follow all instruction definitions as they use the names
13042 // defined in the instructions definitions.