1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     if(UseAVX <= 2) {
 295       size += 3; // vzeroupper
 296     }
 297   }
 298   return size;
 299 }
 300 
 301 // !!!!! Special hack to get all type of calls to specify the byte offset
 302 //       from the start of the call to the point where the return address
 303 //       will point.
 304 int MachCallStaticJavaNode::ret_addr_offset() {
 305   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 306 }
 307 
 308 int MachCallDynamicJavaNode::ret_addr_offset() {
 309   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 310 }
 311 
 312 static int sizeof_FFree_Float_Stack_All = -1;
 313 
 314 int MachCallRuntimeNode::ret_addr_offset() {
 315   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 316   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 317 }
 318 
 319 // Indicate if the safepoint node needs the polling page as an input.
 320 // Since x86 does have absolute addressing, it doesn't.
 321 bool SafePointNode::needs_polling_address_input() {
 322   return false;
 323 }
 324 
 325 //
 326 // Compute padding required for nodes which need alignment
 327 //
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 1;      // skip call opcode byte
 334   return round_to(current_offset, alignment_required()) - current_offset;
 335 }
 336 
 337 // The address of the call instruction needs to be 4-byte aligned to
 338 // ensure that it does not span a cache line so that it can be patched.
 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 340   current_offset += pre_call_resets_size();  // skip fldcw, if any
 341   current_offset += 5;      // skip MOV instruction
 342   current_offset += 1;      // skip call opcode byte
 343   return round_to(current_offset, alignment_required()) - current_offset;
 344 }
 345 
 346 // EMIT_RM()
 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 348   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 349   cbuf.insts()->emit_int8(c);
 350 }
 351 
 352 // EMIT_CC()
 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 354   unsigned char c = (unsigned char)( f1 | f2 );
 355   cbuf.insts()->emit_int8(c);
 356 }
 357 
 358 // EMIT_OPCODE()
 359 void emit_opcode(CodeBuffer &cbuf, int code) {
 360   cbuf.insts()->emit_int8((unsigned char) code);
 361 }
 362 
 363 // EMIT_OPCODE() w/ relocation information
 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 365   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 366   emit_opcode(cbuf, code);
 367 }
 368 
 369 // EMIT_D8()
 370 void emit_d8(CodeBuffer &cbuf, int d8) {
 371   cbuf.insts()->emit_int8((unsigned char) d8);
 372 }
 373 
 374 // EMIT_D16()
 375 void emit_d16(CodeBuffer &cbuf, int d16) {
 376   cbuf.insts()->emit_int16(d16);
 377 }
 378 
 379 // EMIT_D32()
 380 void emit_d32(CodeBuffer &cbuf, int d32) {
 381   cbuf.insts()->emit_int32(d32);
 382 }
 383 
 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 386         int format) {
 387   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 388   cbuf.insts()->emit_int32(d32);
 389 }
 390 
 391 // emit 32 bit value and construct relocation entry from RelocationHolder
 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 393         int format) {
 394 #ifdef ASSERT
 395   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 396     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 397   }
 398 #endif
 399   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 400   cbuf.insts()->emit_int32(d32);
 401 }
 402 
 403 // Access stack slot for load or store
 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 405   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 406   if( -128 <= disp && disp <= 127 ) {
 407     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 408     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 409     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 410   } else {
 411     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 414   }
 415 }
 416 
 417    // rRegI ereg, memory mem) %{    // emit_reg_mem
 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 419   // There is no index & no scale, use form without SIB byte
 420   if ((index == 0x4) &&
 421       (scale == 0) && (base != ESP_enc)) {
 422     // If no displacement, mode is 0x0; unless base is [EBP]
 423     if ( (displace == 0) && (base != EBP_enc) ) {
 424       emit_rm(cbuf, 0x0, reg_encoding, base);
 425     }
 426     else {                    // If 8-bit displacement, mode 0x1
 427       if ((displace >= -128) && (displace <= 127)
 428           && (disp_reloc == relocInfo::none) ) {
 429         emit_rm(cbuf, 0x1, reg_encoding, base);
 430         emit_d8(cbuf, displace);
 431       }
 432       else {                  // If 32-bit displacement
 433         if (base == -1) { // Special flag for absolute address
 434           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 435           // (manual lies; no SIB needed here)
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442         else {                // Normal base + offset
 443           emit_rm(cbuf, 0x2, reg_encoding, base);
 444           if ( disp_reloc != relocInfo::none ) {
 445             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 446           } else {
 447             emit_d32      (cbuf, displace);
 448           }
 449         }
 450       }
 451     }
 452   }
 453   else {                      // Else, encode with the SIB byte
 454     // If no displacement, mode is 0x0; unless base is [EBP]
 455     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 456       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 457       emit_rm(cbuf, scale, index, base);
 458     }
 459     else {                    // If 8-bit displacement, mode 0x1
 460       if ((displace >= -128) && (displace <= 127)
 461           && (disp_reloc == relocInfo::none) ) {
 462         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 463         emit_rm(cbuf, scale, index, base);
 464         emit_d8(cbuf, displace);
 465       }
 466       else {                  // If 32-bit displacement
 467         if (base == 0x04 ) {
 468           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 469           emit_rm(cbuf, scale, index, 0x04);
 470         } else {
 471           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 472           emit_rm(cbuf, scale, index, base);
 473         }
 474         if ( disp_reloc != relocInfo::none ) {
 475           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 476         } else {
 477           emit_d32      (cbuf, displace);
 478         }
 479       }
 480     }
 481   }
 482 }
 483 
 484 
 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 486   if( dst_encoding == src_encoding ) {
 487     // reg-reg copy, use an empty encoding
 488   } else {
 489     emit_opcode( cbuf, 0x8B );
 490     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 491   }
 492 }
 493 
 494 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 495   Label exit;
 496   __ jccb(Assembler::noParity, exit);
 497   __ pushf();
 498   //
 499   // comiss/ucomiss instructions set ZF,PF,CF flags and
 500   // zero OF,AF,SF for NaN values.
 501   // Fixup flags by zeroing ZF,PF so that compare of NaN
 502   // values returns 'less than' result (CF is set).
 503   // Leave the rest of flags unchanged.
 504   //
 505   //    7 6 5 4 3 2 1 0
 506   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 507   //    0 0 1 0 1 0 1 1   (0x2B)
 508   //
 509   __ andl(Address(rsp, 0), 0xffffff2b);
 510   __ popf();
 511   __ bind(exit);
 512 }
 513 
 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 515   Label done;
 516   __ movl(dst, -1);
 517   __ jcc(Assembler::parity, done);
 518   __ jcc(Assembler::below, done);
 519   __ setb(Assembler::notEqual, dst);
 520   __ movzbl(dst, dst);
 521   __ bind(done);
 522 }
 523 
 524 
 525 //=============================================================================
 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 527 
 528 int Compile::ConstantTable::calculate_table_base_offset() const {
 529   return 0;  // absolute addressing, no offset
 530 }
 531 
 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 534   ShouldNotReachHere();
 535 }
 536 
 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 538   // Empty encoding
 539 }
 540 
 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 542   return 0;
 543 }
 544 
 545 #ifndef PRODUCT
 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   st->print("# MachConstantBaseNode (empty encoding)");
 548 }
 549 #endif
 550 
 551 
 552 //=============================================================================
 553 #ifndef PRODUCT
 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 555   Compile* C = ra_->C;
 556 
 557   int framesize = C->frame_size_in_bytes();
 558   int bangsize = C->bang_size_in_bytes();
 559   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 560   // Remove wordSize for return addr which is already pushed.
 561   framesize -= wordSize;
 562 
 563   if (C->need_stack_bang(bangsize)) {
 564     framesize -= wordSize;
 565     st->print("# stack bang (%d bytes)", bangsize);
 566     st->print("\n\t");
 567     st->print("PUSH   EBP\t# Save EBP");
 568     if (PreserveFramePointer) {
 569       st->print("\n\t");
 570       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 571     }
 572     if (framesize) {
 573       st->print("\n\t");
 574       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 575     }
 576   } else {
 577     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 578     st->print("\n\t");
 579     framesize -= wordSize;
 580     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 581     if (PreserveFramePointer) {
 582       st->print("\n\t");
 583       st->print("MOV    EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
 584     }
 585   }
 586 
 587   if (VerifyStackAtCalls) {
 588     st->print("\n\t");
 589     framesize -= wordSize;
 590     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 591   }
 592 
 593   if( C->in_24_bit_fp_mode() ) {
 594     st->print("\n\t");
 595     st->print("FLDCW  \t# load 24 bit fpu control word");
 596   }
 597   if (UseSSE >= 2 && VerifyFPU) {
 598     st->print("\n\t");
 599     st->print("# verify FPU stack (must be clean on entry)");
 600   }
 601 
 602 #ifdef ASSERT
 603   if (VerifyStackAtCalls) {
 604     st->print("\n\t");
 605     st->print("# stack alignment check");
 606   }
 607 #endif
 608   st->cr();
 609 }
 610 #endif
 611 
 612 
 613 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 614   Compile* C = ra_->C;
 615   MacroAssembler _masm(&cbuf);
 616 
 617   int framesize = C->frame_size_in_bytes();
 618   int bangsize = C->bang_size_in_bytes();
 619 
 620   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 621 
 622   C->set_frame_complete(cbuf.insts_size());
 623 
 624   if (C->has_mach_constant_base_node()) {
 625     // NOTE: We set the table base offset here because users might be
 626     // emitted before MachConstantBaseNode.
 627     Compile::ConstantTable& constant_table = C->constant_table();
 628     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 629   }
 630 }
 631 
 632 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 633   return MachNode::size(ra_); // too many variables; just compute it the hard way
 634 }
 635 
 636 int MachPrologNode::reloc() const {
 637   return 0; // a large enough number
 638 }
 639 
 640 //=============================================================================
 641 #ifndef PRODUCT
 642 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 643   Compile *C = ra_->C;
 644   int framesize = C->frame_size_in_bytes();
 645   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 646   // Remove two words for return addr and rbp,
 647   framesize -= 2*wordSize;
 648 
 649   if (C->max_vector_size() > 16) {
 650     st->print("VZEROUPPER");
 651     st->cr(); st->print("\t");
 652   }
 653   if (C->in_24_bit_fp_mode()) {
 654     st->print("FLDCW  standard control word");
 655     st->cr(); st->print("\t");
 656   }
 657   if (framesize) {
 658     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 659     st->cr(); st->print("\t");
 660   }
 661   st->print_cr("POPL   EBP"); st->print("\t");
 662   if (do_polling() && C->is_method_compilation()) {
 663     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 664     st->cr(); st->print("\t");
 665   }
 666 }
 667 #endif
 668 
 669 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 670   Compile *C = ra_->C;
 671 
 672   if (C->max_vector_size() > 16) {
 673     // Clear upper bits of YMM registers when current compiled code uses
 674     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 675     MacroAssembler masm(&cbuf);
 676     masm.vzeroupper();
 677   }
 678   // If method set FPU control word, restore to standard control word
 679   if (C->in_24_bit_fp_mode()) {
 680     MacroAssembler masm(&cbuf);
 681     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 682   }
 683 
 684   int framesize = C->frame_size_in_bytes();
 685   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 686   // Remove two words for return addr and rbp,
 687   framesize -= 2*wordSize;
 688 
 689   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 690 
 691   if (framesize >= 128) {
 692     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 693     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 694     emit_d32(cbuf, framesize);
 695   } else if (framesize) {
 696     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 697     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 698     emit_d8(cbuf, framesize);
 699   }
 700 
 701   emit_opcode(cbuf, 0x58 | EBP_enc);
 702 
 703   if (do_polling() && C->is_method_compilation()) {
 704     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 705     emit_opcode(cbuf,0x85);
 706     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 707     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 708   }
 709 }
 710 
 711 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 712   Compile *C = ra_->C;
 713   // If method set FPU control word, restore to standard control word
 714   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 715   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 716   if (do_polling() && C->is_method_compilation()) size += 6;
 717 
 718   int framesize = C->frame_size_in_bytes();
 719   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 720   // Remove two words for return addr and rbp,
 721   framesize -= 2*wordSize;
 722 
 723   size++; // popl rbp,
 724 
 725   if (framesize >= 128) {
 726     size += 6;
 727   } else {
 728     size += framesize ? 3 : 0;
 729   }
 730   return size;
 731 }
 732 
 733 int MachEpilogNode::reloc() const {
 734   return 0; // a large enough number
 735 }
 736 
 737 const Pipeline * MachEpilogNode::pipeline() const {
 738   return MachNode::pipeline_class();
 739 }
 740 
 741 int MachEpilogNode::safepoint_offset() const { return 0; }
 742 
 743 //=============================================================================
 744 
 745 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 746 static enum RC rc_class( OptoReg::Name reg ) {
 747 
 748   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 749   if (OptoReg::is_stack(reg)) return rc_stack;
 750 
 751   VMReg r = OptoReg::as_VMReg(reg);
 752   if (r->is_Register()) return rc_int;
 753   if (r->is_FloatRegister()) {
 754     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 755     return rc_float;
 756   }
 757   assert(r->is_XMMRegister(), "must be");
 758   return rc_xmm;
 759 }
 760 
 761 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 762                         int opcode, const char *op_str, int size, outputStream* st ) {
 763   if( cbuf ) {
 764     emit_opcode  (*cbuf, opcode );
 765     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 766 #ifndef PRODUCT
 767   } else if( !do_size ) {
 768     if( size != 0 ) st->print("\n\t");
 769     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 770       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 771       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 772     } else { // FLD, FST, PUSH, POP
 773       st->print("%s [ESP + #%d]",op_str,offset);
 774     }
 775 #endif
 776   }
 777   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 778   return size+3+offset_size;
 779 }
 780 
 781 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 782 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 783                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 784   int in_size_in_bits = Assembler::EVEX_32bit;
 785   int evex_encoding = 0;
 786   if (reg_lo+1 == reg_hi) {
 787     in_size_in_bits = Assembler::EVEX_64bit;
 788     evex_encoding = Assembler::VEX_W;
 789   }
 790   if (cbuf) {
 791     MacroAssembler _masm(cbuf);
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 843       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 844                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 845     } else {
 846       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 847                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 848     }
 849 #ifndef PRODUCT
 850   } else if (!do_size) {
 851     if (size != 0) st->print("\n\t");
 852     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 853       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 855       } else {
 856         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       }
 858     } else {
 859       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 860         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       } else {
 862         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       }
 864     }
 865 #endif
 866   }
 867   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 868   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 869   int sz = (UseAVX > 2) ? 6 : 4;
 870   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 871       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 872   return size + sz;
 873 }
 874 
 875 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 876                             int src_hi, int dst_hi, int size, outputStream* st ) {
 877   // 32-bit
 878   if (cbuf) {
 879     MacroAssembler _masm(cbuf);
 880     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 881              as_Register(Matcher::_regEncode[src_lo]));
 882 #ifndef PRODUCT
 883   } else if (!do_size) {
 884     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 885 #endif
 886   }
 887   return (UseAVX> 2) ? 6 : 4;
 888 }
 889 
 890 
 891 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 892                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 893   // 32-bit
 894   if (cbuf) {
 895     MacroAssembler _masm(cbuf);
 896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 897              as_XMMRegister(Matcher::_regEncode[src_lo]));
 898 #ifndef PRODUCT
 899   } else if (!do_size) {
 900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 901 #endif
 902   }
 903   return (UseAVX> 2) ? 6 : 4;
 904 }
 905 
 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 907   if( cbuf ) {
 908     emit_opcode(*cbuf, 0x8B );
 909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 910 #ifndef PRODUCT
 911   } else if( !do_size ) {
 912     if( size != 0 ) st->print("\n\t");
 913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 914 #endif
 915   }
 916   return size+2;
 917 }
 918 
 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 920                                  int offset, int size, outputStream* st ) {
 921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 922     if( cbuf ) {
 923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 925 #ifndef PRODUCT
 926     } else if( !do_size ) {
 927       if( size != 0 ) st->print("\n\t");
 928       st->print("FLD    %s",Matcher::regName[src_lo]);
 929 #endif
 930     }
 931     size += 2;
 932   }
 933 
 934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 935   const char *op_str;
 936   int op;
 937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 939     op = 0xDD;
 940   } else {                   // 32-bit store
 941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 942     op = 0xD9;
 943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 944   }
 945 
 946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 947 }
 948 
 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 950 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 952 
 953 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 954                             int stack_offset, int reg, uint ireg, outputStream* st);
 955 
 956 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 957                                      int dst_offset, uint ireg, outputStream* st) {
 958   int calc_size = 0;
 959   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 960   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 961   switch (ireg) {
 962   case Op_VecS:
 963     calc_size = 3+src_offset_size + 3+dst_offset_size;
 964     break;
 965   case Op_VecD:
 966     calc_size = 3+src_offset_size + 3+dst_offset_size;
 967     src_offset += 4;
 968     dst_offset += 4;
 969     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 970     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 971     calc_size += 3+src_offset_size + 3+dst_offset_size;
 972     break;
 973   case Op_VecX:
 974   case Op_VecY:
 975   case Op_VecZ:
 976     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 977     break;
 978   default:
 979     ShouldNotReachHere();
 980   }
 981   if (cbuf) {
 982     MacroAssembler _masm(cbuf);
 983     int offset = __ offset();
 984     switch (ireg) {
 985     case Op_VecS:
 986       __ pushl(Address(rsp, src_offset));
 987       __ popl (Address(rsp, dst_offset));
 988       break;
 989     case Op_VecD:
 990       __ pushl(Address(rsp, src_offset));
 991       __ popl (Address(rsp, dst_offset));
 992       __ pushl(Address(rsp, src_offset+4));
 993       __ popl (Address(rsp, dst_offset+4));
 994       break;
 995     case Op_VecX:
 996       __ movdqu(Address(rsp, -16), xmm0);
 997       __ movdqu(xmm0, Address(rsp, src_offset));
 998       __ movdqu(Address(rsp, dst_offset), xmm0);
 999       __ movdqu(xmm0, Address(rsp, -16));
1000       break;
1001     case Op_VecY:
1002       __ vmovdqu(Address(rsp, -32), xmm0);
1003       __ vmovdqu(xmm0, Address(rsp, src_offset));
1004       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1005       __ vmovdqu(xmm0, Address(rsp, -32));
1006     case Op_VecZ:
1007       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1008       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1009       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1010       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1011       break;
1012     default:
1013       ShouldNotReachHere();
1014     }
1015     int size = __ offset() - offset;
1016     assert(size == calc_size, "incorrect size calculattion");
1017     return size;
1018 #ifndef PRODUCT
1019   } else if (!do_size) {
1020     switch (ireg) {
1021     case Op_VecS:
1022       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1023                 "popl    [rsp + #%d]",
1024                 src_offset, dst_offset);
1025       break;
1026     case Op_VecD:
1027       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1028                 "popq    [rsp + #%d]\n\t"
1029                 "pushl   [rsp + #%d]\n\t"
1030                 "popq    [rsp + #%d]",
1031                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1032       break;
1033      case Op_VecX:
1034       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1035                 "movdqu  xmm0, [rsp + #%d]\n\t"
1036                 "movdqu  [rsp + #%d], xmm0\n\t"
1037                 "movdqu  xmm0, [rsp - #16]",
1038                 src_offset, dst_offset);
1039       break;
1040     case Op_VecY:
1041       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1042                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1043                 "vmovdqu [rsp + #%d], xmm0\n\t"
1044                 "vmovdqu xmm0, [rsp - #32]",
1045                 src_offset, dst_offset);
1046     case Op_VecZ:
1047       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1048                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1049                 "vmovdqu [rsp + #%d], xmm0\n\t"
1050                 "vmovdqu xmm0, [rsp - #64]",
1051                 src_offset, dst_offset);
1052       break;
1053     default:
1054       ShouldNotReachHere();
1055     }
1056 #endif
1057   }
1058   return calc_size;
1059 }
1060 
1061 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1062   // Get registers to move
1063   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1064   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1065   OptoReg::Name dst_second = ra_->get_reg_second(this );
1066   OptoReg::Name dst_first = ra_->get_reg_first(this );
1067 
1068   enum RC src_second_rc = rc_class(src_second);
1069   enum RC src_first_rc = rc_class(src_first);
1070   enum RC dst_second_rc = rc_class(dst_second);
1071   enum RC dst_first_rc = rc_class(dst_first);
1072 
1073   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1074 
1075   // Generate spill code!
1076   int size = 0;
1077 
1078   if( src_first == dst_first && src_second == dst_second )
1079     return size;            // Self copy, no move
1080 
1081   if (bottom_type()->isa_vect() != NULL) {
1082     uint ireg = ideal_reg();
1083     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1084     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1085     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1086     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1087       // mem -> mem
1088       int src_offset = ra_->reg2offset(src_first);
1089       int dst_offset = ra_->reg2offset(dst_first);
1090       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1091     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1092       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1093     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1094       int stack_offset = ra_->reg2offset(dst_first);
1095       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1096     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1097       int stack_offset = ra_->reg2offset(src_first);
1098       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1099     } else {
1100       ShouldNotReachHere();
1101     }
1102   }
1103 
1104   // --------------------------------------
1105   // Check for mem-mem move.  push/pop to move.
1106   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1107     if( src_second == dst_first ) { // overlapping stack copy ranges
1108       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1109       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1110       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1111       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1112     }
1113     // move low bits
1114     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1115     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1116     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1117       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1118       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1119     }
1120     return size;
1121   }
1122 
1123   // --------------------------------------
1124   // Check for integer reg-reg copy
1125   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1126     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1127 
1128   // Check for integer store
1129   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1130     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1131 
1132   // Check for integer load
1133   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1134     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1135 
1136   // Check for integer reg-xmm reg copy
1137   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1138     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1139             "no 64 bit integer-float reg moves" );
1140     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1141   }
1142   // --------------------------------------
1143   // Check for float reg-reg copy
1144   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1145     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1146             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1147     if( cbuf ) {
1148 
1149       // Note the mucking with the register encode to compensate for the 0/1
1150       // indexing issue mentioned in a comment in the reg_def sections
1151       // for FPR registers many lines above here.
1152 
1153       if( src_first != FPR1L_num ) {
1154         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1155         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1156         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1157         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1158      } else {
1159         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1160         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1161      }
1162 #ifndef PRODUCT
1163     } else if( !do_size ) {
1164       if( size != 0 ) st->print("\n\t");
1165       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1166       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1167 #endif
1168     }
1169     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1170   }
1171 
1172   // Check for float store
1173   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1174     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1175   }
1176 
1177   // Check for float load
1178   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1179     int offset = ra_->reg2offset(src_first);
1180     const char *op_str;
1181     int op;
1182     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1183       op_str = "FLD_D";
1184       op = 0xDD;
1185     } else {                   // 32-bit load
1186       op_str = "FLD_S";
1187       op = 0xD9;
1188       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1189     }
1190     if( cbuf ) {
1191       emit_opcode  (*cbuf, op );
1192       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1193       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1194       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1195 #ifndef PRODUCT
1196     } else if( !do_size ) {
1197       if( size != 0 ) st->print("\n\t");
1198       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1199 #endif
1200     }
1201     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1202     return size + 3+offset_size+2;
1203   }
1204 
1205   // Check for xmm reg-reg copy
1206   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1207     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1208             (src_first+1 == src_second && dst_first+1 == dst_second),
1209             "no non-adjacent float-moves" );
1210     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1211   }
1212 
1213   // Check for xmm reg-integer reg copy
1214   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1215     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1216             "no 64 bit float-integer reg moves" );
1217     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1218   }
1219 
1220   // Check for xmm store
1221   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1222     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1223   }
1224 
1225   // Check for float xmm load
1226   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1227     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1228   }
1229 
1230   // Copy from float reg to xmm reg
1231   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1232     // copy to the top of stack from floating point reg
1233     // and use LEA to preserve flags
1234     if( cbuf ) {
1235       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1236       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1237       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1238       emit_d8(*cbuf,0xF8);
1239 #ifndef PRODUCT
1240     } else if( !do_size ) {
1241       if( size != 0 ) st->print("\n\t");
1242       st->print("LEA    ESP,[ESP-8]");
1243 #endif
1244     }
1245     size += 4;
1246 
1247     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1248 
1249     // Copy from the temp memory to the xmm reg.
1250     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1251 
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0x08);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP+8]");
1261 #endif
1262     }
1263     size += 4;
1264     return size;
1265   }
1266 
1267   assert( size > 0, "missed a case" );
1268 
1269   // --------------------------------------------------------------------
1270   // Check for second bits still needing moving.
1271   if( src_second == dst_second )
1272     return size;               // Self copy; no move
1273   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1274 
1275   // Check for second word int-int move
1276   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1277     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1278 
1279   // Check for second word integer store
1280   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1281     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1282 
1283   // Check for second word integer load
1284   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1285     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1286 
1287 
1288   Unimplemented();
1289   return 0; // Mute compiler
1290 }
1291 
1292 #ifndef PRODUCT
1293 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1294   implementation( NULL, ra_, false, st );
1295 }
1296 #endif
1297 
1298 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1299   implementation( &cbuf, ra_, false, NULL );
1300 }
1301 
1302 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1303   return implementation( NULL, ra_, true, NULL );
1304 }
1305 
1306 
1307 //=============================================================================
1308 #ifndef PRODUCT
1309 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1310   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1311   int reg = ra_->get_reg_first(this);
1312   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1313 }
1314 #endif
1315 
1316 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1318   int reg = ra_->get_encode(this);
1319   if( offset >= 128 ) {
1320     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1321     emit_rm(cbuf, 0x2, reg, 0x04);
1322     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1323     emit_d32(cbuf, offset);
1324   }
1325   else {
1326     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1327     emit_rm(cbuf, 0x1, reg, 0x04);
1328     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1329     emit_d8(cbuf, offset);
1330   }
1331 }
1332 
1333 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1335   if( offset >= 128 ) {
1336     return 7;
1337   }
1338   else {
1339     return 4;
1340   }
1341 }
1342 
1343 //=============================================================================
1344 #ifndef PRODUCT
1345 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1346   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1347   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1348   st->print_cr("\tNOP");
1349   st->print_cr("\tNOP");
1350   if( !OptoBreakpoint )
1351     st->print_cr("\tNOP");
1352 }
1353 #endif
1354 
1355 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1356   MacroAssembler masm(&cbuf);
1357 #ifdef ASSERT
1358   uint insts_size = cbuf.insts_size();
1359 #endif
1360   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1361   masm.jump_cc(Assembler::notEqual,
1362                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1363   /* WARNING these NOPs are critical so that verified entry point is properly
1364      aligned for patching by NativeJump::patch_verified_entry() */
1365   int nops_cnt = 2;
1366   if( !OptoBreakpoint ) // Leave space for int3
1367      nops_cnt += 1;
1368   masm.nop(nops_cnt);
1369 
1370   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1371 }
1372 
1373 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1374   return OptoBreakpoint ? 11 : 12;
1375 }
1376 
1377 
1378 //=============================================================================
1379 
1380 int Matcher::regnum_to_fpu_offset(int regnum) {
1381   return regnum - 32; // The FP registers are in the second chunk
1382 }
1383 
1384 // This is UltraSparc specific, true just means we have fast l2f conversion
1385 const bool Matcher::convL2FSupported(void) {
1386   return true;
1387 }
1388 
1389 // Is this branch offset short enough that a short branch can be used?
1390 //
1391 // NOTE: If the platform does not provide any short branch variants, then
1392 //       this method should return false for offset 0.
1393 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1394   // The passed offset is relative to address of the branch.
1395   // On 86 a branch displacement is calculated relative to address
1396   // of a next instruction.
1397   offset -= br_size;
1398 
1399   // the short version of jmpConUCF2 contains multiple branches,
1400   // making the reach slightly less
1401   if (rule == jmpConUCF2_rule)
1402     return (-126 <= offset && offset <= 125);
1403   return (-128 <= offset && offset <= 127);
1404 }
1405 
1406 const bool Matcher::isSimpleConstant64(jlong value) {
1407   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1408   return false;
1409 }
1410 
1411 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1412 const bool Matcher::init_array_count_is_in_bytes = false;
1413 
1414 // Threshold size for cleararray.
1415 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1416 
1417 // Needs 2 CMOV's for longs.
1418 const int Matcher::long_cmove_cost() { return 1; }
1419 
1420 // No CMOVF/CMOVD with SSE/SSE2
1421 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1422 
1423 // Does the CPU require late expand (see block.cpp for description of late expand)?
1424 const bool Matcher::require_postalloc_expand = false;
1425 
1426 // Should the Matcher clone shifts on addressing modes, expecting them to
1427 // be subsumed into complex addressing expressions or compute them into
1428 // registers?  True for Intel but false for most RISCs
1429 const bool Matcher::clone_shift_expressions = true;
1430 
1431 // Do we need to mask the count passed to shift instructions or does
1432 // the cpu only look at the lower 5/6 bits anyway?
1433 const bool Matcher::need_masked_shift_count = false;
1434 
1435 bool Matcher::narrow_oop_use_complex_address() {
1436   ShouldNotCallThis();
1437   return true;
1438 }
1439 
1440 bool Matcher::narrow_klass_use_complex_address() {
1441   ShouldNotCallThis();
1442   return true;
1443 }
1444 
1445 
1446 // Is it better to copy float constants, or load them directly from memory?
1447 // Intel can load a float constant from a direct address, requiring no
1448 // extra registers.  Most RISCs will have to materialize an address into a
1449 // register first, so they would do better to copy the constant from stack.
1450 const bool Matcher::rematerialize_float_constants = true;
1451 
1452 // If CPU can load and store mis-aligned doubles directly then no fixup is
1453 // needed.  Else we split the double into 2 integer pieces and move it
1454 // piece-by-piece.  Only happens when passing doubles into C code as the
1455 // Java calling convention forces doubles to be aligned.
1456 const bool Matcher::misaligned_doubles_ok = true;
1457 
1458 
1459 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1460   // Get the memory operand from the node
1461   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1462   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1463   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1464   uint opcnt     = 1;                 // First operand
1465   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1466   while( idx >= skipped+num_edges ) {
1467     skipped += num_edges;
1468     opcnt++;                          // Bump operand count
1469     assert( opcnt < numopnds, "Accessing non-existent operand" );
1470     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1471   }
1472 
1473   MachOper *memory = node->_opnds[opcnt];
1474   MachOper *new_memory = NULL;
1475   switch (memory->opcode()) {
1476   case DIRECT:
1477   case INDOFFSET32X:
1478     // No transformation necessary.
1479     return;
1480   case INDIRECT:
1481     new_memory = new indirect_win95_safeOper( );
1482     break;
1483   case INDOFFSET8:
1484     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1485     break;
1486   case INDOFFSET32:
1487     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1488     break;
1489   case INDINDEXOFFSET:
1490     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1491     break;
1492   case INDINDEXSCALE:
1493     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1494     break;
1495   case INDINDEXSCALEOFFSET:
1496     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1497     break;
1498   case LOAD_LONG_INDIRECT:
1499   case LOAD_LONG_INDOFFSET32:
1500     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1501     return;
1502   default:
1503     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1504     return;
1505   }
1506   node->_opnds[opcnt] = new_memory;
1507 }
1508 
1509 // Advertise here if the CPU requires explicit rounding operations
1510 // to implement the UseStrictFP mode.
1511 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1512 
1513 // Are floats conerted to double when stored to stack during deoptimization?
1514 // On x32 it is stored with convertion only when FPU is used for floats.
1515 bool Matcher::float_in_double() { return (UseSSE == 0); }
1516 
1517 // Do ints take an entire long register or just half?
1518 const bool Matcher::int_in_long = false;
1519 
1520 // Return whether or not this register is ever used as an argument.  This
1521 // function is used on startup to build the trampoline stubs in generateOptoStub.
1522 // Registers not mentioned will be killed by the VM call in the trampoline, and
1523 // arguments in those registers not be available to the callee.
1524 bool Matcher::can_be_java_arg( int reg ) {
1525   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1526   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1527   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1528   return false;
1529 }
1530 
1531 bool Matcher::is_spillable_arg( int reg ) {
1532   return can_be_java_arg(reg);
1533 }
1534 
1535 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1536   // Use hardware integer DIV instruction when
1537   // it is faster than a code which use multiply.
1538   // Only when constant divisor fits into 32 bit
1539   // (min_jint is excluded to get only correct
1540   // positive 32 bit values from negative).
1541   return VM_Version::has_fast_idiv() &&
1542          (divisor == (int)divisor && divisor != min_jint);
1543 }
1544 
1545 // Register for DIVI projection of divmodI
1546 RegMask Matcher::divI_proj_mask() {
1547   return EAX_REG_mask();
1548 }
1549 
1550 // Register for MODI projection of divmodI
1551 RegMask Matcher::modI_proj_mask() {
1552   return EDX_REG_mask();
1553 }
1554 
1555 // Register for DIVL projection of divmodL
1556 RegMask Matcher::divL_proj_mask() {
1557   ShouldNotReachHere();
1558   return RegMask();
1559 }
1560 
1561 // Register for MODL projection of divmodL
1562 RegMask Matcher::modL_proj_mask() {
1563   ShouldNotReachHere();
1564   return RegMask();
1565 }
1566 
1567 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1568   return NO_REG_mask();
1569 }
1570 
1571 // Returns true if the high 32 bits of the value is known to be zero.
1572 bool is_operand_hi32_zero(Node* n) {
1573   int opc = n->Opcode();
1574   if (opc == Op_AndL) {
1575     Node* o2 = n->in(2);
1576     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1577       return true;
1578     }
1579   }
1580   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1581     return true;
1582   }
1583   return false;
1584 }
1585 
1586 %}
1587 
1588 //----------ENCODING BLOCK-----------------------------------------------------
1589 // This block specifies the encoding classes used by the compiler to output
1590 // byte streams.  Encoding classes generate functions which are called by
1591 // Machine Instruction Nodes in order to generate the bit encoding of the
1592 // instruction.  Operands specify their base encoding interface with the
1593 // interface keyword.  There are currently supported four interfaces,
1594 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1595 // operand to generate a function which returns its register number when
1596 // queried.   CONST_INTER causes an operand to generate a function which
1597 // returns the value of the constant when queried.  MEMORY_INTER causes an
1598 // operand to generate four functions which return the Base Register, the
1599 // Index Register, the Scale Value, and the Offset Value of the operand when
1600 // queried.  COND_INTER causes an operand to generate six functions which
1601 // return the encoding code (ie - encoding bits for the instruction)
1602 // associated with each basic boolean condition for a conditional instruction.
1603 // Instructions specify two basic values for encoding.  They use the
1604 // ins_encode keyword to specify their encoding class (which must be one of
1605 // the class names specified in the encoding block), and they use the
1606 // opcode keyword to specify, in order, their primary, secondary, and
1607 // tertiary opcode.  Only the opcode sections which a particular instruction
1608 // needs for encoding need to be specified.
1609 encode %{
1610   // Build emit functions for each basic byte or larger field in the intel
1611   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1612   // code in the enc_class source block.  Emit functions will live in the
1613   // main source block for now.  In future, we can generalize this by
1614   // adding a syntax that specifies the sizes of fields in an order,
1615   // so that the adlc can build the emit functions automagically
1616 
1617   // Emit primary opcode
1618   enc_class OpcP %{
1619     emit_opcode(cbuf, $primary);
1620   %}
1621 
1622   // Emit secondary opcode
1623   enc_class OpcS %{
1624     emit_opcode(cbuf, $secondary);
1625   %}
1626 
1627   // Emit opcode directly
1628   enc_class Opcode(immI d8) %{
1629     emit_opcode(cbuf, $d8$$constant);
1630   %}
1631 
1632   enc_class SizePrefix %{
1633     emit_opcode(cbuf,0x66);
1634   %}
1635 
1636   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1637     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1638   %}
1639 
1640   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1641     emit_opcode(cbuf,$opcode$$constant);
1642     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1643   %}
1644 
1645   enc_class mov_r32_imm0( rRegI dst ) %{
1646     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1647     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1648   %}
1649 
1650   enc_class cdq_enc %{
1651     // Full implementation of Java idiv and irem; checks for
1652     // special case as described in JVM spec., p.243 & p.271.
1653     //
1654     //         normal case                           special case
1655     //
1656     // input : rax,: dividend                         min_int
1657     //         reg: divisor                          -1
1658     //
1659     // output: rax,: quotient  (= rax, idiv reg)       min_int
1660     //         rdx: remainder (= rax, irem reg)       0
1661     //
1662     //  Code sequnce:
1663     //
1664     //  81 F8 00 00 00 80    cmp         rax,80000000h
1665     //  0F 85 0B 00 00 00    jne         normal_case
1666     //  33 D2                xor         rdx,edx
1667     //  83 F9 FF             cmp         rcx,0FFh
1668     //  0F 84 03 00 00 00    je          done
1669     //                  normal_case:
1670     //  99                   cdq
1671     //  F7 F9                idiv        rax,ecx
1672     //                  done:
1673     //
1674     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1675     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1676     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1677     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1678     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1679     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1680     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1681     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1682     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1683     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1684     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1685     // normal_case:
1686     emit_opcode(cbuf,0x99);                                         // cdq
1687     // idiv (note: must be emitted by the user of this rule)
1688     // normal:
1689   %}
1690 
1691   // Dense encoding for older common ops
1692   enc_class Opc_plus(immI opcode, rRegI reg) %{
1693     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1694   %}
1695 
1696 
1697   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1698   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1699     // Check for 8-bit immediate, and set sign extend bit in opcode
1700     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1701       emit_opcode(cbuf, $primary | 0x02);
1702     }
1703     else {                          // If 32-bit immediate
1704       emit_opcode(cbuf, $primary);
1705     }
1706   %}
1707 
1708   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1709     // Emit primary opcode and set sign-extend bit
1710     // Check for 8-bit immediate, and set sign extend bit in opcode
1711     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1712       emit_opcode(cbuf, $primary | 0x02);    }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716     // Emit r/m byte with secondary opcode, after primary opcode.
1717     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1718   %}
1719 
1720   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1721     // Check for 8-bit immediate, and set sign extend bit in opcode
1722     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1723       $$$emit8$imm$$constant;
1724     }
1725     else {                          // If 32-bit immediate
1726       // Output immediate
1727       $$$emit32$imm$$constant;
1728     }
1729   %}
1730 
1731   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1732     // Emit primary opcode and set sign-extend bit
1733     // Check for 8-bit immediate, and set sign extend bit in opcode
1734     int con = (int)$imm$$constant; // Throw away top bits
1735     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1736     // Emit r/m byte with secondary opcode, after primary opcode.
1737     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1738     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1739     else                               emit_d32(cbuf,con);
1740   %}
1741 
1742   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1743     // Emit primary opcode and set sign-extend bit
1744     // Check for 8-bit immediate, and set sign extend bit in opcode
1745     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1746     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1747     // Emit r/m byte with tertiary opcode, after primary opcode.
1748     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1749     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1750     else                               emit_d32(cbuf,con);
1751   %}
1752 
1753   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1754     emit_cc(cbuf, $secondary, $dst$$reg );
1755   %}
1756 
1757   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1758     int destlo = $dst$$reg;
1759     int desthi = HIGH_FROM_LOW(destlo);
1760     // bswap lo
1761     emit_opcode(cbuf, 0x0F);
1762     emit_cc(cbuf, 0xC8, destlo);
1763     // bswap hi
1764     emit_opcode(cbuf, 0x0F);
1765     emit_cc(cbuf, 0xC8, desthi);
1766     // xchg lo and hi
1767     emit_opcode(cbuf, 0x87);
1768     emit_rm(cbuf, 0x3, destlo, desthi);
1769   %}
1770 
1771   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1772     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1773   %}
1774 
1775   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1776     $$$emit8$primary;
1777     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1778   %}
1779 
1780   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1781     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1782     emit_d8(cbuf, op >> 8 );
1783     emit_d8(cbuf, op & 255);
1784   %}
1785 
1786   // emulate a CMOV with a conditional branch around a MOV
1787   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1788     // Invert sense of branch from sense of CMOV
1789     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1790     emit_d8( cbuf, $brOffs$$constant );
1791   %}
1792 
1793   enc_class enc_PartialSubtypeCheck( ) %{
1794     Register Redi = as_Register(EDI_enc); // result register
1795     Register Reax = as_Register(EAX_enc); // super class
1796     Register Recx = as_Register(ECX_enc); // killed
1797     Register Resi = as_Register(ESI_enc); // sub class
1798     Label miss;
1799 
1800     MacroAssembler _masm(&cbuf);
1801     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1802                                      NULL, &miss,
1803                                      /*set_cond_codes:*/ true);
1804     if ($primary) {
1805       __ xorptr(Redi, Redi);
1806     }
1807     __ bind(miss);
1808   %}
1809 
1810   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1811     MacroAssembler masm(&cbuf);
1812     int start = masm.offset();
1813     if (UseSSE >= 2) {
1814       if (VerifyFPU) {
1815         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1816       }
1817     } else {
1818       // External c_calling_convention expects the FPU stack to be 'clean'.
1819       // Compiled code leaves it dirty.  Do cleanup now.
1820       masm.empty_FPU_stack();
1821     }
1822     if (sizeof_FFree_Float_Stack_All == -1) {
1823       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1824     } else {
1825       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1826     }
1827   %}
1828 
1829   enc_class Verify_FPU_For_Leaf %{
1830     if( VerifyFPU ) {
1831       MacroAssembler masm(&cbuf);
1832       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1833     }
1834   %}
1835 
1836   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1837     // This is the instruction starting address for relocation info.
1838     cbuf.set_insts_mark();
1839     $$$emit8$primary;
1840     // CALL directly to the runtime
1841     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1842                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1843 
1844     if (UseSSE >= 2) {
1845       MacroAssembler _masm(&cbuf);
1846       BasicType rt = tf()->return_type();
1847 
1848       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1849         // A C runtime call where the return value is unused.  In SSE2+
1850         // mode the result needs to be removed from the FPU stack.  It's
1851         // likely that this function call could be removed by the
1852         // optimizer if the C function is a pure function.
1853         __ ffree(0);
1854       } else if (rt == T_FLOAT) {
1855         __ lea(rsp, Address(rsp, -4));
1856         __ fstp_s(Address(rsp, 0));
1857         __ movflt(xmm0, Address(rsp, 0));
1858         __ lea(rsp, Address(rsp,  4));
1859       } else if (rt == T_DOUBLE) {
1860         __ lea(rsp, Address(rsp, -8));
1861         __ fstp_d(Address(rsp, 0));
1862         __ movdbl(xmm0, Address(rsp, 0));
1863         __ lea(rsp, Address(rsp,  8));
1864       }
1865     }
1866   %}
1867 
1868 
1869   enc_class pre_call_resets %{
1870     // If method sets FPU control word restore it here
1871     debug_only(int off0 = cbuf.insts_size());
1872     if (ra_->C->in_24_bit_fp_mode()) {
1873       MacroAssembler _masm(&cbuf);
1874       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1875     }
1876     if (ra_->C->max_vector_size() > 16) {
1877       // Clear upper bits of YMM registers when current compiled code uses
1878       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1879       MacroAssembler _masm(&cbuf);
1880       __ vzeroupper();
1881     }
1882     debug_only(int off1 = cbuf.insts_size());
1883     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1884   %}
1885 
1886   enc_class post_call_FPU %{
1887     // If method sets FPU control word do it here also
1888     if (Compile::current()->in_24_bit_fp_mode()) {
1889       MacroAssembler masm(&cbuf);
1890       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891     }
1892   %}
1893 
1894   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1895     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1896     // who we intended to call.
1897     cbuf.set_insts_mark();
1898     $$$emit8$primary;
1899     if (!_method) {
1900       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1901                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1902     } else if (_optimized_virtual) {
1903       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1904                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1905     } else {
1906       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1907                      static_call_Relocation::spec(), RELOC_IMM32 );
1908     }
1909     if (_method) {  // Emit stub for static call.
1910       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1911       if (stub == NULL) {
1912         ciEnv::current()->record_failure("CodeCache is full");
1913         return;
1914       } 
1915     }
1916   %}
1917 
1918   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1919     MacroAssembler _masm(&cbuf);
1920     __ ic_call((address)$meth$$method);
1921   %}
1922 
1923   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1924     int disp = in_bytes(Method::from_compiled_offset());
1925     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1926 
1927     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1928     cbuf.set_insts_mark();
1929     $$$emit8$primary;
1930     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1931     emit_d8(cbuf, disp);             // Displacement
1932 
1933   %}
1934 
1935 //   Following encoding is no longer used, but may be restored if calling
1936 //   convention changes significantly.
1937 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1938 //
1939 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1940 //     // int ic_reg     = Matcher::inline_cache_reg();
1941 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1942 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1943 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1944 //
1945 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1946 //     // // so we load it immediately before the call
1947 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1948 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1949 //
1950 //     // xor rbp,ebp
1951 //     emit_opcode(cbuf, 0x33);
1952 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1953 //
1954 //     // CALL to interpreter.
1955 //     cbuf.set_insts_mark();
1956 //     $$$emit8$primary;
1957 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1958 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1959 //   %}
1960 
1961   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1962     $$$emit8$primary;
1963     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1964     $$$emit8$shift$$constant;
1965   %}
1966 
1967   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1968     // Load immediate does not have a zero or sign extended version
1969     // for 8-bit immediates
1970     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1971     $$$emit32$src$$constant;
1972   %}
1973 
1974   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1975     // Load immediate does not have a zero or sign extended version
1976     // for 8-bit immediates
1977     emit_opcode(cbuf, $primary + $dst$$reg);
1978     $$$emit32$src$$constant;
1979   %}
1980 
1981   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1982     // Load immediate does not have a zero or sign extended version
1983     // for 8-bit immediates
1984     int dst_enc = $dst$$reg;
1985     int src_con = $src$$constant & 0x0FFFFFFFFL;
1986     if (src_con == 0) {
1987       // xor dst, dst
1988       emit_opcode(cbuf, 0x33);
1989       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1990     } else {
1991       emit_opcode(cbuf, $primary + dst_enc);
1992       emit_d32(cbuf, src_con);
1993     }
1994   %}
1995 
1996   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1997     // Load immediate does not have a zero or sign extended version
1998     // for 8-bit immediates
1999     int dst_enc = $dst$$reg + 2;
2000     int src_con = ((julong)($src$$constant)) >> 32;
2001     if (src_con == 0) {
2002       // xor dst, dst
2003       emit_opcode(cbuf, 0x33);
2004       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2005     } else {
2006       emit_opcode(cbuf, $primary + dst_enc);
2007       emit_d32(cbuf, src_con);
2008     }
2009   %}
2010 
2011 
2012   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2013   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2014     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2015   %}
2016 
2017   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2018     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2019   %}
2020 
2021   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2022     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2023   %}
2024 
2025   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2026     $$$emit8$primary;
2027     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2028   %}
2029 
2030   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2031     $$$emit8$secondary;
2032     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2033   %}
2034 
2035   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2036     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2037   %}
2038 
2039   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2044     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2045   %}
2046 
2047   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2048     // Output immediate
2049     $$$emit32$src$$constant;
2050   %}
2051 
2052   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2053     // Output Float immediate bits
2054     jfloat jf = $src$$constant;
2055     int    jf_as_bits = jint_cast( jf );
2056     emit_d32(cbuf, jf_as_bits);
2057   %}
2058 
2059   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2060     // Output Float immediate bits
2061     jfloat jf = $src$$constant;
2062     int    jf_as_bits = jint_cast( jf );
2063     emit_d32(cbuf, jf_as_bits);
2064   %}
2065 
2066   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2067     // Output immediate
2068     $$$emit16$src$$constant;
2069   %}
2070 
2071   enc_class Con_d32(immI src) %{
2072     emit_d32(cbuf,$src$$constant);
2073   %}
2074 
2075   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2076     // Output immediate memory reference
2077     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2078     emit_d32(cbuf, 0x00);
2079   %}
2080 
2081   enc_class lock_prefix( ) %{
2082     if( os::is_MP() )
2083       emit_opcode(cbuf,0xF0);         // [Lock]
2084   %}
2085 
2086   // Cmp-xchg long value.
2087   // Note: we need to swap rbx, and rcx before and after the
2088   //       cmpxchg8 instruction because the instruction uses
2089   //       rcx as the high order word of the new value to store but
2090   //       our register encoding uses rbx,.
2091   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2092 
2093     // XCHG  rbx,ecx
2094     emit_opcode(cbuf,0x87);
2095     emit_opcode(cbuf,0xD9);
2096     // [Lock]
2097     if( os::is_MP() )
2098       emit_opcode(cbuf,0xF0);
2099     // CMPXCHG8 [Eptr]
2100     emit_opcode(cbuf,0x0F);
2101     emit_opcode(cbuf,0xC7);
2102     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2103     // XCHG  rbx,ecx
2104     emit_opcode(cbuf,0x87);
2105     emit_opcode(cbuf,0xD9);
2106   %}
2107 
2108   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2109     // [Lock]
2110     if( os::is_MP() )
2111       emit_opcode(cbuf,0xF0);
2112 
2113     // CMPXCHG [Eptr]
2114     emit_opcode(cbuf,0x0F);
2115     emit_opcode(cbuf,0xB1);
2116     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2117   %}
2118 
2119   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2120     int res_encoding = $res$$reg;
2121 
2122     // MOV  res,0
2123     emit_opcode( cbuf, 0xB8 + res_encoding);
2124     emit_d32( cbuf, 0 );
2125     // JNE,s  fail
2126     emit_opcode(cbuf,0x75);
2127     emit_d8(cbuf, 5 );
2128     // MOV  res,1
2129     emit_opcode( cbuf, 0xB8 + res_encoding);
2130     emit_d32( cbuf, 1 );
2131     // fail:
2132   %}
2133 
2134   enc_class set_instruction_start( ) %{
2135     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2136   %}
2137 
2138   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2139     int reg_encoding = $ereg$$reg;
2140     int base  = $mem$$base;
2141     int index = $mem$$index;
2142     int scale = $mem$$scale;
2143     int displace = $mem$$disp;
2144     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2145     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2146   %}
2147 
2148   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2149     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2150     int base  = $mem$$base;
2151     int index = $mem$$index;
2152     int scale = $mem$$scale;
2153     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2154     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2155     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2156   %}
2157 
2158   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2159     int r1, r2;
2160     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2161     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2162     emit_opcode(cbuf,0x0F);
2163     emit_opcode(cbuf,$tertiary);
2164     emit_rm(cbuf, 0x3, r1, r2);
2165     emit_d8(cbuf,$cnt$$constant);
2166     emit_d8(cbuf,$primary);
2167     emit_rm(cbuf, 0x3, $secondary, r1);
2168     emit_d8(cbuf,$cnt$$constant);
2169   %}
2170 
2171   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2172     emit_opcode( cbuf, 0x8B ); // Move
2173     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2174     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2175       emit_d8(cbuf,$primary);
2176       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2177       emit_d8(cbuf,$cnt$$constant-32);
2178     }
2179     emit_d8(cbuf,$primary);
2180     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2181     emit_d8(cbuf,31);
2182   %}
2183 
2184   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2185     int r1, r2;
2186     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2187     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2188 
2189     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2190     emit_rm(cbuf, 0x3, r1, r2);
2191     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2192       emit_opcode(cbuf,$primary);
2193       emit_rm(cbuf, 0x3, $secondary, r1);
2194       emit_d8(cbuf,$cnt$$constant-32);
2195     }
2196     emit_opcode(cbuf,0x33);  // XOR r2,r2
2197     emit_rm(cbuf, 0x3, r2, r2);
2198   %}
2199 
2200   // Clone of RegMem but accepts an extra parameter to access each
2201   // half of a double in memory; it never needs relocation info.
2202   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2203     emit_opcode(cbuf,$opcode$$constant);
2204     int reg_encoding = $rm_reg$$reg;
2205     int base     = $mem$$base;
2206     int index    = $mem$$index;
2207     int scale    = $mem$$scale;
2208     int displace = $mem$$disp + $disp_for_half$$constant;
2209     relocInfo::relocType disp_reloc = relocInfo::none;
2210     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2211   %}
2212 
2213   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2214   //
2215   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2216   // and it never needs relocation information.
2217   // Frequently used to move data between FPU's Stack Top and memory.
2218   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2219     int rm_byte_opcode = $rm_opcode$$constant;
2220     int base     = $mem$$base;
2221     int index    = $mem$$index;
2222     int scale    = $mem$$scale;
2223     int displace = $mem$$disp;
2224     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2225     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2226   %}
2227 
2228   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2229     int rm_byte_opcode = $rm_opcode$$constant;
2230     int base     = $mem$$base;
2231     int index    = $mem$$index;
2232     int scale    = $mem$$scale;
2233     int displace = $mem$$disp;
2234     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2235     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2236   %}
2237 
2238   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2239     int reg_encoding = $dst$$reg;
2240     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2241     int index        = 0x04;            // 0x04 indicates no index
2242     int scale        = 0x00;            // 0x00 indicates no scale
2243     int displace     = $src1$$constant; // 0x00 indicates no displacement
2244     relocInfo::relocType disp_reloc = relocInfo::none;
2245     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2246   %}
2247 
2248   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2249     // Compare dst,src
2250     emit_opcode(cbuf,0x3B);
2251     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2252     // jmp dst < src around move
2253     emit_opcode(cbuf,0x7C);
2254     emit_d8(cbuf,2);
2255     // move dst,src
2256     emit_opcode(cbuf,0x8B);
2257     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2258   %}
2259 
2260   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2261     // Compare dst,src
2262     emit_opcode(cbuf,0x3B);
2263     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2264     // jmp dst > src around move
2265     emit_opcode(cbuf,0x7F);
2266     emit_d8(cbuf,2);
2267     // move dst,src
2268     emit_opcode(cbuf,0x8B);
2269     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2270   %}
2271 
2272   enc_class enc_FPR_store(memory mem, regDPR src) %{
2273     // If src is FPR1, we can just FST to store it.
2274     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2275     int reg_encoding = 0x2; // Just store
2276     int base  = $mem$$base;
2277     int index = $mem$$index;
2278     int scale = $mem$$scale;
2279     int displace = $mem$$disp;
2280     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2281     if( $src$$reg != FPR1L_enc ) {
2282       reg_encoding = 0x3;  // Store & pop
2283       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2284       emit_d8( cbuf, 0xC0-1+$src$$reg );
2285     }
2286     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2287     emit_opcode(cbuf,$primary);
2288     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2289   %}
2290 
2291   enc_class neg_reg(rRegI dst) %{
2292     // NEG $dst
2293     emit_opcode(cbuf,0xF7);
2294     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2295   %}
2296 
2297   enc_class setLT_reg(eCXRegI dst) %{
2298     // SETLT $dst
2299     emit_opcode(cbuf,0x0F);
2300     emit_opcode(cbuf,0x9C);
2301     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2302   %}
2303 
2304   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2305     int tmpReg = $tmp$$reg;
2306 
2307     // SUB $p,$q
2308     emit_opcode(cbuf,0x2B);
2309     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2310     // SBB $tmp,$tmp
2311     emit_opcode(cbuf,0x1B);
2312     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2313     // AND $tmp,$y
2314     emit_opcode(cbuf,0x23);
2315     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2316     // ADD $p,$tmp
2317     emit_opcode(cbuf,0x03);
2318     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2319   %}
2320 
2321   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2322     // TEST shift,32
2323     emit_opcode(cbuf,0xF7);
2324     emit_rm(cbuf, 0x3, 0, ECX_enc);
2325     emit_d32(cbuf,0x20);
2326     // JEQ,s small
2327     emit_opcode(cbuf, 0x74);
2328     emit_d8(cbuf, 0x04);
2329     // MOV    $dst.hi,$dst.lo
2330     emit_opcode( cbuf, 0x8B );
2331     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2332     // CLR    $dst.lo
2333     emit_opcode(cbuf, 0x33);
2334     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2335 // small:
2336     // SHLD   $dst.hi,$dst.lo,$shift
2337     emit_opcode(cbuf,0x0F);
2338     emit_opcode(cbuf,0xA5);
2339     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2340     // SHL    $dst.lo,$shift"
2341     emit_opcode(cbuf,0xD3);
2342     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2343   %}
2344 
2345   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2346     // TEST shift,32
2347     emit_opcode(cbuf,0xF7);
2348     emit_rm(cbuf, 0x3, 0, ECX_enc);
2349     emit_d32(cbuf,0x20);
2350     // JEQ,s small
2351     emit_opcode(cbuf, 0x74);
2352     emit_d8(cbuf, 0x04);
2353     // MOV    $dst.lo,$dst.hi
2354     emit_opcode( cbuf, 0x8B );
2355     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2356     // CLR    $dst.hi
2357     emit_opcode(cbuf, 0x33);
2358     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2359 // small:
2360     // SHRD   $dst.lo,$dst.hi,$shift
2361     emit_opcode(cbuf,0x0F);
2362     emit_opcode(cbuf,0xAD);
2363     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2364     // SHR    $dst.hi,$shift"
2365     emit_opcode(cbuf,0xD3);
2366     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2367   %}
2368 
2369   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2370     // TEST shift,32
2371     emit_opcode(cbuf,0xF7);
2372     emit_rm(cbuf, 0x3, 0, ECX_enc);
2373     emit_d32(cbuf,0x20);
2374     // JEQ,s small
2375     emit_opcode(cbuf, 0x74);
2376     emit_d8(cbuf, 0x05);
2377     // MOV    $dst.lo,$dst.hi
2378     emit_opcode( cbuf, 0x8B );
2379     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2380     // SAR    $dst.hi,31
2381     emit_opcode(cbuf, 0xC1);
2382     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2383     emit_d8(cbuf, 0x1F );
2384 // small:
2385     // SHRD   $dst.lo,$dst.hi,$shift
2386     emit_opcode(cbuf,0x0F);
2387     emit_opcode(cbuf,0xAD);
2388     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2389     // SAR    $dst.hi,$shift"
2390     emit_opcode(cbuf,0xD3);
2391     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2392   %}
2393 
2394 
2395   // ----------------- Encodings for floating point unit -----------------
2396   // May leave result in FPU-TOS or FPU reg depending on opcodes
2397   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2398     $$$emit8$primary;
2399     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2400   %}
2401 
2402   // Pop argument in FPR0 with FSTP ST(0)
2403   enc_class PopFPU() %{
2404     emit_opcode( cbuf, 0xDD );
2405     emit_d8( cbuf, 0xD8 );
2406   %}
2407 
2408   // !!!!! equivalent to Pop_Reg_F
2409   enc_class Pop_Reg_DPR( regDPR dst ) %{
2410     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2411     emit_d8( cbuf, 0xD8+$dst$$reg );
2412   %}
2413 
2414   enc_class Push_Reg_DPR( regDPR dst ) %{
2415     emit_opcode( cbuf, 0xD9 );
2416     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2417   %}
2418 
2419   enc_class strictfp_bias1( regDPR dst ) %{
2420     emit_opcode( cbuf, 0xDB );           // FLD m80real
2421     emit_opcode( cbuf, 0x2D );
2422     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2423     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2424     emit_opcode( cbuf, 0xC8+$dst$$reg );
2425   %}
2426 
2427   enc_class strictfp_bias2( regDPR dst ) %{
2428     emit_opcode( cbuf, 0xDB );           // FLD m80real
2429     emit_opcode( cbuf, 0x2D );
2430     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2431     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2432     emit_opcode( cbuf, 0xC8+$dst$$reg );
2433   %}
2434 
2435   // Special case for moving an integer register to a stack slot.
2436   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2437     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2438   %}
2439 
2440   // Special case for moving a register to a stack slot.
2441   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2442     // Opcode already emitted
2443     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2444     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2445     emit_d32(cbuf, $dst$$disp);   // Displacement
2446   %}
2447 
2448   // Push the integer in stackSlot 'src' onto FP-stack
2449   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2450     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2451   %}
2452 
2453   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2454   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2455     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2456   %}
2457 
2458   // Same as Pop_Mem_F except for opcode
2459   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2460   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2461     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2462   %}
2463 
2464   enc_class Pop_Reg_FPR( regFPR dst ) %{
2465     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2466     emit_d8( cbuf, 0xD8+$dst$$reg );
2467   %}
2468 
2469   enc_class Push_Reg_FPR( regFPR dst ) %{
2470     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2471     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2472   %}
2473 
2474   // Push FPU's float to a stack-slot, and pop FPU-stack
2475   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2476     int pop = 0x02;
2477     if ($src$$reg != FPR1L_enc) {
2478       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2479       emit_d8( cbuf, 0xC0-1+$src$$reg );
2480       pop = 0x03;
2481     }
2482     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2483   %}
2484 
2485   // Push FPU's double to a stack-slot, and pop FPU-stack
2486   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2487     int pop = 0x02;
2488     if ($src$$reg != FPR1L_enc) {
2489       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2490       emit_d8( cbuf, 0xC0-1+$src$$reg );
2491       pop = 0x03;
2492     }
2493     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2494   %}
2495 
2496   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2497   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2498     int pop = 0xD0 - 1; // -1 since we skip FLD
2499     if ($src$$reg != FPR1L_enc) {
2500       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2501       emit_d8( cbuf, 0xC0-1+$src$$reg );
2502       pop = 0xD8;
2503     }
2504     emit_opcode( cbuf, 0xDD );
2505     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2506   %}
2507 
2508 
2509   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2510     // load dst in FPR0
2511     emit_opcode( cbuf, 0xD9 );
2512     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2513     if ($src$$reg != FPR1L_enc) {
2514       // fincstp
2515       emit_opcode (cbuf, 0xD9);
2516       emit_opcode (cbuf, 0xF7);
2517       // swap src with FPR1:
2518       // FXCH FPR1 with src
2519       emit_opcode(cbuf, 0xD9);
2520       emit_d8(cbuf, 0xC8-1+$src$$reg );
2521       // fdecstp
2522       emit_opcode (cbuf, 0xD9);
2523       emit_opcode (cbuf, 0xF6);
2524     }
2525   %}
2526 
2527   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2528     MacroAssembler _masm(&cbuf);
2529     __ subptr(rsp, 8);
2530     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2531     __ fld_d(Address(rsp, 0));
2532     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2533     __ fld_d(Address(rsp, 0));
2534   %}
2535 
2536   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2537     MacroAssembler _masm(&cbuf);
2538     __ subptr(rsp, 4);
2539     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2540     __ fld_s(Address(rsp, 0));
2541     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2542     __ fld_s(Address(rsp, 0));
2543   %}
2544 
2545   enc_class Push_ResultD(regD dst) %{
2546     MacroAssembler _masm(&cbuf);
2547     __ fstp_d(Address(rsp, 0));
2548     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2549     __ addptr(rsp, 8);
2550   %}
2551 
2552   enc_class Push_ResultF(regF dst, immI d8) %{
2553     MacroAssembler _masm(&cbuf);
2554     __ fstp_s(Address(rsp, 0));
2555     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2556     __ addptr(rsp, $d8$$constant);
2557   %}
2558 
2559   enc_class Push_SrcD(regD src) %{
2560     MacroAssembler _masm(&cbuf);
2561     __ subptr(rsp, 8);
2562     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2563     __ fld_d(Address(rsp, 0));
2564   %}
2565 
2566   enc_class push_stack_temp_qword() %{
2567     MacroAssembler _masm(&cbuf);
2568     __ subptr(rsp, 8);
2569   %}
2570 
2571   enc_class pop_stack_temp_qword() %{
2572     MacroAssembler _masm(&cbuf);
2573     __ addptr(rsp, 8);
2574   %}
2575 
2576   enc_class push_xmm_to_fpr1(regD src) %{
2577     MacroAssembler _masm(&cbuf);
2578     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2579     __ fld_d(Address(rsp, 0));
2580   %}
2581 
2582   enc_class Push_Result_Mod_DPR( regDPR src) %{
2583     if ($src$$reg != FPR1L_enc) {
2584       // fincstp
2585       emit_opcode (cbuf, 0xD9);
2586       emit_opcode (cbuf, 0xF7);
2587       // FXCH FPR1 with src
2588       emit_opcode(cbuf, 0xD9);
2589       emit_d8(cbuf, 0xC8-1+$src$$reg );
2590       // fdecstp
2591       emit_opcode (cbuf, 0xD9);
2592       emit_opcode (cbuf, 0xF6);
2593     }
2594     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2595     // // FSTP   FPR$dst$$reg
2596     // emit_opcode( cbuf, 0xDD );
2597     // emit_d8( cbuf, 0xD8+$dst$$reg );
2598   %}
2599 
2600   enc_class fnstsw_sahf_skip_parity() %{
2601     // fnstsw ax
2602     emit_opcode( cbuf, 0xDF );
2603     emit_opcode( cbuf, 0xE0 );
2604     // sahf
2605     emit_opcode( cbuf, 0x9E );
2606     // jnp  ::skip
2607     emit_opcode( cbuf, 0x7B );
2608     emit_opcode( cbuf, 0x05 );
2609   %}
2610 
2611   enc_class emitModDPR() %{
2612     // fprem must be iterative
2613     // :: loop
2614     // fprem
2615     emit_opcode( cbuf, 0xD9 );
2616     emit_opcode( cbuf, 0xF8 );
2617     // wait
2618     emit_opcode( cbuf, 0x9b );
2619     // fnstsw ax
2620     emit_opcode( cbuf, 0xDF );
2621     emit_opcode( cbuf, 0xE0 );
2622     // sahf
2623     emit_opcode( cbuf, 0x9E );
2624     // jp  ::loop
2625     emit_opcode( cbuf, 0x0F );
2626     emit_opcode( cbuf, 0x8A );
2627     emit_opcode( cbuf, 0xF4 );
2628     emit_opcode( cbuf, 0xFF );
2629     emit_opcode( cbuf, 0xFF );
2630     emit_opcode( cbuf, 0xFF );
2631   %}
2632 
2633   enc_class fpu_flags() %{
2634     // fnstsw_ax
2635     emit_opcode( cbuf, 0xDF);
2636     emit_opcode( cbuf, 0xE0);
2637     // test ax,0x0400
2638     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2639     emit_opcode( cbuf, 0xA9 );
2640     emit_d16   ( cbuf, 0x0400 );
2641     // // // This sequence works, but stalls for 12-16 cycles on PPro
2642     // // test rax,0x0400
2643     // emit_opcode( cbuf, 0xA9 );
2644     // emit_d32   ( cbuf, 0x00000400 );
2645     //
2646     // jz exit (no unordered comparison)
2647     emit_opcode( cbuf, 0x74 );
2648     emit_d8    ( cbuf, 0x02 );
2649     // mov ah,1 - treat as LT case (set carry flag)
2650     emit_opcode( cbuf, 0xB4 );
2651     emit_d8    ( cbuf, 0x01 );
2652     // sahf
2653     emit_opcode( cbuf, 0x9E);
2654   %}
2655 
2656   enc_class cmpF_P6_fixup() %{
2657     // Fixup the integer flags in case comparison involved a NaN
2658     //
2659     // JNP exit (no unordered comparison, P-flag is set by NaN)
2660     emit_opcode( cbuf, 0x7B );
2661     emit_d8    ( cbuf, 0x03 );
2662     // MOV AH,1 - treat as LT case (set carry flag)
2663     emit_opcode( cbuf, 0xB4 );
2664     emit_d8    ( cbuf, 0x01 );
2665     // SAHF
2666     emit_opcode( cbuf, 0x9E);
2667     // NOP     // target for branch to avoid branch to branch
2668     emit_opcode( cbuf, 0x90);
2669   %}
2670 
2671 //     fnstsw_ax();
2672 //     sahf();
2673 //     movl(dst, nan_result);
2674 //     jcc(Assembler::parity, exit);
2675 //     movl(dst, less_result);
2676 //     jcc(Assembler::below, exit);
2677 //     movl(dst, equal_result);
2678 //     jcc(Assembler::equal, exit);
2679 //     movl(dst, greater_result);
2680 
2681 // less_result     =  1;
2682 // greater_result  = -1;
2683 // equal_result    = 0;
2684 // nan_result      = -1;
2685 
2686   enc_class CmpF_Result(rRegI dst) %{
2687     // fnstsw_ax();
2688     emit_opcode( cbuf, 0xDF);
2689     emit_opcode( cbuf, 0xE0);
2690     // sahf
2691     emit_opcode( cbuf, 0x9E);
2692     // movl(dst, nan_result);
2693     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2694     emit_d32( cbuf, -1 );
2695     // jcc(Assembler::parity, exit);
2696     emit_opcode( cbuf, 0x7A );
2697     emit_d8    ( cbuf, 0x13 );
2698     // movl(dst, less_result);
2699     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2700     emit_d32( cbuf, -1 );
2701     // jcc(Assembler::below, exit);
2702     emit_opcode( cbuf, 0x72 );
2703     emit_d8    ( cbuf, 0x0C );
2704     // movl(dst, equal_result);
2705     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2706     emit_d32( cbuf, 0 );
2707     // jcc(Assembler::equal, exit);
2708     emit_opcode( cbuf, 0x74 );
2709     emit_d8    ( cbuf, 0x05 );
2710     // movl(dst, greater_result);
2711     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2712     emit_d32( cbuf, 1 );
2713   %}
2714 
2715 
2716   // Compare the longs and set flags
2717   // BROKEN!  Do Not use as-is
2718   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2719     // CMP    $src1.hi,$src2.hi
2720     emit_opcode( cbuf, 0x3B );
2721     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2722     // JNE,s  done
2723     emit_opcode(cbuf,0x75);
2724     emit_d8(cbuf, 2 );
2725     // CMP    $src1.lo,$src2.lo
2726     emit_opcode( cbuf, 0x3B );
2727     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2728 // done:
2729   %}
2730 
2731   enc_class convert_int_long( regL dst, rRegI src ) %{
2732     // mov $dst.lo,$src
2733     int dst_encoding = $dst$$reg;
2734     int src_encoding = $src$$reg;
2735     encode_Copy( cbuf, dst_encoding  , src_encoding );
2736     // mov $dst.hi,$src
2737     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2738     // sar $dst.hi,31
2739     emit_opcode( cbuf, 0xC1 );
2740     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2741     emit_d8(cbuf, 0x1F );
2742   %}
2743 
2744   enc_class convert_long_double( eRegL src ) %{
2745     // push $src.hi
2746     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2747     // push $src.lo
2748     emit_opcode(cbuf, 0x50+$src$$reg  );
2749     // fild 64-bits at [SP]
2750     emit_opcode(cbuf,0xdf);
2751     emit_d8(cbuf, 0x6C);
2752     emit_d8(cbuf, 0x24);
2753     emit_d8(cbuf, 0x00);
2754     // pop stack
2755     emit_opcode(cbuf, 0x83); // add  SP, #8
2756     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2757     emit_d8(cbuf, 0x8);
2758   %}
2759 
2760   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2761     // IMUL   EDX:EAX,$src1
2762     emit_opcode( cbuf, 0xF7 );
2763     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2764     // SAR    EDX,$cnt-32
2765     int shift_count = ((int)$cnt$$constant) - 32;
2766     if (shift_count > 0) {
2767       emit_opcode(cbuf, 0xC1);
2768       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2769       emit_d8(cbuf, shift_count);
2770     }
2771   %}
2772 
2773   // this version doesn't have add sp, 8
2774   enc_class convert_long_double2( eRegL src ) %{
2775     // push $src.hi
2776     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2777     // push $src.lo
2778     emit_opcode(cbuf, 0x50+$src$$reg  );
2779     // fild 64-bits at [SP]
2780     emit_opcode(cbuf,0xdf);
2781     emit_d8(cbuf, 0x6C);
2782     emit_d8(cbuf, 0x24);
2783     emit_d8(cbuf, 0x00);
2784   %}
2785 
2786   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2787     // Basic idea: long = (long)int * (long)int
2788     // IMUL EDX:EAX, src
2789     emit_opcode( cbuf, 0xF7 );
2790     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2791   %}
2792 
2793   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2794     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2795     // MUL EDX:EAX, src
2796     emit_opcode( cbuf, 0xF7 );
2797     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2798   %}
2799 
2800   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2801     // Basic idea: lo(result) = lo(x_lo * y_lo)
2802     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2803     // MOV    $tmp,$src.lo
2804     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2805     // IMUL   $tmp,EDX
2806     emit_opcode( cbuf, 0x0F );
2807     emit_opcode( cbuf, 0xAF );
2808     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2809     // MOV    EDX,$src.hi
2810     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2811     // IMUL   EDX,EAX
2812     emit_opcode( cbuf, 0x0F );
2813     emit_opcode( cbuf, 0xAF );
2814     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2815     // ADD    $tmp,EDX
2816     emit_opcode( cbuf, 0x03 );
2817     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2818     // MUL   EDX:EAX,$src.lo
2819     emit_opcode( cbuf, 0xF7 );
2820     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2821     // ADD    EDX,ESI
2822     emit_opcode( cbuf, 0x03 );
2823     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2824   %}
2825 
2826   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2827     // Basic idea: lo(result) = lo(src * y_lo)
2828     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2829     // IMUL   $tmp,EDX,$src
2830     emit_opcode( cbuf, 0x6B );
2831     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2832     emit_d8( cbuf, (int)$src$$constant );
2833     // MOV    EDX,$src
2834     emit_opcode(cbuf, 0xB8 + EDX_enc);
2835     emit_d32( cbuf, (int)$src$$constant );
2836     // MUL   EDX:EAX,EDX
2837     emit_opcode( cbuf, 0xF7 );
2838     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2839     // ADD    EDX,ESI
2840     emit_opcode( cbuf, 0x03 );
2841     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2842   %}
2843 
2844   enc_class long_div( eRegL src1, eRegL src2 ) %{
2845     // PUSH src1.hi
2846     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2847     // PUSH src1.lo
2848     emit_opcode(cbuf,               0x50+$src1$$reg  );
2849     // PUSH src2.hi
2850     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2851     // PUSH src2.lo
2852     emit_opcode(cbuf,               0x50+$src2$$reg  );
2853     // CALL directly to the runtime
2854     cbuf.set_insts_mark();
2855     emit_opcode(cbuf,0xE8);       // Call into runtime
2856     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2857     // Restore stack
2858     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2859     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2860     emit_d8(cbuf, 4*4);
2861   %}
2862 
2863   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2864     // PUSH src1.hi
2865     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2866     // PUSH src1.lo
2867     emit_opcode(cbuf,               0x50+$src1$$reg  );
2868     // PUSH src2.hi
2869     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2870     // PUSH src2.lo
2871     emit_opcode(cbuf,               0x50+$src2$$reg  );
2872     // CALL directly to the runtime
2873     cbuf.set_insts_mark();
2874     emit_opcode(cbuf,0xE8);       // Call into runtime
2875     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2876     // Restore stack
2877     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2878     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2879     emit_d8(cbuf, 4*4);
2880   %}
2881 
2882   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2883     // MOV   $tmp,$src.lo
2884     emit_opcode(cbuf, 0x8B);
2885     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2886     // OR    $tmp,$src.hi
2887     emit_opcode(cbuf, 0x0B);
2888     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2889   %}
2890 
2891   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2892     // CMP    $src1.lo,$src2.lo
2893     emit_opcode( cbuf, 0x3B );
2894     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2895     // JNE,s  skip
2896     emit_cc(cbuf, 0x70, 0x5);
2897     emit_d8(cbuf,2);
2898     // CMP    $src1.hi,$src2.hi
2899     emit_opcode( cbuf, 0x3B );
2900     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2901   %}
2902 
2903   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2904     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2905     emit_opcode( cbuf, 0x3B );
2906     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2907     // MOV    $tmp,$src1.hi
2908     emit_opcode( cbuf, 0x8B );
2909     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2910     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2911     emit_opcode( cbuf, 0x1B );
2912     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2913   %}
2914 
2915   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2916     // XOR    $tmp,$tmp
2917     emit_opcode(cbuf,0x33);  // XOR
2918     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2919     // CMP    $tmp,$src.lo
2920     emit_opcode( cbuf, 0x3B );
2921     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2922     // SBB    $tmp,$src.hi
2923     emit_opcode( cbuf, 0x1B );
2924     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2925   %}
2926 
2927  // Sniff, sniff... smells like Gnu Superoptimizer
2928   enc_class neg_long( eRegL dst ) %{
2929     emit_opcode(cbuf,0xF7);    // NEG hi
2930     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2931     emit_opcode(cbuf,0xF7);    // NEG lo
2932     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2933     emit_opcode(cbuf,0x83);    // SBB hi,0
2934     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2935     emit_d8    (cbuf,0 );
2936   %}
2937 
2938   enc_class enc_pop_rdx() %{
2939     emit_opcode(cbuf,0x5A);
2940   %}
2941 
2942   enc_class enc_rethrow() %{
2943     cbuf.set_insts_mark();
2944     emit_opcode(cbuf, 0xE9);        // jmp    entry
2945     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2946                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2947   %}
2948 
2949 
2950   // Convert a double to an int.  Java semantics require we do complex
2951   // manglelations in the corner cases.  So we set the rounding mode to
2952   // 'zero', store the darned double down as an int, and reset the
2953   // rounding mode to 'nearest'.  The hardware throws an exception which
2954   // patches up the correct value directly to the stack.
2955   enc_class DPR2I_encoding( regDPR src ) %{
2956     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2957     // exceptions here, so that a NAN or other corner-case value will
2958     // thrown an exception (but normal values get converted at full speed).
2959     // However, I2C adapters and other float-stack manglers leave pending
2960     // invalid-op exceptions hanging.  We would have to clear them before
2961     // enabling them and that is more expensive than just testing for the
2962     // invalid value Intel stores down in the corner cases.
2963     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2964     emit_opcode(cbuf,0x2D);
2965     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2966     // Allocate a word
2967     emit_opcode(cbuf,0x83);            // SUB ESP,4
2968     emit_opcode(cbuf,0xEC);
2969     emit_d8(cbuf,0x04);
2970     // Encoding assumes a double has been pushed into FPR0.
2971     // Store down the double as an int, popping the FPU stack
2972     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2973     emit_opcode(cbuf,0x1C);
2974     emit_d8(cbuf,0x24);
2975     // Restore the rounding mode; mask the exception
2976     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2977     emit_opcode(cbuf,0x2D);
2978     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2979         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2980         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2981 
2982     // Load the converted int; adjust CPU stack
2983     emit_opcode(cbuf,0x58);       // POP EAX
2984     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2985     emit_d32   (cbuf,0x80000000); //         0x80000000
2986     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2987     emit_d8    (cbuf,0x07);       // Size of slow_call
2988     // Push src onto stack slow-path
2989     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2990     emit_d8    (cbuf,0xC0-1+$src$$reg );
2991     // CALL directly to the runtime
2992     cbuf.set_insts_mark();
2993     emit_opcode(cbuf,0xE8);       // Call into runtime
2994     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2995     // Carry on here...
2996   %}
2997 
2998   enc_class DPR2L_encoding( regDPR src ) %{
2999     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3000     emit_opcode(cbuf,0x2D);
3001     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3002     // Allocate a word
3003     emit_opcode(cbuf,0x83);            // SUB ESP,8
3004     emit_opcode(cbuf,0xEC);
3005     emit_d8(cbuf,0x08);
3006     // Encoding assumes a double has been pushed into FPR0.
3007     // Store down the double as a long, popping the FPU stack
3008     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3009     emit_opcode(cbuf,0x3C);
3010     emit_d8(cbuf,0x24);
3011     // Restore the rounding mode; mask the exception
3012     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3013     emit_opcode(cbuf,0x2D);
3014     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3015         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3016         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3017 
3018     // Load the converted int; adjust CPU stack
3019     emit_opcode(cbuf,0x58);       // POP EAX
3020     emit_opcode(cbuf,0x5A);       // POP EDX
3021     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3022     emit_d8    (cbuf,0xFA);       // rdx
3023     emit_d32   (cbuf,0x80000000); //         0x80000000
3024     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3025     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3026     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3027     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3028     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3029     emit_d8    (cbuf,0x07);       // Size of slow_call
3030     // Push src onto stack slow-path
3031     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3032     emit_d8    (cbuf,0xC0-1+$src$$reg );
3033     // CALL directly to the runtime
3034     cbuf.set_insts_mark();
3035     emit_opcode(cbuf,0xE8);       // Call into runtime
3036     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3037     // Carry on here...
3038   %}
3039 
3040   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3041     // Operand was loaded from memory into fp ST (stack top)
3042     // FMUL   ST,$src  /* D8 C8+i */
3043     emit_opcode(cbuf, 0xD8);
3044     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3045   %}
3046 
3047   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3048     // FADDP  ST,src2  /* D8 C0+i */
3049     emit_opcode(cbuf, 0xD8);
3050     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3051     //could use FADDP  src2,fpST  /* DE C0+i */
3052   %}
3053 
3054   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3055     // FADDP  src2,ST  /* DE C0+i */
3056     emit_opcode(cbuf, 0xDE);
3057     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3058   %}
3059 
3060   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3061     // Operand has been loaded into fp ST (stack top)
3062       // FSUB   ST,$src1
3063       emit_opcode(cbuf, 0xD8);
3064       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3065 
3066       // FDIV
3067       emit_opcode(cbuf, 0xD8);
3068       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3069   %}
3070 
3071   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3072     // Operand was loaded from memory into fp ST (stack top)
3073     // FADD   ST,$src  /* D8 C0+i */
3074     emit_opcode(cbuf, 0xD8);
3075     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3076 
3077     // FMUL  ST,src2  /* D8 C*+i */
3078     emit_opcode(cbuf, 0xD8);
3079     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3080   %}
3081 
3082 
3083   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3084     // Operand was loaded from memory into fp ST (stack top)
3085     // FADD   ST,$src  /* D8 C0+i */
3086     emit_opcode(cbuf, 0xD8);
3087     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3088 
3089     // FMULP  src2,ST  /* DE C8+i */
3090     emit_opcode(cbuf, 0xDE);
3091     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3092   %}
3093 
3094   // Atomically load the volatile long
3095   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3096     emit_opcode(cbuf,0xDF);
3097     int rm_byte_opcode = 0x05;
3098     int base     = $mem$$base;
3099     int index    = $mem$$index;
3100     int scale    = $mem$$scale;
3101     int displace = $mem$$disp;
3102     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3103     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3104     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3105   %}
3106 
3107   // Volatile Store Long.  Must be atomic, so move it into
3108   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3109   // target address before the store (for null-ptr checks)
3110   // so the memory operand is used twice in the encoding.
3111   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3112     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3113     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3114     emit_opcode(cbuf,0xDF);
3115     int rm_byte_opcode = 0x07;
3116     int base     = $mem$$base;
3117     int index    = $mem$$index;
3118     int scale    = $mem$$scale;
3119     int displace = $mem$$disp;
3120     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3121     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3122   %}
3123 
3124   // Safepoint Poll.  This polls the safepoint page, and causes an
3125   // exception if it is not readable. Unfortunately, it kills the condition code
3126   // in the process
3127   // We current use TESTL [spp],EDI
3128   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3129 
3130   enc_class Safepoint_Poll() %{
3131     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3132     emit_opcode(cbuf,0x85);
3133     emit_rm (cbuf, 0x0, 0x7, 0x5);
3134     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3135   %}
3136 %}
3137 
3138 
3139 //----------FRAME--------------------------------------------------------------
3140 // Definition of frame structure and management information.
3141 //
3142 //  S T A C K   L A Y O U T    Allocators stack-slot number
3143 //                             |   (to get allocators register number
3144 //  G  Owned by    |        |  v    add OptoReg::stack0())
3145 //  r   CALLER     |        |
3146 //  o     |        +--------+      pad to even-align allocators stack-slot
3147 //  w     V        |  pad0  |        numbers; owned by CALLER
3148 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3149 //  h     ^        |   in   |  5
3150 //        |        |  args  |  4   Holes in incoming args owned by SELF
3151 //  |     |        |        |  3
3152 //  |     |        +--------+
3153 //  V     |        | old out|      Empty on Intel, window on Sparc
3154 //        |    old |preserve|      Must be even aligned.
3155 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3156 //        |        |   in   |  3   area for Intel ret address
3157 //     Owned by    |preserve|      Empty on Sparc.
3158 //       SELF      +--------+
3159 //        |        |  pad2  |  2   pad to align old SP
3160 //        |        +--------+  1
3161 //        |        | locks  |  0
3162 //        |        +--------+----> OptoReg::stack0(), even aligned
3163 //        |        |  pad1  | 11   pad to align new SP
3164 //        |        +--------+
3165 //        |        |        | 10
3166 //        |        | spills |  9   spills
3167 //        V        |        |  8   (pad0 slot for callee)
3168 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3169 //        ^        |  out   |  7
3170 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3171 //     Owned by    +--------+
3172 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3173 //        |    new |preserve|      Must be even-aligned.
3174 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3175 //        |        |        |
3176 //
3177 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3178 //         known from SELF's arguments and the Java calling convention.
3179 //         Region 6-7 is determined per call site.
3180 // Note 2: If the calling convention leaves holes in the incoming argument
3181 //         area, those holes are owned by SELF.  Holes in the outgoing area
3182 //         are owned by the CALLEE.  Holes should not be nessecary in the
3183 //         incoming area, as the Java calling convention is completely under
3184 //         the control of the AD file.  Doubles can be sorted and packed to
3185 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3186 //         varargs C calling conventions.
3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3188 //         even aligned with pad0 as needed.
3189 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3190 //         region 6-11 is even aligned; it may be padded out more so that
3191 //         the region from SP to FP meets the minimum stack alignment.
3192 
3193 frame %{
3194   // What direction does stack grow in (assumed to be same for C & Java)
3195   stack_direction(TOWARDS_LOW);
3196 
3197   // These three registers define part of the calling convention
3198   // between compiled code and the interpreter.
3199   inline_cache_reg(EAX);                // Inline Cache Register
3200   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3201 
3202   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3203   cisc_spilling_operand_name(indOffset32);
3204 
3205   // Number of stack slots consumed by locking an object
3206   sync_stack_slots(1);
3207 
3208   // Compiled code's Frame Pointer
3209   frame_pointer(ESP);
3210   // Interpreter stores its frame pointer in a register which is
3211   // stored to the stack by I2CAdaptors.
3212   // I2CAdaptors convert from interpreted java to compiled java.
3213   interpreter_frame_pointer(EBP);
3214 
3215   // Stack alignment requirement
3216   // Alignment size in bytes (128-bit -> 16 bytes)
3217   stack_alignment(StackAlignmentInBytes);
3218 
3219   // Number of stack slots between incoming argument block and the start of
3220   // a new frame.  The PROLOG must add this many slots to the stack.  The
3221   // EPILOG must remove this many slots.  Intel needs one slot for
3222   // return address and one for rbp, (must save rbp)
3223   in_preserve_stack_slots(2+VerifyStackAtCalls);
3224 
3225   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3226   // for calls to C.  Supports the var-args backing area for register parms.
3227   varargs_C_out_slots_killed(0);
3228 
3229   // The after-PROLOG location of the return address.  Location of
3230   // return address specifies a type (REG or STACK) and a number
3231   // representing the register number (i.e. - use a register name) or
3232   // stack slot.
3233   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3234   // Otherwise, it is above the locks and verification slot and alignment word
3235   return_addr(STACK - 1 +
3236               round_to((Compile::current()->in_preserve_stack_slots() +
3237                         Compile::current()->fixed_slots()),
3238                        stack_alignment_in_slots()));
3239 
3240   // Body of function which returns an integer array locating
3241   // arguments either in registers or in stack slots.  Passed an array
3242   // of ideal registers called "sig" and a "length" count.  Stack-slot
3243   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3244   // arguments for a CALLEE.  Incoming stack arguments are
3245   // automatically biased by the preserve_stack_slots field above.
3246   calling_convention %{
3247     // No difference between ingoing/outgoing just pass false
3248     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3249   %}
3250 
3251 
3252   // Body of function which returns an integer array locating
3253   // arguments either in registers or in stack slots.  Passed an array
3254   // of ideal registers called "sig" and a "length" count.  Stack-slot
3255   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3256   // arguments for a CALLEE.  Incoming stack arguments are
3257   // automatically biased by the preserve_stack_slots field above.
3258   c_calling_convention %{
3259     // This is obviously always outgoing
3260     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3261   %}
3262 
3263   // Location of C & interpreter return values
3264   c_return_value %{
3265     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3266     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3267     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3268 
3269     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3270     // that C functions return float and double results in XMM0.
3271     if( ideal_reg == Op_RegD && UseSSE>=2 )
3272       return OptoRegPair(XMM0b_num,XMM0_num);
3273     if( ideal_reg == Op_RegF && UseSSE>=2 )
3274       return OptoRegPair(OptoReg::Bad,XMM0_num);
3275 
3276     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3277   %}
3278 
3279   // Location of return values
3280   return_value %{
3281     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3282     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3283     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3284     if( ideal_reg == Op_RegD && UseSSE>=2 )
3285       return OptoRegPair(XMM0b_num,XMM0_num);
3286     if( ideal_reg == Op_RegF && UseSSE>=1 )
3287       return OptoRegPair(OptoReg::Bad,XMM0_num);
3288     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3289   %}
3290 
3291 %}
3292 
3293 //----------ATTRIBUTES---------------------------------------------------------
3294 //----------Operand Attributes-------------------------------------------------
3295 op_attrib op_cost(0);        // Required cost attribute
3296 
3297 //----------Instruction Attributes---------------------------------------------
3298 ins_attrib ins_cost(100);       // Required cost attribute
3299 ins_attrib ins_size(8);         // Required size attribute (in bits)
3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3301                                 // non-matching short branch variant of some
3302                                                             // long branch?
3303 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3304                                 // specifies the alignment that some part of the instruction (not
3305                                 // necessarily the start) requires.  If > 1, a compute_padding()
3306                                 // function must be provided for the instruction
3307 
3308 //----------OPERANDS-----------------------------------------------------------
3309 // Operand definitions must precede instruction definitions for correct parsing
3310 // in the ADLC because operands constitute user defined types which are used in
3311 // instruction definitions.
3312 
3313 //----------Simple Operands----------------------------------------------------
3314 // Immediate Operands
3315 // Integer Immediate
3316 operand immI() %{
3317   match(ConI);
3318 
3319   op_cost(10);
3320   format %{ %}
3321   interface(CONST_INTER);
3322 %}
3323 
3324 // Constant for test vs zero
3325 operand immI0() %{
3326   predicate(n->get_int() == 0);
3327   match(ConI);
3328 
3329   op_cost(0);
3330   format %{ %}
3331   interface(CONST_INTER);
3332 %}
3333 
3334 // Constant for increment
3335 operand immI1() %{
3336   predicate(n->get_int() == 1);
3337   match(ConI);
3338 
3339   op_cost(0);
3340   format %{ %}
3341   interface(CONST_INTER);
3342 %}
3343 
3344 // Constant for decrement
3345 operand immI_M1() %{
3346   predicate(n->get_int() == -1);
3347   match(ConI);
3348 
3349   op_cost(0);
3350   format %{ %}
3351   interface(CONST_INTER);
3352 %}
3353 
3354 // Valid scale values for addressing modes
3355 operand immI2() %{
3356   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3357   match(ConI);
3358 
3359   format %{ %}
3360   interface(CONST_INTER);
3361 %}
3362 
3363 operand immI8() %{
3364   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3365   match(ConI);
3366 
3367   op_cost(5);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 operand immI16() %{
3373   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3374   match(ConI);
3375 
3376   op_cost(10);
3377   format %{ %}
3378   interface(CONST_INTER);
3379 %}
3380 
3381 // Int Immediate non-negative
3382 operand immU31()
3383 %{
3384   predicate(n->get_int() >= 0);
3385   match(ConI);
3386 
3387   op_cost(0);
3388   format %{ %}
3389   interface(CONST_INTER);
3390 %}
3391 
3392 // Constant for long shifts
3393 operand immI_32() %{
3394   predicate( n->get_int() == 32 );
3395   match(ConI);
3396 
3397   op_cost(0);
3398   format %{ %}
3399   interface(CONST_INTER);
3400 %}
3401 
3402 operand immI_1_31() %{
3403   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3404   match(ConI);
3405 
3406   op_cost(0);
3407   format %{ %}
3408   interface(CONST_INTER);
3409 %}
3410 
3411 operand immI_32_63() %{
3412   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3413   match(ConI);
3414   op_cost(0);
3415 
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 operand immI_1() %{
3421   predicate( n->get_int() == 1 );
3422   match(ConI);
3423 
3424   op_cost(0);
3425   format %{ %}
3426   interface(CONST_INTER);
3427 %}
3428 
3429 operand immI_2() %{
3430   predicate( n->get_int() == 2 );
3431   match(ConI);
3432 
3433   op_cost(0);
3434   format %{ %}
3435   interface(CONST_INTER);
3436 %}
3437 
3438 operand immI_3() %{
3439   predicate( n->get_int() == 3 );
3440   match(ConI);
3441 
3442   op_cost(0);
3443   format %{ %}
3444   interface(CONST_INTER);
3445 %}
3446 
3447 // Pointer Immediate
3448 operand immP() %{
3449   match(ConP);
3450 
3451   op_cost(10);
3452   format %{ %}
3453   interface(CONST_INTER);
3454 %}
3455 
3456 // NULL Pointer Immediate
3457 operand immP0() %{
3458   predicate( n->get_ptr() == 0 );
3459   match(ConP);
3460   op_cost(0);
3461 
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 // Long Immediate
3467 operand immL() %{
3468   match(ConL);
3469 
3470   op_cost(20);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Long Immediate zero
3476 operand immL0() %{
3477   predicate( n->get_long() == 0L );
3478   match(ConL);
3479   op_cost(0);
3480 
3481   format %{ %}
3482   interface(CONST_INTER);
3483 %}
3484 
3485 // Long Immediate zero
3486 operand immL_M1() %{
3487   predicate( n->get_long() == -1L );
3488   match(ConL);
3489   op_cost(0);
3490 
3491   format %{ %}
3492   interface(CONST_INTER);
3493 %}
3494 
3495 // Long immediate from 0 to 127.
3496 // Used for a shorter form of long mul by 10.
3497 operand immL_127() %{
3498   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3499   match(ConL);
3500   op_cost(0);
3501 
3502   format %{ %}
3503   interface(CONST_INTER);
3504 %}
3505 
3506 // Long Immediate: low 32-bit mask
3507 operand immL_32bits() %{
3508   predicate(n->get_long() == 0xFFFFFFFFL);
3509   match(ConL);
3510   op_cost(0);
3511 
3512   format %{ %}
3513   interface(CONST_INTER);
3514 %}
3515 
3516 // Long Immediate: low 32-bit mask
3517 operand immL32() %{
3518   predicate(n->get_long() == (int)(n->get_long()));
3519   match(ConL);
3520   op_cost(20);
3521 
3522   format %{ %}
3523   interface(CONST_INTER);
3524 %}
3525 
3526 //Double Immediate zero
3527 operand immDPR0() %{
3528   // Do additional (and counter-intuitive) test against NaN to work around VC++
3529   // bug that generates code such that NaNs compare equal to 0.0
3530   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3531   match(ConD);
3532 
3533   op_cost(5);
3534   format %{ %}
3535   interface(CONST_INTER);
3536 %}
3537 
3538 // Double Immediate one
3539 operand immDPR1() %{
3540   predicate( UseSSE<=1 && n->getd() == 1.0 );
3541   match(ConD);
3542 
3543   op_cost(5);
3544   format %{ %}
3545   interface(CONST_INTER);
3546 %}
3547 
3548 // Double Immediate
3549 operand immDPR() %{
3550   predicate(UseSSE<=1);
3551   match(ConD);
3552 
3553   op_cost(5);
3554   format %{ %}
3555   interface(CONST_INTER);
3556 %}
3557 
3558 operand immD() %{
3559   predicate(UseSSE>=2);
3560   match(ConD);
3561 
3562   op_cost(5);
3563   format %{ %}
3564   interface(CONST_INTER);
3565 %}
3566 
3567 // Double Immediate zero
3568 operand immD0() %{
3569   // Do additional (and counter-intuitive) test against NaN to work around VC++
3570   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3571   // compare equal to -0.0.
3572   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3573   match(ConD);
3574 
3575   format %{ %}
3576   interface(CONST_INTER);
3577 %}
3578 
3579 // Float Immediate zero
3580 operand immFPR0() %{
3581   predicate(UseSSE == 0 && n->getf() == 0.0F);
3582   match(ConF);
3583 
3584   op_cost(5);
3585   format %{ %}
3586   interface(CONST_INTER);
3587 %}
3588 
3589 // Float Immediate one
3590 operand immFPR1() %{
3591   predicate(UseSSE == 0 && n->getf() == 1.0F);
3592   match(ConF);
3593 
3594   op_cost(5);
3595   format %{ %}
3596   interface(CONST_INTER);
3597 %}
3598 
3599 // Float Immediate
3600 operand immFPR() %{
3601   predicate( UseSSE == 0 );
3602   match(ConF);
3603 
3604   op_cost(5);
3605   format %{ %}
3606   interface(CONST_INTER);
3607 %}
3608 
3609 // Float Immediate
3610 operand immF() %{
3611   predicate(UseSSE >= 1);
3612   match(ConF);
3613 
3614   op_cost(5);
3615   format %{ %}
3616   interface(CONST_INTER);
3617 %}
3618 
3619 // Float Immediate zero.  Zero and not -0.0
3620 operand immF0() %{
3621   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3622   match(ConF);
3623 
3624   op_cost(5);
3625   format %{ %}
3626   interface(CONST_INTER);
3627 %}
3628 
3629 // Immediates for special shifts (sign extend)
3630 
3631 // Constants for increment
3632 operand immI_16() %{
3633   predicate( n->get_int() == 16 );
3634   match(ConI);
3635 
3636   format %{ %}
3637   interface(CONST_INTER);
3638 %}
3639 
3640 operand immI_24() %{
3641   predicate( n->get_int() == 24 );
3642   match(ConI);
3643 
3644   format %{ %}
3645   interface(CONST_INTER);
3646 %}
3647 
3648 // Constant for byte-wide masking
3649 operand immI_255() %{
3650   predicate( n->get_int() == 255 );
3651   match(ConI);
3652 
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Constant for short-wide masking
3658 operand immI_65535() %{
3659   predicate(n->get_int() == 65535);
3660   match(ConI);
3661 
3662   format %{ %}
3663   interface(CONST_INTER);
3664 %}
3665 
3666 // Register Operands
3667 // Integer Register
3668 operand rRegI() %{
3669   constraint(ALLOC_IN_RC(int_reg));
3670   match(RegI);
3671   match(xRegI);
3672   match(eAXRegI);
3673   match(eBXRegI);
3674   match(eCXRegI);
3675   match(eDXRegI);
3676   match(eDIRegI);
3677   match(eSIRegI);
3678 
3679   format %{ %}
3680   interface(REG_INTER);
3681 %}
3682 
3683 // Subset of Integer Register
3684 operand xRegI(rRegI reg) %{
3685   constraint(ALLOC_IN_RC(int_x_reg));
3686   match(reg);
3687   match(eAXRegI);
3688   match(eBXRegI);
3689   match(eCXRegI);
3690   match(eDXRegI);
3691 
3692   format %{ %}
3693   interface(REG_INTER);
3694 %}
3695 
3696 // Special Registers
3697 operand eAXRegI(xRegI reg) %{
3698   constraint(ALLOC_IN_RC(eax_reg));
3699   match(reg);
3700   match(rRegI);
3701 
3702   format %{ "EAX" %}
3703   interface(REG_INTER);
3704 %}
3705 
3706 // Special Registers
3707 operand eBXRegI(xRegI reg) %{
3708   constraint(ALLOC_IN_RC(ebx_reg));
3709   match(reg);
3710   match(rRegI);
3711 
3712   format %{ "EBX" %}
3713   interface(REG_INTER);
3714 %}
3715 
3716 operand eCXRegI(xRegI reg) %{
3717   constraint(ALLOC_IN_RC(ecx_reg));
3718   match(reg);
3719   match(rRegI);
3720 
3721   format %{ "ECX" %}
3722   interface(REG_INTER);
3723 %}
3724 
3725 operand eDXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(edx_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EDX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand eDIRegI(xRegI reg) %{
3735   constraint(ALLOC_IN_RC(edi_reg));
3736   match(reg);
3737   match(rRegI);
3738 
3739   format %{ "EDI" %}
3740   interface(REG_INTER);
3741 %}
3742 
3743 operand naxRegI() %{
3744   constraint(ALLOC_IN_RC(nax_reg));
3745   match(RegI);
3746   match(eCXRegI);
3747   match(eDXRegI);
3748   match(eSIRegI);
3749   match(eDIRegI);
3750 
3751   format %{ %}
3752   interface(REG_INTER);
3753 %}
3754 
3755 operand nadxRegI() %{
3756   constraint(ALLOC_IN_RC(nadx_reg));
3757   match(RegI);
3758   match(eBXRegI);
3759   match(eCXRegI);
3760   match(eSIRegI);
3761   match(eDIRegI);
3762 
3763   format %{ %}
3764   interface(REG_INTER);
3765 %}
3766 
3767 operand ncxRegI() %{
3768   constraint(ALLOC_IN_RC(ncx_reg));
3769   match(RegI);
3770   match(eAXRegI);
3771   match(eDXRegI);
3772   match(eSIRegI);
3773   match(eDIRegI);
3774 
3775   format %{ %}
3776   interface(REG_INTER);
3777 %}
3778 
3779 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3780 // //
3781 operand eSIRegI(xRegI reg) %{
3782    constraint(ALLOC_IN_RC(esi_reg));
3783    match(reg);
3784    match(rRegI);
3785 
3786    format %{ "ESI" %}
3787    interface(REG_INTER);
3788 %}
3789 
3790 // Pointer Register
3791 operand anyRegP() %{
3792   constraint(ALLOC_IN_RC(any_reg));
3793   match(RegP);
3794   match(eAXRegP);
3795   match(eBXRegP);
3796   match(eCXRegP);
3797   match(eDIRegP);
3798   match(eRegP);
3799 
3800   format %{ %}
3801   interface(REG_INTER);
3802 %}
3803 
3804 operand eRegP() %{
3805   constraint(ALLOC_IN_RC(int_reg));
3806   match(RegP);
3807   match(eAXRegP);
3808   match(eBXRegP);
3809   match(eCXRegP);
3810   match(eDIRegP);
3811 
3812   format %{ %}
3813   interface(REG_INTER);
3814 %}
3815 
3816 // On windows95, EBP is not safe to use for implicit null tests.
3817 operand eRegP_no_EBP() %{
3818   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3819   match(RegP);
3820   match(eAXRegP);
3821   match(eBXRegP);
3822   match(eCXRegP);
3823   match(eDIRegP);
3824 
3825   op_cost(100);
3826   format %{ %}
3827   interface(REG_INTER);
3828 %}
3829 
3830 operand naxRegP() %{
3831   constraint(ALLOC_IN_RC(nax_reg));
3832   match(RegP);
3833   match(eBXRegP);
3834   match(eDXRegP);
3835   match(eCXRegP);
3836   match(eSIRegP);
3837   match(eDIRegP);
3838 
3839   format %{ %}
3840   interface(REG_INTER);
3841 %}
3842 
3843 operand nabxRegP() %{
3844   constraint(ALLOC_IN_RC(nabx_reg));
3845   match(RegP);
3846   match(eCXRegP);
3847   match(eDXRegP);
3848   match(eSIRegP);
3849   match(eDIRegP);
3850 
3851   format %{ %}
3852   interface(REG_INTER);
3853 %}
3854 
3855 operand pRegP() %{
3856   constraint(ALLOC_IN_RC(p_reg));
3857   match(RegP);
3858   match(eBXRegP);
3859   match(eDXRegP);
3860   match(eSIRegP);
3861   match(eDIRegP);
3862 
3863   format %{ %}
3864   interface(REG_INTER);
3865 %}
3866 
3867 // Special Registers
3868 // Return a pointer value
3869 operand eAXRegP(eRegP reg) %{
3870   constraint(ALLOC_IN_RC(eax_reg));
3871   match(reg);
3872   format %{ "EAX" %}
3873   interface(REG_INTER);
3874 %}
3875 
3876 // Used in AtomicAdd
3877 operand eBXRegP(eRegP reg) %{
3878   constraint(ALLOC_IN_RC(ebx_reg));
3879   match(reg);
3880   format %{ "EBX" %}
3881   interface(REG_INTER);
3882 %}
3883 
3884 // Tail-call (interprocedural jump) to interpreter
3885 operand eCXRegP(eRegP reg) %{
3886   constraint(ALLOC_IN_RC(ecx_reg));
3887   match(reg);
3888   format %{ "ECX" %}
3889   interface(REG_INTER);
3890 %}
3891 
3892 operand eSIRegP(eRegP reg) %{
3893   constraint(ALLOC_IN_RC(esi_reg));
3894   match(reg);
3895   format %{ "ESI" %}
3896   interface(REG_INTER);
3897 %}
3898 
3899 // Used in rep stosw
3900 operand eDIRegP(eRegP reg) %{
3901   constraint(ALLOC_IN_RC(edi_reg));
3902   match(reg);
3903   format %{ "EDI" %}
3904   interface(REG_INTER);
3905 %}
3906 
3907 operand eRegL() %{
3908   constraint(ALLOC_IN_RC(long_reg));
3909   match(RegL);
3910   match(eADXRegL);
3911 
3912   format %{ %}
3913   interface(REG_INTER);
3914 %}
3915 
3916 operand eADXRegL( eRegL reg ) %{
3917   constraint(ALLOC_IN_RC(eadx_reg));
3918   match(reg);
3919 
3920   format %{ "EDX:EAX" %}
3921   interface(REG_INTER);
3922 %}
3923 
3924 operand eBCXRegL( eRegL reg ) %{
3925   constraint(ALLOC_IN_RC(ebcx_reg));
3926   match(reg);
3927 
3928   format %{ "EBX:ECX" %}
3929   interface(REG_INTER);
3930 %}
3931 
3932 // Special case for integer high multiply
3933 operand eADXRegL_low_only() %{
3934   constraint(ALLOC_IN_RC(eadx_reg));
3935   match(RegL);
3936 
3937   format %{ "EAX" %}
3938   interface(REG_INTER);
3939 %}
3940 
3941 // Flags register, used as output of compare instructions
3942 operand eFlagsReg() %{
3943   constraint(ALLOC_IN_RC(int_flags));
3944   match(RegFlags);
3945 
3946   format %{ "EFLAGS" %}
3947   interface(REG_INTER);
3948 %}
3949 
3950 // Flags register, used as output of FLOATING POINT compare instructions
3951 operand eFlagsRegU() %{
3952   constraint(ALLOC_IN_RC(int_flags));
3953   match(RegFlags);
3954 
3955   format %{ "EFLAGS_U" %}
3956   interface(REG_INTER);
3957 %}
3958 
3959 operand eFlagsRegUCF() %{
3960   constraint(ALLOC_IN_RC(int_flags));
3961   match(RegFlags);
3962   predicate(false);
3963 
3964   format %{ "EFLAGS_U_CF" %}
3965   interface(REG_INTER);
3966 %}
3967 
3968 // Condition Code Register used by long compare
3969 operand flagsReg_long_LTGE() %{
3970   constraint(ALLOC_IN_RC(int_flags));
3971   match(RegFlags);
3972   format %{ "FLAGS_LTGE" %}
3973   interface(REG_INTER);
3974 %}
3975 operand flagsReg_long_EQNE() %{
3976   constraint(ALLOC_IN_RC(int_flags));
3977   match(RegFlags);
3978   format %{ "FLAGS_EQNE" %}
3979   interface(REG_INTER);
3980 %}
3981 operand flagsReg_long_LEGT() %{
3982   constraint(ALLOC_IN_RC(int_flags));
3983   match(RegFlags);
3984   format %{ "FLAGS_LEGT" %}
3985   interface(REG_INTER);
3986 %}
3987 
3988 // Float register operands
3989 operand regDPR() %{
3990   predicate( UseSSE < 2 );
3991   constraint(ALLOC_IN_RC(fp_dbl_reg));
3992   match(RegD);
3993   match(regDPR1);
3994   match(regDPR2);
3995   format %{ %}
3996   interface(REG_INTER);
3997 %}
3998 
3999 operand regDPR1(regDPR reg) %{
4000   predicate( UseSSE < 2 );
4001   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4002   match(reg);
4003   format %{ "FPR1" %}
4004   interface(REG_INTER);
4005 %}
4006 
4007 operand regDPR2(regDPR reg) %{
4008   predicate( UseSSE < 2 );
4009   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4010   match(reg);
4011   format %{ "FPR2" %}
4012   interface(REG_INTER);
4013 %}
4014 
4015 operand regnotDPR1(regDPR reg) %{
4016   predicate( UseSSE < 2 );
4017   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4018   match(reg);
4019   format %{ %}
4020   interface(REG_INTER);
4021 %}
4022 
4023 // Float register operands
4024 operand regFPR() %{
4025   predicate( UseSSE < 2 );
4026   constraint(ALLOC_IN_RC(fp_flt_reg));
4027   match(RegF);
4028   match(regFPR1);
4029   format %{ %}
4030   interface(REG_INTER);
4031 %}
4032 
4033 // Float register operands
4034 operand regFPR1(regFPR reg) %{
4035   predicate( UseSSE < 2 );
4036   constraint(ALLOC_IN_RC(fp_flt_reg0));
4037   match(reg);
4038   format %{ "FPR1" %}
4039   interface(REG_INTER);
4040 %}
4041 
4042 // XMM Float register operands
4043 operand regF() %{
4044   predicate( UseSSE>=1 );
4045   constraint(ALLOC_IN_RC(float_reg_legacy));
4046   match(RegF);
4047   format %{ %}
4048   interface(REG_INTER);
4049 %}
4050 
4051 // XMM Double register operands
4052 operand regD() %{
4053   predicate( UseSSE>=2 );
4054   constraint(ALLOC_IN_RC(double_reg_legacy));
4055   match(RegD);
4056   format %{ %}
4057   interface(REG_INTER);
4058 %}
4059 
4060 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4061 // runtime code generation via reg_class_dynamic.
4062 operand vecS() %{
4063   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4064   match(VecS);
4065 
4066   format %{ %}
4067   interface(REG_INTER);
4068 %}
4069 
4070 operand vecD() %{
4071   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4072   match(VecD);
4073 
4074   format %{ %}
4075   interface(REG_INTER);
4076 %}
4077 
4078 operand vecX() %{
4079   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4080   match(VecX);
4081 
4082   format %{ %}
4083   interface(REG_INTER);
4084 %}
4085 
4086 operand vecY() %{
4087   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4088   match(VecY);
4089 
4090   format %{ %}
4091   interface(REG_INTER);
4092 %}
4093 
4094 //----------Memory Operands----------------------------------------------------
4095 // Direct Memory Operand
4096 operand direct(immP addr) %{
4097   match(addr);
4098 
4099   format %{ "[$addr]" %}
4100   interface(MEMORY_INTER) %{
4101     base(0xFFFFFFFF);
4102     index(0x4);
4103     scale(0x0);
4104     disp($addr);
4105   %}
4106 %}
4107 
4108 // Indirect Memory Operand
4109 operand indirect(eRegP reg) %{
4110   constraint(ALLOC_IN_RC(int_reg));
4111   match(reg);
4112 
4113   format %{ "[$reg]" %}
4114   interface(MEMORY_INTER) %{
4115     base($reg);
4116     index(0x4);
4117     scale(0x0);
4118     disp(0x0);
4119   %}
4120 %}
4121 
4122 // Indirect Memory Plus Short Offset Operand
4123 operand indOffset8(eRegP reg, immI8 off) %{
4124   match(AddP reg off);
4125 
4126   format %{ "[$reg + $off]" %}
4127   interface(MEMORY_INTER) %{
4128     base($reg);
4129     index(0x4);
4130     scale(0x0);
4131     disp($off);
4132   %}
4133 %}
4134 
4135 // Indirect Memory Plus Long Offset Operand
4136 operand indOffset32(eRegP reg, immI off) %{
4137   match(AddP reg off);
4138 
4139   format %{ "[$reg + $off]" %}
4140   interface(MEMORY_INTER) %{
4141     base($reg);
4142     index(0x4);
4143     scale(0x0);
4144     disp($off);
4145   %}
4146 %}
4147 
4148 // Indirect Memory Plus Long Offset Operand
4149 operand indOffset32X(rRegI reg, immP off) %{
4150   match(AddP off reg);
4151 
4152   format %{ "[$reg + $off]" %}
4153   interface(MEMORY_INTER) %{
4154     base($reg);
4155     index(0x4);
4156     scale(0x0);
4157     disp($off);
4158   %}
4159 %}
4160 
4161 // Indirect Memory Plus Index Register Plus Offset Operand
4162 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4163   match(AddP (AddP reg ireg) off);
4164 
4165   op_cost(10);
4166   format %{"[$reg + $off + $ireg]" %}
4167   interface(MEMORY_INTER) %{
4168     base($reg);
4169     index($ireg);
4170     scale(0x0);
4171     disp($off);
4172   %}
4173 %}
4174 
4175 // Indirect Memory Plus Index Register Plus Offset Operand
4176 operand indIndex(eRegP reg, rRegI ireg) %{
4177   match(AddP reg ireg);
4178 
4179   op_cost(10);
4180   format %{"[$reg + $ireg]" %}
4181   interface(MEMORY_INTER) %{
4182     base($reg);
4183     index($ireg);
4184     scale(0x0);
4185     disp(0x0);
4186   %}
4187 %}
4188 
4189 // // -------------------------------------------------------------------------
4190 // // 486 architecture doesn't support "scale * index + offset" with out a base
4191 // // -------------------------------------------------------------------------
4192 // // Scaled Memory Operands
4193 // // Indirect Memory Times Scale Plus Offset Operand
4194 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4195 //   match(AddP off (LShiftI ireg scale));
4196 //
4197 //   op_cost(10);
4198 //   format %{"[$off + $ireg << $scale]" %}
4199 //   interface(MEMORY_INTER) %{
4200 //     base(0x4);
4201 //     index($ireg);
4202 //     scale($scale);
4203 //     disp($off);
4204 //   %}
4205 // %}
4206 
4207 // Indirect Memory Times Scale Plus Index Register
4208 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4209   match(AddP reg (LShiftI ireg scale));
4210 
4211   op_cost(10);
4212   format %{"[$reg + $ireg << $scale]" %}
4213   interface(MEMORY_INTER) %{
4214     base($reg);
4215     index($ireg);
4216     scale($scale);
4217     disp(0x0);
4218   %}
4219 %}
4220 
4221 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4222 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4223   match(AddP (AddP reg (LShiftI ireg scale)) off);
4224 
4225   op_cost(10);
4226   format %{"[$reg + $off + $ireg << $scale]" %}
4227   interface(MEMORY_INTER) %{
4228     base($reg);
4229     index($ireg);
4230     scale($scale);
4231     disp($off);
4232   %}
4233 %}
4234 
4235 //----------Load Long Memory Operands------------------------------------------
4236 // The load-long idiom will use it's address expression again after loading
4237 // the first word of the long.  If the load-long destination overlaps with
4238 // registers used in the addressing expression, the 2nd half will be loaded
4239 // from a clobbered address.  Fix this by requiring that load-long use
4240 // address registers that do not overlap with the load-long target.
4241 
4242 // load-long support
4243 operand load_long_RegP() %{
4244   constraint(ALLOC_IN_RC(esi_reg));
4245   match(RegP);
4246   match(eSIRegP);
4247   op_cost(100);
4248   format %{  %}
4249   interface(REG_INTER);
4250 %}
4251 
4252 // Indirect Memory Operand Long
4253 operand load_long_indirect(load_long_RegP reg) %{
4254   constraint(ALLOC_IN_RC(esi_reg));
4255   match(reg);
4256 
4257   format %{ "[$reg]" %}
4258   interface(MEMORY_INTER) %{
4259     base($reg);
4260     index(0x4);
4261     scale(0x0);
4262     disp(0x0);
4263   %}
4264 %}
4265 
4266 // Indirect Memory Plus Long Offset Operand
4267 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4268   match(AddP reg off);
4269 
4270   format %{ "[$reg + $off]" %}
4271   interface(MEMORY_INTER) %{
4272     base($reg);
4273     index(0x4);
4274     scale(0x0);
4275     disp($off);
4276   %}
4277 %}
4278 
4279 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4280 
4281 
4282 //----------Special Memory Operands--------------------------------------------
4283 // Stack Slot Operand - This operand is used for loading and storing temporary
4284 //                      values on the stack where a match requires a value to
4285 //                      flow through memory.
4286 operand stackSlotP(sRegP reg) %{
4287   constraint(ALLOC_IN_RC(stack_slots));
4288   // No match rule because this operand is only generated in matching
4289   format %{ "[$reg]" %}
4290   interface(MEMORY_INTER) %{
4291     base(0x4);   // ESP
4292     index(0x4);  // No Index
4293     scale(0x0);  // No Scale
4294     disp($reg);  // Stack Offset
4295   %}
4296 %}
4297 
4298 operand stackSlotI(sRegI reg) %{
4299   constraint(ALLOC_IN_RC(stack_slots));
4300   // No match rule because this operand is only generated in matching
4301   format %{ "[$reg]" %}
4302   interface(MEMORY_INTER) %{
4303     base(0x4);   // ESP
4304     index(0x4);  // No Index
4305     scale(0x0);  // No Scale
4306     disp($reg);  // Stack Offset
4307   %}
4308 %}
4309 
4310 operand stackSlotF(sRegF reg) %{
4311   constraint(ALLOC_IN_RC(stack_slots));
4312   // No match rule because this operand is only generated in matching
4313   format %{ "[$reg]" %}
4314   interface(MEMORY_INTER) %{
4315     base(0x4);   // ESP
4316     index(0x4);  // No Index
4317     scale(0x0);  // No Scale
4318     disp($reg);  // Stack Offset
4319   %}
4320 %}
4321 
4322 operand stackSlotD(sRegD reg) %{
4323   constraint(ALLOC_IN_RC(stack_slots));
4324   // No match rule because this operand is only generated in matching
4325   format %{ "[$reg]" %}
4326   interface(MEMORY_INTER) %{
4327     base(0x4);   // ESP
4328     index(0x4);  // No Index
4329     scale(0x0);  // No Scale
4330     disp($reg);  // Stack Offset
4331   %}
4332 %}
4333 
4334 operand stackSlotL(sRegL reg) %{
4335   constraint(ALLOC_IN_RC(stack_slots));
4336   // No match rule because this operand is only generated in matching
4337   format %{ "[$reg]" %}
4338   interface(MEMORY_INTER) %{
4339     base(0x4);   // ESP
4340     index(0x4);  // No Index
4341     scale(0x0);  // No Scale
4342     disp($reg);  // Stack Offset
4343   %}
4344 %}
4345 
4346 //----------Memory Operands - Win95 Implicit Null Variants----------------
4347 // Indirect Memory Operand
4348 operand indirect_win95_safe(eRegP_no_EBP reg)
4349 %{
4350   constraint(ALLOC_IN_RC(int_reg));
4351   match(reg);
4352 
4353   op_cost(100);
4354   format %{ "[$reg]" %}
4355   interface(MEMORY_INTER) %{
4356     base($reg);
4357     index(0x4);
4358     scale(0x0);
4359     disp(0x0);
4360   %}
4361 %}
4362 
4363 // Indirect Memory Plus Short Offset Operand
4364 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4365 %{
4366   match(AddP reg off);
4367 
4368   op_cost(100);
4369   format %{ "[$reg + $off]" %}
4370   interface(MEMORY_INTER) %{
4371     base($reg);
4372     index(0x4);
4373     scale(0x0);
4374     disp($off);
4375   %}
4376 %}
4377 
4378 // Indirect Memory Plus Long Offset Operand
4379 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4380 %{
4381   match(AddP reg off);
4382 
4383   op_cost(100);
4384   format %{ "[$reg + $off]" %}
4385   interface(MEMORY_INTER) %{
4386     base($reg);
4387     index(0x4);
4388     scale(0x0);
4389     disp($off);
4390   %}
4391 %}
4392 
4393 // Indirect Memory Plus Index Register Plus Offset Operand
4394 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4395 %{
4396   match(AddP (AddP reg ireg) off);
4397 
4398   op_cost(100);
4399   format %{"[$reg + $off + $ireg]" %}
4400   interface(MEMORY_INTER) %{
4401     base($reg);
4402     index($ireg);
4403     scale(0x0);
4404     disp($off);
4405   %}
4406 %}
4407 
4408 // Indirect Memory Times Scale Plus Index Register
4409 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4410 %{
4411   match(AddP reg (LShiftI ireg scale));
4412 
4413   op_cost(100);
4414   format %{"[$reg + $ireg << $scale]" %}
4415   interface(MEMORY_INTER) %{
4416     base($reg);
4417     index($ireg);
4418     scale($scale);
4419     disp(0x0);
4420   %}
4421 %}
4422 
4423 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4424 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4425 %{
4426   match(AddP (AddP reg (LShiftI ireg scale)) off);
4427 
4428   op_cost(100);
4429   format %{"[$reg + $off + $ireg << $scale]" %}
4430   interface(MEMORY_INTER) %{
4431     base($reg);
4432     index($ireg);
4433     scale($scale);
4434     disp($off);
4435   %}
4436 %}
4437 
4438 //----------Conditional Branch Operands----------------------------------------
4439 // Comparison Op  - This is the operation of the comparison, and is limited to
4440 //                  the following set of codes:
4441 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4442 //
4443 // Other attributes of the comparison, such as unsignedness, are specified
4444 // by the comparison instruction that sets a condition code flags register.
4445 // That result is represented by a flags operand whose subtype is appropriate
4446 // to the unsignedness (etc.) of the comparison.
4447 //
4448 // Later, the instruction which matches both the Comparison Op (a Bool) and
4449 // the flags (produced by the Cmp) specifies the coding of the comparison op
4450 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4451 
4452 // Comparision Code
4453 operand cmpOp() %{
4454   match(Bool);
4455 
4456   format %{ "" %}
4457   interface(COND_INTER) %{
4458     equal(0x4, "e");
4459     not_equal(0x5, "ne");
4460     less(0xC, "l");
4461     greater_equal(0xD, "ge");
4462     less_equal(0xE, "le");
4463     greater(0xF, "g");
4464     overflow(0x0, "o");
4465     no_overflow(0x1, "no");
4466   %}
4467 %}
4468 
4469 // Comparison Code, unsigned compare.  Used by FP also, with
4470 // C2 (unordered) turned into GT or LT already.  The other bits
4471 // C0 and C3 are turned into Carry & Zero flags.
4472 operand cmpOpU() %{
4473   match(Bool);
4474 
4475   format %{ "" %}
4476   interface(COND_INTER) %{
4477     equal(0x4, "e");
4478     not_equal(0x5, "ne");
4479     less(0x2, "b");
4480     greater_equal(0x3, "nb");
4481     less_equal(0x6, "be");
4482     greater(0x7, "nbe");
4483     overflow(0x0, "o");
4484     no_overflow(0x1, "no");
4485   %}
4486 %}
4487 
4488 // Floating comparisons that don't require any fixup for the unordered case
4489 operand cmpOpUCF() %{
4490   match(Bool);
4491   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4492             n->as_Bool()->_test._test == BoolTest::ge ||
4493             n->as_Bool()->_test._test == BoolTest::le ||
4494             n->as_Bool()->_test._test == BoolTest::gt);
4495   format %{ "" %}
4496   interface(COND_INTER) %{
4497     equal(0x4, "e");
4498     not_equal(0x5, "ne");
4499     less(0x2, "b");
4500     greater_equal(0x3, "nb");
4501     less_equal(0x6, "be");
4502     greater(0x7, "nbe");
4503     overflow(0x0, "o");
4504     no_overflow(0x1, "no");
4505   %}
4506 %}
4507 
4508 
4509 // Floating comparisons that can be fixed up with extra conditional jumps
4510 operand cmpOpUCF2() %{
4511   match(Bool);
4512   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4513             n->as_Bool()->_test._test == BoolTest::eq);
4514   format %{ "" %}
4515   interface(COND_INTER) %{
4516     equal(0x4, "e");
4517     not_equal(0x5, "ne");
4518     less(0x2, "b");
4519     greater_equal(0x3, "nb");
4520     less_equal(0x6, "be");
4521     greater(0x7, "nbe");
4522     overflow(0x0, "o");
4523     no_overflow(0x1, "no");
4524   %}
4525 %}
4526 
4527 // Comparison Code for FP conditional move
4528 operand cmpOp_fcmov() %{
4529   match(Bool);
4530 
4531   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4532             n->as_Bool()->_test._test != BoolTest::no_overflow);
4533   format %{ "" %}
4534   interface(COND_INTER) %{
4535     equal        (0x0C8);
4536     not_equal    (0x1C8);
4537     less         (0x0C0);
4538     greater_equal(0x1C0);
4539     less_equal   (0x0D0);
4540     greater      (0x1D0);
4541     overflow(0x0, "o"); // not really supported by the instruction
4542     no_overflow(0x1, "no"); // not really supported by the instruction
4543   %}
4544 %}
4545 
4546 // Comparision Code used in long compares
4547 operand cmpOp_commute() %{
4548   match(Bool);
4549 
4550   format %{ "" %}
4551   interface(COND_INTER) %{
4552     equal(0x4, "e");
4553     not_equal(0x5, "ne");
4554     less(0xF, "g");
4555     greater_equal(0xE, "le");
4556     less_equal(0xD, "ge");
4557     greater(0xC, "l");
4558     overflow(0x0, "o");
4559     no_overflow(0x1, "no");
4560   %}
4561 %}
4562 
4563 //----------OPERAND CLASSES----------------------------------------------------
4564 // Operand Classes are groups of operands that are used as to simplify
4565 // instruction definitions by not requiring the AD writer to specify separate
4566 // instructions for every form of operand when the instruction accepts
4567 // multiple operand types with the same basic encoding and format.  The classic
4568 // case of this is memory operands.
4569 
4570 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4571                indIndex, indIndexScale, indIndexScaleOffset);
4572 
4573 // Long memory operations are encoded in 2 instructions and a +4 offset.
4574 // This means some kind of offset is always required and you cannot use
4575 // an oop as the offset (done when working on static globals).
4576 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4577                     indIndex, indIndexScale, indIndexScaleOffset);
4578 
4579 
4580 //----------PIPELINE-----------------------------------------------------------
4581 // Rules which define the behavior of the target architectures pipeline.
4582 pipeline %{
4583 
4584 //----------ATTRIBUTES---------------------------------------------------------
4585 attributes %{
4586   variable_size_instructions;        // Fixed size instructions
4587   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4588   instruction_unit_size = 1;         // An instruction is 1 bytes long
4589   instruction_fetch_unit_size = 16;  // The processor fetches one line
4590   instruction_fetch_units = 1;       // of 16 bytes
4591 
4592   // List of nop instructions
4593   nops( MachNop );
4594 %}
4595 
4596 //----------RESOURCES----------------------------------------------------------
4597 // Resources are the functional units available to the machine
4598 
4599 // Generic P2/P3 pipeline
4600 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4601 // 3 instructions decoded per cycle.
4602 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4603 // 2 ALU op, only ALU0 handles mul/div instructions.
4604 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4605            MS0, MS1, MEM = MS0 | MS1,
4606            BR, FPU,
4607            ALU0, ALU1, ALU = ALU0 | ALU1 );
4608 
4609 //----------PIPELINE DESCRIPTION-----------------------------------------------
4610 // Pipeline Description specifies the stages in the machine's pipeline
4611 
4612 // Generic P2/P3 pipeline
4613 pipe_desc(S0, S1, S2, S3, S4, S5);
4614 
4615 //----------PIPELINE CLASSES---------------------------------------------------
4616 // Pipeline Classes describe the stages in which input and output are
4617 // referenced by the hardware pipeline.
4618 
4619 // Naming convention: ialu or fpu
4620 // Then: _reg
4621 // Then: _reg if there is a 2nd register
4622 // Then: _long if it's a pair of instructions implementing a long
4623 // Then: _fat if it requires the big decoder
4624 //   Or: _mem if it requires the big decoder and a memory unit.
4625 
4626 // Integer ALU reg operation
4627 pipe_class ialu_reg(rRegI dst) %{
4628     single_instruction;
4629     dst    : S4(write);
4630     dst    : S3(read);
4631     DECODE : S0;        // any decoder
4632     ALU    : S3;        // any alu
4633 %}
4634 
4635 // Long ALU reg operation
4636 pipe_class ialu_reg_long(eRegL dst) %{
4637     instruction_count(2);
4638     dst    : S4(write);
4639     dst    : S3(read);
4640     DECODE : S0(2);     // any 2 decoders
4641     ALU    : S3(2);     // both alus
4642 %}
4643 
4644 // Integer ALU reg operation using big decoder
4645 pipe_class ialu_reg_fat(rRegI dst) %{
4646     single_instruction;
4647     dst    : S4(write);
4648     dst    : S3(read);
4649     D0     : S0;        // big decoder only
4650     ALU    : S3;        // any alu
4651 %}
4652 
4653 // Long ALU reg operation using big decoder
4654 pipe_class ialu_reg_long_fat(eRegL dst) %{
4655     instruction_count(2);
4656     dst    : S4(write);
4657     dst    : S3(read);
4658     D0     : S0(2);     // big decoder only; twice
4659     ALU    : S3(2);     // any 2 alus
4660 %}
4661 
4662 // Integer ALU reg-reg operation
4663 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4664     single_instruction;
4665     dst    : S4(write);
4666     src    : S3(read);
4667     DECODE : S0;        // any decoder
4668     ALU    : S3;        // any alu
4669 %}
4670 
4671 // Long ALU reg-reg operation
4672 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4673     instruction_count(2);
4674     dst    : S4(write);
4675     src    : S3(read);
4676     DECODE : S0(2);     // any 2 decoders
4677     ALU    : S3(2);     // both alus
4678 %}
4679 
4680 // Integer ALU reg-reg operation
4681 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4682     single_instruction;
4683     dst    : S4(write);
4684     src    : S3(read);
4685     D0     : S0;        // big decoder only
4686     ALU    : S3;        // any alu
4687 %}
4688 
4689 // Long ALU reg-reg operation
4690 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4691     instruction_count(2);
4692     dst    : S4(write);
4693     src    : S3(read);
4694     D0     : S0(2);     // big decoder only; twice
4695     ALU    : S3(2);     // both alus
4696 %}
4697 
4698 // Integer ALU reg-mem operation
4699 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4700     single_instruction;
4701     dst    : S5(write);
4702     mem    : S3(read);
4703     D0     : S0;        // big decoder only
4704     ALU    : S4;        // any alu
4705     MEM    : S3;        // any mem
4706 %}
4707 
4708 // Long ALU reg-mem operation
4709 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4710     instruction_count(2);
4711     dst    : S5(write);
4712     mem    : S3(read);
4713     D0     : S0(2);     // big decoder only; twice
4714     ALU    : S4(2);     // any 2 alus
4715     MEM    : S3(2);     // both mems
4716 %}
4717 
4718 // Integer mem operation (prefetch)
4719 pipe_class ialu_mem(memory mem)
4720 %{
4721     single_instruction;
4722     mem    : S3(read);
4723     D0     : S0;        // big decoder only
4724     MEM    : S3;        // any mem
4725 %}
4726 
4727 // Integer Store to Memory
4728 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4729     single_instruction;
4730     mem    : S3(read);
4731     src    : S5(read);
4732     D0     : S0;        // big decoder only
4733     ALU    : S4;        // any alu
4734     MEM    : S3;
4735 %}
4736 
4737 // Long Store to Memory
4738 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4739     instruction_count(2);
4740     mem    : S3(read);
4741     src    : S5(read);
4742     D0     : S0(2);     // big decoder only; twice
4743     ALU    : S4(2);     // any 2 alus
4744     MEM    : S3(2);     // Both mems
4745 %}
4746 
4747 // Integer Store to Memory
4748 pipe_class ialu_mem_imm(memory mem) %{
4749     single_instruction;
4750     mem    : S3(read);
4751     D0     : S0;        // big decoder only
4752     ALU    : S4;        // any alu
4753     MEM    : S3;
4754 %}
4755 
4756 // Integer ALU0 reg-reg operation
4757 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4758     single_instruction;
4759     dst    : S4(write);
4760     src    : S3(read);
4761     D0     : S0;        // Big decoder only
4762     ALU0   : S3;        // only alu0
4763 %}
4764 
4765 // Integer ALU0 reg-mem operation
4766 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4767     single_instruction;
4768     dst    : S5(write);
4769     mem    : S3(read);
4770     D0     : S0;        // big decoder only
4771     ALU0   : S4;        // ALU0 only
4772     MEM    : S3;        // any mem
4773 %}
4774 
4775 // Integer ALU reg-reg operation
4776 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4777     single_instruction;
4778     cr     : S4(write);
4779     src1   : S3(read);
4780     src2   : S3(read);
4781     DECODE : S0;        // any decoder
4782     ALU    : S3;        // any alu
4783 %}
4784 
4785 // Integer ALU reg-imm operation
4786 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4787     single_instruction;
4788     cr     : S4(write);
4789     src1   : S3(read);
4790     DECODE : S0;        // any decoder
4791     ALU    : S3;        // any alu
4792 %}
4793 
4794 // Integer ALU reg-mem operation
4795 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4796     single_instruction;
4797     cr     : S4(write);
4798     src1   : S3(read);
4799     src2   : S3(read);
4800     D0     : S0;        // big decoder only
4801     ALU    : S4;        // any alu
4802     MEM    : S3;
4803 %}
4804 
4805 // Conditional move reg-reg
4806 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4807     instruction_count(4);
4808     y      : S4(read);
4809     q      : S3(read);
4810     p      : S3(read);
4811     DECODE : S0(4);     // any decoder
4812 %}
4813 
4814 // Conditional move reg-reg
4815 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4816     single_instruction;
4817     dst    : S4(write);
4818     src    : S3(read);
4819     cr     : S3(read);
4820     DECODE : S0;        // any decoder
4821 %}
4822 
4823 // Conditional move reg-mem
4824 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4825     single_instruction;
4826     dst    : S4(write);
4827     src    : S3(read);
4828     cr     : S3(read);
4829     DECODE : S0;        // any decoder
4830     MEM    : S3;
4831 %}
4832 
4833 // Conditional move reg-reg long
4834 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4835     single_instruction;
4836     dst    : S4(write);
4837     src    : S3(read);
4838     cr     : S3(read);
4839     DECODE : S0(2);     // any 2 decoders
4840 %}
4841 
4842 // Conditional move double reg-reg
4843 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4844     single_instruction;
4845     dst    : S4(write);
4846     src    : S3(read);
4847     cr     : S3(read);
4848     DECODE : S0;        // any decoder
4849 %}
4850 
4851 // Float reg-reg operation
4852 pipe_class fpu_reg(regDPR dst) %{
4853     instruction_count(2);
4854     dst    : S3(read);
4855     DECODE : S0(2);     // any 2 decoders
4856     FPU    : S3;
4857 %}
4858 
4859 // Float reg-reg operation
4860 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4861     instruction_count(2);
4862     dst    : S4(write);
4863     src    : S3(read);
4864     DECODE : S0(2);     // any 2 decoders
4865     FPU    : S3;
4866 %}
4867 
4868 // Float reg-reg operation
4869 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4870     instruction_count(3);
4871     dst    : S4(write);
4872     src1   : S3(read);
4873     src2   : S3(read);
4874     DECODE : S0(3);     // any 3 decoders
4875     FPU    : S3(2);
4876 %}
4877 
4878 // Float reg-reg operation
4879 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4880     instruction_count(4);
4881     dst    : S4(write);
4882     src1   : S3(read);
4883     src2   : S3(read);
4884     src3   : S3(read);
4885     DECODE : S0(4);     // any 3 decoders
4886     FPU    : S3(2);
4887 %}
4888 
4889 // Float reg-reg operation
4890 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4891     instruction_count(4);
4892     dst    : S4(write);
4893     src1   : S3(read);
4894     src2   : S3(read);
4895     src3   : S3(read);
4896     DECODE : S1(3);     // any 3 decoders
4897     D0     : S0;        // Big decoder only
4898     FPU    : S3(2);
4899     MEM    : S3;
4900 %}
4901 
4902 // Float reg-mem operation
4903 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4904     instruction_count(2);
4905     dst    : S5(write);
4906     mem    : S3(read);
4907     D0     : S0;        // big decoder only
4908     DECODE : S1;        // any decoder for FPU POP
4909     FPU    : S4;
4910     MEM    : S3;        // any mem
4911 %}
4912 
4913 // Float reg-mem operation
4914 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4915     instruction_count(3);
4916     dst    : S5(write);
4917     src1   : S3(read);
4918     mem    : S3(read);
4919     D0     : S0;        // big decoder only
4920     DECODE : S1(2);     // any decoder for FPU POP
4921     FPU    : S4;
4922     MEM    : S3;        // any mem
4923 %}
4924 
4925 // Float mem-reg operation
4926 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4927     instruction_count(2);
4928     src    : S5(read);
4929     mem    : S3(read);
4930     DECODE : S0;        // any decoder for FPU PUSH
4931     D0     : S1;        // big decoder only
4932     FPU    : S4;
4933     MEM    : S3;        // any mem
4934 %}
4935 
4936 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4937     instruction_count(3);
4938     src1   : S3(read);
4939     src2   : S3(read);
4940     mem    : S3(read);
4941     DECODE : S0(2);     // any decoder for FPU PUSH
4942     D0     : S1;        // big decoder only
4943     FPU    : S4;
4944     MEM    : S3;        // any mem
4945 %}
4946 
4947 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4948     instruction_count(3);
4949     src1   : S3(read);
4950     src2   : S3(read);
4951     mem    : S4(read);
4952     DECODE : S0;        // any decoder for FPU PUSH
4953     D0     : S0(2);     // big decoder only
4954     FPU    : S4;
4955     MEM    : S3(2);     // any mem
4956 %}
4957 
4958 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4959     instruction_count(2);
4960     src1   : S3(read);
4961     dst    : S4(read);
4962     D0     : S0(2);     // big decoder only
4963     MEM    : S3(2);     // any mem
4964 %}
4965 
4966 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4967     instruction_count(3);
4968     src1   : S3(read);
4969     src2   : S3(read);
4970     dst    : S4(read);
4971     D0     : S0(3);     // big decoder only
4972     FPU    : S4;
4973     MEM    : S3(3);     // any mem
4974 %}
4975 
4976 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4977     instruction_count(3);
4978     src1   : S4(read);
4979     mem    : S4(read);
4980     DECODE : S0;        // any decoder for FPU PUSH
4981     D0     : S0(2);     // big decoder only
4982     FPU    : S4;
4983     MEM    : S3(2);     // any mem
4984 %}
4985 
4986 // Float load constant
4987 pipe_class fpu_reg_con(regDPR dst) %{
4988     instruction_count(2);
4989     dst    : S5(write);
4990     D0     : S0;        // big decoder only for the load
4991     DECODE : S1;        // any decoder for FPU POP
4992     FPU    : S4;
4993     MEM    : S3;        // any mem
4994 %}
4995 
4996 // Float load constant
4997 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4998     instruction_count(3);
4999     dst    : S5(write);
5000     src    : S3(read);
5001     D0     : S0;        // big decoder only for the load
5002     DECODE : S1(2);     // any decoder for FPU POP
5003     FPU    : S4;
5004     MEM    : S3;        // any mem
5005 %}
5006 
5007 // UnConditional branch
5008 pipe_class pipe_jmp( label labl ) %{
5009     single_instruction;
5010     BR   : S3;
5011 %}
5012 
5013 // Conditional branch
5014 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5015     single_instruction;
5016     cr    : S1(read);
5017     BR    : S3;
5018 %}
5019 
5020 // Allocation idiom
5021 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5022     instruction_count(1); force_serialization;
5023     fixed_latency(6);
5024     heap_ptr : S3(read);
5025     DECODE   : S0(3);
5026     D0       : S2;
5027     MEM      : S3;
5028     ALU      : S3(2);
5029     dst      : S5(write);
5030     BR       : S5;
5031 %}
5032 
5033 // Generic big/slow expanded idiom
5034 pipe_class pipe_slow(  ) %{
5035     instruction_count(10); multiple_bundles; force_serialization;
5036     fixed_latency(100);
5037     D0  : S0(2);
5038     MEM : S3(2);
5039 %}
5040 
5041 // The real do-nothing guy
5042 pipe_class empty( ) %{
5043     instruction_count(0);
5044 %}
5045 
5046 // Define the class for the Nop node
5047 define %{
5048    MachNop = empty;
5049 %}
5050 
5051 %}
5052 
5053 //----------INSTRUCTIONS-------------------------------------------------------
5054 //
5055 // match      -- States which machine-independent subtree may be replaced
5056 //               by this instruction.
5057 // ins_cost   -- The estimated cost of this instruction is used by instruction
5058 //               selection to identify a minimum cost tree of machine
5059 //               instructions that matches a tree of machine-independent
5060 //               instructions.
5061 // format     -- A string providing the disassembly for this instruction.
5062 //               The value of an instruction's operand may be inserted
5063 //               by referring to it with a '$' prefix.
5064 // opcode     -- Three instruction opcodes may be provided.  These are referred
5065 //               to within an encode class as $primary, $secondary, and $tertiary
5066 //               respectively.  The primary opcode is commonly used to
5067 //               indicate the type of machine instruction, while secondary
5068 //               and tertiary are often used for prefix options or addressing
5069 //               modes.
5070 // ins_encode -- A list of encode classes with parameters. The encode class
5071 //               name must have been defined in an 'enc_class' specification
5072 //               in the encode section of the architecture description.
5073 
5074 //----------BSWAP-Instruction--------------------------------------------------
5075 instruct bytes_reverse_int(rRegI dst) %{
5076   match(Set dst (ReverseBytesI dst));
5077 
5078   format %{ "BSWAP  $dst" %}
5079   opcode(0x0F, 0xC8);
5080   ins_encode( OpcP, OpcSReg(dst) );
5081   ins_pipe( ialu_reg );
5082 %}
5083 
5084 instruct bytes_reverse_long(eRegL dst) %{
5085   match(Set dst (ReverseBytesL dst));
5086 
5087   format %{ "BSWAP  $dst.lo\n\t"
5088             "BSWAP  $dst.hi\n\t"
5089             "XCHG   $dst.lo $dst.hi" %}
5090 
5091   ins_cost(125);
5092   ins_encode( bswap_long_bytes(dst) );
5093   ins_pipe( ialu_reg_reg);
5094 %}
5095 
5096 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5097   match(Set dst (ReverseBytesUS dst));
5098   effect(KILL cr);
5099 
5100   format %{ "BSWAP  $dst\n\t"
5101             "SHR    $dst,16\n\t" %}
5102   ins_encode %{
5103     __ bswapl($dst$$Register);
5104     __ shrl($dst$$Register, 16);
5105   %}
5106   ins_pipe( ialu_reg );
5107 %}
5108 
5109 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5110   match(Set dst (ReverseBytesS dst));
5111   effect(KILL cr);
5112 
5113   format %{ "BSWAP  $dst\n\t"
5114             "SAR    $dst,16\n\t" %}
5115   ins_encode %{
5116     __ bswapl($dst$$Register);
5117     __ sarl($dst$$Register, 16);
5118   %}
5119   ins_pipe( ialu_reg );
5120 %}
5121 
5122 
5123 //---------- Zeros Count Instructions ------------------------------------------
5124 
5125 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5126   predicate(UseCountLeadingZerosInstruction);
5127   match(Set dst (CountLeadingZerosI src));
5128   effect(KILL cr);
5129 
5130   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5131   ins_encode %{
5132     __ lzcntl($dst$$Register, $src$$Register);
5133   %}
5134   ins_pipe(ialu_reg);
5135 %}
5136 
5137 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5138   predicate(!UseCountLeadingZerosInstruction);
5139   match(Set dst (CountLeadingZerosI src));
5140   effect(KILL cr);
5141 
5142   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5143             "JNZ    skip\n\t"
5144             "MOV    $dst, -1\n"
5145       "skip:\n\t"
5146             "NEG    $dst\n\t"
5147             "ADD    $dst, 31" %}
5148   ins_encode %{
5149     Register Rdst = $dst$$Register;
5150     Register Rsrc = $src$$Register;
5151     Label skip;
5152     __ bsrl(Rdst, Rsrc);
5153     __ jccb(Assembler::notZero, skip);
5154     __ movl(Rdst, -1);
5155     __ bind(skip);
5156     __ negl(Rdst);
5157     __ addl(Rdst, BitsPerInt - 1);
5158   %}
5159   ins_pipe(ialu_reg);
5160 %}
5161 
5162 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5163   predicate(UseCountLeadingZerosInstruction);
5164   match(Set dst (CountLeadingZerosL src));
5165   effect(TEMP dst, KILL cr);
5166 
5167   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5168             "JNC    done\n\t"
5169             "LZCNT  $dst, $src.lo\n\t"
5170             "ADD    $dst, 32\n"
5171       "done:" %}
5172   ins_encode %{
5173     Register Rdst = $dst$$Register;
5174     Register Rsrc = $src$$Register;
5175     Label done;
5176     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5177     __ jccb(Assembler::carryClear, done);
5178     __ lzcntl(Rdst, Rsrc);
5179     __ addl(Rdst, BitsPerInt);
5180     __ bind(done);
5181   %}
5182   ins_pipe(ialu_reg);
5183 %}
5184 
5185 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5186   predicate(!UseCountLeadingZerosInstruction);
5187   match(Set dst (CountLeadingZerosL src));
5188   effect(TEMP dst, KILL cr);
5189 
5190   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5191             "JZ     msw_is_zero\n\t"
5192             "ADD    $dst, 32\n\t"
5193             "JMP    not_zero\n"
5194       "msw_is_zero:\n\t"
5195             "BSR    $dst, $src.lo\n\t"
5196             "JNZ    not_zero\n\t"
5197             "MOV    $dst, -1\n"
5198       "not_zero:\n\t"
5199             "NEG    $dst\n\t"
5200             "ADD    $dst, 63\n" %}
5201  ins_encode %{
5202     Register Rdst = $dst$$Register;
5203     Register Rsrc = $src$$Register;
5204     Label msw_is_zero;
5205     Label not_zero;
5206     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5207     __ jccb(Assembler::zero, msw_is_zero);
5208     __ addl(Rdst, BitsPerInt);
5209     __ jmpb(not_zero);
5210     __ bind(msw_is_zero);
5211     __ bsrl(Rdst, Rsrc);
5212     __ jccb(Assembler::notZero, not_zero);
5213     __ movl(Rdst, -1);
5214     __ bind(not_zero);
5215     __ negl(Rdst);
5216     __ addl(Rdst, BitsPerLong - 1);
5217   %}
5218   ins_pipe(ialu_reg);
5219 %}
5220 
5221 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5222   predicate(UseCountTrailingZerosInstruction);
5223   match(Set dst (CountTrailingZerosI src));
5224   effect(KILL cr);
5225 
5226   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5227   ins_encode %{
5228     __ tzcntl($dst$$Register, $src$$Register);
5229   %}
5230   ins_pipe(ialu_reg);
5231 %}
5232 
5233 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5234   predicate(!UseCountTrailingZerosInstruction);
5235   match(Set dst (CountTrailingZerosI src));
5236   effect(KILL cr);
5237 
5238   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5239             "JNZ    done\n\t"
5240             "MOV    $dst, 32\n"
5241       "done:" %}
5242   ins_encode %{
5243     Register Rdst = $dst$$Register;
5244     Label done;
5245     __ bsfl(Rdst, $src$$Register);
5246     __ jccb(Assembler::notZero, done);
5247     __ movl(Rdst, BitsPerInt);
5248     __ bind(done);
5249   %}
5250   ins_pipe(ialu_reg);
5251 %}
5252 
5253 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5254   predicate(UseCountTrailingZerosInstruction);
5255   match(Set dst (CountTrailingZerosL src));
5256   effect(TEMP dst, KILL cr);
5257 
5258   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5259             "JNC    done\n\t"
5260             "TZCNT  $dst, $src.hi\n\t"
5261             "ADD    $dst, 32\n"
5262             "done:" %}
5263   ins_encode %{
5264     Register Rdst = $dst$$Register;
5265     Register Rsrc = $src$$Register;
5266     Label done;
5267     __ tzcntl(Rdst, Rsrc);
5268     __ jccb(Assembler::carryClear, done);
5269     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5270     __ addl(Rdst, BitsPerInt);
5271     __ bind(done);
5272   %}
5273   ins_pipe(ialu_reg);
5274 %}
5275 
5276 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5277   predicate(!UseCountTrailingZerosInstruction);
5278   match(Set dst (CountTrailingZerosL src));
5279   effect(TEMP dst, KILL cr);
5280 
5281   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5282             "JNZ    done\n\t"
5283             "BSF    $dst, $src.hi\n\t"
5284             "JNZ    msw_not_zero\n\t"
5285             "MOV    $dst, 32\n"
5286       "msw_not_zero:\n\t"
5287             "ADD    $dst, 32\n"
5288       "done:" %}
5289   ins_encode %{
5290     Register Rdst = $dst$$Register;
5291     Register Rsrc = $src$$Register;
5292     Label msw_not_zero;
5293     Label done;
5294     __ bsfl(Rdst, Rsrc);
5295     __ jccb(Assembler::notZero, done);
5296     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5297     __ jccb(Assembler::notZero, msw_not_zero);
5298     __ movl(Rdst, BitsPerInt);
5299     __ bind(msw_not_zero);
5300     __ addl(Rdst, BitsPerInt);
5301     __ bind(done);
5302   %}
5303   ins_pipe(ialu_reg);
5304 %}
5305 
5306 
5307 //---------- Population Count Instructions -------------------------------------
5308 
5309 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5310   predicate(UsePopCountInstruction);
5311   match(Set dst (PopCountI src));
5312   effect(KILL cr);
5313 
5314   format %{ "POPCNT $dst, $src" %}
5315   ins_encode %{
5316     __ popcntl($dst$$Register, $src$$Register);
5317   %}
5318   ins_pipe(ialu_reg);
5319 %}
5320 
5321 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5322   predicate(UsePopCountInstruction);
5323   match(Set dst (PopCountI (LoadI mem)));
5324   effect(KILL cr);
5325 
5326   format %{ "POPCNT $dst, $mem" %}
5327   ins_encode %{
5328     __ popcntl($dst$$Register, $mem$$Address);
5329   %}
5330   ins_pipe(ialu_reg);
5331 %}
5332 
5333 // Note: Long.bitCount(long) returns an int.
5334 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5335   predicate(UsePopCountInstruction);
5336   match(Set dst (PopCountL src));
5337   effect(KILL cr, TEMP tmp, TEMP dst);
5338 
5339   format %{ "POPCNT $dst, $src.lo\n\t"
5340             "POPCNT $tmp, $src.hi\n\t"
5341             "ADD    $dst, $tmp" %}
5342   ins_encode %{
5343     __ popcntl($dst$$Register, $src$$Register);
5344     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5345     __ addl($dst$$Register, $tmp$$Register);
5346   %}
5347   ins_pipe(ialu_reg);
5348 %}
5349 
5350 // Note: Long.bitCount(long) returns an int.
5351 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5352   predicate(UsePopCountInstruction);
5353   match(Set dst (PopCountL (LoadL mem)));
5354   effect(KILL cr, TEMP tmp, TEMP dst);
5355 
5356   format %{ "POPCNT $dst, $mem\n\t"
5357             "POPCNT $tmp, $mem+4\n\t"
5358             "ADD    $dst, $tmp" %}
5359   ins_encode %{
5360     //__ popcntl($dst$$Register, $mem$$Address$$first);
5361     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5362     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5363     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5364     __ addl($dst$$Register, $tmp$$Register);
5365   %}
5366   ins_pipe(ialu_reg);
5367 %}
5368 
5369 
5370 //----------Load/Store/Move Instructions---------------------------------------
5371 //----------Load Instructions--------------------------------------------------
5372 // Load Byte (8bit signed)
5373 instruct loadB(xRegI dst, memory mem) %{
5374   match(Set dst (LoadB mem));
5375 
5376   ins_cost(125);
5377   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5378 
5379   ins_encode %{
5380     __ movsbl($dst$$Register, $mem$$Address);
5381   %}
5382 
5383   ins_pipe(ialu_reg_mem);
5384 %}
5385 
5386 // Load Byte (8bit signed) into Long Register
5387 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5388   match(Set dst (ConvI2L (LoadB mem)));
5389   effect(KILL cr);
5390 
5391   ins_cost(375);
5392   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5393             "MOV    $dst.hi,$dst.lo\n\t"
5394             "SAR    $dst.hi,7" %}
5395 
5396   ins_encode %{
5397     __ movsbl($dst$$Register, $mem$$Address);
5398     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5399     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5400   %}
5401 
5402   ins_pipe(ialu_reg_mem);
5403 %}
5404 
5405 // Load Unsigned Byte (8bit UNsigned)
5406 instruct loadUB(xRegI dst, memory mem) %{
5407   match(Set dst (LoadUB mem));
5408 
5409   ins_cost(125);
5410   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5411 
5412   ins_encode %{
5413     __ movzbl($dst$$Register, $mem$$Address);
5414   %}
5415 
5416   ins_pipe(ialu_reg_mem);
5417 %}
5418 
5419 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5420 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5421   match(Set dst (ConvI2L (LoadUB mem)));
5422   effect(KILL cr);
5423 
5424   ins_cost(250);
5425   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5426             "XOR    $dst.hi,$dst.hi" %}
5427 
5428   ins_encode %{
5429     Register Rdst = $dst$$Register;
5430     __ movzbl(Rdst, $mem$$Address);
5431     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5432   %}
5433 
5434   ins_pipe(ialu_reg_mem);
5435 %}
5436 
5437 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5438 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5439   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5440   effect(KILL cr);
5441 
5442   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5443             "XOR    $dst.hi,$dst.hi\n\t"
5444             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5445   ins_encode %{
5446     Register Rdst = $dst$$Register;
5447     __ movzbl(Rdst, $mem$$Address);
5448     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5449     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5450   %}
5451   ins_pipe(ialu_reg_mem);
5452 %}
5453 
5454 // Load Short (16bit signed)
5455 instruct loadS(rRegI dst, memory mem) %{
5456   match(Set dst (LoadS mem));
5457 
5458   ins_cost(125);
5459   format %{ "MOVSX  $dst,$mem\t# short" %}
5460 
5461   ins_encode %{
5462     __ movswl($dst$$Register, $mem$$Address);
5463   %}
5464 
5465   ins_pipe(ialu_reg_mem);
5466 %}
5467 
5468 // Load Short (16 bit signed) to Byte (8 bit signed)
5469 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5470   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5471 
5472   ins_cost(125);
5473   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5474   ins_encode %{
5475     __ movsbl($dst$$Register, $mem$$Address);
5476   %}
5477   ins_pipe(ialu_reg_mem);
5478 %}
5479 
5480 // Load Short (16bit signed) into Long Register
5481 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5482   match(Set dst (ConvI2L (LoadS mem)));
5483   effect(KILL cr);
5484 
5485   ins_cost(375);
5486   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5487             "MOV    $dst.hi,$dst.lo\n\t"
5488             "SAR    $dst.hi,15" %}
5489 
5490   ins_encode %{
5491     __ movswl($dst$$Register, $mem$$Address);
5492     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5493     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5494   %}
5495 
5496   ins_pipe(ialu_reg_mem);
5497 %}
5498 
5499 // Load Unsigned Short/Char (16bit unsigned)
5500 instruct loadUS(rRegI dst, memory mem) %{
5501   match(Set dst (LoadUS mem));
5502 
5503   ins_cost(125);
5504   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5505 
5506   ins_encode %{
5507     __ movzwl($dst$$Register, $mem$$Address);
5508   %}
5509 
5510   ins_pipe(ialu_reg_mem);
5511 %}
5512 
5513 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5514 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5515   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5516 
5517   ins_cost(125);
5518   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5519   ins_encode %{
5520     __ movsbl($dst$$Register, $mem$$Address);
5521   %}
5522   ins_pipe(ialu_reg_mem);
5523 %}
5524 
5525 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5526 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5527   match(Set dst (ConvI2L (LoadUS mem)));
5528   effect(KILL cr);
5529 
5530   ins_cost(250);
5531   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5532             "XOR    $dst.hi,$dst.hi" %}
5533 
5534   ins_encode %{
5535     __ movzwl($dst$$Register, $mem$$Address);
5536     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5537   %}
5538 
5539   ins_pipe(ialu_reg_mem);
5540 %}
5541 
5542 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5543 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5544   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5545   effect(KILL cr);
5546 
5547   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5548             "XOR    $dst.hi,$dst.hi" %}
5549   ins_encode %{
5550     Register Rdst = $dst$$Register;
5551     __ movzbl(Rdst, $mem$$Address);
5552     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5553   %}
5554   ins_pipe(ialu_reg_mem);
5555 %}
5556 
5557 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5558 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5559   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5560   effect(KILL cr);
5561 
5562   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5563             "XOR    $dst.hi,$dst.hi\n\t"
5564             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5565   ins_encode %{
5566     Register Rdst = $dst$$Register;
5567     __ movzwl(Rdst, $mem$$Address);
5568     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5569     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5570   %}
5571   ins_pipe(ialu_reg_mem);
5572 %}
5573 
5574 // Load Integer
5575 instruct loadI(rRegI dst, memory mem) %{
5576   match(Set dst (LoadI mem));
5577 
5578   ins_cost(125);
5579   format %{ "MOV    $dst,$mem\t# int" %}
5580 
5581   ins_encode %{
5582     __ movl($dst$$Register, $mem$$Address);
5583   %}
5584 
5585   ins_pipe(ialu_reg_mem);
5586 %}
5587 
5588 // Load Integer (32 bit signed) to Byte (8 bit signed)
5589 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5590   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5591 
5592   ins_cost(125);
5593   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5594   ins_encode %{
5595     __ movsbl($dst$$Register, $mem$$Address);
5596   %}
5597   ins_pipe(ialu_reg_mem);
5598 %}
5599 
5600 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5601 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5602   match(Set dst (AndI (LoadI mem) mask));
5603 
5604   ins_cost(125);
5605   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5606   ins_encode %{
5607     __ movzbl($dst$$Register, $mem$$Address);
5608   %}
5609   ins_pipe(ialu_reg_mem);
5610 %}
5611 
5612 // Load Integer (32 bit signed) to Short (16 bit signed)
5613 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5614   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5615 
5616   ins_cost(125);
5617   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5618   ins_encode %{
5619     __ movswl($dst$$Register, $mem$$Address);
5620   %}
5621   ins_pipe(ialu_reg_mem);
5622 %}
5623 
5624 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5625 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5626   match(Set dst (AndI (LoadI mem) mask));
5627 
5628   ins_cost(125);
5629   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5630   ins_encode %{
5631     __ movzwl($dst$$Register, $mem$$Address);
5632   %}
5633   ins_pipe(ialu_reg_mem);
5634 %}
5635 
5636 // Load Integer into Long Register
5637 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5638   match(Set dst (ConvI2L (LoadI mem)));
5639   effect(KILL cr);
5640 
5641   ins_cost(375);
5642   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5643             "MOV    $dst.hi,$dst.lo\n\t"
5644             "SAR    $dst.hi,31" %}
5645 
5646   ins_encode %{
5647     __ movl($dst$$Register, $mem$$Address);
5648     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5649     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5650   %}
5651 
5652   ins_pipe(ialu_reg_mem);
5653 %}
5654 
5655 // Load Integer with mask 0xFF into Long Register
5656 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5657   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5658   effect(KILL cr);
5659 
5660   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5661             "XOR    $dst.hi,$dst.hi" %}
5662   ins_encode %{
5663     Register Rdst = $dst$$Register;
5664     __ movzbl(Rdst, $mem$$Address);
5665     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5666   %}
5667   ins_pipe(ialu_reg_mem);
5668 %}
5669 
5670 // Load Integer with mask 0xFFFF into Long Register
5671 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5672   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5673   effect(KILL cr);
5674 
5675   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5676             "XOR    $dst.hi,$dst.hi" %}
5677   ins_encode %{
5678     Register Rdst = $dst$$Register;
5679     __ movzwl(Rdst, $mem$$Address);
5680     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5681   %}
5682   ins_pipe(ialu_reg_mem);
5683 %}
5684 
5685 // Load Integer with 31-bit mask into Long Register
5686 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5687   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5688   effect(KILL cr);
5689 
5690   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5691             "XOR    $dst.hi,$dst.hi\n\t"
5692             "AND    $dst.lo,$mask" %}
5693   ins_encode %{
5694     Register Rdst = $dst$$Register;
5695     __ movl(Rdst, $mem$$Address);
5696     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5697     __ andl(Rdst, $mask$$constant);
5698   %}
5699   ins_pipe(ialu_reg_mem);
5700 %}
5701 
5702 // Load Unsigned Integer into Long Register
5703 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5704   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5705   effect(KILL cr);
5706 
5707   ins_cost(250);
5708   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5709             "XOR    $dst.hi,$dst.hi" %}
5710 
5711   ins_encode %{
5712     __ movl($dst$$Register, $mem$$Address);
5713     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5714   %}
5715 
5716   ins_pipe(ialu_reg_mem);
5717 %}
5718 
5719 // Load Long.  Cannot clobber address while loading, so restrict address
5720 // register to ESI
5721 instruct loadL(eRegL dst, load_long_memory mem) %{
5722   predicate(!((LoadLNode*)n)->require_atomic_access());
5723   match(Set dst (LoadL mem));
5724 
5725   ins_cost(250);
5726   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5727             "MOV    $dst.hi,$mem+4" %}
5728 
5729   ins_encode %{
5730     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5731     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5732     __ movl($dst$$Register, Amemlo);
5733     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5734   %}
5735 
5736   ins_pipe(ialu_reg_long_mem);
5737 %}
5738 
5739 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5740 // then store it down to the stack and reload on the int
5741 // side.
5742 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5743   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5744   match(Set dst (LoadL mem));
5745 
5746   ins_cost(200);
5747   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5748             "FISTp  $dst" %}
5749   ins_encode(enc_loadL_volatile(mem,dst));
5750   ins_pipe( fpu_reg_mem );
5751 %}
5752 
5753 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5754   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5755   match(Set dst (LoadL mem));
5756   effect(TEMP tmp);
5757   ins_cost(180);
5758   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5759             "MOVSD  $dst,$tmp" %}
5760   ins_encode %{
5761     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5762     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5763   %}
5764   ins_pipe( pipe_slow );
5765 %}
5766 
5767 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5768   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5769   match(Set dst (LoadL mem));
5770   effect(TEMP tmp);
5771   ins_cost(160);
5772   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5773             "MOVD   $dst.lo,$tmp\n\t"
5774             "PSRLQ  $tmp,32\n\t"
5775             "MOVD   $dst.hi,$tmp" %}
5776   ins_encode %{
5777     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5778     __ movdl($dst$$Register, $tmp$$XMMRegister);
5779     __ psrlq($tmp$$XMMRegister, 32);
5780     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5781   %}
5782   ins_pipe( pipe_slow );
5783 %}
5784 
5785 // Load Range
5786 instruct loadRange(rRegI dst, memory mem) %{
5787   match(Set dst (LoadRange mem));
5788 
5789   ins_cost(125);
5790   format %{ "MOV    $dst,$mem" %}
5791   opcode(0x8B);
5792   ins_encode( OpcP, RegMem(dst,mem));
5793   ins_pipe( ialu_reg_mem );
5794 %}
5795 
5796 
5797 // Load Pointer
5798 instruct loadP(eRegP dst, memory mem) %{
5799   match(Set dst (LoadP mem));
5800 
5801   ins_cost(125);
5802   format %{ "MOV    $dst,$mem" %}
5803   opcode(0x8B);
5804   ins_encode( OpcP, RegMem(dst,mem));
5805   ins_pipe( ialu_reg_mem );
5806 %}
5807 
5808 // Load Klass Pointer
5809 instruct loadKlass(eRegP dst, memory mem) %{
5810   match(Set dst (LoadKlass mem));
5811 
5812   ins_cost(125);
5813   format %{ "MOV    $dst,$mem" %}
5814   opcode(0x8B);
5815   ins_encode( OpcP, RegMem(dst,mem));
5816   ins_pipe( ialu_reg_mem );
5817 %}
5818 
5819 // Load Double
5820 instruct loadDPR(regDPR dst, memory mem) %{
5821   predicate(UseSSE<=1);
5822   match(Set dst (LoadD mem));
5823 
5824   ins_cost(150);
5825   format %{ "FLD_D  ST,$mem\n\t"
5826             "FSTP   $dst" %}
5827   opcode(0xDD);               /* DD /0 */
5828   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5829               Pop_Reg_DPR(dst) );
5830   ins_pipe( fpu_reg_mem );
5831 %}
5832 
5833 // Load Double to XMM
5834 instruct loadD(regD dst, memory mem) %{
5835   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5836   match(Set dst (LoadD mem));
5837   ins_cost(145);
5838   format %{ "MOVSD  $dst,$mem" %}
5839   ins_encode %{
5840     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5841   %}
5842   ins_pipe( pipe_slow );
5843 %}
5844 
5845 instruct loadD_partial(regD dst, memory mem) %{
5846   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5847   match(Set dst (LoadD mem));
5848   ins_cost(145);
5849   format %{ "MOVLPD $dst,$mem" %}
5850   ins_encode %{
5851     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5852   %}
5853   ins_pipe( pipe_slow );
5854 %}
5855 
5856 // Load to XMM register (single-precision floating point)
5857 // MOVSS instruction
5858 instruct loadF(regF dst, memory mem) %{
5859   predicate(UseSSE>=1);
5860   match(Set dst (LoadF mem));
5861   ins_cost(145);
5862   format %{ "MOVSS  $dst,$mem" %}
5863   ins_encode %{
5864     __ movflt ($dst$$XMMRegister, $mem$$Address);
5865   %}
5866   ins_pipe( pipe_slow );
5867 %}
5868 
5869 // Load Float
5870 instruct loadFPR(regFPR dst, memory mem) %{
5871   predicate(UseSSE==0);
5872   match(Set dst (LoadF mem));
5873 
5874   ins_cost(150);
5875   format %{ "FLD_S  ST,$mem\n\t"
5876             "FSTP   $dst" %}
5877   opcode(0xD9);               /* D9 /0 */
5878   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5879               Pop_Reg_FPR(dst) );
5880   ins_pipe( fpu_reg_mem );
5881 %}
5882 
5883 // Load Effective Address
5884 instruct leaP8(eRegP dst, indOffset8 mem) %{
5885   match(Set dst mem);
5886 
5887   ins_cost(110);
5888   format %{ "LEA    $dst,$mem" %}
5889   opcode(0x8D);
5890   ins_encode( OpcP, RegMem(dst,mem));
5891   ins_pipe( ialu_reg_reg_fat );
5892 %}
5893 
5894 instruct leaP32(eRegP dst, indOffset32 mem) %{
5895   match(Set dst mem);
5896 
5897   ins_cost(110);
5898   format %{ "LEA    $dst,$mem" %}
5899   opcode(0x8D);
5900   ins_encode( OpcP, RegMem(dst,mem));
5901   ins_pipe( ialu_reg_reg_fat );
5902 %}
5903 
5904 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5905   match(Set dst mem);
5906 
5907   ins_cost(110);
5908   format %{ "LEA    $dst,$mem" %}
5909   opcode(0x8D);
5910   ins_encode( OpcP, RegMem(dst,mem));
5911   ins_pipe( ialu_reg_reg_fat );
5912 %}
5913 
5914 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5915   match(Set dst mem);
5916 
5917   ins_cost(110);
5918   format %{ "LEA    $dst,$mem" %}
5919   opcode(0x8D);
5920   ins_encode( OpcP, RegMem(dst,mem));
5921   ins_pipe( ialu_reg_reg_fat );
5922 %}
5923 
5924 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5925   match(Set dst mem);
5926 
5927   ins_cost(110);
5928   format %{ "LEA    $dst,$mem" %}
5929   opcode(0x8D);
5930   ins_encode( OpcP, RegMem(dst,mem));
5931   ins_pipe( ialu_reg_reg_fat );
5932 %}
5933 
5934 // Load Constant
5935 instruct loadConI(rRegI dst, immI src) %{
5936   match(Set dst src);
5937 
5938   format %{ "MOV    $dst,$src" %}
5939   ins_encode( LdImmI(dst, src) );
5940   ins_pipe( ialu_reg_fat );
5941 %}
5942 
5943 // Load Constant zero
5944 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5945   match(Set dst src);
5946   effect(KILL cr);
5947 
5948   ins_cost(50);
5949   format %{ "XOR    $dst,$dst" %}
5950   opcode(0x33);  /* + rd */
5951   ins_encode( OpcP, RegReg( dst, dst ) );
5952   ins_pipe( ialu_reg );
5953 %}
5954 
5955 instruct loadConP(eRegP dst, immP src) %{
5956   match(Set dst src);
5957 
5958   format %{ "MOV    $dst,$src" %}
5959   opcode(0xB8);  /* + rd */
5960   ins_encode( LdImmP(dst, src) );
5961   ins_pipe( ialu_reg_fat );
5962 %}
5963 
5964 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5965   match(Set dst src);
5966   effect(KILL cr);
5967   ins_cost(200);
5968   format %{ "MOV    $dst.lo,$src.lo\n\t"
5969             "MOV    $dst.hi,$src.hi" %}
5970   opcode(0xB8);
5971   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5972   ins_pipe( ialu_reg_long_fat );
5973 %}
5974 
5975 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5976   match(Set dst src);
5977   effect(KILL cr);
5978   ins_cost(150);
5979   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5980             "XOR    $dst.hi,$dst.hi" %}
5981   opcode(0x33,0x33);
5982   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5983   ins_pipe( ialu_reg_long );
5984 %}
5985 
5986 // The instruction usage is guarded by predicate in operand immFPR().
5987 instruct loadConFPR(regFPR dst, immFPR con) %{
5988   match(Set dst con);
5989   ins_cost(125);
5990   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5991             "FSTP   $dst" %}
5992   ins_encode %{
5993     __ fld_s($constantaddress($con));
5994     __ fstp_d($dst$$reg);
5995   %}
5996   ins_pipe(fpu_reg_con);
5997 %}
5998 
5999 // The instruction usage is guarded by predicate in operand immFPR0().
6000 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6001   match(Set dst con);
6002   ins_cost(125);
6003   format %{ "FLDZ   ST\n\t"
6004             "FSTP   $dst" %}
6005   ins_encode %{
6006     __ fldz();
6007     __ fstp_d($dst$$reg);
6008   %}
6009   ins_pipe(fpu_reg_con);
6010 %}
6011 
6012 // The instruction usage is guarded by predicate in operand immFPR1().
6013 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6014   match(Set dst con);
6015   ins_cost(125);
6016   format %{ "FLD1   ST\n\t"
6017             "FSTP   $dst" %}
6018   ins_encode %{
6019     __ fld1();
6020     __ fstp_d($dst$$reg);
6021   %}
6022   ins_pipe(fpu_reg_con);
6023 %}
6024 
6025 // The instruction usage is guarded by predicate in operand immF().
6026 instruct loadConF(regF dst, immF con) %{
6027   match(Set dst con);
6028   ins_cost(125);
6029   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6030   ins_encode %{
6031     __ movflt($dst$$XMMRegister, $constantaddress($con));
6032   %}
6033   ins_pipe(pipe_slow);
6034 %}
6035 
6036 // The instruction usage is guarded by predicate in operand immF0().
6037 instruct loadConF0(regF dst, immF0 src) %{
6038   match(Set dst src);
6039   ins_cost(100);
6040   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6041   ins_encode %{
6042     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6043   %}
6044   ins_pipe(pipe_slow);
6045 %}
6046 
6047 // The instruction usage is guarded by predicate in operand immDPR().
6048 instruct loadConDPR(regDPR dst, immDPR con) %{
6049   match(Set dst con);
6050   ins_cost(125);
6051 
6052   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6053             "FSTP   $dst" %}
6054   ins_encode %{
6055     __ fld_d($constantaddress($con));
6056     __ fstp_d($dst$$reg);
6057   %}
6058   ins_pipe(fpu_reg_con);
6059 %}
6060 
6061 // The instruction usage is guarded by predicate in operand immDPR0().
6062 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6063   match(Set dst con);
6064   ins_cost(125);
6065 
6066   format %{ "FLDZ   ST\n\t"
6067             "FSTP   $dst" %}
6068   ins_encode %{
6069     __ fldz();
6070     __ fstp_d($dst$$reg);
6071   %}
6072   ins_pipe(fpu_reg_con);
6073 %}
6074 
6075 // The instruction usage is guarded by predicate in operand immDPR1().
6076 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6077   match(Set dst con);
6078   ins_cost(125);
6079 
6080   format %{ "FLD1   ST\n\t"
6081             "FSTP   $dst" %}
6082   ins_encode %{
6083     __ fld1();
6084     __ fstp_d($dst$$reg);
6085   %}
6086   ins_pipe(fpu_reg_con);
6087 %}
6088 
6089 // The instruction usage is guarded by predicate in operand immD().
6090 instruct loadConD(regD dst, immD con) %{
6091   match(Set dst con);
6092   ins_cost(125);
6093   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6094   ins_encode %{
6095     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6096   %}
6097   ins_pipe(pipe_slow);
6098 %}
6099 
6100 // The instruction usage is guarded by predicate in operand immD0().
6101 instruct loadConD0(regD dst, immD0 src) %{
6102   match(Set dst src);
6103   ins_cost(100);
6104   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6105   ins_encode %{
6106     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6107   %}
6108   ins_pipe( pipe_slow );
6109 %}
6110 
6111 // Load Stack Slot
6112 instruct loadSSI(rRegI dst, stackSlotI src) %{
6113   match(Set dst src);
6114   ins_cost(125);
6115 
6116   format %{ "MOV    $dst,$src" %}
6117   opcode(0x8B);
6118   ins_encode( OpcP, RegMem(dst,src));
6119   ins_pipe( ialu_reg_mem );
6120 %}
6121 
6122 instruct loadSSL(eRegL dst, stackSlotL src) %{
6123   match(Set dst src);
6124 
6125   ins_cost(200);
6126   format %{ "MOV    $dst,$src.lo\n\t"
6127             "MOV    $dst+4,$src.hi" %}
6128   opcode(0x8B, 0x8B);
6129   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6130   ins_pipe( ialu_mem_long_reg );
6131 %}
6132 
6133 // Load Stack Slot
6134 instruct loadSSP(eRegP dst, stackSlotP src) %{
6135   match(Set dst src);
6136   ins_cost(125);
6137 
6138   format %{ "MOV    $dst,$src" %}
6139   opcode(0x8B);
6140   ins_encode( OpcP, RegMem(dst,src));
6141   ins_pipe( ialu_reg_mem );
6142 %}
6143 
6144 // Load Stack Slot
6145 instruct loadSSF(regFPR dst, stackSlotF src) %{
6146   match(Set dst src);
6147   ins_cost(125);
6148 
6149   format %{ "FLD_S  $src\n\t"
6150             "FSTP   $dst" %}
6151   opcode(0xD9);               /* D9 /0, FLD m32real */
6152   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6153               Pop_Reg_FPR(dst) );
6154   ins_pipe( fpu_reg_mem );
6155 %}
6156 
6157 // Load Stack Slot
6158 instruct loadSSD(regDPR dst, stackSlotD src) %{
6159   match(Set dst src);
6160   ins_cost(125);
6161 
6162   format %{ "FLD_D  $src\n\t"
6163             "FSTP   $dst" %}
6164   opcode(0xDD);               /* DD /0, FLD m64real */
6165   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6166               Pop_Reg_DPR(dst) );
6167   ins_pipe( fpu_reg_mem );
6168 %}
6169 
6170 // Prefetch instructions for allocation.
6171 // Must be safe to execute with invalid address (cannot fault).
6172 
6173 instruct prefetchAlloc0( memory mem ) %{
6174   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6175   match(PrefetchAllocation mem);
6176   ins_cost(0);
6177   size(0);
6178   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6179   ins_encode();
6180   ins_pipe(empty);
6181 %}
6182 
6183 instruct prefetchAlloc( memory mem ) %{
6184   predicate(AllocatePrefetchInstr==3);
6185   match( PrefetchAllocation mem );
6186   ins_cost(100);
6187 
6188   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6189   ins_encode %{
6190     __ prefetchw($mem$$Address);
6191   %}
6192   ins_pipe(ialu_mem);
6193 %}
6194 
6195 instruct prefetchAllocNTA( memory mem ) %{
6196   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6197   match(PrefetchAllocation mem);
6198   ins_cost(100);
6199 
6200   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6201   ins_encode %{
6202     __ prefetchnta($mem$$Address);
6203   %}
6204   ins_pipe(ialu_mem);
6205 %}
6206 
6207 instruct prefetchAllocT0( memory mem ) %{
6208   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6209   match(PrefetchAllocation mem);
6210   ins_cost(100);
6211 
6212   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6213   ins_encode %{
6214     __ prefetcht0($mem$$Address);
6215   %}
6216   ins_pipe(ialu_mem);
6217 %}
6218 
6219 instruct prefetchAllocT2( memory mem ) %{
6220   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6221   match(PrefetchAllocation mem);
6222   ins_cost(100);
6223 
6224   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6225   ins_encode %{
6226     __ prefetcht2($mem$$Address);
6227   %}
6228   ins_pipe(ialu_mem);
6229 %}
6230 
6231 //----------Store Instructions-------------------------------------------------
6232 
6233 // Store Byte
6234 instruct storeB(memory mem, xRegI src) %{
6235   match(Set mem (StoreB mem src));
6236 
6237   ins_cost(125);
6238   format %{ "MOV8   $mem,$src" %}
6239   opcode(0x88);
6240   ins_encode( OpcP, RegMem( src, mem ) );
6241   ins_pipe( ialu_mem_reg );
6242 %}
6243 
6244 // Store Char/Short
6245 instruct storeC(memory mem, rRegI src) %{
6246   match(Set mem (StoreC mem src));
6247 
6248   ins_cost(125);
6249   format %{ "MOV16  $mem,$src" %}
6250   opcode(0x89, 0x66);
6251   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6252   ins_pipe( ialu_mem_reg );
6253 %}
6254 
6255 // Store Integer
6256 instruct storeI(memory mem, rRegI src) %{
6257   match(Set mem (StoreI mem src));
6258 
6259   ins_cost(125);
6260   format %{ "MOV    $mem,$src" %}
6261   opcode(0x89);
6262   ins_encode( OpcP, RegMem( src, mem ) );
6263   ins_pipe( ialu_mem_reg );
6264 %}
6265 
6266 // Store Long
6267 instruct storeL(long_memory mem, eRegL src) %{
6268   predicate(!((StoreLNode*)n)->require_atomic_access());
6269   match(Set mem (StoreL mem src));
6270 
6271   ins_cost(200);
6272   format %{ "MOV    $mem,$src.lo\n\t"
6273             "MOV    $mem+4,$src.hi" %}
6274   opcode(0x89, 0x89);
6275   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6276   ins_pipe( ialu_mem_long_reg );
6277 %}
6278 
6279 // Store Long to Integer
6280 instruct storeL2I(memory mem, eRegL src) %{
6281   match(Set mem (StoreI mem (ConvL2I src)));
6282 
6283   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6284   ins_encode %{
6285     __ movl($mem$$Address, $src$$Register);
6286   %}
6287   ins_pipe(ialu_mem_reg);
6288 %}
6289 
6290 // Volatile Store Long.  Must be atomic, so move it into
6291 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6292 // target address before the store (for null-ptr checks)
6293 // so the memory operand is used twice in the encoding.
6294 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6295   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6296   match(Set mem (StoreL mem src));
6297   effect( KILL cr );
6298   ins_cost(400);
6299   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6300             "FILD   $src\n\t"
6301             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6302   opcode(0x3B);
6303   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6304   ins_pipe( fpu_reg_mem );
6305 %}
6306 
6307 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6308   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6309   match(Set mem (StoreL mem src));
6310   effect( TEMP tmp, KILL cr );
6311   ins_cost(380);
6312   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6313             "MOVSD  $tmp,$src\n\t"
6314             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6315   ins_encode %{
6316     __ cmpl(rax, $mem$$Address);
6317     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6318     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6319   %}
6320   ins_pipe( pipe_slow );
6321 %}
6322 
6323 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6324   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6325   match(Set mem (StoreL mem src));
6326   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6327   ins_cost(360);
6328   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6329             "MOVD   $tmp,$src.lo\n\t"
6330             "MOVD   $tmp2,$src.hi\n\t"
6331             "PUNPCKLDQ $tmp,$tmp2\n\t"
6332             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6333   ins_encode %{
6334     __ cmpl(rax, $mem$$Address);
6335     __ movdl($tmp$$XMMRegister, $src$$Register);
6336     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6337     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6338     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6339   %}
6340   ins_pipe( pipe_slow );
6341 %}
6342 
6343 // Store Pointer; for storing unknown oops and raw pointers
6344 instruct storeP(memory mem, anyRegP src) %{
6345   match(Set mem (StoreP mem src));
6346 
6347   ins_cost(125);
6348   format %{ "MOV    $mem,$src" %}
6349   opcode(0x89);
6350   ins_encode( OpcP, RegMem( src, mem ) );
6351   ins_pipe( ialu_mem_reg );
6352 %}
6353 
6354 // Store Integer Immediate
6355 instruct storeImmI(memory mem, immI src) %{
6356   match(Set mem (StoreI mem src));
6357 
6358   ins_cost(150);
6359   format %{ "MOV    $mem,$src" %}
6360   opcode(0xC7);               /* C7 /0 */
6361   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6362   ins_pipe( ialu_mem_imm );
6363 %}
6364 
6365 // Store Short/Char Immediate
6366 instruct storeImmI16(memory mem, immI16 src) %{
6367   predicate(UseStoreImmI16);
6368   match(Set mem (StoreC mem src));
6369 
6370   ins_cost(150);
6371   format %{ "MOV16  $mem,$src" %}
6372   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6373   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6374   ins_pipe( ialu_mem_imm );
6375 %}
6376 
6377 // Store Pointer Immediate; null pointers or constant oops that do not
6378 // need card-mark barriers.
6379 instruct storeImmP(memory mem, immP src) %{
6380   match(Set mem (StoreP mem src));
6381 
6382   ins_cost(150);
6383   format %{ "MOV    $mem,$src" %}
6384   opcode(0xC7);               /* C7 /0 */
6385   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6386   ins_pipe( ialu_mem_imm );
6387 %}
6388 
6389 // Store Byte Immediate
6390 instruct storeImmB(memory mem, immI8 src) %{
6391   match(Set mem (StoreB mem src));
6392 
6393   ins_cost(150);
6394   format %{ "MOV8   $mem,$src" %}
6395   opcode(0xC6);               /* C6 /0 */
6396   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6397   ins_pipe( ialu_mem_imm );
6398 %}
6399 
6400 // Store CMS card-mark Immediate
6401 instruct storeImmCM(memory mem, immI8 src) %{
6402   match(Set mem (StoreCM mem src));
6403 
6404   ins_cost(150);
6405   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6406   opcode(0xC6);               /* C6 /0 */
6407   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6408   ins_pipe( ialu_mem_imm );
6409 %}
6410 
6411 // Store Double
6412 instruct storeDPR( memory mem, regDPR1 src) %{
6413   predicate(UseSSE<=1);
6414   match(Set mem (StoreD mem src));
6415 
6416   ins_cost(100);
6417   format %{ "FST_D  $mem,$src" %}
6418   opcode(0xDD);       /* DD /2 */
6419   ins_encode( enc_FPR_store(mem,src) );
6420   ins_pipe( fpu_mem_reg );
6421 %}
6422 
6423 // Store double does rounding on x86
6424 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6425   predicate(UseSSE<=1);
6426   match(Set mem (StoreD mem (RoundDouble src)));
6427 
6428   ins_cost(100);
6429   format %{ "FST_D  $mem,$src\t# round" %}
6430   opcode(0xDD);       /* DD /2 */
6431   ins_encode( enc_FPR_store(mem,src) );
6432   ins_pipe( fpu_mem_reg );
6433 %}
6434 
6435 // Store XMM register to memory (double-precision floating points)
6436 // MOVSD instruction
6437 instruct storeD(memory mem, regD src) %{
6438   predicate(UseSSE>=2);
6439   match(Set mem (StoreD mem src));
6440   ins_cost(95);
6441   format %{ "MOVSD  $mem,$src" %}
6442   ins_encode %{
6443     __ movdbl($mem$$Address, $src$$XMMRegister);
6444   %}
6445   ins_pipe( pipe_slow );
6446 %}
6447 
6448 // Store XMM register to memory (single-precision floating point)
6449 // MOVSS instruction
6450 instruct storeF(memory mem, regF src) %{
6451   predicate(UseSSE>=1);
6452   match(Set mem (StoreF mem src));
6453   ins_cost(95);
6454   format %{ "MOVSS  $mem,$src" %}
6455   ins_encode %{
6456     __ movflt($mem$$Address, $src$$XMMRegister);
6457   %}
6458   ins_pipe( pipe_slow );
6459 %}
6460 
6461 // Store Float
6462 instruct storeFPR( memory mem, regFPR1 src) %{
6463   predicate(UseSSE==0);
6464   match(Set mem (StoreF mem src));
6465 
6466   ins_cost(100);
6467   format %{ "FST_S  $mem,$src" %}
6468   opcode(0xD9);       /* D9 /2 */
6469   ins_encode( enc_FPR_store(mem,src) );
6470   ins_pipe( fpu_mem_reg );
6471 %}
6472 
6473 // Store Float does rounding on x86
6474 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6475   predicate(UseSSE==0);
6476   match(Set mem (StoreF mem (RoundFloat src)));
6477 
6478   ins_cost(100);
6479   format %{ "FST_S  $mem,$src\t# round" %}
6480   opcode(0xD9);       /* D9 /2 */
6481   ins_encode( enc_FPR_store(mem,src) );
6482   ins_pipe( fpu_mem_reg );
6483 %}
6484 
6485 // Store Float does rounding on x86
6486 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6487   predicate(UseSSE<=1);
6488   match(Set mem (StoreF mem (ConvD2F src)));
6489 
6490   ins_cost(100);
6491   format %{ "FST_S  $mem,$src\t# D-round" %}
6492   opcode(0xD9);       /* D9 /2 */
6493   ins_encode( enc_FPR_store(mem,src) );
6494   ins_pipe( fpu_mem_reg );
6495 %}
6496 
6497 // Store immediate Float value (it is faster than store from FPU register)
6498 // The instruction usage is guarded by predicate in operand immFPR().
6499 instruct storeFPR_imm( memory mem, immFPR src) %{
6500   match(Set mem (StoreF mem src));
6501 
6502   ins_cost(50);
6503   format %{ "MOV    $mem,$src\t# store float" %}
6504   opcode(0xC7);               /* C7 /0 */
6505   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6506   ins_pipe( ialu_mem_imm );
6507 %}
6508 
6509 // Store immediate Float value (it is faster than store from XMM register)
6510 // The instruction usage is guarded by predicate in operand immF().
6511 instruct storeF_imm( memory mem, immF src) %{
6512   match(Set mem (StoreF mem src));
6513 
6514   ins_cost(50);
6515   format %{ "MOV    $mem,$src\t# store float" %}
6516   opcode(0xC7);               /* C7 /0 */
6517   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6518   ins_pipe( ialu_mem_imm );
6519 %}
6520 
6521 // Store Integer to stack slot
6522 instruct storeSSI(stackSlotI dst, rRegI src) %{
6523   match(Set dst src);
6524 
6525   ins_cost(100);
6526   format %{ "MOV    $dst,$src" %}
6527   opcode(0x89);
6528   ins_encode( OpcPRegSS( dst, src ) );
6529   ins_pipe( ialu_mem_reg );
6530 %}
6531 
6532 // Store Integer to stack slot
6533 instruct storeSSP(stackSlotP dst, eRegP src) %{
6534   match(Set dst src);
6535 
6536   ins_cost(100);
6537   format %{ "MOV    $dst,$src" %}
6538   opcode(0x89);
6539   ins_encode( OpcPRegSS( dst, src ) );
6540   ins_pipe( ialu_mem_reg );
6541 %}
6542 
6543 // Store Long to stack slot
6544 instruct storeSSL(stackSlotL dst, eRegL src) %{
6545   match(Set dst src);
6546 
6547   ins_cost(200);
6548   format %{ "MOV    $dst,$src.lo\n\t"
6549             "MOV    $dst+4,$src.hi" %}
6550   opcode(0x89, 0x89);
6551   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6552   ins_pipe( ialu_mem_long_reg );
6553 %}
6554 
6555 //----------MemBar Instructions-----------------------------------------------
6556 // Memory barrier flavors
6557 
6558 instruct membar_acquire() %{
6559   match(MemBarAcquire);
6560   match(LoadFence);
6561   ins_cost(400);
6562 
6563   size(0);
6564   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6565   ins_encode();
6566   ins_pipe(empty);
6567 %}
6568 
6569 instruct membar_acquire_lock() %{
6570   match(MemBarAcquireLock);
6571   ins_cost(0);
6572 
6573   size(0);
6574   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6575   ins_encode( );
6576   ins_pipe(empty);
6577 %}
6578 
6579 instruct membar_release() %{
6580   match(MemBarRelease);
6581   match(StoreFence);
6582   ins_cost(400);
6583 
6584   size(0);
6585   format %{ "MEMBAR-release ! (empty encoding)" %}
6586   ins_encode( );
6587   ins_pipe(empty);
6588 %}
6589 
6590 instruct membar_release_lock() %{
6591   match(MemBarReleaseLock);
6592   ins_cost(0);
6593 
6594   size(0);
6595   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6596   ins_encode( );
6597   ins_pipe(empty);
6598 %}
6599 
6600 instruct membar_volatile(eFlagsReg cr) %{
6601   match(MemBarVolatile);
6602   effect(KILL cr);
6603   ins_cost(400);
6604 
6605   format %{
6606     $$template
6607     if (os::is_MP()) {
6608       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6609     } else {
6610       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6611     }
6612   %}
6613   ins_encode %{
6614     __ membar(Assembler::StoreLoad);
6615   %}
6616   ins_pipe(pipe_slow);
6617 %}
6618 
6619 instruct unnecessary_membar_volatile() %{
6620   match(MemBarVolatile);
6621   predicate(Matcher::post_store_load_barrier(n));
6622   ins_cost(0);
6623 
6624   size(0);
6625   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6626   ins_encode( );
6627   ins_pipe(empty);
6628 %}
6629 
6630 instruct membar_storestore() %{
6631   match(MemBarStoreStore);
6632   ins_cost(0);
6633 
6634   size(0);
6635   format %{ "MEMBAR-storestore (empty encoding)" %}
6636   ins_encode( );
6637   ins_pipe(empty);
6638 %}
6639 
6640 //----------Move Instructions--------------------------------------------------
6641 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6642   match(Set dst (CastX2P src));
6643   format %{ "# X2P  $dst, $src" %}
6644   ins_encode( /*empty encoding*/ );
6645   ins_cost(0);
6646   ins_pipe(empty);
6647 %}
6648 
6649 instruct castP2X(rRegI dst, eRegP src ) %{
6650   match(Set dst (CastP2X src));
6651   ins_cost(50);
6652   format %{ "MOV    $dst, $src\t# CastP2X" %}
6653   ins_encode( enc_Copy( dst, src) );
6654   ins_pipe( ialu_reg_reg );
6655 %}
6656 
6657 //----------Conditional Move---------------------------------------------------
6658 // Conditional move
6659 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6660   predicate(!VM_Version::supports_cmov() );
6661   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6662   ins_cost(200);
6663   format %{ "J$cop,us skip\t# signed cmove\n\t"
6664             "MOV    $dst,$src\n"
6665       "skip:" %}
6666   ins_encode %{
6667     Label Lskip;
6668     // Invert sense of branch from sense of CMOV
6669     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6670     __ movl($dst$$Register, $src$$Register);
6671     __ bind(Lskip);
6672   %}
6673   ins_pipe( pipe_cmov_reg );
6674 %}
6675 
6676 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6677   predicate(!VM_Version::supports_cmov() );
6678   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6679   ins_cost(200);
6680   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6681             "MOV    $dst,$src\n"
6682       "skip:" %}
6683   ins_encode %{
6684     Label Lskip;
6685     // Invert sense of branch from sense of CMOV
6686     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6687     __ movl($dst$$Register, $src$$Register);
6688     __ bind(Lskip);
6689   %}
6690   ins_pipe( pipe_cmov_reg );
6691 %}
6692 
6693 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6694   predicate(VM_Version::supports_cmov() );
6695   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6696   ins_cost(200);
6697   format %{ "CMOV$cop $dst,$src" %}
6698   opcode(0x0F,0x40);
6699   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6700   ins_pipe( pipe_cmov_reg );
6701 %}
6702 
6703 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6704   predicate(VM_Version::supports_cmov() );
6705   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6706   ins_cost(200);
6707   format %{ "CMOV$cop $dst,$src" %}
6708   opcode(0x0F,0x40);
6709   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6710   ins_pipe( pipe_cmov_reg );
6711 %}
6712 
6713 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6714   predicate(VM_Version::supports_cmov() );
6715   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6716   ins_cost(200);
6717   expand %{
6718     cmovI_regU(cop, cr, dst, src);
6719   %}
6720 %}
6721 
6722 // Conditional move
6723 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6724   predicate(VM_Version::supports_cmov() );
6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6726   ins_cost(250);
6727   format %{ "CMOV$cop $dst,$src" %}
6728   opcode(0x0F,0x40);
6729   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6730   ins_pipe( pipe_cmov_mem );
6731 %}
6732 
6733 // Conditional move
6734 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6735   predicate(VM_Version::supports_cmov() );
6736   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6737   ins_cost(250);
6738   format %{ "CMOV$cop $dst,$src" %}
6739   opcode(0x0F,0x40);
6740   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6741   ins_pipe( pipe_cmov_mem );
6742 %}
6743 
6744 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6745   predicate(VM_Version::supports_cmov() );
6746   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6747   ins_cost(250);
6748   expand %{
6749     cmovI_memU(cop, cr, dst, src);
6750   %}
6751 %}
6752 
6753 // Conditional move
6754 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6755   predicate(VM_Version::supports_cmov() );
6756   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6757   ins_cost(200);
6758   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6759   opcode(0x0F,0x40);
6760   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6761   ins_pipe( pipe_cmov_reg );
6762 %}
6763 
6764 // Conditional move (non-P6 version)
6765 // Note:  a CMoveP is generated for  stubs and native wrappers
6766 //        regardless of whether we are on a P6, so we
6767 //        emulate a cmov here
6768 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6769   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6770   ins_cost(300);
6771   format %{ "Jn$cop   skip\n\t"
6772           "MOV    $dst,$src\t# pointer\n"
6773       "skip:" %}
6774   opcode(0x8b);
6775   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6776   ins_pipe( pipe_cmov_reg );
6777 %}
6778 
6779 // Conditional move
6780 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6781   predicate(VM_Version::supports_cmov() );
6782   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6783   ins_cost(200);
6784   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6785   opcode(0x0F,0x40);
6786   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6787   ins_pipe( pipe_cmov_reg );
6788 %}
6789 
6790 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6791   predicate(VM_Version::supports_cmov() );
6792   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6793   ins_cost(200);
6794   expand %{
6795     cmovP_regU(cop, cr, dst, src);
6796   %}
6797 %}
6798 
6799 // DISABLED: Requires the ADLC to emit a bottom_type call that
6800 // correctly meets the two pointer arguments; one is an incoming
6801 // register but the other is a memory operand.  ALSO appears to
6802 // be buggy with implicit null checks.
6803 //
6804 //// Conditional move
6805 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6806 //  predicate(VM_Version::supports_cmov() );
6807 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6808 //  ins_cost(250);
6809 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6810 //  opcode(0x0F,0x40);
6811 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6812 //  ins_pipe( pipe_cmov_mem );
6813 //%}
6814 //
6815 //// Conditional move
6816 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6817 //  predicate(VM_Version::supports_cmov() );
6818 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6819 //  ins_cost(250);
6820 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6821 //  opcode(0x0F,0x40);
6822 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6823 //  ins_pipe( pipe_cmov_mem );
6824 //%}
6825 
6826 // Conditional move
6827 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6828   predicate(UseSSE<=1);
6829   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6830   ins_cost(200);
6831   format %{ "FCMOV$cop $dst,$src\t# double" %}
6832   opcode(0xDA);
6833   ins_encode( enc_cmov_dpr(cop,src) );
6834   ins_pipe( pipe_cmovDPR_reg );
6835 %}
6836 
6837 // Conditional move
6838 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6839   predicate(UseSSE==0);
6840   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6841   ins_cost(200);
6842   format %{ "FCMOV$cop $dst,$src\t# float" %}
6843   opcode(0xDA);
6844   ins_encode( enc_cmov_dpr(cop,src) );
6845   ins_pipe( pipe_cmovDPR_reg );
6846 %}
6847 
6848 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6849 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6850   predicate(UseSSE<=1);
6851   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6852   ins_cost(200);
6853   format %{ "Jn$cop   skip\n\t"
6854             "MOV    $dst,$src\t# double\n"
6855       "skip:" %}
6856   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6857   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6858   ins_pipe( pipe_cmovDPR_reg );
6859 %}
6860 
6861 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6862 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6863   predicate(UseSSE==0);
6864   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6865   ins_cost(200);
6866   format %{ "Jn$cop    skip\n\t"
6867             "MOV    $dst,$src\t# float\n"
6868       "skip:" %}
6869   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6870   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6871   ins_pipe( pipe_cmovDPR_reg );
6872 %}
6873 
6874 // No CMOVE with SSE/SSE2
6875 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6876   predicate (UseSSE>=1);
6877   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6878   ins_cost(200);
6879   format %{ "Jn$cop   skip\n\t"
6880             "MOVSS  $dst,$src\t# float\n"
6881       "skip:" %}
6882   ins_encode %{
6883     Label skip;
6884     // Invert sense of branch from sense of CMOV
6885     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6886     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6887     __ bind(skip);
6888   %}
6889   ins_pipe( pipe_slow );
6890 %}
6891 
6892 // No CMOVE with SSE/SSE2
6893 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6894   predicate (UseSSE>=2);
6895   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6896   ins_cost(200);
6897   format %{ "Jn$cop   skip\n\t"
6898             "MOVSD  $dst,$src\t# float\n"
6899       "skip:" %}
6900   ins_encode %{
6901     Label skip;
6902     // Invert sense of branch from sense of CMOV
6903     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6904     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6905     __ bind(skip);
6906   %}
6907   ins_pipe( pipe_slow );
6908 %}
6909 
6910 // unsigned version
6911 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6912   predicate (UseSSE>=1);
6913   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6914   ins_cost(200);
6915   format %{ "Jn$cop   skip\n\t"
6916             "MOVSS  $dst,$src\t# float\n"
6917       "skip:" %}
6918   ins_encode %{
6919     Label skip;
6920     // Invert sense of branch from sense of CMOV
6921     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6922     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6923     __ bind(skip);
6924   %}
6925   ins_pipe( pipe_slow );
6926 %}
6927 
6928 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6929   predicate (UseSSE>=1);
6930   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6931   ins_cost(200);
6932   expand %{
6933     fcmovF_regU(cop, cr, dst, src);
6934   %}
6935 %}
6936 
6937 // unsigned version
6938 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6939   predicate (UseSSE>=2);
6940   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6941   ins_cost(200);
6942   format %{ "Jn$cop   skip\n\t"
6943             "MOVSD  $dst,$src\t# float\n"
6944       "skip:" %}
6945   ins_encode %{
6946     Label skip;
6947     // Invert sense of branch from sense of CMOV
6948     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6949     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6950     __ bind(skip);
6951   %}
6952   ins_pipe( pipe_slow );
6953 %}
6954 
6955 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6956   predicate (UseSSE>=2);
6957   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6958   ins_cost(200);
6959   expand %{
6960     fcmovD_regU(cop, cr, dst, src);
6961   %}
6962 %}
6963 
6964 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6965   predicate(VM_Version::supports_cmov() );
6966   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6967   ins_cost(200);
6968   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6969             "CMOV$cop $dst.hi,$src.hi" %}
6970   opcode(0x0F,0x40);
6971   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6972   ins_pipe( pipe_cmov_reg_long );
6973 %}
6974 
6975 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6976   predicate(VM_Version::supports_cmov() );
6977   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6978   ins_cost(200);
6979   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6980             "CMOV$cop $dst.hi,$src.hi" %}
6981   opcode(0x0F,0x40);
6982   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6983   ins_pipe( pipe_cmov_reg_long );
6984 %}
6985 
6986 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6987   predicate(VM_Version::supports_cmov() );
6988   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6989   ins_cost(200);
6990   expand %{
6991     cmovL_regU(cop, cr, dst, src);
6992   %}
6993 %}
6994 
6995 //----------Arithmetic Instructions--------------------------------------------
6996 //----------Addition Instructions----------------------------------------------
6997 
6998 // Integer Addition Instructions
6999 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7000   match(Set dst (AddI dst src));
7001   effect(KILL cr);
7002 
7003   size(2);
7004   format %{ "ADD    $dst,$src" %}
7005   opcode(0x03);
7006   ins_encode( OpcP, RegReg( dst, src) );
7007   ins_pipe( ialu_reg_reg );
7008 %}
7009 
7010 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7011   match(Set dst (AddI dst src));
7012   effect(KILL cr);
7013 
7014   format %{ "ADD    $dst,$src" %}
7015   opcode(0x81, 0x00); /* /0 id */
7016   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7017   ins_pipe( ialu_reg );
7018 %}
7019 
7020 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7021   predicate(UseIncDec);
7022   match(Set dst (AddI dst src));
7023   effect(KILL cr);
7024 
7025   size(1);
7026   format %{ "INC    $dst" %}
7027   opcode(0x40); /*  */
7028   ins_encode( Opc_plus( primary, dst ) );
7029   ins_pipe( ialu_reg );
7030 %}
7031 
7032 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7033   match(Set dst (AddI src0 src1));
7034   ins_cost(110);
7035 
7036   format %{ "LEA    $dst,[$src0 + $src1]" %}
7037   opcode(0x8D); /* 0x8D /r */
7038   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7039   ins_pipe( ialu_reg_reg );
7040 %}
7041 
7042 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7043   match(Set dst (AddP src0 src1));
7044   ins_cost(110);
7045 
7046   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7047   opcode(0x8D); /* 0x8D /r */
7048   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7049   ins_pipe( ialu_reg_reg );
7050 %}
7051 
7052 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7053   predicate(UseIncDec);
7054   match(Set dst (AddI dst src));
7055   effect(KILL cr);
7056 
7057   size(1);
7058   format %{ "DEC    $dst" %}
7059   opcode(0x48); /*  */
7060   ins_encode( Opc_plus( primary, dst ) );
7061   ins_pipe( ialu_reg );
7062 %}
7063 
7064 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7065   match(Set dst (AddP dst src));
7066   effect(KILL cr);
7067 
7068   size(2);
7069   format %{ "ADD    $dst,$src" %}
7070   opcode(0x03);
7071   ins_encode( OpcP, RegReg( dst, src) );
7072   ins_pipe( ialu_reg_reg );
7073 %}
7074 
7075 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7076   match(Set dst (AddP dst src));
7077   effect(KILL cr);
7078 
7079   format %{ "ADD    $dst,$src" %}
7080   opcode(0x81,0x00); /* Opcode 81 /0 id */
7081   // ins_encode( RegImm( dst, src) );
7082   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7083   ins_pipe( ialu_reg );
7084 %}
7085 
7086 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7087   match(Set dst (AddI dst (LoadI src)));
7088   effect(KILL cr);
7089 
7090   ins_cost(125);
7091   format %{ "ADD    $dst,$src" %}
7092   opcode(0x03);
7093   ins_encode( OpcP, RegMem( dst, src) );
7094   ins_pipe( ialu_reg_mem );
7095 %}
7096 
7097 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7098   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7099   effect(KILL cr);
7100 
7101   ins_cost(150);
7102   format %{ "ADD    $dst,$src" %}
7103   opcode(0x01);  /* Opcode 01 /r */
7104   ins_encode( OpcP, RegMem( src, dst ) );
7105   ins_pipe( ialu_mem_reg );
7106 %}
7107 
7108 // Add Memory with Immediate
7109 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7110   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7111   effect(KILL cr);
7112 
7113   ins_cost(125);
7114   format %{ "ADD    $dst,$src" %}
7115   opcode(0x81);               /* Opcode 81 /0 id */
7116   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7117   ins_pipe( ialu_mem_imm );
7118 %}
7119 
7120 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7121   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7122   effect(KILL cr);
7123 
7124   ins_cost(125);
7125   format %{ "INC    $dst" %}
7126   opcode(0xFF);               /* Opcode FF /0 */
7127   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7128   ins_pipe( ialu_mem_imm );
7129 %}
7130 
7131 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7132   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7133   effect(KILL cr);
7134 
7135   ins_cost(125);
7136   format %{ "DEC    $dst" %}
7137   opcode(0xFF);               /* Opcode FF /1 */
7138   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7139   ins_pipe( ialu_mem_imm );
7140 %}
7141 
7142 
7143 instruct checkCastPP( eRegP dst ) %{
7144   match(Set dst (CheckCastPP dst));
7145 
7146   size(0);
7147   format %{ "#checkcastPP of $dst" %}
7148   ins_encode( /*empty encoding*/ );
7149   ins_pipe( empty );
7150 %}
7151 
7152 instruct castPP( eRegP dst ) %{
7153   match(Set dst (CastPP dst));
7154   format %{ "#castPP of $dst" %}
7155   ins_encode( /*empty encoding*/ );
7156   ins_pipe( empty );
7157 %}
7158 
7159 instruct castII( rRegI dst ) %{
7160   match(Set dst (CastII dst));
7161   format %{ "#castII of $dst" %}
7162   ins_encode( /*empty encoding*/ );
7163   ins_cost(0);
7164   ins_pipe( empty );
7165 %}
7166 
7167 
7168 // Load-locked - same as a regular pointer load when used with compare-swap
7169 instruct loadPLocked(eRegP dst, memory mem) %{
7170   match(Set dst (LoadPLocked mem));
7171 
7172   ins_cost(125);
7173   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7174   opcode(0x8B);
7175   ins_encode( OpcP, RegMem(dst,mem));
7176   ins_pipe( ialu_reg_mem );
7177 %}
7178 
7179 // Conditional-store of the updated heap-top.
7180 // Used during allocation of the shared heap.
7181 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7182 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7183   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7184   // EAX is killed if there is contention, but then it's also unused.
7185   // In the common case of no contention, EAX holds the new oop address.
7186   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7187   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7188   ins_pipe( pipe_cmpxchg );
7189 %}
7190 
7191 // Conditional-store of an int value.
7192 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7193 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7194   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7195   effect(KILL oldval);
7196   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7197   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7198   ins_pipe( pipe_cmpxchg );
7199 %}
7200 
7201 // Conditional-store of a long value.
7202 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7203 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7204   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7205   effect(KILL oldval);
7206   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7207             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7208             "XCHG   EBX,ECX"
7209   %}
7210   ins_encode %{
7211     // Note: we need to swap rbx, and rcx before and after the
7212     //       cmpxchg8 instruction because the instruction uses
7213     //       rcx as the high order word of the new value to store but
7214     //       our register encoding uses rbx.
7215     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7216     if( os::is_MP() )
7217       __ lock();
7218     __ cmpxchg8($mem$$Address);
7219     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7220   %}
7221   ins_pipe( pipe_cmpxchg );
7222 %}
7223 
7224 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7225 
7226 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7227   predicate(VM_Version::supports_cx8());
7228   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7229   effect(KILL cr, KILL oldval);
7230   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7231             "MOV    $res,0\n\t"
7232             "JNE,s  fail\n\t"
7233             "MOV    $res,1\n"
7234           "fail:" %}
7235   ins_encode( enc_cmpxchg8(mem_ptr),
7236               enc_flags_ne_to_boolean(res) );
7237   ins_pipe( pipe_cmpxchg );
7238 %}
7239 
7240 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7241   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7242   effect(KILL cr, KILL oldval);
7243   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7244             "MOV    $res,0\n\t"
7245             "JNE,s  fail\n\t"
7246             "MOV    $res,1\n"
7247           "fail:" %}
7248   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7249   ins_pipe( pipe_cmpxchg );
7250 %}
7251 
7252 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7253   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7254   effect(KILL cr, KILL oldval);
7255   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7256             "MOV    $res,0\n\t"
7257             "JNE,s  fail\n\t"
7258             "MOV    $res,1\n"
7259           "fail:" %}
7260   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7261   ins_pipe( pipe_cmpxchg );
7262 %}
7263 
7264 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7265   predicate(n->as_LoadStore()->result_not_used());
7266   match(Set dummy (GetAndAddI mem add));
7267   effect(KILL cr);
7268   format %{ "ADDL  [$mem],$add" %}
7269   ins_encode %{
7270     if (os::is_MP()) { __ lock(); }
7271     __ addl($mem$$Address, $add$$constant);
7272   %}
7273   ins_pipe( pipe_cmpxchg );
7274 %}
7275 
7276 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7277   match(Set newval (GetAndAddI mem newval));
7278   effect(KILL cr);
7279   format %{ "XADDL  [$mem],$newval" %}
7280   ins_encode %{
7281     if (os::is_MP()) { __ lock(); }
7282     __ xaddl($mem$$Address, $newval$$Register);
7283   %}
7284   ins_pipe( pipe_cmpxchg );
7285 %}
7286 
7287 instruct xchgI( memory mem, rRegI newval) %{
7288   match(Set newval (GetAndSetI mem newval));
7289   format %{ "XCHGL  $newval,[$mem]" %}
7290   ins_encode %{
7291     __ xchgl($newval$$Register, $mem$$Address);
7292   %}
7293   ins_pipe( pipe_cmpxchg );
7294 %}
7295 
7296 instruct xchgP( memory mem, pRegP newval) %{
7297   match(Set newval (GetAndSetP mem newval));
7298   format %{ "XCHGL  $newval,[$mem]" %}
7299   ins_encode %{
7300     __ xchgl($newval$$Register, $mem$$Address);
7301   %}
7302   ins_pipe( pipe_cmpxchg );
7303 %}
7304 
7305 //----------Subtraction Instructions-------------------------------------------
7306 
7307 // Integer Subtraction Instructions
7308 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7309   match(Set dst (SubI dst src));
7310   effect(KILL cr);
7311 
7312   size(2);
7313   format %{ "SUB    $dst,$src" %}
7314   opcode(0x2B);
7315   ins_encode( OpcP, RegReg( dst, src) );
7316   ins_pipe( ialu_reg_reg );
7317 %}
7318 
7319 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7320   match(Set dst (SubI dst src));
7321   effect(KILL cr);
7322 
7323   format %{ "SUB    $dst,$src" %}
7324   opcode(0x81,0x05);  /* Opcode 81 /5 */
7325   // ins_encode( RegImm( dst, src) );
7326   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7327   ins_pipe( ialu_reg );
7328 %}
7329 
7330 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7331   match(Set dst (SubI dst (LoadI src)));
7332   effect(KILL cr);
7333 
7334   ins_cost(125);
7335   format %{ "SUB    $dst,$src" %}
7336   opcode(0x2B);
7337   ins_encode( OpcP, RegMem( dst, src) );
7338   ins_pipe( ialu_reg_mem );
7339 %}
7340 
7341 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7342   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7343   effect(KILL cr);
7344 
7345   ins_cost(150);
7346   format %{ "SUB    $dst,$src" %}
7347   opcode(0x29);  /* Opcode 29 /r */
7348   ins_encode( OpcP, RegMem( src, dst ) );
7349   ins_pipe( ialu_mem_reg );
7350 %}
7351 
7352 // Subtract from a pointer
7353 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7354   match(Set dst (AddP dst (SubI zero src)));
7355   effect(KILL cr);
7356 
7357   size(2);
7358   format %{ "SUB    $dst,$src" %}
7359   opcode(0x2B);
7360   ins_encode( OpcP, RegReg( dst, src) );
7361   ins_pipe( ialu_reg_reg );
7362 %}
7363 
7364 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7365   match(Set dst (SubI zero dst));
7366   effect(KILL cr);
7367 
7368   size(2);
7369   format %{ "NEG    $dst" %}
7370   opcode(0xF7,0x03);  // Opcode F7 /3
7371   ins_encode( OpcP, RegOpc( dst ) );
7372   ins_pipe( ialu_reg );
7373 %}
7374 
7375 //----------Multiplication/Division Instructions-------------------------------
7376 // Integer Multiplication Instructions
7377 // Multiply Register
7378 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7379   match(Set dst (MulI dst src));
7380   effect(KILL cr);
7381 
7382   size(3);
7383   ins_cost(300);
7384   format %{ "IMUL   $dst,$src" %}
7385   opcode(0xAF, 0x0F);
7386   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7387   ins_pipe( ialu_reg_reg_alu0 );
7388 %}
7389 
7390 // Multiply 32-bit Immediate
7391 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7392   match(Set dst (MulI src imm));
7393   effect(KILL cr);
7394 
7395   ins_cost(300);
7396   format %{ "IMUL   $dst,$src,$imm" %}
7397   opcode(0x69);  /* 69 /r id */
7398   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7399   ins_pipe( ialu_reg_reg_alu0 );
7400 %}
7401 
7402 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7403   match(Set dst src);
7404   effect(KILL cr);
7405 
7406   // Note that this is artificially increased to make it more expensive than loadConL
7407   ins_cost(250);
7408   format %{ "MOV    EAX,$src\t// low word only" %}
7409   opcode(0xB8);
7410   ins_encode( LdImmL_Lo(dst, src) );
7411   ins_pipe( ialu_reg_fat );
7412 %}
7413 
7414 // Multiply by 32-bit Immediate, taking the shifted high order results
7415 //  (special case for shift by 32)
7416 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7417   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7418   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7419              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7420              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7421   effect(USE src1, KILL cr);
7422 
7423   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7424   ins_cost(0*100 + 1*400 - 150);
7425   format %{ "IMUL   EDX:EAX,$src1" %}
7426   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7427   ins_pipe( pipe_slow );
7428 %}
7429 
7430 // Multiply by 32-bit Immediate, taking the shifted high order results
7431 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7432   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7433   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7434              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7435              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7436   effect(USE src1, KILL cr);
7437 
7438   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7439   ins_cost(1*100 + 1*400 - 150);
7440   format %{ "IMUL   EDX:EAX,$src1\n\t"
7441             "SAR    EDX,$cnt-32" %}
7442   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7443   ins_pipe( pipe_slow );
7444 %}
7445 
7446 // Multiply Memory 32-bit Immediate
7447 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7448   match(Set dst (MulI (LoadI src) imm));
7449   effect(KILL cr);
7450 
7451   ins_cost(300);
7452   format %{ "IMUL   $dst,$src,$imm" %}
7453   opcode(0x69);  /* 69 /r id */
7454   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7455   ins_pipe( ialu_reg_mem_alu0 );
7456 %}
7457 
7458 // Multiply Memory
7459 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7460   match(Set dst (MulI dst (LoadI src)));
7461   effect(KILL cr);
7462 
7463   ins_cost(350);
7464   format %{ "IMUL   $dst,$src" %}
7465   opcode(0xAF, 0x0F);
7466   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7467   ins_pipe( ialu_reg_mem_alu0 );
7468 %}
7469 
7470 // Multiply Register Int to Long
7471 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7472   // Basic Idea: long = (long)int * (long)int
7473   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7474   effect(DEF dst, USE src, USE src1, KILL flags);
7475 
7476   ins_cost(300);
7477   format %{ "IMUL   $dst,$src1" %}
7478 
7479   ins_encode( long_int_multiply( dst, src1 ) );
7480   ins_pipe( ialu_reg_reg_alu0 );
7481 %}
7482 
7483 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7484   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7485   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7486   effect(KILL flags);
7487 
7488   ins_cost(300);
7489   format %{ "MUL    $dst,$src1" %}
7490 
7491   ins_encode( long_uint_multiply(dst, src1) );
7492   ins_pipe( ialu_reg_reg_alu0 );
7493 %}
7494 
7495 // Multiply Register Long
7496 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7497   match(Set dst (MulL dst src));
7498   effect(KILL cr, TEMP tmp);
7499   ins_cost(4*100+3*400);
7500 // Basic idea: lo(result) = lo(x_lo * y_lo)
7501 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7502   format %{ "MOV    $tmp,$src.lo\n\t"
7503             "IMUL   $tmp,EDX\n\t"
7504             "MOV    EDX,$src.hi\n\t"
7505             "IMUL   EDX,EAX\n\t"
7506             "ADD    $tmp,EDX\n\t"
7507             "MUL    EDX:EAX,$src.lo\n\t"
7508             "ADD    EDX,$tmp" %}
7509   ins_encode( long_multiply( dst, src, tmp ) );
7510   ins_pipe( pipe_slow );
7511 %}
7512 
7513 // Multiply Register Long where the left operand's high 32 bits are zero
7514 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7515   predicate(is_operand_hi32_zero(n->in(1)));
7516   match(Set dst (MulL dst src));
7517   effect(KILL cr, TEMP tmp);
7518   ins_cost(2*100+2*400);
7519 // Basic idea: lo(result) = lo(x_lo * y_lo)
7520 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7521   format %{ "MOV    $tmp,$src.hi\n\t"
7522             "IMUL   $tmp,EAX\n\t"
7523             "MUL    EDX:EAX,$src.lo\n\t"
7524             "ADD    EDX,$tmp" %}
7525   ins_encode %{
7526     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7527     __ imull($tmp$$Register, rax);
7528     __ mull($src$$Register);
7529     __ addl(rdx, $tmp$$Register);
7530   %}
7531   ins_pipe( pipe_slow );
7532 %}
7533 
7534 // Multiply Register Long where the right operand's high 32 bits are zero
7535 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7536   predicate(is_operand_hi32_zero(n->in(2)));
7537   match(Set dst (MulL dst src));
7538   effect(KILL cr, TEMP tmp);
7539   ins_cost(2*100+2*400);
7540 // Basic idea: lo(result) = lo(x_lo * y_lo)
7541 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7542   format %{ "MOV    $tmp,$src.lo\n\t"
7543             "IMUL   $tmp,EDX\n\t"
7544             "MUL    EDX:EAX,$src.lo\n\t"
7545             "ADD    EDX,$tmp" %}
7546   ins_encode %{
7547     __ movl($tmp$$Register, $src$$Register);
7548     __ imull($tmp$$Register, rdx);
7549     __ mull($src$$Register);
7550     __ addl(rdx, $tmp$$Register);
7551   %}
7552   ins_pipe( pipe_slow );
7553 %}
7554 
7555 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7556 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7557   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7558   match(Set dst (MulL dst src));
7559   effect(KILL cr);
7560   ins_cost(1*400);
7561 // Basic idea: lo(result) = lo(x_lo * y_lo)
7562 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7563   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7564   ins_encode %{
7565     __ mull($src$$Register);
7566   %}
7567   ins_pipe( pipe_slow );
7568 %}
7569 
7570 // Multiply Register Long by small constant
7571 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7572   match(Set dst (MulL dst src));
7573   effect(KILL cr, TEMP tmp);
7574   ins_cost(2*100+2*400);
7575   size(12);
7576 // Basic idea: lo(result) = lo(src * EAX)
7577 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7578   format %{ "IMUL   $tmp,EDX,$src\n\t"
7579             "MOV    EDX,$src\n\t"
7580             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7581             "ADD    EDX,$tmp" %}
7582   ins_encode( long_multiply_con( dst, src, tmp ) );
7583   ins_pipe( pipe_slow );
7584 %}
7585 
7586 // Integer DIV with Register
7587 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7588   match(Set rax (DivI rax div));
7589   effect(KILL rdx, KILL cr);
7590   size(26);
7591   ins_cost(30*100+10*100);
7592   format %{ "CMP    EAX,0x80000000\n\t"
7593             "JNE,s  normal\n\t"
7594             "XOR    EDX,EDX\n\t"
7595             "CMP    ECX,-1\n\t"
7596             "JE,s   done\n"
7597     "normal: CDQ\n\t"
7598             "IDIV   $div\n\t"
7599     "done:"        %}
7600   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7601   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7602   ins_pipe( ialu_reg_reg_alu0 );
7603 %}
7604 
7605 // Divide Register Long
7606 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7607   match(Set dst (DivL src1 src2));
7608   effect( KILL cr, KILL cx, KILL bx );
7609   ins_cost(10000);
7610   format %{ "PUSH   $src1.hi\n\t"
7611             "PUSH   $src1.lo\n\t"
7612             "PUSH   $src2.hi\n\t"
7613             "PUSH   $src2.lo\n\t"
7614             "CALL   SharedRuntime::ldiv\n\t"
7615             "ADD    ESP,16" %}
7616   ins_encode( long_div(src1,src2) );
7617   ins_pipe( pipe_slow );
7618 %}
7619 
7620 // Integer DIVMOD with Register, both quotient and mod results
7621 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7622   match(DivModI rax div);
7623   effect(KILL cr);
7624   size(26);
7625   ins_cost(30*100+10*100);
7626   format %{ "CMP    EAX,0x80000000\n\t"
7627             "JNE,s  normal\n\t"
7628             "XOR    EDX,EDX\n\t"
7629             "CMP    ECX,-1\n\t"
7630             "JE,s   done\n"
7631     "normal: CDQ\n\t"
7632             "IDIV   $div\n\t"
7633     "done:"        %}
7634   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7635   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7636   ins_pipe( pipe_slow );
7637 %}
7638 
7639 // Integer MOD with Register
7640 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7641   match(Set rdx (ModI rax div));
7642   effect(KILL rax, KILL cr);
7643 
7644   size(26);
7645   ins_cost(300);
7646   format %{ "CDQ\n\t"
7647             "IDIV   $div" %}
7648   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7649   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7650   ins_pipe( ialu_reg_reg_alu0 );
7651 %}
7652 
7653 // Remainder Register Long
7654 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7655   match(Set dst (ModL src1 src2));
7656   effect( KILL cr, KILL cx, KILL bx );
7657   ins_cost(10000);
7658   format %{ "PUSH   $src1.hi\n\t"
7659             "PUSH   $src1.lo\n\t"
7660             "PUSH   $src2.hi\n\t"
7661             "PUSH   $src2.lo\n\t"
7662             "CALL   SharedRuntime::lrem\n\t"
7663             "ADD    ESP,16" %}
7664   ins_encode( long_mod(src1,src2) );
7665   ins_pipe( pipe_slow );
7666 %}
7667 
7668 // Divide Register Long (no special case since divisor != -1)
7669 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7670   match(Set dst (DivL dst imm));
7671   effect( TEMP tmp, TEMP tmp2, KILL cr );
7672   ins_cost(1000);
7673   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7674             "XOR    $tmp2,$tmp2\n\t"
7675             "CMP    $tmp,EDX\n\t"
7676             "JA,s   fast\n\t"
7677             "MOV    $tmp2,EAX\n\t"
7678             "MOV    EAX,EDX\n\t"
7679             "MOV    EDX,0\n\t"
7680             "JLE,s  pos\n\t"
7681             "LNEG   EAX : $tmp2\n\t"
7682             "DIV    $tmp # unsigned division\n\t"
7683             "XCHG   EAX,$tmp2\n\t"
7684             "DIV    $tmp\n\t"
7685             "LNEG   $tmp2 : EAX\n\t"
7686             "JMP,s  done\n"
7687     "pos:\n\t"
7688             "DIV    $tmp\n\t"
7689             "XCHG   EAX,$tmp2\n"
7690     "fast:\n\t"
7691             "DIV    $tmp\n"
7692     "done:\n\t"
7693             "MOV    EDX,$tmp2\n\t"
7694             "NEG    EDX:EAX # if $imm < 0" %}
7695   ins_encode %{
7696     int con = (int)$imm$$constant;
7697     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7698     int pcon = (con > 0) ? con : -con;
7699     Label Lfast, Lpos, Ldone;
7700 
7701     __ movl($tmp$$Register, pcon);
7702     __ xorl($tmp2$$Register,$tmp2$$Register);
7703     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7704     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7705 
7706     __ movl($tmp2$$Register, $dst$$Register); // save
7707     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7708     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7709     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7710 
7711     // Negative dividend.
7712     // convert value to positive to use unsigned division
7713     __ lneg($dst$$Register, $tmp2$$Register);
7714     __ divl($tmp$$Register);
7715     __ xchgl($dst$$Register, $tmp2$$Register);
7716     __ divl($tmp$$Register);
7717     // revert result back to negative
7718     __ lneg($tmp2$$Register, $dst$$Register);
7719     __ jmpb(Ldone);
7720 
7721     __ bind(Lpos);
7722     __ divl($tmp$$Register); // Use unsigned division
7723     __ xchgl($dst$$Register, $tmp2$$Register);
7724     // Fallthrow for final divide, tmp2 has 32 bit hi result
7725 
7726     __ bind(Lfast);
7727     // fast path: src is positive
7728     __ divl($tmp$$Register); // Use unsigned division
7729 
7730     __ bind(Ldone);
7731     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7732     if (con < 0) {
7733       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7734     }
7735   %}
7736   ins_pipe( pipe_slow );
7737 %}
7738 
7739 // Remainder Register Long (remainder fit into 32 bits)
7740 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7741   match(Set dst (ModL dst imm));
7742   effect( TEMP tmp, TEMP tmp2, KILL cr );
7743   ins_cost(1000);
7744   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7745             "CMP    $tmp,EDX\n\t"
7746             "JA,s   fast\n\t"
7747             "MOV    $tmp2,EAX\n\t"
7748             "MOV    EAX,EDX\n\t"
7749             "MOV    EDX,0\n\t"
7750             "JLE,s  pos\n\t"
7751             "LNEG   EAX : $tmp2\n\t"
7752             "DIV    $tmp # unsigned division\n\t"
7753             "MOV    EAX,$tmp2\n\t"
7754             "DIV    $tmp\n\t"
7755             "NEG    EDX\n\t"
7756             "JMP,s  done\n"
7757     "pos:\n\t"
7758             "DIV    $tmp\n\t"
7759             "MOV    EAX,$tmp2\n"
7760     "fast:\n\t"
7761             "DIV    $tmp\n"
7762     "done:\n\t"
7763             "MOV    EAX,EDX\n\t"
7764             "SAR    EDX,31\n\t" %}
7765   ins_encode %{
7766     int con = (int)$imm$$constant;
7767     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7768     int pcon = (con > 0) ? con : -con;
7769     Label  Lfast, Lpos, Ldone;
7770 
7771     __ movl($tmp$$Register, pcon);
7772     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7773     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7774 
7775     __ movl($tmp2$$Register, $dst$$Register); // save
7776     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7777     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7778     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7779 
7780     // Negative dividend.
7781     // convert value to positive to use unsigned division
7782     __ lneg($dst$$Register, $tmp2$$Register);
7783     __ divl($tmp$$Register);
7784     __ movl($dst$$Register, $tmp2$$Register);
7785     __ divl($tmp$$Register);
7786     // revert remainder back to negative
7787     __ negl(HIGH_FROM_LOW($dst$$Register));
7788     __ jmpb(Ldone);
7789 
7790     __ bind(Lpos);
7791     __ divl($tmp$$Register);
7792     __ movl($dst$$Register, $tmp2$$Register);
7793 
7794     __ bind(Lfast);
7795     // fast path: src is positive
7796     __ divl($tmp$$Register);
7797 
7798     __ bind(Ldone);
7799     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7800     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7801 
7802   %}
7803   ins_pipe( pipe_slow );
7804 %}
7805 
7806 // Integer Shift Instructions
7807 // Shift Left by one
7808 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7809   match(Set dst (LShiftI dst shift));
7810   effect(KILL cr);
7811 
7812   size(2);
7813   format %{ "SHL    $dst,$shift" %}
7814   opcode(0xD1, 0x4);  /* D1 /4 */
7815   ins_encode( OpcP, RegOpc( dst ) );
7816   ins_pipe( ialu_reg );
7817 %}
7818 
7819 // Shift Left by 8-bit immediate
7820 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7821   match(Set dst (LShiftI dst shift));
7822   effect(KILL cr);
7823 
7824   size(3);
7825   format %{ "SHL    $dst,$shift" %}
7826   opcode(0xC1, 0x4);  /* C1 /4 ib */
7827   ins_encode( RegOpcImm( dst, shift) );
7828   ins_pipe( ialu_reg );
7829 %}
7830 
7831 // Shift Left by variable
7832 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7833   match(Set dst (LShiftI dst shift));
7834   effect(KILL cr);
7835 
7836   size(2);
7837   format %{ "SHL    $dst,$shift" %}
7838   opcode(0xD3, 0x4);  /* D3 /4 */
7839   ins_encode( OpcP, RegOpc( dst ) );
7840   ins_pipe( ialu_reg_reg );
7841 %}
7842 
7843 // Arithmetic shift right by one
7844 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7845   match(Set dst (RShiftI dst shift));
7846   effect(KILL cr);
7847 
7848   size(2);
7849   format %{ "SAR    $dst,$shift" %}
7850   opcode(0xD1, 0x7);  /* D1 /7 */
7851   ins_encode( OpcP, RegOpc( dst ) );
7852   ins_pipe( ialu_reg );
7853 %}
7854 
7855 // Arithmetic shift right by one
7856 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7857   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7858   effect(KILL cr);
7859   format %{ "SAR    $dst,$shift" %}
7860   opcode(0xD1, 0x7);  /* D1 /7 */
7861   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7862   ins_pipe( ialu_mem_imm );
7863 %}
7864 
7865 // Arithmetic Shift Right by 8-bit immediate
7866 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7867   match(Set dst (RShiftI dst shift));
7868   effect(KILL cr);
7869 
7870   size(3);
7871   format %{ "SAR    $dst,$shift" %}
7872   opcode(0xC1, 0x7);  /* C1 /7 ib */
7873   ins_encode( RegOpcImm( dst, shift ) );
7874   ins_pipe( ialu_mem_imm );
7875 %}
7876 
7877 // Arithmetic Shift Right by 8-bit immediate
7878 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7879   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7880   effect(KILL cr);
7881 
7882   format %{ "SAR    $dst,$shift" %}
7883   opcode(0xC1, 0x7);  /* C1 /7 ib */
7884   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7885   ins_pipe( ialu_mem_imm );
7886 %}
7887 
7888 // Arithmetic Shift Right by variable
7889 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7890   match(Set dst (RShiftI dst shift));
7891   effect(KILL cr);
7892 
7893   size(2);
7894   format %{ "SAR    $dst,$shift" %}
7895   opcode(0xD3, 0x7);  /* D3 /7 */
7896   ins_encode( OpcP, RegOpc( dst ) );
7897   ins_pipe( ialu_reg_reg );
7898 %}
7899 
7900 // Logical shift right by one
7901 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7902   match(Set dst (URShiftI dst shift));
7903   effect(KILL cr);
7904 
7905   size(2);
7906   format %{ "SHR    $dst,$shift" %}
7907   opcode(0xD1, 0x5);  /* D1 /5 */
7908   ins_encode( OpcP, RegOpc( dst ) );
7909   ins_pipe( ialu_reg );
7910 %}
7911 
7912 // Logical Shift Right by 8-bit immediate
7913 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7914   match(Set dst (URShiftI dst shift));
7915   effect(KILL cr);
7916 
7917   size(3);
7918   format %{ "SHR    $dst,$shift" %}
7919   opcode(0xC1, 0x5);  /* C1 /5 ib */
7920   ins_encode( RegOpcImm( dst, shift) );
7921   ins_pipe( ialu_reg );
7922 %}
7923 
7924 
7925 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7926 // This idiom is used by the compiler for the i2b bytecode.
7927 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7928   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7929 
7930   size(3);
7931   format %{ "MOVSX  $dst,$src :8" %}
7932   ins_encode %{
7933     __ movsbl($dst$$Register, $src$$Register);
7934   %}
7935   ins_pipe(ialu_reg_reg);
7936 %}
7937 
7938 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7939 // This idiom is used by the compiler the i2s bytecode.
7940 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7941   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7942 
7943   size(3);
7944   format %{ "MOVSX  $dst,$src :16" %}
7945   ins_encode %{
7946     __ movswl($dst$$Register, $src$$Register);
7947   %}
7948   ins_pipe(ialu_reg_reg);
7949 %}
7950 
7951 
7952 // Logical Shift Right by variable
7953 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7954   match(Set dst (URShiftI dst shift));
7955   effect(KILL cr);
7956 
7957   size(2);
7958   format %{ "SHR    $dst,$shift" %}
7959   opcode(0xD3, 0x5);  /* D3 /5 */
7960   ins_encode( OpcP, RegOpc( dst ) );
7961   ins_pipe( ialu_reg_reg );
7962 %}
7963 
7964 
7965 //----------Logical Instructions-----------------------------------------------
7966 //----------Integer Logical Instructions---------------------------------------
7967 // And Instructions
7968 // And Register with Register
7969 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7970   match(Set dst (AndI dst src));
7971   effect(KILL cr);
7972 
7973   size(2);
7974   format %{ "AND    $dst,$src" %}
7975   opcode(0x23);
7976   ins_encode( OpcP, RegReg( dst, src) );
7977   ins_pipe( ialu_reg_reg );
7978 %}
7979 
7980 // And Register with Immediate
7981 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7982   match(Set dst (AndI dst src));
7983   effect(KILL cr);
7984 
7985   format %{ "AND    $dst,$src" %}
7986   opcode(0x81,0x04);  /* Opcode 81 /4 */
7987   // ins_encode( RegImm( dst, src) );
7988   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7989   ins_pipe( ialu_reg );
7990 %}
7991 
7992 // And Register with Memory
7993 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7994   match(Set dst (AndI dst (LoadI src)));
7995   effect(KILL cr);
7996 
7997   ins_cost(125);
7998   format %{ "AND    $dst,$src" %}
7999   opcode(0x23);
8000   ins_encode( OpcP, RegMem( dst, src) );
8001   ins_pipe( ialu_reg_mem );
8002 %}
8003 
8004 // And Memory with Register
8005 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8006   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8007   effect(KILL cr);
8008 
8009   ins_cost(150);
8010   format %{ "AND    $dst,$src" %}
8011   opcode(0x21);  /* Opcode 21 /r */
8012   ins_encode( OpcP, RegMem( src, dst ) );
8013   ins_pipe( ialu_mem_reg );
8014 %}
8015 
8016 // And Memory with Immediate
8017 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8018   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8019   effect(KILL cr);
8020 
8021   ins_cost(125);
8022   format %{ "AND    $dst,$src" %}
8023   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8024   // ins_encode( MemImm( dst, src) );
8025   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8026   ins_pipe( ialu_mem_imm );
8027 %}
8028 
8029 // BMI1 instructions
8030 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8031   match(Set dst (AndI (XorI src1 minus_1) src2));
8032   predicate(UseBMI1Instructions);
8033   effect(KILL cr);
8034 
8035   format %{ "ANDNL  $dst, $src1, $src2" %}
8036 
8037   ins_encode %{
8038     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8039   %}
8040   ins_pipe(ialu_reg);
8041 %}
8042 
8043 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8044   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8045   predicate(UseBMI1Instructions);
8046   effect(KILL cr);
8047 
8048   ins_cost(125);
8049   format %{ "ANDNL  $dst, $src1, $src2" %}
8050 
8051   ins_encode %{
8052     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8053   %}
8054   ins_pipe(ialu_reg_mem);
8055 %}
8056 
8057 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8058   match(Set dst (AndI (SubI imm_zero src) src));
8059   predicate(UseBMI1Instructions);
8060   effect(KILL cr);
8061 
8062   format %{ "BLSIL  $dst, $src" %}
8063 
8064   ins_encode %{
8065     __ blsil($dst$$Register, $src$$Register);
8066   %}
8067   ins_pipe(ialu_reg);
8068 %}
8069 
8070 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8071   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8072   predicate(UseBMI1Instructions);
8073   effect(KILL cr);
8074 
8075   ins_cost(125);
8076   format %{ "BLSIL  $dst, $src" %}
8077 
8078   ins_encode %{
8079     __ blsil($dst$$Register, $src$$Address);
8080   %}
8081   ins_pipe(ialu_reg_mem);
8082 %}
8083 
8084 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8085 %{
8086   match(Set dst (XorI (AddI src minus_1) src));
8087   predicate(UseBMI1Instructions);
8088   effect(KILL cr);
8089 
8090   format %{ "BLSMSKL $dst, $src" %}
8091 
8092   ins_encode %{
8093     __ blsmskl($dst$$Register, $src$$Register);
8094   %}
8095 
8096   ins_pipe(ialu_reg);
8097 %}
8098 
8099 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8100 %{
8101   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8102   predicate(UseBMI1Instructions);
8103   effect(KILL cr);
8104 
8105   ins_cost(125);
8106   format %{ "BLSMSKL $dst, $src" %}
8107 
8108   ins_encode %{
8109     __ blsmskl($dst$$Register, $src$$Address);
8110   %}
8111 
8112   ins_pipe(ialu_reg_mem);
8113 %}
8114 
8115 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8116 %{
8117   match(Set dst (AndI (AddI src minus_1) src) );
8118   predicate(UseBMI1Instructions);
8119   effect(KILL cr);
8120 
8121   format %{ "BLSRL  $dst, $src" %}
8122 
8123   ins_encode %{
8124     __ blsrl($dst$$Register, $src$$Register);
8125   %}
8126 
8127   ins_pipe(ialu_reg);
8128 %}
8129 
8130 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8131 %{
8132   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8133   predicate(UseBMI1Instructions);
8134   effect(KILL cr);
8135 
8136   ins_cost(125);
8137   format %{ "BLSRL  $dst, $src" %}
8138 
8139   ins_encode %{
8140     __ blsrl($dst$$Register, $src$$Address);
8141   %}
8142 
8143   ins_pipe(ialu_reg_mem);
8144 %}
8145 
8146 // Or Instructions
8147 // Or Register with Register
8148 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8149   match(Set dst (OrI dst src));
8150   effect(KILL cr);
8151 
8152   size(2);
8153   format %{ "OR     $dst,$src" %}
8154   opcode(0x0B);
8155   ins_encode( OpcP, RegReg( dst, src) );
8156   ins_pipe( ialu_reg_reg );
8157 %}
8158 
8159 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8160   match(Set dst (OrI dst (CastP2X src)));
8161   effect(KILL cr);
8162 
8163   size(2);
8164   format %{ "OR     $dst,$src" %}
8165   opcode(0x0B);
8166   ins_encode( OpcP, RegReg( dst, src) );
8167   ins_pipe( ialu_reg_reg );
8168 %}
8169 
8170 
8171 // Or Register with Immediate
8172 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8173   match(Set dst (OrI dst src));
8174   effect(KILL cr);
8175 
8176   format %{ "OR     $dst,$src" %}
8177   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8178   // ins_encode( RegImm( dst, src) );
8179   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8180   ins_pipe( ialu_reg );
8181 %}
8182 
8183 // Or Register with Memory
8184 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8185   match(Set dst (OrI dst (LoadI src)));
8186   effect(KILL cr);
8187 
8188   ins_cost(125);
8189   format %{ "OR     $dst,$src" %}
8190   opcode(0x0B);
8191   ins_encode( OpcP, RegMem( dst, src) );
8192   ins_pipe( ialu_reg_mem );
8193 %}
8194 
8195 // Or Memory with Register
8196 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8197   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8198   effect(KILL cr);
8199 
8200   ins_cost(150);
8201   format %{ "OR     $dst,$src" %}
8202   opcode(0x09);  /* Opcode 09 /r */
8203   ins_encode( OpcP, RegMem( src, dst ) );
8204   ins_pipe( ialu_mem_reg );
8205 %}
8206 
8207 // Or Memory with Immediate
8208 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8209   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8210   effect(KILL cr);
8211 
8212   ins_cost(125);
8213   format %{ "OR     $dst,$src" %}
8214   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8215   // ins_encode( MemImm( dst, src) );
8216   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8217   ins_pipe( ialu_mem_imm );
8218 %}
8219 
8220 // ROL/ROR
8221 // ROL expand
8222 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8223   effect(USE_DEF dst, USE shift, KILL cr);
8224 
8225   format %{ "ROL    $dst, $shift" %}
8226   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8227   ins_encode( OpcP, RegOpc( dst ));
8228   ins_pipe( ialu_reg );
8229 %}
8230 
8231 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8232   effect(USE_DEF dst, USE shift, KILL cr);
8233 
8234   format %{ "ROL    $dst, $shift" %}
8235   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8236   ins_encode( RegOpcImm(dst, shift) );
8237   ins_pipe(ialu_reg);
8238 %}
8239 
8240 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8241   effect(USE_DEF dst, USE shift, KILL cr);
8242 
8243   format %{ "ROL    $dst, $shift" %}
8244   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8245   ins_encode(OpcP, RegOpc(dst));
8246   ins_pipe( ialu_reg_reg );
8247 %}
8248 // end of ROL expand
8249 
8250 // ROL 32bit by one once
8251 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8252   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8253 
8254   expand %{
8255     rolI_eReg_imm1(dst, lshift, cr);
8256   %}
8257 %}
8258 
8259 // ROL 32bit var by imm8 once
8260 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8261   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8262   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8263 
8264   expand %{
8265     rolI_eReg_imm8(dst, lshift, cr);
8266   %}
8267 %}
8268 
8269 // ROL 32bit var by var once
8270 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8271   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8272 
8273   expand %{
8274     rolI_eReg_CL(dst, shift, cr);
8275   %}
8276 %}
8277 
8278 // ROL 32bit var by var once
8279 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8280   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8281 
8282   expand %{
8283     rolI_eReg_CL(dst, shift, cr);
8284   %}
8285 %}
8286 
8287 // ROR expand
8288 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8289   effect(USE_DEF dst, USE shift, KILL cr);
8290 
8291   format %{ "ROR    $dst, $shift" %}
8292   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8293   ins_encode( OpcP, RegOpc( dst ) );
8294   ins_pipe( ialu_reg );
8295 %}
8296 
8297 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8298   effect (USE_DEF dst, USE shift, KILL cr);
8299 
8300   format %{ "ROR    $dst, $shift" %}
8301   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8302   ins_encode( RegOpcImm(dst, shift) );
8303   ins_pipe( ialu_reg );
8304 %}
8305 
8306 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8307   effect(USE_DEF dst, USE shift, KILL cr);
8308 
8309   format %{ "ROR    $dst, $shift" %}
8310   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8311   ins_encode(OpcP, RegOpc(dst));
8312   ins_pipe( ialu_reg_reg );
8313 %}
8314 // end of ROR expand
8315 
8316 // ROR right once
8317 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8318   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8319 
8320   expand %{
8321     rorI_eReg_imm1(dst, rshift, cr);
8322   %}
8323 %}
8324 
8325 // ROR 32bit by immI8 once
8326 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8327   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8328   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8329 
8330   expand %{
8331     rorI_eReg_imm8(dst, rshift, cr);
8332   %}
8333 %}
8334 
8335 // ROR 32bit var by var once
8336 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8337   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8338 
8339   expand %{
8340     rorI_eReg_CL(dst, shift, cr);
8341   %}
8342 %}
8343 
8344 // ROR 32bit var by var once
8345 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8346   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8347 
8348   expand %{
8349     rorI_eReg_CL(dst, shift, cr);
8350   %}
8351 %}
8352 
8353 // Xor Instructions
8354 // Xor Register with Register
8355 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8356   match(Set dst (XorI dst src));
8357   effect(KILL cr);
8358 
8359   size(2);
8360   format %{ "XOR    $dst,$src" %}
8361   opcode(0x33);
8362   ins_encode( OpcP, RegReg( dst, src) );
8363   ins_pipe( ialu_reg_reg );
8364 %}
8365 
8366 // Xor Register with Immediate -1
8367 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8368   match(Set dst (XorI dst imm));
8369 
8370   size(2);
8371   format %{ "NOT    $dst" %}
8372   ins_encode %{
8373      __ notl($dst$$Register);
8374   %}
8375   ins_pipe( ialu_reg );
8376 %}
8377 
8378 // Xor Register with Immediate
8379 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8380   match(Set dst (XorI dst src));
8381   effect(KILL cr);
8382 
8383   format %{ "XOR    $dst,$src" %}
8384   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8385   // ins_encode( RegImm( dst, src) );
8386   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8387   ins_pipe( ialu_reg );
8388 %}
8389 
8390 // Xor Register with Memory
8391 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8392   match(Set dst (XorI dst (LoadI src)));
8393   effect(KILL cr);
8394 
8395   ins_cost(125);
8396   format %{ "XOR    $dst,$src" %}
8397   opcode(0x33);
8398   ins_encode( OpcP, RegMem(dst, src) );
8399   ins_pipe( ialu_reg_mem );
8400 %}
8401 
8402 // Xor Memory with Register
8403 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8404   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8405   effect(KILL cr);
8406 
8407   ins_cost(150);
8408   format %{ "XOR    $dst,$src" %}
8409   opcode(0x31);  /* Opcode 31 /r */
8410   ins_encode( OpcP, RegMem( src, dst ) );
8411   ins_pipe( ialu_mem_reg );
8412 %}
8413 
8414 // Xor Memory with Immediate
8415 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8416   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8417   effect(KILL cr);
8418 
8419   ins_cost(125);
8420   format %{ "XOR    $dst,$src" %}
8421   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8422   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8423   ins_pipe( ialu_mem_imm );
8424 %}
8425 
8426 //----------Convert Int to Boolean---------------------------------------------
8427 
8428 instruct movI_nocopy(rRegI dst, rRegI src) %{
8429   effect( DEF dst, USE src );
8430   format %{ "MOV    $dst,$src" %}
8431   ins_encode( enc_Copy( dst, src) );
8432   ins_pipe( ialu_reg_reg );
8433 %}
8434 
8435 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8436   effect( USE_DEF dst, USE src, KILL cr );
8437 
8438   size(4);
8439   format %{ "NEG    $dst\n\t"
8440             "ADC    $dst,$src" %}
8441   ins_encode( neg_reg(dst),
8442               OpcRegReg(0x13,dst,src) );
8443   ins_pipe( ialu_reg_reg_long );
8444 %}
8445 
8446 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8447   match(Set dst (Conv2B src));
8448 
8449   expand %{
8450     movI_nocopy(dst,src);
8451     ci2b(dst,src,cr);
8452   %}
8453 %}
8454 
8455 instruct movP_nocopy(rRegI dst, eRegP src) %{
8456   effect( DEF dst, USE src );
8457   format %{ "MOV    $dst,$src" %}
8458   ins_encode( enc_Copy( dst, src) );
8459   ins_pipe( ialu_reg_reg );
8460 %}
8461 
8462 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8463   effect( USE_DEF dst, USE src, KILL cr );
8464   format %{ "NEG    $dst\n\t"
8465             "ADC    $dst,$src" %}
8466   ins_encode( neg_reg(dst),
8467               OpcRegReg(0x13,dst,src) );
8468   ins_pipe( ialu_reg_reg_long );
8469 %}
8470 
8471 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8472   match(Set dst (Conv2B src));
8473 
8474   expand %{
8475     movP_nocopy(dst,src);
8476     cp2b(dst,src,cr);
8477   %}
8478 %}
8479 
8480 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8481   match(Set dst (CmpLTMask p q));
8482   effect(KILL cr);
8483   ins_cost(400);
8484 
8485   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8486   format %{ "XOR    $dst,$dst\n\t"
8487             "CMP    $p,$q\n\t"
8488             "SETlt  $dst\n\t"
8489             "NEG    $dst" %}
8490   ins_encode %{
8491     Register Rp = $p$$Register;
8492     Register Rq = $q$$Register;
8493     Register Rd = $dst$$Register;
8494     Label done;
8495     __ xorl(Rd, Rd);
8496     __ cmpl(Rp, Rq);
8497     __ setb(Assembler::less, Rd);
8498     __ negl(Rd);
8499   %}
8500 
8501   ins_pipe(pipe_slow);
8502 %}
8503 
8504 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8505   match(Set dst (CmpLTMask dst zero));
8506   effect(DEF dst, KILL cr);
8507   ins_cost(100);
8508 
8509   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8510   ins_encode %{
8511   __ sarl($dst$$Register, 31);
8512   %}
8513   ins_pipe(ialu_reg);
8514 %}
8515 
8516 /* better to save a register than avoid a branch */
8517 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8518   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8519   effect(KILL cr);
8520   ins_cost(400);
8521   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8522             "JGE    done\n\t"
8523             "ADD    $p,$y\n"
8524             "done:  " %}
8525   ins_encode %{
8526     Register Rp = $p$$Register;
8527     Register Rq = $q$$Register;
8528     Register Ry = $y$$Register;
8529     Label done;
8530     __ subl(Rp, Rq);
8531     __ jccb(Assembler::greaterEqual, done);
8532     __ addl(Rp, Ry);
8533     __ bind(done);
8534   %}
8535 
8536   ins_pipe(pipe_cmplt);
8537 %}
8538 
8539 /* better to save a register than avoid a branch */
8540 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8541   match(Set y (AndI (CmpLTMask p q) y));
8542   effect(KILL cr);
8543 
8544   ins_cost(300);
8545 
8546   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8547             "JLT      done\n\t"
8548             "XORL     $y, $y\n"
8549             "done:  " %}
8550   ins_encode %{
8551     Register Rp = $p$$Register;
8552     Register Rq = $q$$Register;
8553     Register Ry = $y$$Register;
8554     Label done;
8555     __ cmpl(Rp, Rq);
8556     __ jccb(Assembler::less, done);
8557     __ xorl(Ry, Ry);
8558     __ bind(done);
8559   %}
8560 
8561   ins_pipe(pipe_cmplt);
8562 %}
8563 
8564 /* If I enable this, I encourage spilling in the inner loop of compress.
8565 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8566   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8567 */
8568 //----------Overflow Math Instructions-----------------------------------------
8569 
8570 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8571 %{
8572   match(Set cr (OverflowAddI op1 op2));
8573   effect(DEF cr, USE_KILL op1, USE op2);
8574 
8575   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8576 
8577   ins_encode %{
8578     __ addl($op1$$Register, $op2$$Register);
8579   %}
8580   ins_pipe(ialu_reg_reg);
8581 %}
8582 
8583 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8584 %{
8585   match(Set cr (OverflowAddI op1 op2));
8586   effect(DEF cr, USE_KILL op1, USE op2);
8587 
8588   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8589 
8590   ins_encode %{
8591     __ addl($op1$$Register, $op2$$constant);
8592   %}
8593   ins_pipe(ialu_reg_reg);
8594 %}
8595 
8596 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8597 %{
8598   match(Set cr (OverflowSubI op1 op2));
8599 
8600   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8601   ins_encode %{
8602     __ cmpl($op1$$Register, $op2$$Register);
8603   %}
8604   ins_pipe(ialu_reg_reg);
8605 %}
8606 
8607 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8608 %{
8609   match(Set cr (OverflowSubI op1 op2));
8610 
8611   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8612   ins_encode %{
8613     __ cmpl($op1$$Register, $op2$$constant);
8614   %}
8615   ins_pipe(ialu_reg_reg);
8616 %}
8617 
8618 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8619 %{
8620   match(Set cr (OverflowSubI zero op2));
8621   effect(DEF cr, USE_KILL op2);
8622 
8623   format %{ "NEG    $op2\t# overflow check int" %}
8624   ins_encode %{
8625     __ negl($op2$$Register);
8626   %}
8627   ins_pipe(ialu_reg_reg);
8628 %}
8629 
8630 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8631 %{
8632   match(Set cr (OverflowMulI op1 op2));
8633   effect(DEF cr, USE_KILL op1, USE op2);
8634 
8635   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8636   ins_encode %{
8637     __ imull($op1$$Register, $op2$$Register);
8638   %}
8639   ins_pipe(ialu_reg_reg_alu0);
8640 %}
8641 
8642 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8643 %{
8644   match(Set cr (OverflowMulI op1 op2));
8645   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8646 
8647   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8648   ins_encode %{
8649     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8650   %}
8651   ins_pipe(ialu_reg_reg_alu0);
8652 %}
8653 
8654 //----------Long Instructions------------------------------------------------
8655 // Add Long Register with Register
8656 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8657   match(Set dst (AddL dst src));
8658   effect(KILL cr);
8659   ins_cost(200);
8660   format %{ "ADD    $dst.lo,$src.lo\n\t"
8661             "ADC    $dst.hi,$src.hi" %}
8662   opcode(0x03, 0x13);
8663   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8664   ins_pipe( ialu_reg_reg_long );
8665 %}
8666 
8667 // Add Long Register with Immediate
8668 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8669   match(Set dst (AddL dst src));
8670   effect(KILL cr);
8671   format %{ "ADD    $dst.lo,$src.lo\n\t"
8672             "ADC    $dst.hi,$src.hi" %}
8673   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8674   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8675   ins_pipe( ialu_reg_long );
8676 %}
8677 
8678 // Add Long Register with Memory
8679 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8680   match(Set dst (AddL dst (LoadL mem)));
8681   effect(KILL cr);
8682   ins_cost(125);
8683   format %{ "ADD    $dst.lo,$mem\n\t"
8684             "ADC    $dst.hi,$mem+4" %}
8685   opcode(0x03, 0x13);
8686   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8687   ins_pipe( ialu_reg_long_mem );
8688 %}
8689 
8690 // Subtract Long Register with Register.
8691 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8692   match(Set dst (SubL dst src));
8693   effect(KILL cr);
8694   ins_cost(200);
8695   format %{ "SUB    $dst.lo,$src.lo\n\t"
8696             "SBB    $dst.hi,$src.hi" %}
8697   opcode(0x2B, 0x1B);
8698   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8699   ins_pipe( ialu_reg_reg_long );
8700 %}
8701 
8702 // Subtract Long Register with Immediate
8703 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8704   match(Set dst (SubL dst src));
8705   effect(KILL cr);
8706   format %{ "SUB    $dst.lo,$src.lo\n\t"
8707             "SBB    $dst.hi,$src.hi" %}
8708   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8709   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8710   ins_pipe( ialu_reg_long );
8711 %}
8712 
8713 // Subtract Long Register with Memory
8714 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8715   match(Set dst (SubL dst (LoadL mem)));
8716   effect(KILL cr);
8717   ins_cost(125);
8718   format %{ "SUB    $dst.lo,$mem\n\t"
8719             "SBB    $dst.hi,$mem+4" %}
8720   opcode(0x2B, 0x1B);
8721   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8722   ins_pipe( ialu_reg_long_mem );
8723 %}
8724 
8725 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8726   match(Set dst (SubL zero dst));
8727   effect(KILL cr);
8728   ins_cost(300);
8729   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8730   ins_encode( neg_long(dst) );
8731   ins_pipe( ialu_reg_reg_long );
8732 %}
8733 
8734 // And Long Register with Register
8735 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8736   match(Set dst (AndL dst src));
8737   effect(KILL cr);
8738   format %{ "AND    $dst.lo,$src.lo\n\t"
8739             "AND    $dst.hi,$src.hi" %}
8740   opcode(0x23,0x23);
8741   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8742   ins_pipe( ialu_reg_reg_long );
8743 %}
8744 
8745 // And Long Register with Immediate
8746 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8747   match(Set dst (AndL dst src));
8748   effect(KILL cr);
8749   format %{ "AND    $dst.lo,$src.lo\n\t"
8750             "AND    $dst.hi,$src.hi" %}
8751   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8752   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8753   ins_pipe( ialu_reg_long );
8754 %}
8755 
8756 // And Long Register with Memory
8757 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8758   match(Set dst (AndL dst (LoadL mem)));
8759   effect(KILL cr);
8760   ins_cost(125);
8761   format %{ "AND    $dst.lo,$mem\n\t"
8762             "AND    $dst.hi,$mem+4" %}
8763   opcode(0x23, 0x23);
8764   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8765   ins_pipe( ialu_reg_long_mem );
8766 %}
8767 
8768 // BMI1 instructions
8769 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8770   match(Set dst (AndL (XorL src1 minus_1) src2));
8771   predicate(UseBMI1Instructions);
8772   effect(KILL cr, TEMP dst);
8773 
8774   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8775             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8776          %}
8777 
8778   ins_encode %{
8779     Register Rdst = $dst$$Register;
8780     Register Rsrc1 = $src1$$Register;
8781     Register Rsrc2 = $src2$$Register;
8782     __ andnl(Rdst, Rsrc1, Rsrc2);
8783     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8784   %}
8785   ins_pipe(ialu_reg_reg_long);
8786 %}
8787 
8788 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8789   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8790   predicate(UseBMI1Instructions);
8791   effect(KILL cr, TEMP dst);
8792 
8793   ins_cost(125);
8794   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8795             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8796          %}
8797 
8798   ins_encode %{
8799     Register Rdst = $dst$$Register;
8800     Register Rsrc1 = $src1$$Register;
8801     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8802 
8803     __ andnl(Rdst, Rsrc1, $src2$$Address);
8804     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8805   %}
8806   ins_pipe(ialu_reg_mem);
8807 %}
8808 
8809 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8810   match(Set dst (AndL (SubL imm_zero src) src));
8811   predicate(UseBMI1Instructions);
8812   effect(KILL cr, TEMP dst);
8813 
8814   format %{ "MOVL   $dst.hi, 0\n\t"
8815             "BLSIL  $dst.lo, $src.lo\n\t"
8816             "JNZ    done\n\t"
8817             "BLSIL  $dst.hi, $src.hi\n"
8818             "done:"
8819          %}
8820 
8821   ins_encode %{
8822     Label done;
8823     Register Rdst = $dst$$Register;
8824     Register Rsrc = $src$$Register;
8825     __ movl(HIGH_FROM_LOW(Rdst), 0);
8826     __ blsil(Rdst, Rsrc);
8827     __ jccb(Assembler::notZero, done);
8828     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8829     __ bind(done);
8830   %}
8831   ins_pipe(ialu_reg);
8832 %}
8833 
8834 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8835   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8836   predicate(UseBMI1Instructions);
8837   effect(KILL cr, TEMP dst);
8838 
8839   ins_cost(125);
8840   format %{ "MOVL   $dst.hi, 0\n\t"
8841             "BLSIL  $dst.lo, $src\n\t"
8842             "JNZ    done\n\t"
8843             "BLSIL  $dst.hi, $src+4\n"
8844             "done:"
8845          %}
8846 
8847   ins_encode %{
8848     Label done;
8849     Register Rdst = $dst$$Register;
8850     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8851 
8852     __ movl(HIGH_FROM_LOW(Rdst), 0);
8853     __ blsil(Rdst, $src$$Address);
8854     __ jccb(Assembler::notZero, done);
8855     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8856     __ bind(done);
8857   %}
8858   ins_pipe(ialu_reg_mem);
8859 %}
8860 
8861 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8862 %{
8863   match(Set dst (XorL (AddL src minus_1) src));
8864   predicate(UseBMI1Instructions);
8865   effect(KILL cr, TEMP dst);
8866 
8867   format %{ "MOVL    $dst.hi, 0\n\t"
8868             "BLSMSKL $dst.lo, $src.lo\n\t"
8869             "JNC     done\n\t"
8870             "BLSMSKL $dst.hi, $src.hi\n"
8871             "done:"
8872          %}
8873 
8874   ins_encode %{
8875     Label done;
8876     Register Rdst = $dst$$Register;
8877     Register Rsrc = $src$$Register;
8878     __ movl(HIGH_FROM_LOW(Rdst), 0);
8879     __ blsmskl(Rdst, Rsrc);
8880     __ jccb(Assembler::carryClear, done);
8881     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8882     __ bind(done);
8883   %}
8884 
8885   ins_pipe(ialu_reg);
8886 %}
8887 
8888 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8889 %{
8890   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8891   predicate(UseBMI1Instructions);
8892   effect(KILL cr, TEMP dst);
8893 
8894   ins_cost(125);
8895   format %{ "MOVL    $dst.hi, 0\n\t"
8896             "BLSMSKL $dst.lo, $src\n\t"
8897             "JNC     done\n\t"
8898             "BLSMSKL $dst.hi, $src+4\n"
8899             "done:"
8900          %}
8901 
8902   ins_encode %{
8903     Label done;
8904     Register Rdst = $dst$$Register;
8905     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8906 
8907     __ movl(HIGH_FROM_LOW(Rdst), 0);
8908     __ blsmskl(Rdst, $src$$Address);
8909     __ jccb(Assembler::carryClear, done);
8910     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8911     __ bind(done);
8912   %}
8913 
8914   ins_pipe(ialu_reg_mem);
8915 %}
8916 
8917 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8918 %{
8919   match(Set dst (AndL (AddL src minus_1) src) );
8920   predicate(UseBMI1Instructions);
8921   effect(KILL cr, TEMP dst);
8922 
8923   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8924             "BLSRL  $dst.lo, $src.lo\n\t"
8925             "JNC    done\n\t"
8926             "BLSRL  $dst.hi, $src.hi\n"
8927             "done:"
8928   %}
8929 
8930   ins_encode %{
8931     Label done;
8932     Register Rdst = $dst$$Register;
8933     Register Rsrc = $src$$Register;
8934     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8935     __ blsrl(Rdst, Rsrc);
8936     __ jccb(Assembler::carryClear, done);
8937     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8938     __ bind(done);
8939   %}
8940 
8941   ins_pipe(ialu_reg);
8942 %}
8943 
8944 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8945 %{
8946   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8947   predicate(UseBMI1Instructions);
8948   effect(KILL cr, TEMP dst);
8949 
8950   ins_cost(125);
8951   format %{ "MOVL   $dst.hi, $src+4\n\t"
8952             "BLSRL  $dst.lo, $src\n\t"
8953             "JNC    done\n\t"
8954             "BLSRL  $dst.hi, $src+4\n"
8955             "done:"
8956   %}
8957 
8958   ins_encode %{
8959     Label done;
8960     Register Rdst = $dst$$Register;
8961     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8962     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8963     __ blsrl(Rdst, $src$$Address);
8964     __ jccb(Assembler::carryClear, done);
8965     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8966     __ bind(done);
8967   %}
8968 
8969   ins_pipe(ialu_reg_mem);
8970 %}
8971 
8972 // Or Long Register with Register
8973 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8974   match(Set dst (OrL dst src));
8975   effect(KILL cr);
8976   format %{ "OR     $dst.lo,$src.lo\n\t"
8977             "OR     $dst.hi,$src.hi" %}
8978   opcode(0x0B,0x0B);
8979   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8980   ins_pipe( ialu_reg_reg_long );
8981 %}
8982 
8983 // Or Long Register with Immediate
8984 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8985   match(Set dst (OrL dst src));
8986   effect(KILL cr);
8987   format %{ "OR     $dst.lo,$src.lo\n\t"
8988             "OR     $dst.hi,$src.hi" %}
8989   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8990   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8991   ins_pipe( ialu_reg_long );
8992 %}
8993 
8994 // Or Long Register with Memory
8995 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8996   match(Set dst (OrL dst (LoadL mem)));
8997   effect(KILL cr);
8998   ins_cost(125);
8999   format %{ "OR     $dst.lo,$mem\n\t"
9000             "OR     $dst.hi,$mem+4" %}
9001   opcode(0x0B,0x0B);
9002   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9003   ins_pipe( ialu_reg_long_mem );
9004 %}
9005 
9006 // Xor Long Register with Register
9007 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9008   match(Set dst (XorL dst src));
9009   effect(KILL cr);
9010   format %{ "XOR    $dst.lo,$src.lo\n\t"
9011             "XOR    $dst.hi,$src.hi" %}
9012   opcode(0x33,0x33);
9013   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9014   ins_pipe( ialu_reg_reg_long );
9015 %}
9016 
9017 // Xor Long Register with Immediate -1
9018 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9019   match(Set dst (XorL dst imm));
9020   format %{ "NOT    $dst.lo\n\t"
9021             "NOT    $dst.hi" %}
9022   ins_encode %{
9023      __ notl($dst$$Register);
9024      __ notl(HIGH_FROM_LOW($dst$$Register));
9025   %}
9026   ins_pipe( ialu_reg_long );
9027 %}
9028 
9029 // Xor Long Register with Immediate
9030 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9031   match(Set dst (XorL dst src));
9032   effect(KILL cr);
9033   format %{ "XOR    $dst.lo,$src.lo\n\t"
9034             "XOR    $dst.hi,$src.hi" %}
9035   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9036   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9037   ins_pipe( ialu_reg_long );
9038 %}
9039 
9040 // Xor Long Register with Memory
9041 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9042   match(Set dst (XorL dst (LoadL mem)));
9043   effect(KILL cr);
9044   ins_cost(125);
9045   format %{ "XOR    $dst.lo,$mem\n\t"
9046             "XOR    $dst.hi,$mem+4" %}
9047   opcode(0x33,0x33);
9048   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9049   ins_pipe( ialu_reg_long_mem );
9050 %}
9051 
9052 // Shift Left Long by 1
9053 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9054   predicate(UseNewLongLShift);
9055   match(Set dst (LShiftL dst cnt));
9056   effect(KILL cr);
9057   ins_cost(100);
9058   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9059             "ADC    $dst.hi,$dst.hi" %}
9060   ins_encode %{
9061     __ addl($dst$$Register,$dst$$Register);
9062     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9063   %}
9064   ins_pipe( ialu_reg_long );
9065 %}
9066 
9067 // Shift Left Long by 2
9068 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9069   predicate(UseNewLongLShift);
9070   match(Set dst (LShiftL dst cnt));
9071   effect(KILL cr);
9072   ins_cost(100);
9073   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9074             "ADC    $dst.hi,$dst.hi\n\t"
9075             "ADD    $dst.lo,$dst.lo\n\t"
9076             "ADC    $dst.hi,$dst.hi" %}
9077   ins_encode %{
9078     __ addl($dst$$Register,$dst$$Register);
9079     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9080     __ addl($dst$$Register,$dst$$Register);
9081     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9082   %}
9083   ins_pipe( ialu_reg_long );
9084 %}
9085 
9086 // Shift Left Long by 3
9087 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9088   predicate(UseNewLongLShift);
9089   match(Set dst (LShiftL dst cnt));
9090   effect(KILL cr);
9091   ins_cost(100);
9092   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9093             "ADC    $dst.hi,$dst.hi\n\t"
9094             "ADD    $dst.lo,$dst.lo\n\t"
9095             "ADC    $dst.hi,$dst.hi\n\t"
9096             "ADD    $dst.lo,$dst.lo\n\t"
9097             "ADC    $dst.hi,$dst.hi" %}
9098   ins_encode %{
9099     __ addl($dst$$Register,$dst$$Register);
9100     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9101     __ addl($dst$$Register,$dst$$Register);
9102     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9103     __ addl($dst$$Register,$dst$$Register);
9104     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9105   %}
9106   ins_pipe( ialu_reg_long );
9107 %}
9108 
9109 // Shift Left Long by 1-31
9110 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9111   match(Set dst (LShiftL dst cnt));
9112   effect(KILL cr);
9113   ins_cost(200);
9114   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9115             "SHL    $dst.lo,$cnt" %}
9116   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9117   ins_encode( move_long_small_shift(dst,cnt) );
9118   ins_pipe( ialu_reg_long );
9119 %}
9120 
9121 // Shift Left Long by 32-63
9122 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9123   match(Set dst (LShiftL dst cnt));
9124   effect(KILL cr);
9125   ins_cost(300);
9126   format %{ "MOV    $dst.hi,$dst.lo\n"
9127           "\tSHL    $dst.hi,$cnt-32\n"
9128           "\tXOR    $dst.lo,$dst.lo" %}
9129   opcode(0xC1, 0x4);  /* C1 /4 ib */
9130   ins_encode( move_long_big_shift_clr(dst,cnt) );
9131   ins_pipe( ialu_reg_long );
9132 %}
9133 
9134 // Shift Left Long by variable
9135 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9136   match(Set dst (LShiftL dst shift));
9137   effect(KILL cr);
9138   ins_cost(500+200);
9139   size(17);
9140   format %{ "TEST   $shift,32\n\t"
9141             "JEQ,s  small\n\t"
9142             "MOV    $dst.hi,$dst.lo\n\t"
9143             "XOR    $dst.lo,$dst.lo\n"
9144     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9145             "SHL    $dst.lo,$shift" %}
9146   ins_encode( shift_left_long( dst, shift ) );
9147   ins_pipe( pipe_slow );
9148 %}
9149 
9150 // Shift Right Long by 1-31
9151 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9152   match(Set dst (URShiftL dst cnt));
9153   effect(KILL cr);
9154   ins_cost(200);
9155   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9156             "SHR    $dst.hi,$cnt" %}
9157   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9158   ins_encode( move_long_small_shift(dst,cnt) );
9159   ins_pipe( ialu_reg_long );
9160 %}
9161 
9162 // Shift Right Long by 32-63
9163 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9164   match(Set dst (URShiftL dst cnt));
9165   effect(KILL cr);
9166   ins_cost(300);
9167   format %{ "MOV    $dst.lo,$dst.hi\n"
9168           "\tSHR    $dst.lo,$cnt-32\n"
9169           "\tXOR    $dst.hi,$dst.hi" %}
9170   opcode(0xC1, 0x5);  /* C1 /5 ib */
9171   ins_encode( move_long_big_shift_clr(dst,cnt) );
9172   ins_pipe( ialu_reg_long );
9173 %}
9174 
9175 // Shift Right Long by variable
9176 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9177   match(Set dst (URShiftL dst shift));
9178   effect(KILL cr);
9179   ins_cost(600);
9180   size(17);
9181   format %{ "TEST   $shift,32\n\t"
9182             "JEQ,s  small\n\t"
9183             "MOV    $dst.lo,$dst.hi\n\t"
9184             "XOR    $dst.hi,$dst.hi\n"
9185     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9186             "SHR    $dst.hi,$shift" %}
9187   ins_encode( shift_right_long( dst, shift ) );
9188   ins_pipe( pipe_slow );
9189 %}
9190 
9191 // Shift Right Long by 1-31
9192 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9193   match(Set dst (RShiftL dst cnt));
9194   effect(KILL cr);
9195   ins_cost(200);
9196   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9197             "SAR    $dst.hi,$cnt" %}
9198   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9199   ins_encode( move_long_small_shift(dst,cnt) );
9200   ins_pipe( ialu_reg_long );
9201 %}
9202 
9203 // Shift Right Long by 32-63
9204 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9205   match(Set dst (RShiftL dst cnt));
9206   effect(KILL cr);
9207   ins_cost(300);
9208   format %{ "MOV    $dst.lo,$dst.hi\n"
9209           "\tSAR    $dst.lo,$cnt-32\n"
9210           "\tSAR    $dst.hi,31" %}
9211   opcode(0xC1, 0x7);  /* C1 /7 ib */
9212   ins_encode( move_long_big_shift_sign(dst,cnt) );
9213   ins_pipe( ialu_reg_long );
9214 %}
9215 
9216 // Shift Right arithmetic Long by variable
9217 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9218   match(Set dst (RShiftL dst shift));
9219   effect(KILL cr);
9220   ins_cost(600);
9221   size(18);
9222   format %{ "TEST   $shift,32\n\t"
9223             "JEQ,s  small\n\t"
9224             "MOV    $dst.lo,$dst.hi\n\t"
9225             "SAR    $dst.hi,31\n"
9226     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9227             "SAR    $dst.hi,$shift" %}
9228   ins_encode( shift_right_arith_long( dst, shift ) );
9229   ins_pipe( pipe_slow );
9230 %}
9231 
9232 
9233 //----------Double Instructions------------------------------------------------
9234 // Double Math
9235 
9236 // Compare & branch
9237 
9238 // P6 version of float compare, sets condition codes in EFLAGS
9239 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9240   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9241   match(Set cr (CmpD src1 src2));
9242   effect(KILL rax);
9243   ins_cost(150);
9244   format %{ "FLD    $src1\n\t"
9245             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9246             "JNP    exit\n\t"
9247             "MOV    ah,1       // saw a NaN, set CF\n\t"
9248             "SAHF\n"
9249      "exit:\tNOP               // avoid branch to branch" %}
9250   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9251   ins_encode( Push_Reg_DPR(src1),
9252               OpcP, RegOpc(src2),
9253               cmpF_P6_fixup );
9254   ins_pipe( pipe_slow );
9255 %}
9256 
9257 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9258   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9259   match(Set cr (CmpD src1 src2));
9260   ins_cost(150);
9261   format %{ "FLD    $src1\n\t"
9262             "FUCOMIP ST,$src2  // P6 instruction" %}
9263   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9264   ins_encode( Push_Reg_DPR(src1),
9265               OpcP, RegOpc(src2));
9266   ins_pipe( pipe_slow );
9267 %}
9268 
9269 // Compare & branch
9270 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9271   predicate(UseSSE<=1);
9272   match(Set cr (CmpD src1 src2));
9273   effect(KILL rax);
9274   ins_cost(200);
9275   format %{ "FLD    $src1\n\t"
9276             "FCOMp  $src2\n\t"
9277             "FNSTSW AX\n\t"
9278             "TEST   AX,0x400\n\t"
9279             "JZ,s   flags\n\t"
9280             "MOV    AH,1\t# unordered treat as LT\n"
9281     "flags:\tSAHF" %}
9282   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9283   ins_encode( Push_Reg_DPR(src1),
9284               OpcP, RegOpc(src2),
9285               fpu_flags);
9286   ins_pipe( pipe_slow );
9287 %}
9288 
9289 // Compare vs zero into -1,0,1
9290 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9291   predicate(UseSSE<=1);
9292   match(Set dst (CmpD3 src1 zero));
9293   effect(KILL cr, KILL rax);
9294   ins_cost(280);
9295   format %{ "FTSTD  $dst,$src1" %}
9296   opcode(0xE4, 0xD9);
9297   ins_encode( Push_Reg_DPR(src1),
9298               OpcS, OpcP, PopFPU,
9299               CmpF_Result(dst));
9300   ins_pipe( pipe_slow );
9301 %}
9302 
9303 // Compare into -1,0,1
9304 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9305   predicate(UseSSE<=1);
9306   match(Set dst (CmpD3 src1 src2));
9307   effect(KILL cr, KILL rax);
9308   ins_cost(300);
9309   format %{ "FCMPD  $dst,$src1,$src2" %}
9310   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9311   ins_encode( Push_Reg_DPR(src1),
9312               OpcP, RegOpc(src2),
9313               CmpF_Result(dst));
9314   ins_pipe( pipe_slow );
9315 %}
9316 
9317 // float compare and set condition codes in EFLAGS by XMM regs
9318 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9319   predicate(UseSSE>=2);
9320   match(Set cr (CmpD src1 src2));
9321   ins_cost(145);
9322   format %{ "UCOMISD $src1,$src2\n\t"
9323             "JNP,s   exit\n\t"
9324             "PUSHF\t# saw NaN, set CF\n\t"
9325             "AND     [rsp], #0xffffff2b\n\t"
9326             "POPF\n"
9327     "exit:" %}
9328   ins_encode %{
9329     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9330     emit_cmpfp_fixup(_masm);
9331   %}
9332   ins_pipe( pipe_slow );
9333 %}
9334 
9335 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9336   predicate(UseSSE>=2);
9337   match(Set cr (CmpD src1 src2));
9338   ins_cost(100);
9339   format %{ "UCOMISD $src1,$src2" %}
9340   ins_encode %{
9341     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9342   %}
9343   ins_pipe( pipe_slow );
9344 %}
9345 
9346 // float compare and set condition codes in EFLAGS by XMM regs
9347 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9348   predicate(UseSSE>=2);
9349   match(Set cr (CmpD src1 (LoadD src2)));
9350   ins_cost(145);
9351   format %{ "UCOMISD $src1,$src2\n\t"
9352             "JNP,s   exit\n\t"
9353             "PUSHF\t# saw NaN, set CF\n\t"
9354             "AND     [rsp], #0xffffff2b\n\t"
9355             "POPF\n"
9356     "exit:" %}
9357   ins_encode %{
9358     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9359     emit_cmpfp_fixup(_masm);
9360   %}
9361   ins_pipe( pipe_slow );
9362 %}
9363 
9364 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9365   predicate(UseSSE>=2);
9366   match(Set cr (CmpD src1 (LoadD src2)));
9367   ins_cost(100);
9368   format %{ "UCOMISD $src1,$src2" %}
9369   ins_encode %{
9370     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9371   %}
9372   ins_pipe( pipe_slow );
9373 %}
9374 
9375 // Compare into -1,0,1 in XMM
9376 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9377   predicate(UseSSE>=2);
9378   match(Set dst (CmpD3 src1 src2));
9379   effect(KILL cr);
9380   ins_cost(255);
9381   format %{ "UCOMISD $src1, $src2\n\t"
9382             "MOV     $dst, #-1\n\t"
9383             "JP,s    done\n\t"
9384             "JB,s    done\n\t"
9385             "SETNE   $dst\n\t"
9386             "MOVZB   $dst, $dst\n"
9387     "done:" %}
9388   ins_encode %{
9389     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9390     emit_cmpfp3(_masm, $dst$$Register);
9391   %}
9392   ins_pipe( pipe_slow );
9393 %}
9394 
9395 // Compare into -1,0,1 in XMM and memory
9396 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9397   predicate(UseSSE>=2);
9398   match(Set dst (CmpD3 src1 (LoadD src2)));
9399   effect(KILL cr);
9400   ins_cost(275);
9401   format %{ "UCOMISD $src1, $src2\n\t"
9402             "MOV     $dst, #-1\n\t"
9403             "JP,s    done\n\t"
9404             "JB,s    done\n\t"
9405             "SETNE   $dst\n\t"
9406             "MOVZB   $dst, $dst\n"
9407     "done:" %}
9408   ins_encode %{
9409     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9410     emit_cmpfp3(_masm, $dst$$Register);
9411   %}
9412   ins_pipe( pipe_slow );
9413 %}
9414 
9415 
9416 instruct subDPR_reg(regDPR dst, regDPR src) %{
9417   predicate (UseSSE <=1);
9418   match(Set dst (SubD dst src));
9419 
9420   format %{ "FLD    $src\n\t"
9421             "DSUBp  $dst,ST" %}
9422   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9423   ins_cost(150);
9424   ins_encode( Push_Reg_DPR(src),
9425               OpcP, RegOpc(dst) );
9426   ins_pipe( fpu_reg_reg );
9427 %}
9428 
9429 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9430   predicate (UseSSE <=1);
9431   match(Set dst (RoundDouble (SubD src1 src2)));
9432   ins_cost(250);
9433 
9434   format %{ "FLD    $src2\n\t"
9435             "DSUB   ST,$src1\n\t"
9436             "FSTP_D $dst\t# D-round" %}
9437   opcode(0xD8, 0x5);
9438   ins_encode( Push_Reg_DPR(src2),
9439               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9440   ins_pipe( fpu_mem_reg_reg );
9441 %}
9442 
9443 
9444 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9445   predicate (UseSSE <=1);
9446   match(Set dst (SubD dst (LoadD src)));
9447   ins_cost(150);
9448 
9449   format %{ "FLD    $src\n\t"
9450             "DSUBp  $dst,ST" %}
9451   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9452   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9453               OpcP, RegOpc(dst) );
9454   ins_pipe( fpu_reg_mem );
9455 %}
9456 
9457 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9458   predicate (UseSSE<=1);
9459   match(Set dst (AbsD src));
9460   ins_cost(100);
9461   format %{ "FABS" %}
9462   opcode(0xE1, 0xD9);
9463   ins_encode( OpcS, OpcP );
9464   ins_pipe( fpu_reg_reg );
9465 %}
9466 
9467 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9468   predicate(UseSSE<=1);
9469   match(Set dst (NegD src));
9470   ins_cost(100);
9471   format %{ "FCHS" %}
9472   opcode(0xE0, 0xD9);
9473   ins_encode( OpcS, OpcP );
9474   ins_pipe( fpu_reg_reg );
9475 %}
9476 
9477 instruct addDPR_reg(regDPR dst, regDPR src) %{
9478   predicate(UseSSE<=1);
9479   match(Set dst (AddD dst src));
9480   format %{ "FLD    $src\n\t"
9481             "DADD   $dst,ST" %}
9482   size(4);
9483   ins_cost(150);
9484   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9485   ins_encode( Push_Reg_DPR(src),
9486               OpcP, RegOpc(dst) );
9487   ins_pipe( fpu_reg_reg );
9488 %}
9489 
9490 
9491 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9492   predicate(UseSSE<=1);
9493   match(Set dst (RoundDouble (AddD src1 src2)));
9494   ins_cost(250);
9495 
9496   format %{ "FLD    $src2\n\t"
9497             "DADD   ST,$src1\n\t"
9498             "FSTP_D $dst\t# D-round" %}
9499   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9500   ins_encode( Push_Reg_DPR(src2),
9501               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9502   ins_pipe( fpu_mem_reg_reg );
9503 %}
9504 
9505 
9506 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9507   predicate(UseSSE<=1);
9508   match(Set dst (AddD dst (LoadD src)));
9509   ins_cost(150);
9510 
9511   format %{ "FLD    $src\n\t"
9512             "DADDp  $dst,ST" %}
9513   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9514   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9515               OpcP, RegOpc(dst) );
9516   ins_pipe( fpu_reg_mem );
9517 %}
9518 
9519 // add-to-memory
9520 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9521   predicate(UseSSE<=1);
9522   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9523   ins_cost(150);
9524 
9525   format %{ "FLD_D  $dst\n\t"
9526             "DADD   ST,$src\n\t"
9527             "FST_D  $dst" %}
9528   opcode(0xDD, 0x0);
9529   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9530               Opcode(0xD8), RegOpc(src),
9531               set_instruction_start,
9532               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9533   ins_pipe( fpu_reg_mem );
9534 %}
9535 
9536 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9537   predicate(UseSSE<=1);
9538   match(Set dst (AddD dst con));
9539   ins_cost(125);
9540   format %{ "FLD1\n\t"
9541             "DADDp  $dst,ST" %}
9542   ins_encode %{
9543     __ fld1();
9544     __ faddp($dst$$reg);
9545   %}
9546   ins_pipe(fpu_reg);
9547 %}
9548 
9549 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9550   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9551   match(Set dst (AddD dst con));
9552   ins_cost(200);
9553   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9554             "DADDp  $dst,ST" %}
9555   ins_encode %{
9556     __ fld_d($constantaddress($con));
9557     __ faddp($dst$$reg);
9558   %}
9559   ins_pipe(fpu_reg_mem);
9560 %}
9561 
9562 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9563   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9564   match(Set dst (RoundDouble (AddD src con)));
9565   ins_cost(200);
9566   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9567             "DADD   ST,$src\n\t"
9568             "FSTP_D $dst\t# D-round" %}
9569   ins_encode %{
9570     __ fld_d($constantaddress($con));
9571     __ fadd($src$$reg);
9572     __ fstp_d(Address(rsp, $dst$$disp));
9573   %}
9574   ins_pipe(fpu_mem_reg_con);
9575 %}
9576 
9577 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9578   predicate(UseSSE<=1);
9579   match(Set dst (MulD dst src));
9580   format %{ "FLD    $src\n\t"
9581             "DMULp  $dst,ST" %}
9582   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9583   ins_cost(150);
9584   ins_encode( Push_Reg_DPR(src),
9585               OpcP, RegOpc(dst) );
9586   ins_pipe( fpu_reg_reg );
9587 %}
9588 
9589 // Strict FP instruction biases argument before multiply then
9590 // biases result to avoid double rounding of subnormals.
9591 //
9592 // scale arg1 by multiplying arg1 by 2^(-15360)
9593 // load arg2
9594 // multiply scaled arg1 by arg2
9595 // rescale product by 2^(15360)
9596 //
9597 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9598   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9599   match(Set dst (MulD dst src));
9600   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9601 
9602   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9603             "DMULp  $dst,ST\n\t"
9604             "FLD    $src\n\t"
9605             "DMULp  $dst,ST\n\t"
9606             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9607             "DMULp  $dst,ST\n\t" %}
9608   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9609   ins_encode( strictfp_bias1(dst),
9610               Push_Reg_DPR(src),
9611               OpcP, RegOpc(dst),
9612               strictfp_bias2(dst) );
9613   ins_pipe( fpu_reg_reg );
9614 %}
9615 
9616 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9617   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9618   match(Set dst (MulD dst con));
9619   ins_cost(200);
9620   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9621             "DMULp  $dst,ST" %}
9622   ins_encode %{
9623     __ fld_d($constantaddress($con));
9624     __ fmulp($dst$$reg);
9625   %}
9626   ins_pipe(fpu_reg_mem);
9627 %}
9628 
9629 
9630 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9631   predicate( UseSSE<=1 );
9632   match(Set dst (MulD dst (LoadD src)));
9633   ins_cost(200);
9634   format %{ "FLD_D  $src\n\t"
9635             "DMULp  $dst,ST" %}
9636   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9637   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9638               OpcP, RegOpc(dst) );
9639   ins_pipe( fpu_reg_mem );
9640 %}
9641 
9642 //
9643 // Cisc-alternate to reg-reg multiply
9644 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9645   predicate( UseSSE<=1 );
9646   match(Set dst (MulD src (LoadD mem)));
9647   ins_cost(250);
9648   format %{ "FLD_D  $mem\n\t"
9649             "DMUL   ST,$src\n\t"
9650             "FSTP_D $dst" %}
9651   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9652   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9653               OpcReg_FPR(src),
9654               Pop_Reg_DPR(dst) );
9655   ins_pipe( fpu_reg_reg_mem );
9656 %}
9657 
9658 
9659 // MACRO3 -- addDPR a mulDPR
9660 // This instruction is a '2-address' instruction in that the result goes
9661 // back to src2.  This eliminates a move from the macro; possibly the
9662 // register allocator will have to add it back (and maybe not).
9663 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9664   predicate( UseSSE<=1 );
9665   match(Set src2 (AddD (MulD src0 src1) src2));
9666   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9667             "DMUL   ST,$src1\n\t"
9668             "DADDp  $src2,ST" %}
9669   ins_cost(250);
9670   opcode(0xDD); /* LoadD DD /0 */
9671   ins_encode( Push_Reg_FPR(src0),
9672               FMul_ST_reg(src1),
9673               FAddP_reg_ST(src2) );
9674   ins_pipe( fpu_reg_reg_reg );
9675 %}
9676 
9677 
9678 // MACRO3 -- subDPR a mulDPR
9679 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9680   predicate( UseSSE<=1 );
9681   match(Set src2 (SubD (MulD src0 src1) src2));
9682   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9683             "DMUL   ST,$src1\n\t"
9684             "DSUBRp $src2,ST" %}
9685   ins_cost(250);
9686   ins_encode( Push_Reg_FPR(src0),
9687               FMul_ST_reg(src1),
9688               Opcode(0xDE), Opc_plus(0xE0,src2));
9689   ins_pipe( fpu_reg_reg_reg );
9690 %}
9691 
9692 
9693 instruct divDPR_reg(regDPR dst, regDPR src) %{
9694   predicate( UseSSE<=1 );
9695   match(Set dst (DivD dst src));
9696 
9697   format %{ "FLD    $src\n\t"
9698             "FDIVp  $dst,ST" %}
9699   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9700   ins_cost(150);
9701   ins_encode( Push_Reg_DPR(src),
9702               OpcP, RegOpc(dst) );
9703   ins_pipe( fpu_reg_reg );
9704 %}
9705 
9706 // Strict FP instruction biases argument before division then
9707 // biases result, to avoid double rounding of subnormals.
9708 //
9709 // scale dividend by multiplying dividend by 2^(-15360)
9710 // load divisor
9711 // divide scaled dividend by divisor
9712 // rescale quotient by 2^(15360)
9713 //
9714 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9715   predicate (UseSSE<=1);
9716   match(Set dst (DivD dst src));
9717   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9718   ins_cost(01);
9719 
9720   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9721             "DMULp  $dst,ST\n\t"
9722             "FLD    $src\n\t"
9723             "FDIVp  $dst,ST\n\t"
9724             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9725             "DMULp  $dst,ST\n\t" %}
9726   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9727   ins_encode( strictfp_bias1(dst),
9728               Push_Reg_DPR(src),
9729               OpcP, RegOpc(dst),
9730               strictfp_bias2(dst) );
9731   ins_pipe( fpu_reg_reg );
9732 %}
9733 
9734 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9735   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9736   match(Set dst (RoundDouble (DivD src1 src2)));
9737 
9738   format %{ "FLD    $src1\n\t"
9739             "FDIV   ST,$src2\n\t"
9740             "FSTP_D $dst\t# D-round" %}
9741   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9742   ins_encode( Push_Reg_DPR(src1),
9743               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9744   ins_pipe( fpu_mem_reg_reg );
9745 %}
9746 
9747 
9748 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9749   predicate(UseSSE<=1);
9750   match(Set dst (ModD dst src));
9751   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9752 
9753   format %{ "DMOD   $dst,$src" %}
9754   ins_cost(250);
9755   ins_encode(Push_Reg_Mod_DPR(dst, src),
9756               emitModDPR(),
9757               Push_Result_Mod_DPR(src),
9758               Pop_Reg_DPR(dst));
9759   ins_pipe( pipe_slow );
9760 %}
9761 
9762 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9763   predicate(UseSSE>=2);
9764   match(Set dst (ModD src0 src1));
9765   effect(KILL rax, KILL cr);
9766 
9767   format %{ "SUB    ESP,8\t # DMOD\n"
9768           "\tMOVSD  [ESP+0],$src1\n"
9769           "\tFLD_D  [ESP+0]\n"
9770           "\tMOVSD  [ESP+0],$src0\n"
9771           "\tFLD_D  [ESP+0]\n"
9772      "loop:\tFPREM\n"
9773           "\tFWAIT\n"
9774           "\tFNSTSW AX\n"
9775           "\tSAHF\n"
9776           "\tJP     loop\n"
9777           "\tFSTP_D [ESP+0]\n"
9778           "\tMOVSD  $dst,[ESP+0]\n"
9779           "\tADD    ESP,8\n"
9780           "\tFSTP   ST0\t # Restore FPU Stack"
9781     %}
9782   ins_cost(250);
9783   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9784   ins_pipe( pipe_slow );
9785 %}
9786 
9787 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9788   predicate (UseSSE<=1);
9789   match(Set dst (SinD src));
9790   ins_cost(1800);
9791   format %{ "DSIN   $dst" %}
9792   opcode(0xD9, 0xFE);
9793   ins_encode( OpcP, OpcS );
9794   ins_pipe( pipe_slow );
9795 %}
9796 
9797 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9798   predicate (UseSSE>=2);
9799   match(Set dst (SinD dst));
9800   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9801   ins_cost(1800);
9802   format %{ "DSIN   $dst" %}
9803   opcode(0xD9, 0xFE);
9804   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9805   ins_pipe( pipe_slow );
9806 %}
9807 
9808 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9809   predicate (UseSSE<=1);
9810   match(Set dst (CosD src));
9811   ins_cost(1800);
9812   format %{ "DCOS   $dst" %}
9813   opcode(0xD9, 0xFF);
9814   ins_encode( OpcP, OpcS );
9815   ins_pipe( pipe_slow );
9816 %}
9817 
9818 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9819   predicate (UseSSE>=2);
9820   match(Set dst (CosD dst));
9821   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9822   ins_cost(1800);
9823   format %{ "DCOS   $dst" %}
9824   opcode(0xD9, 0xFF);
9825   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9826   ins_pipe( pipe_slow );
9827 %}
9828 
9829 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9830   predicate (UseSSE<=1);
9831   match(Set dst(TanD src));
9832   format %{ "DTAN   $dst" %}
9833   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9834               Opcode(0xDD), Opcode(0xD8));   // fstp st
9835   ins_pipe( pipe_slow );
9836 %}
9837 
9838 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9839   predicate (UseSSE>=2);
9840   match(Set dst(TanD dst));
9841   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9842   format %{ "DTAN   $dst" %}
9843   ins_encode( Push_SrcD(dst),
9844               Opcode(0xD9), Opcode(0xF2),    // fptan
9845               Opcode(0xDD), Opcode(0xD8),   // fstp st
9846               Push_ResultD(dst) );
9847   ins_pipe( pipe_slow );
9848 %}
9849 
9850 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9851   predicate (UseSSE<=1);
9852   match(Set dst(AtanD dst src));
9853   format %{ "DATA   $dst,$src" %}
9854   opcode(0xD9, 0xF3);
9855   ins_encode( Push_Reg_DPR(src),
9856               OpcP, OpcS, RegOpc(dst) );
9857   ins_pipe( pipe_slow );
9858 %}
9859 
9860 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9861   predicate (UseSSE>=2);
9862   match(Set dst(AtanD dst src));
9863   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9864   format %{ "DATA   $dst,$src" %}
9865   opcode(0xD9, 0xF3);
9866   ins_encode( Push_SrcD(src),
9867               OpcP, OpcS, Push_ResultD(dst) );
9868   ins_pipe( pipe_slow );
9869 %}
9870 
9871 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9872   predicate (UseSSE<=1);
9873   match(Set dst (SqrtD src));
9874   format %{ "DSQRT  $dst,$src" %}
9875   opcode(0xFA, 0xD9);
9876   ins_encode( Push_Reg_DPR(src),
9877               OpcS, OpcP, Pop_Reg_DPR(dst) );
9878   ins_pipe( pipe_slow );
9879 %}
9880 
9881 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9882   predicate (UseSSE<=1);
9883   match(Set Y (PowD X Y));  // Raise X to the Yth power
9884   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9885   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9886   ins_encode %{
9887     __ subptr(rsp, 8);
9888     __ fld_s($X$$reg - 1);
9889     __ fast_pow();
9890     __ addptr(rsp, 8);
9891   %}
9892   ins_pipe( pipe_slow );
9893 %}
9894 
9895 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9896   predicate (UseSSE>=2);
9897   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9898   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9899   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9900   ins_encode %{
9901     __ subptr(rsp, 8);
9902     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9903     __ fld_d(Address(rsp, 0));
9904     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9905     __ fld_d(Address(rsp, 0));
9906     __ fast_pow();
9907     __ fstp_d(Address(rsp, 0));
9908     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9909     __ addptr(rsp, 8);
9910   %}
9911   ins_pipe( pipe_slow );
9912 %}
9913 
9914 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9915   predicate (UseSSE<=1);
9916   // The source Double operand on FPU stack
9917   match(Set dst (Log10D src));
9918   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9919   // fxch         ; swap ST(0) with ST(1)
9920   // fyl2x        ; compute log_10(2) * log_2(x)
9921   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9922             "FXCH   \n\t"
9923             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9924          %}
9925   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9926               Opcode(0xD9), Opcode(0xC9),   // fxch
9927               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9928 
9929   ins_pipe( pipe_slow );
9930 %}
9931 
9932 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9933   predicate (UseSSE>=2);
9934   effect(KILL cr);
9935   match(Set dst (Log10D src));
9936   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9937   // fyl2x        ; compute log_10(2) * log_2(x)
9938   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9939             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9940          %}
9941   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9942               Push_SrcD(src),
9943               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9944               Push_ResultD(dst));
9945 
9946   ins_pipe( pipe_slow );
9947 %}
9948 
9949 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
9950   predicate (UseSSE<=1);
9951   // The source Double operand on FPU stack
9952   match(Set dst (LogD src));
9953   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9954   // fxch         ; swap ST(0) with ST(1)
9955   // fyl2x        ; compute log_e(2) * log_2(x)
9956   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9957             "FXCH   \n\t"
9958             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9959          %}
9960   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9961               Opcode(0xD9), Opcode(0xC9),   // fxch
9962               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9963 
9964   ins_pipe( pipe_slow );
9965 %}
9966 
9967 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
9968   predicate (UseSSE>=2);
9969   effect(KILL cr);
9970   // The source and result Double operands in XMM registers
9971   match(Set dst (LogD src));
9972   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9973   // fyl2x        ; compute log_e(2) * log_2(x)
9974   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9975             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9976          %}
9977   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9978               Push_SrcD(src),
9979               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9980               Push_ResultD(dst));
9981   ins_pipe( pipe_slow );
9982 %}
9983 
9984 //-------------Float Instructions-------------------------------
9985 // Float Math
9986 
9987 // Code for float compare:
9988 //     fcompp();
9989 //     fwait(); fnstsw_ax();
9990 //     sahf();
9991 //     movl(dst, unordered_result);
9992 //     jcc(Assembler::parity, exit);
9993 //     movl(dst, less_result);
9994 //     jcc(Assembler::below, exit);
9995 //     movl(dst, equal_result);
9996 //     jcc(Assembler::equal, exit);
9997 //     movl(dst, greater_result);
9998 //   exit:
9999 
10000 // P6 version of float compare, sets condition codes in EFLAGS
10001 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10002   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10003   match(Set cr (CmpF src1 src2));
10004   effect(KILL rax);
10005   ins_cost(150);
10006   format %{ "FLD    $src1\n\t"
10007             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10008             "JNP    exit\n\t"
10009             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10010             "SAHF\n"
10011      "exit:\tNOP               // avoid branch to branch" %}
10012   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10013   ins_encode( Push_Reg_DPR(src1),
10014               OpcP, RegOpc(src2),
10015               cmpF_P6_fixup );
10016   ins_pipe( pipe_slow );
10017 %}
10018 
10019 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10020   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10021   match(Set cr (CmpF src1 src2));
10022   ins_cost(100);
10023   format %{ "FLD    $src1\n\t"
10024             "FUCOMIP ST,$src2  // P6 instruction" %}
10025   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10026   ins_encode( Push_Reg_DPR(src1),
10027               OpcP, RegOpc(src2));
10028   ins_pipe( pipe_slow );
10029 %}
10030 
10031 
10032 // Compare & branch
10033 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10034   predicate(UseSSE == 0);
10035   match(Set cr (CmpF src1 src2));
10036   effect(KILL rax);
10037   ins_cost(200);
10038   format %{ "FLD    $src1\n\t"
10039             "FCOMp  $src2\n\t"
10040             "FNSTSW AX\n\t"
10041             "TEST   AX,0x400\n\t"
10042             "JZ,s   flags\n\t"
10043             "MOV    AH,1\t# unordered treat as LT\n"
10044     "flags:\tSAHF" %}
10045   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10046   ins_encode( Push_Reg_DPR(src1),
10047               OpcP, RegOpc(src2),
10048               fpu_flags);
10049   ins_pipe( pipe_slow );
10050 %}
10051 
10052 // Compare vs zero into -1,0,1
10053 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10054   predicate(UseSSE == 0);
10055   match(Set dst (CmpF3 src1 zero));
10056   effect(KILL cr, KILL rax);
10057   ins_cost(280);
10058   format %{ "FTSTF  $dst,$src1" %}
10059   opcode(0xE4, 0xD9);
10060   ins_encode( Push_Reg_DPR(src1),
10061               OpcS, OpcP, PopFPU,
10062               CmpF_Result(dst));
10063   ins_pipe( pipe_slow );
10064 %}
10065 
10066 // Compare into -1,0,1
10067 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10068   predicate(UseSSE == 0);
10069   match(Set dst (CmpF3 src1 src2));
10070   effect(KILL cr, KILL rax);
10071   ins_cost(300);
10072   format %{ "FCMPF  $dst,$src1,$src2" %}
10073   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10074   ins_encode( Push_Reg_DPR(src1),
10075               OpcP, RegOpc(src2),
10076               CmpF_Result(dst));
10077   ins_pipe( pipe_slow );
10078 %}
10079 
10080 // float compare and set condition codes in EFLAGS by XMM regs
10081 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10082   predicate(UseSSE>=1);
10083   match(Set cr (CmpF src1 src2));
10084   ins_cost(145);
10085   format %{ "UCOMISS $src1,$src2\n\t"
10086             "JNP,s   exit\n\t"
10087             "PUSHF\t# saw NaN, set CF\n\t"
10088             "AND     [rsp], #0xffffff2b\n\t"
10089             "POPF\n"
10090     "exit:" %}
10091   ins_encode %{
10092     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10093     emit_cmpfp_fixup(_masm);
10094   %}
10095   ins_pipe( pipe_slow );
10096 %}
10097 
10098 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10099   predicate(UseSSE>=1);
10100   match(Set cr (CmpF src1 src2));
10101   ins_cost(100);
10102   format %{ "UCOMISS $src1,$src2" %}
10103   ins_encode %{
10104     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10105   %}
10106   ins_pipe( pipe_slow );
10107 %}
10108 
10109 // float compare and set condition codes in EFLAGS by XMM regs
10110 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10111   predicate(UseSSE>=1);
10112   match(Set cr (CmpF src1 (LoadF src2)));
10113   ins_cost(165);
10114   format %{ "UCOMISS $src1,$src2\n\t"
10115             "JNP,s   exit\n\t"
10116             "PUSHF\t# saw NaN, set CF\n\t"
10117             "AND     [rsp], #0xffffff2b\n\t"
10118             "POPF\n"
10119     "exit:" %}
10120   ins_encode %{
10121     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10122     emit_cmpfp_fixup(_masm);
10123   %}
10124   ins_pipe( pipe_slow );
10125 %}
10126 
10127 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10128   predicate(UseSSE>=1);
10129   match(Set cr (CmpF src1 (LoadF src2)));
10130   ins_cost(100);
10131   format %{ "UCOMISS $src1,$src2" %}
10132   ins_encode %{
10133     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10134   %}
10135   ins_pipe( pipe_slow );
10136 %}
10137 
10138 // Compare into -1,0,1 in XMM
10139 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10140   predicate(UseSSE>=1);
10141   match(Set dst (CmpF3 src1 src2));
10142   effect(KILL cr);
10143   ins_cost(255);
10144   format %{ "UCOMISS $src1, $src2\n\t"
10145             "MOV     $dst, #-1\n\t"
10146             "JP,s    done\n\t"
10147             "JB,s    done\n\t"
10148             "SETNE   $dst\n\t"
10149             "MOVZB   $dst, $dst\n"
10150     "done:" %}
10151   ins_encode %{
10152     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10153     emit_cmpfp3(_masm, $dst$$Register);
10154   %}
10155   ins_pipe( pipe_slow );
10156 %}
10157 
10158 // Compare into -1,0,1 in XMM and memory
10159 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10160   predicate(UseSSE>=1);
10161   match(Set dst (CmpF3 src1 (LoadF src2)));
10162   effect(KILL cr);
10163   ins_cost(275);
10164   format %{ "UCOMISS $src1, $src2\n\t"
10165             "MOV     $dst, #-1\n\t"
10166             "JP,s    done\n\t"
10167             "JB,s    done\n\t"
10168             "SETNE   $dst\n\t"
10169             "MOVZB   $dst, $dst\n"
10170     "done:" %}
10171   ins_encode %{
10172     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10173     emit_cmpfp3(_masm, $dst$$Register);
10174   %}
10175   ins_pipe( pipe_slow );
10176 %}
10177 
10178 // Spill to obtain 24-bit precision
10179 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10180   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181   match(Set dst (SubF src1 src2));
10182 
10183   format %{ "FSUB   $dst,$src1 - $src2" %}
10184   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10185   ins_encode( Push_Reg_FPR(src1),
10186               OpcReg_FPR(src2),
10187               Pop_Mem_FPR(dst) );
10188   ins_pipe( fpu_mem_reg_reg );
10189 %}
10190 //
10191 // This instruction does not round to 24-bits
10192 instruct subFPR_reg(regFPR dst, regFPR src) %{
10193   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10194   match(Set dst (SubF dst src));
10195 
10196   format %{ "FSUB   $dst,$src" %}
10197   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10198   ins_encode( Push_Reg_FPR(src),
10199               OpcP, RegOpc(dst) );
10200   ins_pipe( fpu_reg_reg );
10201 %}
10202 
10203 // Spill to obtain 24-bit precision
10204 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10205   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10206   match(Set dst (AddF src1 src2));
10207 
10208   format %{ "FADD   $dst,$src1,$src2" %}
10209   opcode(0xD8, 0x0); /* D8 C0+i */
10210   ins_encode( Push_Reg_FPR(src2),
10211               OpcReg_FPR(src1),
10212               Pop_Mem_FPR(dst) );
10213   ins_pipe( fpu_mem_reg_reg );
10214 %}
10215 //
10216 // This instruction does not round to 24-bits
10217 instruct addFPR_reg(regFPR dst, regFPR src) %{
10218   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10219   match(Set dst (AddF dst src));
10220 
10221   format %{ "FLD    $src\n\t"
10222             "FADDp  $dst,ST" %}
10223   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10224   ins_encode( Push_Reg_FPR(src),
10225               OpcP, RegOpc(dst) );
10226   ins_pipe( fpu_reg_reg );
10227 %}
10228 
10229 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10230   predicate(UseSSE==0);
10231   match(Set dst (AbsF src));
10232   ins_cost(100);
10233   format %{ "FABS" %}
10234   opcode(0xE1, 0xD9);
10235   ins_encode( OpcS, OpcP );
10236   ins_pipe( fpu_reg_reg );
10237 %}
10238 
10239 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10240   predicate(UseSSE==0);
10241   match(Set dst (NegF src));
10242   ins_cost(100);
10243   format %{ "FCHS" %}
10244   opcode(0xE0, 0xD9);
10245   ins_encode( OpcS, OpcP );
10246   ins_pipe( fpu_reg_reg );
10247 %}
10248 
10249 // Cisc-alternate to addFPR_reg
10250 // Spill to obtain 24-bit precision
10251 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10252   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10253   match(Set dst (AddF src1 (LoadF src2)));
10254 
10255   format %{ "FLD    $src2\n\t"
10256             "FADD   ST,$src1\n\t"
10257             "FSTP_S $dst" %}
10258   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10259   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10260               OpcReg_FPR(src1),
10261               Pop_Mem_FPR(dst) );
10262   ins_pipe( fpu_mem_reg_mem );
10263 %}
10264 //
10265 // Cisc-alternate to addFPR_reg
10266 // This instruction does not round to 24-bits
10267 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10268   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269   match(Set dst (AddF dst (LoadF src)));
10270 
10271   format %{ "FADD   $dst,$src" %}
10272   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10273   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10274               OpcP, RegOpc(dst) );
10275   ins_pipe( fpu_reg_mem );
10276 %}
10277 
10278 // // Following two instructions for _222_mpegaudio
10279 // Spill to obtain 24-bit precision
10280 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10281   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10282   match(Set dst (AddF src1 src2));
10283 
10284   format %{ "FADD   $dst,$src1,$src2" %}
10285   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10286   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10287               OpcReg_FPR(src2),
10288               Pop_Mem_FPR(dst) );
10289   ins_pipe( fpu_mem_reg_mem );
10290 %}
10291 
10292 // Cisc-spill variant
10293 // Spill to obtain 24-bit precision
10294 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10295   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10296   match(Set dst (AddF src1 (LoadF src2)));
10297 
10298   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10299   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10300   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10301               set_instruction_start,
10302               OpcP, RMopc_Mem(secondary,src1),
10303               Pop_Mem_FPR(dst) );
10304   ins_pipe( fpu_mem_mem_mem );
10305 %}
10306 
10307 // Spill to obtain 24-bit precision
10308 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10309   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10310   match(Set dst (AddF src1 src2));
10311 
10312   format %{ "FADD   $dst,$src1,$src2" %}
10313   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10314   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10315               set_instruction_start,
10316               OpcP, RMopc_Mem(secondary,src1),
10317               Pop_Mem_FPR(dst) );
10318   ins_pipe( fpu_mem_mem_mem );
10319 %}
10320 
10321 
10322 // Spill to obtain 24-bit precision
10323 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10324   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10325   match(Set dst (AddF src con));
10326   format %{ "FLD    $src\n\t"
10327             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10328             "FSTP_S $dst"  %}
10329   ins_encode %{
10330     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10331     __ fadd_s($constantaddress($con));
10332     __ fstp_s(Address(rsp, $dst$$disp));
10333   %}
10334   ins_pipe(fpu_mem_reg_con);
10335 %}
10336 //
10337 // This instruction does not round to 24-bits
10338 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10339   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10340   match(Set dst (AddF src con));
10341   format %{ "FLD    $src\n\t"
10342             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10343             "FSTP   $dst"  %}
10344   ins_encode %{
10345     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10346     __ fadd_s($constantaddress($con));
10347     __ fstp_d($dst$$reg);
10348   %}
10349   ins_pipe(fpu_reg_reg_con);
10350 %}
10351 
10352 // Spill to obtain 24-bit precision
10353 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (MulF src1 src2));
10356 
10357   format %{ "FLD    $src1\n\t"
10358             "FMUL   $src2\n\t"
10359             "FSTP_S $dst"  %}
10360   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10361   ins_encode( Push_Reg_FPR(src1),
10362               OpcReg_FPR(src2),
10363               Pop_Mem_FPR(dst) );
10364   ins_pipe( fpu_mem_reg_reg );
10365 %}
10366 //
10367 // This instruction does not round to 24-bits
10368 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10369   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10370   match(Set dst (MulF src1 src2));
10371 
10372   format %{ "FLD    $src1\n\t"
10373             "FMUL   $src2\n\t"
10374             "FSTP_S $dst"  %}
10375   opcode(0xD8, 0x1); /* D8 C8+i */
10376   ins_encode( Push_Reg_FPR(src2),
10377               OpcReg_FPR(src1),
10378               Pop_Reg_FPR(dst) );
10379   ins_pipe( fpu_reg_reg_reg );
10380 %}
10381 
10382 
10383 // Spill to obtain 24-bit precision
10384 // Cisc-alternate to reg-reg multiply
10385 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10386   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10387   match(Set dst (MulF src1 (LoadF src2)));
10388 
10389   format %{ "FLD_S  $src2\n\t"
10390             "FMUL   $src1\n\t"
10391             "FSTP_S $dst"  %}
10392   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10393   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10394               OpcReg_FPR(src1),
10395               Pop_Mem_FPR(dst) );
10396   ins_pipe( fpu_mem_reg_mem );
10397 %}
10398 //
10399 // This instruction does not round to 24-bits
10400 // Cisc-alternate to reg-reg multiply
10401 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10402   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10403   match(Set dst (MulF src1 (LoadF src2)));
10404 
10405   format %{ "FMUL   $dst,$src1,$src2" %}
10406   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10407   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10408               OpcReg_FPR(src1),
10409               Pop_Reg_FPR(dst) );
10410   ins_pipe( fpu_reg_reg_mem );
10411 %}
10412 
10413 // Spill to obtain 24-bit precision
10414 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10415   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10416   match(Set dst (MulF src1 src2));
10417 
10418   format %{ "FMUL   $dst,$src1,$src2" %}
10419   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10420   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10421               set_instruction_start,
10422               OpcP, RMopc_Mem(secondary,src1),
10423               Pop_Mem_FPR(dst) );
10424   ins_pipe( fpu_mem_mem_mem );
10425 %}
10426 
10427 // Spill to obtain 24-bit precision
10428 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10429   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10430   match(Set dst (MulF src con));
10431 
10432   format %{ "FLD    $src\n\t"
10433             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10434             "FSTP_S $dst"  %}
10435   ins_encode %{
10436     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10437     __ fmul_s($constantaddress($con));
10438     __ fstp_s(Address(rsp, $dst$$disp));
10439   %}
10440   ins_pipe(fpu_mem_reg_con);
10441 %}
10442 //
10443 // This instruction does not round to 24-bits
10444 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10445   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10446   match(Set dst (MulF src con));
10447 
10448   format %{ "FLD    $src\n\t"
10449             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10450             "FSTP   $dst"  %}
10451   ins_encode %{
10452     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10453     __ fmul_s($constantaddress($con));
10454     __ fstp_d($dst$$reg);
10455   %}
10456   ins_pipe(fpu_reg_reg_con);
10457 %}
10458 
10459 
10460 //
10461 // MACRO1 -- subsume unshared load into mulFPR
10462 // This instruction does not round to 24-bits
10463 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10464   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10465   match(Set dst (MulF (LoadF mem1) src));
10466 
10467   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10468             "FMUL   ST,$src\n\t"
10469             "FSTP   $dst" %}
10470   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10471   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10472               OpcReg_FPR(src),
10473               Pop_Reg_FPR(dst) );
10474   ins_pipe( fpu_reg_reg_mem );
10475 %}
10476 //
10477 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10478 // This instruction does not round to 24-bits
10479 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10480   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10481   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10482   ins_cost(95);
10483 
10484   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10485             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10486             "FADD   ST,$src2\n\t"
10487             "FSTP   $dst" %}
10488   opcode(0xD9); /* LoadF D9 /0 */
10489   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10490               FMul_ST_reg(src1),
10491               FAdd_ST_reg(src2),
10492               Pop_Reg_FPR(dst) );
10493   ins_pipe( fpu_reg_mem_reg_reg );
10494 %}
10495 
10496 // MACRO3 -- addFPR a mulFPR
10497 // This instruction does not round to 24-bits.  It is a '2-address'
10498 // instruction in that the result goes back to src2.  This eliminates
10499 // a move from the macro; possibly the register allocator will have
10500 // to add it back (and maybe not).
10501 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10502   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10503   match(Set src2 (AddF (MulF src0 src1) src2));
10504 
10505   format %{ "FLD    $src0     ===MACRO3===\n\t"
10506             "FMUL   ST,$src1\n\t"
10507             "FADDP  $src2,ST" %}
10508   opcode(0xD9); /* LoadF D9 /0 */
10509   ins_encode( Push_Reg_FPR(src0),
10510               FMul_ST_reg(src1),
10511               FAddP_reg_ST(src2) );
10512   ins_pipe( fpu_reg_reg_reg );
10513 %}
10514 
10515 // MACRO4 -- divFPR subFPR
10516 // This instruction does not round to 24-bits
10517 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10518   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10519   match(Set dst (DivF (SubF src2 src1) src3));
10520 
10521   format %{ "FLD    $src2   ===MACRO4===\n\t"
10522             "FSUB   ST,$src1\n\t"
10523             "FDIV   ST,$src3\n\t"
10524             "FSTP  $dst" %}
10525   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10526   ins_encode( Push_Reg_FPR(src2),
10527               subFPR_divFPR_encode(src1,src3),
10528               Pop_Reg_FPR(dst) );
10529   ins_pipe( fpu_reg_reg_reg_reg );
10530 %}
10531 
10532 // Spill to obtain 24-bit precision
10533 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10534   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10535   match(Set dst (DivF src1 src2));
10536 
10537   format %{ "FDIV   $dst,$src1,$src2" %}
10538   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10539   ins_encode( Push_Reg_FPR(src1),
10540               OpcReg_FPR(src2),
10541               Pop_Mem_FPR(dst) );
10542   ins_pipe( fpu_mem_reg_reg );
10543 %}
10544 //
10545 // This instruction does not round to 24-bits
10546 instruct divFPR_reg(regFPR dst, regFPR src) %{
10547   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10548   match(Set dst (DivF dst src));
10549 
10550   format %{ "FDIV   $dst,$src" %}
10551   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10552   ins_encode( Push_Reg_FPR(src),
10553               OpcP, RegOpc(dst) );
10554   ins_pipe( fpu_reg_reg );
10555 %}
10556 
10557 
10558 // Spill to obtain 24-bit precision
10559 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10560   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10561   match(Set dst (ModF src1 src2));
10562   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10563 
10564   format %{ "FMOD   $dst,$src1,$src2" %}
10565   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10566               emitModDPR(),
10567               Push_Result_Mod_DPR(src2),
10568               Pop_Mem_FPR(dst));
10569   ins_pipe( pipe_slow );
10570 %}
10571 //
10572 // This instruction does not round to 24-bits
10573 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10574   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10575   match(Set dst (ModF dst src));
10576   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10577 
10578   format %{ "FMOD   $dst,$src" %}
10579   ins_encode(Push_Reg_Mod_DPR(dst, src),
10580               emitModDPR(),
10581               Push_Result_Mod_DPR(src),
10582               Pop_Reg_FPR(dst));
10583   ins_pipe( pipe_slow );
10584 %}
10585 
10586 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10587   predicate(UseSSE>=1);
10588   match(Set dst (ModF src0 src1));
10589   effect(KILL rax, KILL cr);
10590   format %{ "SUB    ESP,4\t # FMOD\n"
10591           "\tMOVSS  [ESP+0],$src1\n"
10592           "\tFLD_S  [ESP+0]\n"
10593           "\tMOVSS  [ESP+0],$src0\n"
10594           "\tFLD_S  [ESP+0]\n"
10595      "loop:\tFPREM\n"
10596           "\tFWAIT\n"
10597           "\tFNSTSW AX\n"
10598           "\tSAHF\n"
10599           "\tJP     loop\n"
10600           "\tFSTP_S [ESP+0]\n"
10601           "\tMOVSS  $dst,[ESP+0]\n"
10602           "\tADD    ESP,4\n"
10603           "\tFSTP   ST0\t # Restore FPU Stack"
10604     %}
10605   ins_cost(250);
10606   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10607   ins_pipe( pipe_slow );
10608 %}
10609 
10610 
10611 //----------Arithmetic Conversion Instructions---------------------------------
10612 // The conversions operations are all Alpha sorted.  Please keep it that way!
10613 
10614 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10615   predicate(UseSSE==0);
10616   match(Set dst (RoundFloat src));
10617   ins_cost(125);
10618   format %{ "FST_S  $dst,$src\t# F-round" %}
10619   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10620   ins_pipe( fpu_mem_reg );
10621 %}
10622 
10623 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10624   predicate(UseSSE<=1);
10625   match(Set dst (RoundDouble src));
10626   ins_cost(125);
10627   format %{ "FST_D  $dst,$src\t# D-round" %}
10628   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10629   ins_pipe( fpu_mem_reg );
10630 %}
10631 
10632 // Force rounding to 24-bit precision and 6-bit exponent
10633 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10634   predicate(UseSSE==0);
10635   match(Set dst (ConvD2F src));
10636   format %{ "FST_S  $dst,$src\t# F-round" %}
10637   expand %{
10638     roundFloat_mem_reg(dst,src);
10639   %}
10640 %}
10641 
10642 // Force rounding to 24-bit precision and 6-bit exponent
10643 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10644   predicate(UseSSE==1);
10645   match(Set dst (ConvD2F src));
10646   effect( KILL cr );
10647   format %{ "SUB    ESP,4\n\t"
10648             "FST_S  [ESP],$src\t# F-round\n\t"
10649             "MOVSS  $dst,[ESP]\n\t"
10650             "ADD ESP,4" %}
10651   ins_encode %{
10652     __ subptr(rsp, 4);
10653     if ($src$$reg != FPR1L_enc) {
10654       __ fld_s($src$$reg-1);
10655       __ fstp_s(Address(rsp, 0));
10656     } else {
10657       __ fst_s(Address(rsp, 0));
10658     }
10659     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10660     __ addptr(rsp, 4);
10661   %}
10662   ins_pipe( pipe_slow );
10663 %}
10664 
10665 // Force rounding double precision to single precision
10666 instruct convD2F_reg(regF dst, regD src) %{
10667   predicate(UseSSE>=2);
10668   match(Set dst (ConvD2F src));
10669   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10670   ins_encode %{
10671     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10672   %}
10673   ins_pipe( pipe_slow );
10674 %}
10675 
10676 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10677   predicate(UseSSE==0);
10678   match(Set dst (ConvF2D src));
10679   format %{ "FST_S  $dst,$src\t# D-round" %}
10680   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10681   ins_pipe( fpu_reg_reg );
10682 %}
10683 
10684 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10685   predicate(UseSSE==1);
10686   match(Set dst (ConvF2D src));
10687   format %{ "FST_D  $dst,$src\t# D-round" %}
10688   expand %{
10689     roundDouble_mem_reg(dst,src);
10690   %}
10691 %}
10692 
10693 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10694   predicate(UseSSE==1);
10695   match(Set dst (ConvF2D src));
10696   effect( KILL cr );
10697   format %{ "SUB    ESP,4\n\t"
10698             "MOVSS  [ESP] $src\n\t"
10699             "FLD_S  [ESP]\n\t"
10700             "ADD    ESP,4\n\t"
10701             "FSTP   $dst\t# D-round" %}
10702   ins_encode %{
10703     __ subptr(rsp, 4);
10704     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10705     __ fld_s(Address(rsp, 0));
10706     __ addptr(rsp, 4);
10707     __ fstp_d($dst$$reg);
10708   %}
10709   ins_pipe( pipe_slow );
10710 %}
10711 
10712 instruct convF2D_reg(regD dst, regF src) %{
10713   predicate(UseSSE>=2);
10714   match(Set dst (ConvF2D src));
10715   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10716   ins_encode %{
10717     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10718   %}
10719   ins_pipe( pipe_slow );
10720 %}
10721 
10722 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10723 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10724   predicate(UseSSE<=1);
10725   match(Set dst (ConvD2I src));
10726   effect( KILL tmp, KILL cr );
10727   format %{ "FLD    $src\t# Convert double to int \n\t"
10728             "FLDCW  trunc mode\n\t"
10729             "SUB    ESP,4\n\t"
10730             "FISTp  [ESP + #0]\n\t"
10731             "FLDCW  std/24-bit mode\n\t"
10732             "POP    EAX\n\t"
10733             "CMP    EAX,0x80000000\n\t"
10734             "JNE,s  fast\n\t"
10735             "FLD_D  $src\n\t"
10736             "CALL   d2i_wrapper\n"
10737       "fast:" %}
10738   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10739   ins_pipe( pipe_slow );
10740 %}
10741 
10742 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10743 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10744   predicate(UseSSE>=2);
10745   match(Set dst (ConvD2I src));
10746   effect( KILL tmp, KILL cr );
10747   format %{ "CVTTSD2SI $dst, $src\n\t"
10748             "CMP    $dst,0x80000000\n\t"
10749             "JNE,s  fast\n\t"
10750             "SUB    ESP, 8\n\t"
10751             "MOVSD  [ESP], $src\n\t"
10752             "FLD_D  [ESP]\n\t"
10753             "ADD    ESP, 8\n\t"
10754             "CALL   d2i_wrapper\n"
10755       "fast:" %}
10756   ins_encode %{
10757     Label fast;
10758     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10759     __ cmpl($dst$$Register, 0x80000000);
10760     __ jccb(Assembler::notEqual, fast);
10761     __ subptr(rsp, 8);
10762     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10763     __ fld_d(Address(rsp, 0));
10764     __ addptr(rsp, 8);
10765     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10766     __ bind(fast);
10767   %}
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10772   predicate(UseSSE<=1);
10773   match(Set dst (ConvD2L src));
10774   effect( KILL cr );
10775   format %{ "FLD    $src\t# Convert double to long\n\t"
10776             "FLDCW  trunc mode\n\t"
10777             "SUB    ESP,8\n\t"
10778             "FISTp  [ESP + #0]\n\t"
10779             "FLDCW  std/24-bit mode\n\t"
10780             "POP    EAX\n\t"
10781             "POP    EDX\n\t"
10782             "CMP    EDX,0x80000000\n\t"
10783             "JNE,s  fast\n\t"
10784             "TEST   EAX,EAX\n\t"
10785             "JNE,s  fast\n\t"
10786             "FLD    $src\n\t"
10787             "CALL   d2l_wrapper\n"
10788       "fast:" %}
10789   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10790   ins_pipe( pipe_slow );
10791 %}
10792 
10793 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10794 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10795   predicate (UseSSE>=2);
10796   match(Set dst (ConvD2L src));
10797   effect( KILL cr );
10798   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10799             "MOVSD  [ESP],$src\n\t"
10800             "FLD_D  [ESP]\n\t"
10801             "FLDCW  trunc mode\n\t"
10802             "FISTp  [ESP + #0]\n\t"
10803             "FLDCW  std/24-bit mode\n\t"
10804             "POP    EAX\n\t"
10805             "POP    EDX\n\t"
10806             "CMP    EDX,0x80000000\n\t"
10807             "JNE,s  fast\n\t"
10808             "TEST   EAX,EAX\n\t"
10809             "JNE,s  fast\n\t"
10810             "SUB    ESP,8\n\t"
10811             "MOVSD  [ESP],$src\n\t"
10812             "FLD_D  [ESP]\n\t"
10813             "ADD    ESP,8\n\t"
10814             "CALL   d2l_wrapper\n"
10815       "fast:" %}
10816   ins_encode %{
10817     Label fast;
10818     __ subptr(rsp, 8);
10819     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10820     __ fld_d(Address(rsp, 0));
10821     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10822     __ fistp_d(Address(rsp, 0));
10823     // Restore the rounding mode, mask the exception
10824     if (Compile::current()->in_24_bit_fp_mode()) {
10825       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10826     } else {
10827       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10828     }
10829     // Load the converted long, adjust CPU stack
10830     __ pop(rax);
10831     __ pop(rdx);
10832     __ cmpl(rdx, 0x80000000);
10833     __ jccb(Assembler::notEqual, fast);
10834     __ testl(rax, rax);
10835     __ jccb(Assembler::notEqual, fast);
10836     __ subptr(rsp, 8);
10837     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10838     __ fld_d(Address(rsp, 0));
10839     __ addptr(rsp, 8);
10840     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10841     __ bind(fast);
10842   %}
10843   ins_pipe( pipe_slow );
10844 %}
10845 
10846 // Convert a double to an int.  Java semantics require we do complex
10847 // manglations in the corner cases.  So we set the rounding mode to
10848 // 'zero', store the darned double down as an int, and reset the
10849 // rounding mode to 'nearest'.  The hardware stores a flag value down
10850 // if we would overflow or converted a NAN; we check for this and
10851 // and go the slow path if needed.
10852 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10853   predicate(UseSSE==0);
10854   match(Set dst (ConvF2I src));
10855   effect( KILL tmp, KILL cr );
10856   format %{ "FLD    $src\t# Convert float to int \n\t"
10857             "FLDCW  trunc mode\n\t"
10858             "SUB    ESP,4\n\t"
10859             "FISTp  [ESP + #0]\n\t"
10860             "FLDCW  std/24-bit mode\n\t"
10861             "POP    EAX\n\t"
10862             "CMP    EAX,0x80000000\n\t"
10863             "JNE,s  fast\n\t"
10864             "FLD    $src\n\t"
10865             "CALL   d2i_wrapper\n"
10866       "fast:" %}
10867   // DPR2I_encoding works for FPR2I
10868   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10869   ins_pipe( pipe_slow );
10870 %}
10871 
10872 // Convert a float in xmm to an int reg.
10873 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10874   predicate(UseSSE>=1);
10875   match(Set dst (ConvF2I src));
10876   effect( KILL tmp, KILL cr );
10877   format %{ "CVTTSS2SI $dst, $src\n\t"
10878             "CMP    $dst,0x80000000\n\t"
10879             "JNE,s  fast\n\t"
10880             "SUB    ESP, 4\n\t"
10881             "MOVSS  [ESP], $src\n\t"
10882             "FLD    [ESP]\n\t"
10883             "ADD    ESP, 4\n\t"
10884             "CALL   d2i_wrapper\n"
10885       "fast:" %}
10886   ins_encode %{
10887     Label fast;
10888     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10889     __ cmpl($dst$$Register, 0x80000000);
10890     __ jccb(Assembler::notEqual, fast);
10891     __ subptr(rsp, 4);
10892     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10893     __ fld_s(Address(rsp, 0));
10894     __ addptr(rsp, 4);
10895     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10896     __ bind(fast);
10897   %}
10898   ins_pipe( pipe_slow );
10899 %}
10900 
10901 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10902   predicate(UseSSE==0);
10903   match(Set dst (ConvF2L src));
10904   effect( KILL cr );
10905   format %{ "FLD    $src\t# Convert float to long\n\t"
10906             "FLDCW  trunc mode\n\t"
10907             "SUB    ESP,8\n\t"
10908             "FISTp  [ESP + #0]\n\t"
10909             "FLDCW  std/24-bit mode\n\t"
10910             "POP    EAX\n\t"
10911             "POP    EDX\n\t"
10912             "CMP    EDX,0x80000000\n\t"
10913             "JNE,s  fast\n\t"
10914             "TEST   EAX,EAX\n\t"
10915             "JNE,s  fast\n\t"
10916             "FLD    $src\n\t"
10917             "CALL   d2l_wrapper\n"
10918       "fast:" %}
10919   // DPR2L_encoding works for FPR2L
10920   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10921   ins_pipe( pipe_slow );
10922 %}
10923 
10924 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10925 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10926   predicate (UseSSE>=1);
10927   match(Set dst (ConvF2L src));
10928   effect( KILL cr );
10929   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10930             "MOVSS  [ESP],$src\n\t"
10931             "FLD_S  [ESP]\n\t"
10932             "FLDCW  trunc mode\n\t"
10933             "FISTp  [ESP + #0]\n\t"
10934             "FLDCW  std/24-bit mode\n\t"
10935             "POP    EAX\n\t"
10936             "POP    EDX\n\t"
10937             "CMP    EDX,0x80000000\n\t"
10938             "JNE,s  fast\n\t"
10939             "TEST   EAX,EAX\n\t"
10940             "JNE,s  fast\n\t"
10941             "SUB    ESP,4\t# Convert float to long\n\t"
10942             "MOVSS  [ESP],$src\n\t"
10943             "FLD_S  [ESP]\n\t"
10944             "ADD    ESP,4\n\t"
10945             "CALL   d2l_wrapper\n"
10946       "fast:" %}
10947   ins_encode %{
10948     Label fast;
10949     __ subptr(rsp, 8);
10950     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10951     __ fld_s(Address(rsp, 0));
10952     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10953     __ fistp_d(Address(rsp, 0));
10954     // Restore the rounding mode, mask the exception
10955     if (Compile::current()->in_24_bit_fp_mode()) {
10956       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10957     } else {
10958       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10959     }
10960     // Load the converted long, adjust CPU stack
10961     __ pop(rax);
10962     __ pop(rdx);
10963     __ cmpl(rdx, 0x80000000);
10964     __ jccb(Assembler::notEqual, fast);
10965     __ testl(rax, rax);
10966     __ jccb(Assembler::notEqual, fast);
10967     __ subptr(rsp, 4);
10968     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10969     __ fld_s(Address(rsp, 0));
10970     __ addptr(rsp, 4);
10971     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10972     __ bind(fast);
10973   %}
10974   ins_pipe( pipe_slow );
10975 %}
10976 
10977 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10978   predicate( UseSSE<=1 );
10979   match(Set dst (ConvI2D src));
10980   format %{ "FILD   $src\n\t"
10981             "FSTP   $dst" %}
10982   opcode(0xDB, 0x0);  /* DB /0 */
10983   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10984   ins_pipe( fpu_reg_mem );
10985 %}
10986 
10987 instruct convI2D_reg(regD dst, rRegI src) %{
10988   predicate( UseSSE>=2 && !UseXmmI2D );
10989   match(Set dst (ConvI2D src));
10990   format %{ "CVTSI2SD $dst,$src" %}
10991   ins_encode %{
10992     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10993   %}
10994   ins_pipe( pipe_slow );
10995 %}
10996 
10997 instruct convI2D_mem(regD dst, memory mem) %{
10998   predicate( UseSSE>=2 );
10999   match(Set dst (ConvI2D (LoadI mem)));
11000   format %{ "CVTSI2SD $dst,$mem" %}
11001   ins_encode %{
11002     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11003   %}
11004   ins_pipe( pipe_slow );
11005 %}
11006 
11007 instruct convXI2D_reg(regD dst, rRegI src)
11008 %{
11009   predicate( UseSSE>=2 && UseXmmI2D );
11010   match(Set dst (ConvI2D src));
11011 
11012   format %{ "MOVD  $dst,$src\n\t"
11013             "CVTDQ2PD $dst,$dst\t# i2d" %}
11014   ins_encode %{
11015     __ movdl($dst$$XMMRegister, $src$$Register);
11016     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11017   %}
11018   ins_pipe(pipe_slow); // XXX
11019 %}
11020 
11021 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11022   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11023   match(Set dst (ConvI2D (LoadI mem)));
11024   format %{ "FILD   $mem\n\t"
11025             "FSTP   $dst" %}
11026   opcode(0xDB);      /* DB /0 */
11027   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11028               Pop_Reg_DPR(dst));
11029   ins_pipe( fpu_reg_mem );
11030 %}
11031 
11032 // Convert a byte to a float; no rounding step needed.
11033 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11034   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11035   match(Set dst (ConvI2F src));
11036   format %{ "FILD   $src\n\t"
11037             "FSTP   $dst" %}
11038 
11039   opcode(0xDB, 0x0);  /* DB /0 */
11040   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11041   ins_pipe( fpu_reg_mem );
11042 %}
11043 
11044 // In 24-bit mode, force exponent rounding by storing back out
11045 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11046   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11047   match(Set dst (ConvI2F src));
11048   ins_cost(200);
11049   format %{ "FILD   $src\n\t"
11050             "FSTP_S $dst" %}
11051   opcode(0xDB, 0x0);  /* DB /0 */
11052   ins_encode( Push_Mem_I(src),
11053               Pop_Mem_FPR(dst));
11054   ins_pipe( fpu_mem_mem );
11055 %}
11056 
11057 // In 24-bit mode, force exponent rounding by storing back out
11058 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11059   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11060   match(Set dst (ConvI2F (LoadI mem)));
11061   ins_cost(200);
11062   format %{ "FILD   $mem\n\t"
11063             "FSTP_S $dst" %}
11064   opcode(0xDB);  /* DB /0 */
11065   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11066               Pop_Mem_FPR(dst));
11067   ins_pipe( fpu_mem_mem );
11068 %}
11069 
11070 // This instruction does not round to 24-bits
11071 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11072   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11073   match(Set dst (ConvI2F src));
11074   format %{ "FILD   $src\n\t"
11075             "FSTP   $dst" %}
11076   opcode(0xDB, 0x0);  /* DB /0 */
11077   ins_encode( Push_Mem_I(src),
11078               Pop_Reg_FPR(dst));
11079   ins_pipe( fpu_reg_mem );
11080 %}
11081 
11082 // This instruction does not round to 24-bits
11083 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11084   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11085   match(Set dst (ConvI2F (LoadI mem)));
11086   format %{ "FILD   $mem\n\t"
11087             "FSTP   $dst" %}
11088   opcode(0xDB);      /* DB /0 */
11089   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11090               Pop_Reg_FPR(dst));
11091   ins_pipe( fpu_reg_mem );
11092 %}
11093 
11094 // Convert an int to a float in xmm; no rounding step needed.
11095 instruct convI2F_reg(regF dst, rRegI src) %{
11096   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11097   match(Set dst (ConvI2F src));
11098   format %{ "CVTSI2SS $dst, $src" %}
11099   ins_encode %{
11100     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11101   %}
11102   ins_pipe( pipe_slow );
11103 %}
11104 
11105  instruct convXI2F_reg(regF dst, rRegI src)
11106 %{
11107   predicate( UseSSE>=2 && UseXmmI2F );
11108   match(Set dst (ConvI2F src));
11109 
11110   format %{ "MOVD  $dst,$src\n\t"
11111             "CVTDQ2PS $dst,$dst\t# i2f" %}
11112   ins_encode %{
11113     __ movdl($dst$$XMMRegister, $src$$Register);
11114     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11115   %}
11116   ins_pipe(pipe_slow); // XXX
11117 %}
11118 
11119 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11120   match(Set dst (ConvI2L src));
11121   effect(KILL cr);
11122   ins_cost(375);
11123   format %{ "MOV    $dst.lo,$src\n\t"
11124             "MOV    $dst.hi,$src\n\t"
11125             "SAR    $dst.hi,31" %}
11126   ins_encode(convert_int_long(dst,src));
11127   ins_pipe( ialu_reg_reg_long );
11128 %}
11129 
11130 // Zero-extend convert int to long
11131 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11132   match(Set dst (AndL (ConvI2L src) mask) );
11133   effect( KILL flags );
11134   ins_cost(250);
11135   format %{ "MOV    $dst.lo,$src\n\t"
11136             "XOR    $dst.hi,$dst.hi" %}
11137   opcode(0x33); // XOR
11138   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11139   ins_pipe( ialu_reg_reg_long );
11140 %}
11141 
11142 // Zero-extend long
11143 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11144   match(Set dst (AndL src mask) );
11145   effect( KILL flags );
11146   ins_cost(250);
11147   format %{ "MOV    $dst.lo,$src.lo\n\t"
11148             "XOR    $dst.hi,$dst.hi\n\t" %}
11149   opcode(0x33); // XOR
11150   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11151   ins_pipe( ialu_reg_reg_long );
11152 %}
11153 
11154 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11155   predicate (UseSSE<=1);
11156   match(Set dst (ConvL2D src));
11157   effect( KILL cr );
11158   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11159             "PUSH   $src.lo\n\t"
11160             "FILD   ST,[ESP + #0]\n\t"
11161             "ADD    ESP,8\n\t"
11162             "FSTP_D $dst\t# D-round" %}
11163   opcode(0xDF, 0x5);  /* DF /5 */
11164   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11165   ins_pipe( pipe_slow );
11166 %}
11167 
11168 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11169   predicate (UseSSE>=2);
11170   match(Set dst (ConvL2D src));
11171   effect( KILL cr );
11172   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11173             "PUSH   $src.lo\n\t"
11174             "FILD_D [ESP]\n\t"
11175             "FSTP_D [ESP]\n\t"
11176             "MOVSD  $dst,[ESP]\n\t"
11177             "ADD    ESP,8" %}
11178   opcode(0xDF, 0x5);  /* DF /5 */
11179   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11180   ins_pipe( pipe_slow );
11181 %}
11182 
11183 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11184   predicate (UseSSE>=1);
11185   match(Set dst (ConvL2F src));
11186   effect( KILL cr );
11187   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11188             "PUSH   $src.lo\n\t"
11189             "FILD_D [ESP]\n\t"
11190             "FSTP_S [ESP]\n\t"
11191             "MOVSS  $dst,[ESP]\n\t"
11192             "ADD    ESP,8" %}
11193   opcode(0xDF, 0x5);  /* DF /5 */
11194   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11195   ins_pipe( pipe_slow );
11196 %}
11197 
11198 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11199   match(Set dst (ConvL2F src));
11200   effect( KILL cr );
11201   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11202             "PUSH   $src.lo\n\t"
11203             "FILD   ST,[ESP + #0]\n\t"
11204             "ADD    ESP,8\n\t"
11205             "FSTP_S $dst\t# F-round" %}
11206   opcode(0xDF, 0x5);  /* DF /5 */
11207   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11208   ins_pipe( pipe_slow );
11209 %}
11210 
11211 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11212   match(Set dst (ConvL2I src));
11213   effect( DEF dst, USE src );
11214   format %{ "MOV    $dst,$src.lo" %}
11215   ins_encode(enc_CopyL_Lo(dst,src));
11216   ins_pipe( ialu_reg_reg );
11217 %}
11218 
11219 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11220   match(Set dst (MoveF2I src));
11221   effect( DEF dst, USE src );
11222   ins_cost(100);
11223   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11224   ins_encode %{
11225     __ movl($dst$$Register, Address(rsp, $src$$disp));
11226   %}
11227   ins_pipe( ialu_reg_mem );
11228 %}
11229 
11230 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11231   predicate(UseSSE==0);
11232   match(Set dst (MoveF2I src));
11233   effect( DEF dst, USE src );
11234 
11235   ins_cost(125);
11236   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11237   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11238   ins_pipe( fpu_mem_reg );
11239 %}
11240 
11241 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11242   predicate(UseSSE>=1);
11243   match(Set dst (MoveF2I src));
11244   effect( DEF dst, USE src );
11245 
11246   ins_cost(95);
11247   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11248   ins_encode %{
11249     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11250   %}
11251   ins_pipe( pipe_slow );
11252 %}
11253 
11254 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11255   predicate(UseSSE>=2);
11256   match(Set dst (MoveF2I src));
11257   effect( DEF dst, USE src );
11258   ins_cost(85);
11259   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11260   ins_encode %{
11261     __ movdl($dst$$Register, $src$$XMMRegister);
11262   %}
11263   ins_pipe( pipe_slow );
11264 %}
11265 
11266 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11267   match(Set dst (MoveI2F src));
11268   effect( DEF dst, USE src );
11269 
11270   ins_cost(100);
11271   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11272   ins_encode %{
11273     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11274   %}
11275   ins_pipe( ialu_mem_reg );
11276 %}
11277 
11278 
11279 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11280   predicate(UseSSE==0);
11281   match(Set dst (MoveI2F src));
11282   effect(DEF dst, USE src);
11283 
11284   ins_cost(125);
11285   format %{ "FLD_S  $src\n\t"
11286             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11287   opcode(0xD9);               /* D9 /0, FLD m32real */
11288   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11289               Pop_Reg_FPR(dst) );
11290   ins_pipe( fpu_reg_mem );
11291 %}
11292 
11293 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11294   predicate(UseSSE>=1);
11295   match(Set dst (MoveI2F src));
11296   effect( DEF dst, USE src );
11297 
11298   ins_cost(95);
11299   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11300   ins_encode %{
11301     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11302   %}
11303   ins_pipe( pipe_slow );
11304 %}
11305 
11306 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11307   predicate(UseSSE>=2);
11308   match(Set dst (MoveI2F src));
11309   effect( DEF dst, USE src );
11310 
11311   ins_cost(85);
11312   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11313   ins_encode %{
11314     __ movdl($dst$$XMMRegister, $src$$Register);
11315   %}
11316   ins_pipe( pipe_slow );
11317 %}
11318 
11319 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11320   match(Set dst (MoveD2L src));
11321   effect(DEF dst, USE src);
11322 
11323   ins_cost(250);
11324   format %{ "MOV    $dst.lo,$src\n\t"
11325             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11326   opcode(0x8B, 0x8B);
11327   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11328   ins_pipe( ialu_mem_long_reg );
11329 %}
11330 
11331 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11332   predicate(UseSSE<=1);
11333   match(Set dst (MoveD2L src));
11334   effect(DEF dst, USE src);
11335 
11336   ins_cost(125);
11337   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11338   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11339   ins_pipe( fpu_mem_reg );
11340 %}
11341 
11342 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11343   predicate(UseSSE>=2);
11344   match(Set dst (MoveD2L src));
11345   effect(DEF dst, USE src);
11346   ins_cost(95);
11347   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11348   ins_encode %{
11349     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11350   %}
11351   ins_pipe( pipe_slow );
11352 %}
11353 
11354 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11355   predicate(UseSSE>=2);
11356   match(Set dst (MoveD2L src));
11357   effect(DEF dst, USE src, TEMP tmp);
11358   ins_cost(85);
11359   format %{ "MOVD   $dst.lo,$src\n\t"
11360             "PSHUFLW $tmp,$src,0x4E\n\t"
11361             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11362   ins_encode %{
11363     __ movdl($dst$$Register, $src$$XMMRegister);
11364     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11365     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11366   %}
11367   ins_pipe( pipe_slow );
11368 %}
11369 
11370 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11371   match(Set dst (MoveL2D src));
11372   effect(DEF dst, USE src);
11373 
11374   ins_cost(200);
11375   format %{ "MOV    $dst,$src.lo\n\t"
11376             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11377   opcode(0x89, 0x89);
11378   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11379   ins_pipe( ialu_mem_long_reg );
11380 %}
11381 
11382 
11383 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11384   predicate(UseSSE<=1);
11385   match(Set dst (MoveL2D src));
11386   effect(DEF dst, USE src);
11387   ins_cost(125);
11388 
11389   format %{ "FLD_D  $src\n\t"
11390             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11391   opcode(0xDD);               /* DD /0, FLD m64real */
11392   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11393               Pop_Reg_DPR(dst) );
11394   ins_pipe( fpu_reg_mem );
11395 %}
11396 
11397 
11398 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11399   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11400   match(Set dst (MoveL2D src));
11401   effect(DEF dst, USE src);
11402 
11403   ins_cost(95);
11404   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11405   ins_encode %{
11406     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11407   %}
11408   ins_pipe( pipe_slow );
11409 %}
11410 
11411 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11412   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11413   match(Set dst (MoveL2D src));
11414   effect(DEF dst, USE src);
11415 
11416   ins_cost(95);
11417   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11418   ins_encode %{
11419     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11420   %}
11421   ins_pipe( pipe_slow );
11422 %}
11423 
11424 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11425   predicate(UseSSE>=2);
11426   match(Set dst (MoveL2D src));
11427   effect(TEMP dst, USE src, TEMP tmp);
11428   ins_cost(85);
11429   format %{ "MOVD   $dst,$src.lo\n\t"
11430             "MOVD   $tmp,$src.hi\n\t"
11431             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11432   ins_encode %{
11433     __ movdl($dst$$XMMRegister, $src$$Register);
11434     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11435     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11436   %}
11437   ins_pipe( pipe_slow );
11438 %}
11439 
11440 
11441 // =======================================================================
11442 // fast clearing of an array
11443 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11444   predicate(!UseFastStosb);
11445   match(Set dummy (ClearArray cnt base));
11446   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11447   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11448             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11449             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11450   ins_encode %{
11451     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11452   %}
11453   ins_pipe( pipe_slow );
11454 %}
11455 
11456 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11457   predicate(UseFastStosb);
11458   match(Set dummy (ClearArray cnt base));
11459   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11460   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11461             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11462             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11463   ins_encode %{
11464     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11465   %}
11466   ins_pipe( pipe_slow );
11467 %}
11468 
11469 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11470                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11471   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11472   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11473 
11474   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11475   ins_encode %{
11476     __ string_compare($str1$$Register, $str2$$Register,
11477                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11478                       $tmp1$$XMMRegister);
11479   %}
11480   ins_pipe( pipe_slow );
11481 %}
11482 
11483 // fast string equals
11484 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11485                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11486   match(Set result (StrEquals (Binary str1 str2) cnt));
11487   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11488 
11489   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11490   ins_encode %{
11491     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11492                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11493                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11494   %}
11495   ins_pipe( pipe_slow );
11496 %}
11497 
11498 // fast search of substring with known size.
11499 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11500                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11501   predicate(UseSSE42Intrinsics);
11502   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11503   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11504 
11505   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11506   ins_encode %{
11507     int icnt2 = (int)$int_cnt2$$constant;
11508     if (icnt2 >= 8) {
11509       // IndexOf for constant substrings with size >= 8 elements
11510       // which don't need to be loaded through stack.
11511       __ string_indexofC8($str1$$Register, $str2$$Register,
11512                           $cnt1$$Register, $cnt2$$Register,
11513                           icnt2, $result$$Register,
11514                           $vec$$XMMRegister, $tmp$$Register);
11515     } else {
11516       // Small strings are loaded through stack if they cross page boundary.
11517       __ string_indexof($str1$$Register, $str2$$Register,
11518                         $cnt1$$Register, $cnt2$$Register,
11519                         icnt2, $result$$Register,
11520                         $vec$$XMMRegister, $tmp$$Register);
11521     }
11522   %}
11523   ins_pipe( pipe_slow );
11524 %}
11525 
11526 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11527                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11528   predicate(UseSSE42Intrinsics);
11529   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11530   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11531 
11532   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11533   ins_encode %{
11534     __ string_indexof($str1$$Register, $str2$$Register,
11535                       $cnt1$$Register, $cnt2$$Register,
11536                       (-1), $result$$Register,
11537                       $vec$$XMMRegister, $tmp$$Register);
11538   %}
11539   ins_pipe( pipe_slow );
11540 %}
11541 
11542 // fast array equals
11543 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11544                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11545 %{
11546   match(Set result (AryEq ary1 ary2));
11547   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11548   //ins_cost(300);
11549 
11550   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11551   ins_encode %{
11552     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11553                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11554                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11555   %}
11556   ins_pipe( pipe_slow );
11557 %}
11558 
11559 // encode char[] to byte[] in ISO_8859_1
11560 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11561                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11562                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11563   match(Set result (EncodeISOArray src (Binary dst len)));
11564   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11565 
11566   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11567   ins_encode %{
11568     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11569                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11570                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11571   %}
11572   ins_pipe( pipe_slow );
11573 %}
11574 
11575 
11576 //----------Control Flow Instructions------------------------------------------
11577 // Signed compare Instructions
11578 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11579   match(Set cr (CmpI op1 op2));
11580   effect( DEF cr, USE op1, USE op2 );
11581   format %{ "CMP    $op1,$op2" %}
11582   opcode(0x3B);  /* Opcode 3B /r */
11583   ins_encode( OpcP, RegReg( op1, op2) );
11584   ins_pipe( ialu_cr_reg_reg );
11585 %}
11586 
11587 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11588   match(Set cr (CmpI op1 op2));
11589   effect( DEF cr, USE op1 );
11590   format %{ "CMP    $op1,$op2" %}
11591   opcode(0x81,0x07);  /* Opcode 81 /7 */
11592   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11593   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11594   ins_pipe( ialu_cr_reg_imm );
11595 %}
11596 
11597 // Cisc-spilled version of cmpI_eReg
11598 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11599   match(Set cr (CmpI op1 (LoadI op2)));
11600 
11601   format %{ "CMP    $op1,$op2" %}
11602   ins_cost(500);
11603   opcode(0x3B);  /* Opcode 3B /r */
11604   ins_encode( OpcP, RegMem( op1, op2) );
11605   ins_pipe( ialu_cr_reg_mem );
11606 %}
11607 
11608 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11609   match(Set cr (CmpI src zero));
11610   effect( DEF cr, USE src );
11611 
11612   format %{ "TEST   $src,$src" %}
11613   opcode(0x85);
11614   ins_encode( OpcP, RegReg( src, src ) );
11615   ins_pipe( ialu_cr_reg_imm );
11616 %}
11617 
11618 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11619   match(Set cr (CmpI (AndI src con) zero));
11620 
11621   format %{ "TEST   $src,$con" %}
11622   opcode(0xF7,0x00);
11623   ins_encode( OpcP, RegOpc(src), Con32(con) );
11624   ins_pipe( ialu_cr_reg_imm );
11625 %}
11626 
11627 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11628   match(Set cr (CmpI (AndI src mem) zero));
11629 
11630   format %{ "TEST   $src,$mem" %}
11631   opcode(0x85);
11632   ins_encode( OpcP, RegMem( src, mem ) );
11633   ins_pipe( ialu_cr_reg_mem );
11634 %}
11635 
11636 // Unsigned compare Instructions; really, same as signed except they
11637 // produce an eFlagsRegU instead of eFlagsReg.
11638 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11639   match(Set cr (CmpU op1 op2));
11640 
11641   format %{ "CMPu   $op1,$op2" %}
11642   opcode(0x3B);  /* Opcode 3B /r */
11643   ins_encode( OpcP, RegReg( op1, op2) );
11644   ins_pipe( ialu_cr_reg_reg );
11645 %}
11646 
11647 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11648   match(Set cr (CmpU op1 op2));
11649 
11650   format %{ "CMPu   $op1,$op2" %}
11651   opcode(0x81,0x07);  /* Opcode 81 /7 */
11652   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11653   ins_pipe( ialu_cr_reg_imm );
11654 %}
11655 
11656 // // Cisc-spilled version of cmpU_eReg
11657 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11658   match(Set cr (CmpU op1 (LoadI op2)));
11659 
11660   format %{ "CMPu   $op1,$op2" %}
11661   ins_cost(500);
11662   opcode(0x3B);  /* Opcode 3B /r */
11663   ins_encode( OpcP, RegMem( op1, op2) );
11664   ins_pipe( ialu_cr_reg_mem );
11665 %}
11666 
11667 // // Cisc-spilled version of cmpU_eReg
11668 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11669 //  match(Set cr (CmpU (LoadI op1) op2));
11670 //
11671 //  format %{ "CMPu   $op1,$op2" %}
11672 //  ins_cost(500);
11673 //  opcode(0x39);  /* Opcode 39 /r */
11674 //  ins_encode( OpcP, RegMem( op1, op2) );
11675 //%}
11676 
11677 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11678   match(Set cr (CmpU src zero));
11679 
11680   format %{ "TESTu  $src,$src" %}
11681   opcode(0x85);
11682   ins_encode( OpcP, RegReg( src, src ) );
11683   ins_pipe( ialu_cr_reg_imm );
11684 %}
11685 
11686 // Unsigned pointer compare Instructions
11687 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11688   match(Set cr (CmpP op1 op2));
11689 
11690   format %{ "CMPu   $op1,$op2" %}
11691   opcode(0x3B);  /* Opcode 3B /r */
11692   ins_encode( OpcP, RegReg( op1, op2) );
11693   ins_pipe( ialu_cr_reg_reg );
11694 %}
11695 
11696 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11697   match(Set cr (CmpP op1 op2));
11698 
11699   format %{ "CMPu   $op1,$op2" %}
11700   opcode(0x81,0x07);  /* Opcode 81 /7 */
11701   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11702   ins_pipe( ialu_cr_reg_imm );
11703 %}
11704 
11705 // // Cisc-spilled version of cmpP_eReg
11706 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11707   match(Set cr (CmpP op1 (LoadP op2)));
11708 
11709   format %{ "CMPu   $op1,$op2" %}
11710   ins_cost(500);
11711   opcode(0x3B);  /* Opcode 3B /r */
11712   ins_encode( OpcP, RegMem( op1, op2) );
11713   ins_pipe( ialu_cr_reg_mem );
11714 %}
11715 
11716 // // Cisc-spilled version of cmpP_eReg
11717 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11718 //  match(Set cr (CmpP (LoadP op1) op2));
11719 //
11720 //  format %{ "CMPu   $op1,$op2" %}
11721 //  ins_cost(500);
11722 //  opcode(0x39);  /* Opcode 39 /r */
11723 //  ins_encode( OpcP, RegMem( op1, op2) );
11724 //%}
11725 
11726 // Compare raw pointer (used in out-of-heap check).
11727 // Only works because non-oop pointers must be raw pointers
11728 // and raw pointers have no anti-dependencies.
11729 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11730   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11731   match(Set cr (CmpP op1 (LoadP op2)));
11732 
11733   format %{ "CMPu   $op1,$op2" %}
11734   opcode(0x3B);  /* Opcode 3B /r */
11735   ins_encode( OpcP, RegMem( op1, op2) );
11736   ins_pipe( ialu_cr_reg_mem );
11737 %}
11738 
11739 //
11740 // This will generate a signed flags result. This should be ok
11741 // since any compare to a zero should be eq/neq.
11742 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11743   match(Set cr (CmpP src zero));
11744 
11745   format %{ "TEST   $src,$src" %}
11746   opcode(0x85);
11747   ins_encode( OpcP, RegReg( src, src ) );
11748   ins_pipe( ialu_cr_reg_imm );
11749 %}
11750 
11751 // Cisc-spilled version of testP_reg
11752 // This will generate a signed flags result. This should be ok
11753 // since any compare to a zero should be eq/neq.
11754 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11755   match(Set cr (CmpP (LoadP op) zero));
11756 
11757   format %{ "TEST   $op,0xFFFFFFFF" %}
11758   ins_cost(500);
11759   opcode(0xF7);               /* Opcode F7 /0 */
11760   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11761   ins_pipe( ialu_cr_reg_imm );
11762 %}
11763 
11764 // Yanked all unsigned pointer compare operations.
11765 // Pointer compares are done with CmpP which is already unsigned.
11766 
11767 //----------Max and Min--------------------------------------------------------
11768 // Min Instructions
11769 ////
11770 //   *** Min and Max using the conditional move are slower than the
11771 //   *** branch version on a Pentium III.
11772 // // Conditional move for min
11773 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11774 //  effect( USE_DEF op2, USE op1, USE cr );
11775 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11776 //  opcode(0x4C,0x0F);
11777 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11778 //  ins_pipe( pipe_cmov_reg );
11779 //%}
11780 //
11781 //// Min Register with Register (P6 version)
11782 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11783 //  predicate(VM_Version::supports_cmov() );
11784 //  match(Set op2 (MinI op1 op2));
11785 //  ins_cost(200);
11786 //  expand %{
11787 //    eFlagsReg cr;
11788 //    compI_eReg(cr,op1,op2);
11789 //    cmovI_reg_lt(op2,op1,cr);
11790 //  %}
11791 //%}
11792 
11793 // Min Register with Register (generic version)
11794 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11795   match(Set dst (MinI dst src));
11796   effect(KILL flags);
11797   ins_cost(300);
11798 
11799   format %{ "MIN    $dst,$src" %}
11800   opcode(0xCC);
11801   ins_encode( min_enc(dst,src) );
11802   ins_pipe( pipe_slow );
11803 %}
11804 
11805 // Max Register with Register
11806 //   *** Min and Max using the conditional move are slower than the
11807 //   *** branch version on a Pentium III.
11808 // // Conditional move for max
11809 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11810 //  effect( USE_DEF op2, USE op1, USE cr );
11811 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11812 //  opcode(0x4F,0x0F);
11813 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11814 //  ins_pipe( pipe_cmov_reg );
11815 //%}
11816 //
11817 // // Max Register with Register (P6 version)
11818 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11819 //  predicate(VM_Version::supports_cmov() );
11820 //  match(Set op2 (MaxI op1 op2));
11821 //  ins_cost(200);
11822 //  expand %{
11823 //    eFlagsReg cr;
11824 //    compI_eReg(cr,op1,op2);
11825 //    cmovI_reg_gt(op2,op1,cr);
11826 //  %}
11827 //%}
11828 
11829 // Max Register with Register (generic version)
11830 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11831   match(Set dst (MaxI dst src));
11832   effect(KILL flags);
11833   ins_cost(300);
11834 
11835   format %{ "MAX    $dst,$src" %}
11836   opcode(0xCC);
11837   ins_encode( max_enc(dst,src) );
11838   ins_pipe( pipe_slow );
11839 %}
11840 
11841 // ============================================================================
11842 // Counted Loop limit node which represents exact final iterator value.
11843 // Note: the resulting value should fit into integer range since
11844 // counted loops have limit check on overflow.
11845 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11846   match(Set limit (LoopLimit (Binary init limit) stride));
11847   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11848   ins_cost(300);
11849 
11850   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11851   ins_encode %{
11852     int strd = (int)$stride$$constant;
11853     assert(strd != 1 && strd != -1, "sanity");
11854     int m1 = (strd > 0) ? 1 : -1;
11855     // Convert limit to long (EAX:EDX)
11856     __ cdql();
11857     // Convert init to long (init:tmp)
11858     __ movl($tmp$$Register, $init$$Register);
11859     __ sarl($tmp$$Register, 31);
11860     // $limit - $init
11861     __ subl($limit$$Register, $init$$Register);
11862     __ sbbl($limit_hi$$Register, $tmp$$Register);
11863     // + ($stride - 1)
11864     if (strd > 0) {
11865       __ addl($limit$$Register, (strd - 1));
11866       __ adcl($limit_hi$$Register, 0);
11867       __ movl($tmp$$Register, strd);
11868     } else {
11869       __ addl($limit$$Register, (strd + 1));
11870       __ adcl($limit_hi$$Register, -1);
11871       __ lneg($limit_hi$$Register, $limit$$Register);
11872       __ movl($tmp$$Register, -strd);
11873     }
11874     // signed devision: (EAX:EDX) / pos_stride
11875     __ idivl($tmp$$Register);
11876     if (strd < 0) {
11877       // restore sign
11878       __ negl($tmp$$Register);
11879     }
11880     // (EAX) * stride
11881     __ mull($tmp$$Register);
11882     // + init (ignore upper bits)
11883     __ addl($limit$$Register, $init$$Register);
11884   %}
11885   ins_pipe( pipe_slow );
11886 %}
11887 
11888 // ============================================================================
11889 // Branch Instructions
11890 // Jump Table
11891 instruct jumpXtnd(rRegI switch_val) %{
11892   match(Jump switch_val);
11893   ins_cost(350);
11894   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
11895   ins_encode %{
11896     // Jump to Address(table_base + switch_reg)
11897     Address index(noreg, $switch_val$$Register, Address::times_1);
11898     __ jump(ArrayAddress($constantaddress, index));
11899   %}
11900   ins_pipe(pipe_jmp);
11901 %}
11902 
11903 // Jump Direct - Label defines a relative address from JMP+1
11904 instruct jmpDir(label labl) %{
11905   match(Goto);
11906   effect(USE labl);
11907 
11908   ins_cost(300);
11909   format %{ "JMP    $labl" %}
11910   size(5);
11911   ins_encode %{
11912     Label* L = $labl$$label;
11913     __ jmp(*L, false); // Always long jump
11914   %}
11915   ins_pipe( pipe_jmp );
11916 %}
11917 
11918 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11919 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
11920   match(If cop cr);
11921   effect(USE labl);
11922 
11923   ins_cost(300);
11924   format %{ "J$cop    $labl" %}
11925   size(6);
11926   ins_encode %{
11927     Label* L = $labl$$label;
11928     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11929   %}
11930   ins_pipe( pipe_jcc );
11931 %}
11932 
11933 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11934 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
11935   match(CountedLoopEnd cop cr);
11936   effect(USE labl);
11937 
11938   ins_cost(300);
11939   format %{ "J$cop    $labl\t# Loop end" %}
11940   size(6);
11941   ins_encode %{
11942     Label* L = $labl$$label;
11943     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11944   %}
11945   ins_pipe( pipe_jcc );
11946 %}
11947 
11948 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11949 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11950   match(CountedLoopEnd cop cmp);
11951   effect(USE labl);
11952 
11953   ins_cost(300);
11954   format %{ "J$cop,u  $labl\t# Loop end" %}
11955   size(6);
11956   ins_encode %{
11957     Label* L = $labl$$label;
11958     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11959   %}
11960   ins_pipe( pipe_jcc );
11961 %}
11962 
11963 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11964   match(CountedLoopEnd cop cmp);
11965   effect(USE labl);
11966 
11967   ins_cost(200);
11968   format %{ "J$cop,u  $labl\t# Loop end" %}
11969   size(6);
11970   ins_encode %{
11971     Label* L = $labl$$label;
11972     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11973   %}
11974   ins_pipe( pipe_jcc );
11975 %}
11976 
11977 // Jump Direct Conditional - using unsigned comparison
11978 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11979   match(If cop cmp);
11980   effect(USE labl);
11981 
11982   ins_cost(300);
11983   format %{ "J$cop,u  $labl" %}
11984   size(6);
11985   ins_encode %{
11986     Label* L = $labl$$label;
11987     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11988   %}
11989   ins_pipe(pipe_jcc);
11990 %}
11991 
11992 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11993   match(If cop cmp);
11994   effect(USE labl);
11995 
11996   ins_cost(200);
11997   format %{ "J$cop,u  $labl" %}
11998   size(6);
11999   ins_encode %{
12000     Label* L = $labl$$label;
12001     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12002   %}
12003   ins_pipe(pipe_jcc);
12004 %}
12005 
12006 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12007   match(If cop cmp);
12008   effect(USE labl);
12009 
12010   ins_cost(200);
12011   format %{ $$template
12012     if ($cop$$cmpcode == Assembler::notEqual) {
12013       $$emit$$"JP,u   $labl\n\t"
12014       $$emit$$"J$cop,u   $labl"
12015     } else {
12016       $$emit$$"JP,u   done\n\t"
12017       $$emit$$"J$cop,u   $labl\n\t"
12018       $$emit$$"done:"
12019     }
12020   %}
12021   ins_encode %{
12022     Label* l = $labl$$label;
12023     if ($cop$$cmpcode == Assembler::notEqual) {
12024       __ jcc(Assembler::parity, *l, false);
12025       __ jcc(Assembler::notEqual, *l, false);
12026     } else if ($cop$$cmpcode == Assembler::equal) {
12027       Label done;
12028       __ jccb(Assembler::parity, done);
12029       __ jcc(Assembler::equal, *l, false);
12030       __ bind(done);
12031     } else {
12032        ShouldNotReachHere();
12033     }
12034   %}
12035   ins_pipe(pipe_jcc);
12036 %}
12037 
12038 // ============================================================================
12039 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12040 // array for an instance of the superklass.  Set a hidden internal cache on a
12041 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12042 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12043 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12044   match(Set result (PartialSubtypeCheck sub super));
12045   effect( KILL rcx, KILL cr );
12046 
12047   ins_cost(1100);  // slightly larger than the next version
12048   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12049             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12050             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12051             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12052             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12053             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12054             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12055      "miss:\t" %}
12056 
12057   opcode(0x1); // Force a XOR of EDI
12058   ins_encode( enc_PartialSubtypeCheck() );
12059   ins_pipe( pipe_slow );
12060 %}
12061 
12062 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12063   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12064   effect( KILL rcx, KILL result );
12065 
12066   ins_cost(1000);
12067   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12068             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12069             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12070             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12071             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12072             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12073      "miss:\t" %}
12074 
12075   opcode(0x0);  // No need to XOR EDI
12076   ins_encode( enc_PartialSubtypeCheck() );
12077   ins_pipe( pipe_slow );
12078 %}
12079 
12080 // ============================================================================
12081 // Branch Instructions -- short offset versions
12082 //
12083 // These instructions are used to replace jumps of a long offset (the default
12084 // match) with jumps of a shorter offset.  These instructions are all tagged
12085 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12086 // match rules in general matching.  Instead, the ADLC generates a conversion
12087 // method in the MachNode which can be used to do in-place replacement of the
12088 // long variant with the shorter variant.  The compiler will determine if a
12089 // branch can be taken by the is_short_branch_offset() predicate in the machine
12090 // specific code section of the file.
12091 
12092 // Jump Direct - Label defines a relative address from JMP+1
12093 instruct jmpDir_short(label labl) %{
12094   match(Goto);
12095   effect(USE labl);
12096 
12097   ins_cost(300);
12098   format %{ "JMP,s  $labl" %}
12099   size(2);
12100   ins_encode %{
12101     Label* L = $labl$$label;
12102     __ jmpb(*L);
12103   %}
12104   ins_pipe( pipe_jmp );
12105   ins_short_branch(1);
12106 %}
12107 
12108 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12109 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12110   match(If cop cr);
12111   effect(USE labl);
12112 
12113   ins_cost(300);
12114   format %{ "J$cop,s  $labl" %}
12115   size(2);
12116   ins_encode %{
12117     Label* L = $labl$$label;
12118     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12119   %}
12120   ins_pipe( pipe_jcc );
12121   ins_short_branch(1);
12122 %}
12123 
12124 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12125 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12126   match(CountedLoopEnd cop cr);
12127   effect(USE labl);
12128 
12129   ins_cost(300);
12130   format %{ "J$cop,s  $labl\t# Loop end" %}
12131   size(2);
12132   ins_encode %{
12133     Label* L = $labl$$label;
12134     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12135   %}
12136   ins_pipe( pipe_jcc );
12137   ins_short_branch(1);
12138 %}
12139 
12140 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12141 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12142   match(CountedLoopEnd cop cmp);
12143   effect(USE labl);
12144 
12145   ins_cost(300);
12146   format %{ "J$cop,us $labl\t# Loop end" %}
12147   size(2);
12148   ins_encode %{
12149     Label* L = $labl$$label;
12150     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12151   %}
12152   ins_pipe( pipe_jcc );
12153   ins_short_branch(1);
12154 %}
12155 
12156 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12157   match(CountedLoopEnd cop cmp);
12158   effect(USE labl);
12159 
12160   ins_cost(300);
12161   format %{ "J$cop,us $labl\t# Loop end" %}
12162   size(2);
12163   ins_encode %{
12164     Label* L = $labl$$label;
12165     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12166   %}
12167   ins_pipe( pipe_jcc );
12168   ins_short_branch(1);
12169 %}
12170 
12171 // Jump Direct Conditional - using unsigned comparison
12172 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12173   match(If cop cmp);
12174   effect(USE labl);
12175 
12176   ins_cost(300);
12177   format %{ "J$cop,us $labl" %}
12178   size(2);
12179   ins_encode %{
12180     Label* L = $labl$$label;
12181     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12182   %}
12183   ins_pipe( pipe_jcc );
12184   ins_short_branch(1);
12185 %}
12186 
12187 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12188   match(If cop cmp);
12189   effect(USE labl);
12190 
12191   ins_cost(300);
12192   format %{ "J$cop,us $labl" %}
12193   size(2);
12194   ins_encode %{
12195     Label* L = $labl$$label;
12196     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12197   %}
12198   ins_pipe( pipe_jcc );
12199   ins_short_branch(1);
12200 %}
12201 
12202 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12203   match(If cop cmp);
12204   effect(USE labl);
12205 
12206   ins_cost(300);
12207   format %{ $$template
12208     if ($cop$$cmpcode == Assembler::notEqual) {
12209       $$emit$$"JP,u,s   $labl\n\t"
12210       $$emit$$"J$cop,u,s   $labl"
12211     } else {
12212       $$emit$$"JP,u,s   done\n\t"
12213       $$emit$$"J$cop,u,s  $labl\n\t"
12214       $$emit$$"done:"
12215     }
12216   %}
12217   size(4);
12218   ins_encode %{
12219     Label* l = $labl$$label;
12220     if ($cop$$cmpcode == Assembler::notEqual) {
12221       __ jccb(Assembler::parity, *l);
12222       __ jccb(Assembler::notEqual, *l);
12223     } else if ($cop$$cmpcode == Assembler::equal) {
12224       Label done;
12225       __ jccb(Assembler::parity, done);
12226       __ jccb(Assembler::equal, *l);
12227       __ bind(done);
12228     } else {
12229        ShouldNotReachHere();
12230     }
12231   %}
12232   ins_pipe(pipe_jcc);
12233   ins_short_branch(1);
12234 %}
12235 
12236 // ============================================================================
12237 // Long Compare
12238 //
12239 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12240 // is tricky.  The flavor of compare used depends on whether we are testing
12241 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12242 // The GE test is the negated LT test.  The LE test can be had by commuting
12243 // the operands (yielding a GE test) and then negating; negate again for the
12244 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12245 // NE test is negated from that.
12246 
12247 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12248 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12249 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12250 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12251 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12252 // foo match ends up with the wrong leaf.  One fix is to not match both
12253 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12254 // both forms beat the trinary form of long-compare and both are very useful
12255 // on Intel which has so few registers.
12256 
12257 // Manifest a CmpL result in an integer register.  Very painful.
12258 // This is the test to avoid.
12259 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12260   match(Set dst (CmpL3 src1 src2));
12261   effect( KILL flags );
12262   ins_cost(1000);
12263   format %{ "XOR    $dst,$dst\n\t"
12264             "CMP    $src1.hi,$src2.hi\n\t"
12265             "JLT,s  m_one\n\t"
12266             "JGT,s  p_one\n\t"
12267             "CMP    $src1.lo,$src2.lo\n\t"
12268             "JB,s   m_one\n\t"
12269             "JEQ,s  done\n"
12270     "p_one:\tINC    $dst\n\t"
12271             "JMP,s  done\n"
12272     "m_one:\tDEC    $dst\n"
12273      "done:" %}
12274   ins_encode %{
12275     Label p_one, m_one, done;
12276     __ xorptr($dst$$Register, $dst$$Register);
12277     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12278     __ jccb(Assembler::less,    m_one);
12279     __ jccb(Assembler::greater, p_one);
12280     __ cmpl($src1$$Register, $src2$$Register);
12281     __ jccb(Assembler::below,   m_one);
12282     __ jccb(Assembler::equal,   done);
12283     __ bind(p_one);
12284     __ incrementl($dst$$Register);
12285     __ jmpb(done);
12286     __ bind(m_one);
12287     __ decrementl($dst$$Register);
12288     __ bind(done);
12289   %}
12290   ins_pipe( pipe_slow );
12291 %}
12292 
12293 //======
12294 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12295 // compares.  Can be used for LE or GT compares by reversing arguments.
12296 // NOT GOOD FOR EQ/NE tests.
12297 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12298   match( Set flags (CmpL src zero ));
12299   ins_cost(100);
12300   format %{ "TEST   $src.hi,$src.hi" %}
12301   opcode(0x85);
12302   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12303   ins_pipe( ialu_cr_reg_reg );
12304 %}
12305 
12306 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12307 // compares.  Can be used for LE or GT compares by reversing arguments.
12308 // NOT GOOD FOR EQ/NE tests.
12309 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12310   match( Set flags (CmpL src1 src2 ));
12311   effect( TEMP tmp );
12312   ins_cost(300);
12313   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12314             "MOV    $tmp,$src1.hi\n\t"
12315             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12316   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12317   ins_pipe( ialu_cr_reg_reg );
12318 %}
12319 
12320 // Long compares reg < zero/req OR reg >= zero/req.
12321 // Just a wrapper for a normal branch, plus the predicate test.
12322 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12323   match(If cmp flags);
12324   effect(USE labl);
12325   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12326   expand %{
12327     jmpCon(cmp,flags,labl);    // JLT or JGE...
12328   %}
12329 %}
12330 
12331 // Compare 2 longs and CMOVE longs.
12332 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12333   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12334   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12335   ins_cost(400);
12336   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12337             "CMOV$cmp $dst.hi,$src.hi" %}
12338   opcode(0x0F,0x40);
12339   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12340   ins_pipe( pipe_cmov_reg_long );
12341 %}
12342 
12343 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12344   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12345   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12346   ins_cost(500);
12347   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12348             "CMOV$cmp $dst.hi,$src.hi" %}
12349   opcode(0x0F,0x40);
12350   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12351   ins_pipe( pipe_cmov_reg_long );
12352 %}
12353 
12354 // Compare 2 longs and CMOVE ints.
12355 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12356   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12357   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12358   ins_cost(200);
12359   format %{ "CMOV$cmp $dst,$src" %}
12360   opcode(0x0F,0x40);
12361   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12362   ins_pipe( pipe_cmov_reg );
12363 %}
12364 
12365 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12366   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12367   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12368   ins_cost(250);
12369   format %{ "CMOV$cmp $dst,$src" %}
12370   opcode(0x0F,0x40);
12371   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12372   ins_pipe( pipe_cmov_mem );
12373 %}
12374 
12375 // Compare 2 longs and CMOVE ints.
12376 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12377   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12378   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12379   ins_cost(200);
12380   format %{ "CMOV$cmp $dst,$src" %}
12381   opcode(0x0F,0x40);
12382   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12383   ins_pipe( pipe_cmov_reg );
12384 %}
12385 
12386 // Compare 2 longs and CMOVE doubles
12387 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12388   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12389   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12390   ins_cost(200);
12391   expand %{
12392     fcmovDPR_regS(cmp,flags,dst,src);
12393   %}
12394 %}
12395 
12396 // Compare 2 longs and CMOVE doubles
12397 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12398   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12399   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12400   ins_cost(200);
12401   expand %{
12402     fcmovD_regS(cmp,flags,dst,src);
12403   %}
12404 %}
12405 
12406 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12407   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12408   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12409   ins_cost(200);
12410   expand %{
12411     fcmovFPR_regS(cmp,flags,dst,src);
12412   %}
12413 %}
12414 
12415 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12416   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12417   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12418   ins_cost(200);
12419   expand %{
12420     fcmovF_regS(cmp,flags,dst,src);
12421   %}
12422 %}
12423 
12424 //======
12425 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12426 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12427   match( Set flags (CmpL src zero ));
12428   effect(TEMP tmp);
12429   ins_cost(200);
12430   format %{ "MOV    $tmp,$src.lo\n\t"
12431             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12432   ins_encode( long_cmp_flags0( src, tmp ) );
12433   ins_pipe( ialu_reg_reg_long );
12434 %}
12435 
12436 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12437 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12438   match( Set flags (CmpL src1 src2 ));
12439   ins_cost(200+300);
12440   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12441             "JNE,s  skip\n\t"
12442             "CMP    $src1.hi,$src2.hi\n\t"
12443      "skip:\t" %}
12444   ins_encode( long_cmp_flags1( src1, src2 ) );
12445   ins_pipe( ialu_cr_reg_reg );
12446 %}
12447 
12448 // Long compare reg == zero/reg OR reg != zero/reg
12449 // Just a wrapper for a normal branch, plus the predicate test.
12450 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12451   match(If cmp flags);
12452   effect(USE labl);
12453   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12454   expand %{
12455     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12456   %}
12457 %}
12458 
12459 // Compare 2 longs and CMOVE longs.
12460 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12461   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12462   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12463   ins_cost(400);
12464   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12465             "CMOV$cmp $dst.hi,$src.hi" %}
12466   opcode(0x0F,0x40);
12467   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12468   ins_pipe( pipe_cmov_reg_long );
12469 %}
12470 
12471 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12472   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12473   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12474   ins_cost(500);
12475   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12476             "CMOV$cmp $dst.hi,$src.hi" %}
12477   opcode(0x0F,0x40);
12478   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12479   ins_pipe( pipe_cmov_reg_long );
12480 %}
12481 
12482 // Compare 2 longs and CMOVE ints.
12483 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12484   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12485   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12486   ins_cost(200);
12487   format %{ "CMOV$cmp $dst,$src" %}
12488   opcode(0x0F,0x40);
12489   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12490   ins_pipe( pipe_cmov_reg );
12491 %}
12492 
12493 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12494   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12495   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12496   ins_cost(250);
12497   format %{ "CMOV$cmp $dst,$src" %}
12498   opcode(0x0F,0x40);
12499   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12500   ins_pipe( pipe_cmov_mem );
12501 %}
12502 
12503 // Compare 2 longs and CMOVE ints.
12504 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12505   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12506   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12507   ins_cost(200);
12508   format %{ "CMOV$cmp $dst,$src" %}
12509   opcode(0x0F,0x40);
12510   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12511   ins_pipe( pipe_cmov_reg );
12512 %}
12513 
12514 // Compare 2 longs and CMOVE doubles
12515 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12516   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12517   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12518   ins_cost(200);
12519   expand %{
12520     fcmovDPR_regS(cmp,flags,dst,src);
12521   %}
12522 %}
12523 
12524 // Compare 2 longs and CMOVE doubles
12525 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12526   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12527   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12528   ins_cost(200);
12529   expand %{
12530     fcmovD_regS(cmp,flags,dst,src);
12531   %}
12532 %}
12533 
12534 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12535   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12536   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12537   ins_cost(200);
12538   expand %{
12539     fcmovFPR_regS(cmp,flags,dst,src);
12540   %}
12541 %}
12542 
12543 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12544   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12545   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12546   ins_cost(200);
12547   expand %{
12548     fcmovF_regS(cmp,flags,dst,src);
12549   %}
12550 %}
12551 
12552 //======
12553 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12554 // Same as cmpL_reg_flags_LEGT except must negate src
12555 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12556   match( Set flags (CmpL src zero ));
12557   effect( TEMP tmp );
12558   ins_cost(300);
12559   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12560             "CMP    $tmp,$src.lo\n\t"
12561             "SBB    $tmp,$src.hi\n\t" %}
12562   ins_encode( long_cmp_flags3(src, tmp) );
12563   ins_pipe( ialu_reg_reg_long );
12564 %}
12565 
12566 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12567 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12568 // requires a commuted test to get the same result.
12569 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12570   match( Set flags (CmpL src1 src2 ));
12571   effect( TEMP tmp );
12572   ins_cost(300);
12573   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12574             "MOV    $tmp,$src2.hi\n\t"
12575             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12576   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12577   ins_pipe( ialu_cr_reg_reg );
12578 %}
12579 
12580 // Long compares reg < zero/req OR reg >= zero/req.
12581 // Just a wrapper for a normal branch, plus the predicate test
12582 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12583   match(If cmp flags);
12584   effect(USE labl);
12585   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12586   ins_cost(300);
12587   expand %{
12588     jmpCon(cmp,flags,labl);    // JGT or JLE...
12589   %}
12590 %}
12591 
12592 // Compare 2 longs and CMOVE longs.
12593 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12594   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12595   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12596   ins_cost(400);
12597   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12598             "CMOV$cmp $dst.hi,$src.hi" %}
12599   opcode(0x0F,0x40);
12600   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12601   ins_pipe( pipe_cmov_reg_long );
12602 %}
12603 
12604 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12605   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12606   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12607   ins_cost(500);
12608   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12609             "CMOV$cmp $dst.hi,$src.hi+4" %}
12610   opcode(0x0F,0x40);
12611   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12612   ins_pipe( pipe_cmov_reg_long );
12613 %}
12614 
12615 // Compare 2 longs and CMOVE ints.
12616 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12617   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12618   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12619   ins_cost(200);
12620   format %{ "CMOV$cmp $dst,$src" %}
12621   opcode(0x0F,0x40);
12622   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12623   ins_pipe( pipe_cmov_reg );
12624 %}
12625 
12626 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12627   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12628   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12629   ins_cost(250);
12630   format %{ "CMOV$cmp $dst,$src" %}
12631   opcode(0x0F,0x40);
12632   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12633   ins_pipe( pipe_cmov_mem );
12634 %}
12635 
12636 // Compare 2 longs and CMOVE ptrs.
12637 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12638   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12639   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12640   ins_cost(200);
12641   format %{ "CMOV$cmp $dst,$src" %}
12642   opcode(0x0F,0x40);
12643   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12644   ins_pipe( pipe_cmov_reg );
12645 %}
12646 
12647 // Compare 2 longs and CMOVE doubles
12648 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12649   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12650   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12651   ins_cost(200);
12652   expand %{
12653     fcmovDPR_regS(cmp,flags,dst,src);
12654   %}
12655 %}
12656 
12657 // Compare 2 longs and CMOVE doubles
12658 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12659   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12660   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12661   ins_cost(200);
12662   expand %{
12663     fcmovD_regS(cmp,flags,dst,src);
12664   %}
12665 %}
12666 
12667 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12668   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12669   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12670   ins_cost(200);
12671   expand %{
12672     fcmovFPR_regS(cmp,flags,dst,src);
12673   %}
12674 %}
12675 
12676 
12677 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12678   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12679   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12680   ins_cost(200);
12681   expand %{
12682     fcmovF_regS(cmp,flags,dst,src);
12683   %}
12684 %}
12685 
12686 
12687 // ============================================================================
12688 // Procedure Call/Return Instructions
12689 // Call Java Static Instruction
12690 // Note: If this code changes, the corresponding ret_addr_offset() and
12691 //       compute_padding() functions will have to be adjusted.
12692 instruct CallStaticJavaDirect(method meth) %{
12693   match(CallStaticJava);
12694   effect(USE meth);
12695 
12696   ins_cost(300);
12697   format %{ "CALL,static " %}
12698   opcode(0xE8); /* E8 cd */
12699   ins_encode( pre_call_resets,
12700               Java_Static_Call( meth ),
12701               call_epilog,
12702               post_call_FPU );
12703   ins_pipe( pipe_slow );
12704   ins_alignment(4);
12705 %}
12706 
12707 // Call Java Dynamic Instruction
12708 // Note: If this code changes, the corresponding ret_addr_offset() and
12709 //       compute_padding() functions will have to be adjusted.
12710 instruct CallDynamicJavaDirect(method meth) %{
12711   match(CallDynamicJava);
12712   effect(USE meth);
12713 
12714   ins_cost(300);
12715   format %{ "MOV    EAX,(oop)-1\n\t"
12716             "CALL,dynamic" %}
12717   opcode(0xE8); /* E8 cd */
12718   ins_encode( pre_call_resets,
12719               Java_Dynamic_Call( meth ),
12720               call_epilog,
12721               post_call_FPU );
12722   ins_pipe( pipe_slow );
12723   ins_alignment(4);
12724 %}
12725 
12726 // Call Runtime Instruction
12727 instruct CallRuntimeDirect(method meth) %{
12728   match(CallRuntime );
12729   effect(USE meth);
12730 
12731   ins_cost(300);
12732   format %{ "CALL,runtime " %}
12733   opcode(0xE8); /* E8 cd */
12734   // Use FFREEs to clear entries in float stack
12735   ins_encode( pre_call_resets,
12736               FFree_Float_Stack_All,
12737               Java_To_Runtime( meth ),
12738               post_call_FPU );
12739   ins_pipe( pipe_slow );
12740 %}
12741 
12742 // Call runtime without safepoint
12743 instruct CallLeafDirect(method meth) %{
12744   match(CallLeaf);
12745   effect(USE meth);
12746 
12747   ins_cost(300);
12748   format %{ "CALL_LEAF,runtime " %}
12749   opcode(0xE8); /* E8 cd */
12750   ins_encode( pre_call_resets,
12751               FFree_Float_Stack_All,
12752               Java_To_Runtime( meth ),
12753               Verify_FPU_For_Leaf, post_call_FPU );
12754   ins_pipe( pipe_slow );
12755 %}
12756 
12757 instruct CallLeafNoFPDirect(method meth) %{
12758   match(CallLeafNoFP);
12759   effect(USE meth);
12760 
12761   ins_cost(300);
12762   format %{ "CALL_LEAF_NOFP,runtime " %}
12763   opcode(0xE8); /* E8 cd */
12764   ins_encode(Java_To_Runtime(meth));
12765   ins_pipe( pipe_slow );
12766 %}
12767 
12768 
12769 // Return Instruction
12770 // Remove the return address & jump to it.
12771 instruct Ret() %{
12772   match(Return);
12773   format %{ "RET" %}
12774   opcode(0xC3);
12775   ins_encode(OpcP);
12776   ins_pipe( pipe_jmp );
12777 %}
12778 
12779 // Tail Call; Jump from runtime stub to Java code.
12780 // Also known as an 'interprocedural jump'.
12781 // Target of jump will eventually return to caller.
12782 // TailJump below removes the return address.
12783 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12784   match(TailCall jump_target method_oop );
12785   ins_cost(300);
12786   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12787   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12788   ins_encode( OpcP, RegOpc(jump_target) );
12789   ins_pipe( pipe_jmp );
12790 %}
12791 
12792 
12793 // Tail Jump; remove the return address; jump to target.
12794 // TailCall above leaves the return address around.
12795 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12796   match( TailJump jump_target ex_oop );
12797   ins_cost(300);
12798   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12799             "JMP    $jump_target " %}
12800   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12801   ins_encode( enc_pop_rdx,
12802               OpcP, RegOpc(jump_target) );
12803   ins_pipe( pipe_jmp );
12804 %}
12805 
12806 // Create exception oop: created by stack-crawling runtime code.
12807 // Created exception is now available to this handler, and is setup
12808 // just prior to jumping to this handler.  No code emitted.
12809 instruct CreateException( eAXRegP ex_oop )
12810 %{
12811   match(Set ex_oop (CreateEx));
12812 
12813   size(0);
12814   // use the following format syntax
12815   format %{ "# exception oop is in EAX; no code emitted" %}
12816   ins_encode();
12817   ins_pipe( empty );
12818 %}
12819 
12820 
12821 // Rethrow exception:
12822 // The exception oop will come in the first argument position.
12823 // Then JUMP (not call) to the rethrow stub code.
12824 instruct RethrowException()
12825 %{
12826   match(Rethrow);
12827 
12828   // use the following format syntax
12829   format %{ "JMP    rethrow_stub" %}
12830   ins_encode(enc_rethrow);
12831   ins_pipe( pipe_jmp );
12832 %}
12833 
12834 // inlined locking and unlocking
12835 
12836 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12837   predicate(Compile::current()->use_rtm());
12838   match(Set cr (FastLock object box));
12839   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12840   ins_cost(300);
12841   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12842   ins_encode %{
12843     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12844                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12845                  _counters, _rtm_counters, _stack_rtm_counters,
12846                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12847                  true, ra_->C->profile_rtm());
12848   %}
12849   ins_pipe(pipe_slow);
12850 %}
12851 
12852 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12853   predicate(!Compile::current()->use_rtm());
12854   match(Set cr (FastLock object box));
12855   effect(TEMP tmp, TEMP scr, USE_KILL box);
12856   ins_cost(300);
12857   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12858   ins_encode %{
12859     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12860                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12861   %}
12862   ins_pipe(pipe_slow);
12863 %}
12864 
12865 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12866   match(Set cr (FastUnlock object box));
12867   effect(TEMP tmp, USE_KILL box);
12868   ins_cost(300);
12869   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12870   ins_encode %{
12871     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12872   %}
12873   ins_pipe(pipe_slow);
12874 %}
12875 
12876 
12877 
12878 // ============================================================================
12879 // Safepoint Instruction
12880 instruct safePoint_poll(eFlagsReg cr) %{
12881   match(SafePoint);
12882   effect(KILL cr);
12883 
12884   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12885   // On SPARC that might be acceptable as we can generate the address with
12886   // just a sethi, saving an or.  By polling at offset 0 we can end up
12887   // putting additional pressure on the index-0 in the D$.  Because of
12888   // alignment (just like the situation at hand) the lower indices tend
12889   // to see more traffic.  It'd be better to change the polling address
12890   // to offset 0 of the last $line in the polling page.
12891 
12892   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
12893   ins_cost(125);
12894   size(6) ;
12895   ins_encode( Safepoint_Poll() );
12896   ins_pipe( ialu_reg_mem );
12897 %}
12898 
12899 
12900 // ============================================================================
12901 // This name is KNOWN by the ADLC and cannot be changed.
12902 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12903 // for this guy.
12904 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
12905   match(Set dst (ThreadLocal));
12906   effect(DEF dst, KILL cr);
12907 
12908   format %{ "MOV    $dst, Thread::current()" %}
12909   ins_encode %{
12910     Register dstReg = as_Register($dst$$reg);
12911     __ get_thread(dstReg);
12912   %}
12913   ins_pipe( ialu_reg_fat );
12914 %}
12915 
12916 
12917 
12918 //----------PEEPHOLE RULES-----------------------------------------------------
12919 // These must follow all instruction definitions as they use the names
12920 // defined in the instructions definitions.
12921 //
12922 // peepmatch ( root_instr_name [preceding_instruction]* );
12923 //
12924 // peepconstraint %{
12925 // (instruction_number.operand_name relational_op instruction_number.operand_name
12926 //  [, ...] );
12927 // // instruction numbers are zero-based using left to right order in peepmatch
12928 //
12929 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12930 // // provide an instruction_number.operand_name for each operand that appears
12931 // // in the replacement instruction's match rule
12932 //
12933 // ---------VM FLAGS---------------------------------------------------------
12934 //
12935 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12936 //
12937 // Each peephole rule is given an identifying number starting with zero and
12938 // increasing by one in the order seen by the parser.  An individual peephole
12939 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12940 // on the command-line.
12941 //
12942 // ---------CURRENT LIMITATIONS----------------------------------------------
12943 //
12944 // Only match adjacent instructions in same basic block
12945 // Only equality constraints
12946 // Only constraints between operands, not (0.dest_reg == EAX_enc)
12947 // Only one replacement instruction
12948 //
12949 // ---------EXAMPLE----------------------------------------------------------
12950 //
12951 // // pertinent parts of existing instructions in architecture description
12952 // instruct movI(rRegI dst, rRegI src) %{
12953 //   match(Set dst (CopyI src));
12954 // %}
12955 //
12956 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
12957 //   match(Set dst (AddI dst src));
12958 //   effect(KILL cr);
12959 // %}
12960 //
12961 // // Change (inc mov) to lea
12962 // peephole %{
12963 //   // increment preceeded by register-register move
12964 //   peepmatch ( incI_eReg movI );
12965 //   // require that the destination register of the increment
12966 //   // match the destination register of the move
12967 //   peepconstraint ( 0.dst == 1.dst );
12968 //   // construct a replacement instruction that sets
12969 //   // the destination to ( move's source register + one )
12970 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12971 // %}
12972 //
12973 // Implementation no longer uses movX instructions since
12974 // machine-independent system no longer uses CopyX nodes.
12975 //
12976 // peephole %{
12977 //   peepmatch ( incI_eReg movI );
12978 //   peepconstraint ( 0.dst == 1.dst );
12979 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12980 // %}
12981 //
12982 // peephole %{
12983 //   peepmatch ( decI_eReg movI );
12984 //   peepconstraint ( 0.dst == 1.dst );
12985 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12986 // %}
12987 //
12988 // peephole %{
12989 //   peepmatch ( addI_eReg_imm movI );
12990 //   peepconstraint ( 0.dst == 1.dst );
12991 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12992 // %}
12993 //
12994 // peephole %{
12995 //   peepmatch ( addP_eReg_imm movP );
12996 //   peepconstraint ( 0.dst == 1.dst );
12997 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
12998 // %}
12999 
13000 // // Change load of spilled value to only a spill
13001 // instruct storeI(memory mem, rRegI src) %{
13002 //   match(Set mem (StoreI mem src));
13003 // %}
13004 //
13005 // instruct loadI(rRegI dst, memory mem) %{
13006 //   match(Set dst (LoadI mem));
13007 // %}
13008 //
13009 peephole %{
13010   peepmatch ( loadI storeI );
13011   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13012   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13013 %}
13014 
13015 //----------SMARTSPILL RULES---------------------------------------------------
13016 // These must follow all instruction definitions as they use the names
13017 // defined in the instructions definitions.