Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
 799     //                          it maps more cases to single byte displacement
 800     _masm.set_managed();
 801     if (reg_lo+1 == reg_hi) { // double move?
 802       if (is_load) {
 803         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 804       } else {
 805         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 806       }
 807     } else {
 808       if (is_load) {
 809         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 810       } else {
 811         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 812       }
 813     }
 814 #ifndef PRODUCT
 815   } else if (!do_size) {
 816     if (size != 0) st->print("\n\t");
 817     if (reg_lo+1 == reg_hi) { // double move?
 818       if (is_load) st->print("%s %s,[ESP + #%d]",
 819                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSD  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     } else {
 824       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 825                               Matcher::regName[reg_lo], offset);
 826       else         st->print("MOVSS  [ESP + #%d],%s",
 827                               offset, Matcher::regName[reg_lo]);
 828     }
 829 #endif
 830   }
 831   bool is_single_byte = false;
 832   if ((UseAVX > 2) && (offset != 0)) {
 833     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 834   }
 835   int offset_size = 0;
 836   if (UseAVX > 2 ) {
 837     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 838   } else {
 839     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 840   }
 841   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 842   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 843   return size+5+offset_size;
 844 }
 845 
 846 
 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 848                             int src_hi, int dst_hi, int size, outputStream* st ) {
 849   if (cbuf) {
 850     MacroAssembler _masm(cbuf);
 851     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 852     _masm.set_managed();
 853     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     } else {
 857       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 858                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 859     }
 860 #ifndef PRODUCT
 861   } else if (!do_size) {
 862     if (size != 0) st->print("\n\t");
 863     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 864       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 865         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 866       } else {
 867         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 868       }
 869     } else {
 870       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 871         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 872       } else {
 873         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 874       }
 875     }
 876 #endif
 877   }
 878   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 879   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 880   int sz = (UseAVX > 2) ? 6 : 4;
 881   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 882       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 883   return size + sz;
 884 }
 885 
 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 887                             int src_hi, int dst_hi, int size, outputStream* st ) {
 888   // 32-bit
 889   if (cbuf) {
 890     MacroAssembler _masm(cbuf);
 891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 892     _masm.set_managed();
 893     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 894              as_Register(Matcher::_regEncode[src_lo]));
 895 #ifndef PRODUCT
 896   } else if (!do_size) {
 897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 898 #endif
 899   }
 900   return (UseAVX> 2) ? 6 : 4;
 901 }
 902 
 903 
 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 905                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 906   // 32-bit
 907   if (cbuf) {
 908     MacroAssembler _masm(cbuf);
 909     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 910     _masm.set_managed();
 911     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 912              as_XMMRegister(Matcher::_regEncode[src_lo]));
 913 #ifndef PRODUCT
 914   } else if (!do_size) {
 915     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 916 #endif
 917   }
 918   return (UseAVX> 2) ? 6 : 4;
 919 }
 920 
 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 922   if( cbuf ) {
 923     emit_opcode(*cbuf, 0x8B );
 924     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 925 #ifndef PRODUCT
 926   } else if( !do_size ) {
 927     if( size != 0 ) st->print("\n\t");
 928     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 929 #endif
 930   }
 931   return size+2;
 932 }
 933 
 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 935                                  int offset, int size, outputStream* st ) {
 936   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 937     if( cbuf ) {
 938       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 939       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 940 #ifndef PRODUCT
 941     } else if( !do_size ) {
 942       if( size != 0 ) st->print("\n\t");
 943       st->print("FLD    %s",Matcher::regName[src_lo]);
 944 #endif
 945     }
 946     size += 2;
 947   }
 948 
 949   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 950   const char *op_str;
 951   int op;
 952   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 953     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 954     op = 0xDD;
 955   } else {                   // 32-bit store
 956     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 957     op = 0xD9;
 958     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 959   }
 960 
 961   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 962 }
 963 
 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 966                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 967 
 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 969                             int stack_offset, int reg, uint ireg, outputStream* st);
 970 
 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 972                                      int dst_offset, uint ireg, outputStream* st) {
 973   int calc_size = 0;
 974   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 975   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 976   switch (ireg) {
 977   case Op_VecS:
 978     calc_size = 3+src_offset_size + 3+dst_offset_size;
 979     break;
 980   case Op_VecD: {
 981     calc_size = 3+src_offset_size + 3+dst_offset_size;
 982     int tmp_src_offset = src_offset + 4;
 983     int tmp_dst_offset = dst_offset + 4;
 984     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 985     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 986     calc_size += 3+src_offset_size + 3+dst_offset_size;
 987     break;
 988   }   
 989   case Op_VecX:
 990   case Op_VecY:
 991   case Op_VecZ:
 992     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 993     break;
 994   default:
 995     ShouldNotReachHere();
 996   }
 997   if (cbuf) {
 998     MacroAssembler _masm(cbuf);
 999     int offset = __ offset();
1000     switch (ireg) {
1001     case Op_VecS:
1002       __ pushl(Address(rsp, src_offset));
1003       __ popl (Address(rsp, dst_offset));
1004       break;
1005     case Op_VecD:
1006       __ pushl(Address(rsp, src_offset));
1007       __ popl (Address(rsp, dst_offset));
1008       __ pushl(Address(rsp, src_offset+4));
1009       __ popl (Address(rsp, dst_offset+4));
1010       break;
1011     case Op_VecX:
1012       __ movdqu(Address(rsp, -16), xmm0);
1013       __ movdqu(xmm0, Address(rsp, src_offset));
1014       __ movdqu(Address(rsp, dst_offset), xmm0);
1015       __ movdqu(xmm0, Address(rsp, -16));
1016       break;
1017     case Op_VecY:
1018       __ vmovdqu(Address(rsp, -32), xmm0);
1019       __ vmovdqu(xmm0, Address(rsp, src_offset));
1020       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1021       __ vmovdqu(xmm0, Address(rsp, -32));
1022       break;
1023     case Op_VecZ:
1024       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1025       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1026       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1027       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1028       break;
1029     default:
1030       ShouldNotReachHere();
1031     }
1032     int size = __ offset() - offset;
1033     assert(size == calc_size, "incorrect size calculation");
1034     return size;
1035 #ifndef PRODUCT
1036   } else if (!do_size) {
1037     switch (ireg) {
1038     case Op_VecS:
1039       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1040                 "popl    [rsp + #%d]",
1041                 src_offset, dst_offset);
1042       break;
1043     case Op_VecD:
1044       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1045                 "popq    [rsp + #%d]\n\t"
1046                 "pushl   [rsp + #%d]\n\t"
1047                 "popq    [rsp + #%d]",
1048                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1049       break;
1050      case Op_VecX:
1051       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1052                 "movdqu  xmm0, [rsp + #%d]\n\t"
1053                 "movdqu  [rsp + #%d], xmm0\n\t"
1054                 "movdqu  xmm0, [rsp - #16]",
1055                 src_offset, dst_offset);
1056       break;
1057     case Op_VecY:
1058       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1059                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1060                 "vmovdqu [rsp + #%d], xmm0\n\t"
1061                 "vmovdqu xmm0, [rsp - #32]",
1062                 src_offset, dst_offset);
1063       break;
1064     case Op_VecZ:
1065       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1066                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1067                 "vmovdqu [rsp + #%d], xmm0\n\t"
1068                 "vmovdqu xmm0, [rsp - #64]",
1069                 src_offset, dst_offset);
1070       break;
1071     default:
1072       ShouldNotReachHere();
1073     }
1074 #endif
1075   }
1076   return calc_size;
1077 }
1078 
1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1080   // Get registers to move
1081   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1082   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1083   OptoReg::Name dst_second = ra_->get_reg_second(this );
1084   OptoReg::Name dst_first = ra_->get_reg_first(this );
1085 
1086   enum RC src_second_rc = rc_class(src_second);
1087   enum RC src_first_rc = rc_class(src_first);
1088   enum RC dst_second_rc = rc_class(dst_second);
1089   enum RC dst_first_rc = rc_class(dst_first);
1090 
1091   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1092 
1093   // Generate spill code!
1094   int size = 0;
1095 
1096   if( src_first == dst_first && src_second == dst_second )
1097     return size;            // Self copy, no move
1098 
1099   if (bottom_type()->isa_vect() != NULL) {
1100     uint ireg = ideal_reg();
1101     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1102     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1103     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1104     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1105       // mem -> mem
1106       int src_offset = ra_->reg2offset(src_first);
1107       int dst_offset = ra_->reg2offset(dst_first);
1108       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1109     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1110       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1111     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1112       int stack_offset = ra_->reg2offset(dst_first);
1113       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1114     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1115       int stack_offset = ra_->reg2offset(src_first);
1116       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1117     } else {
1118       ShouldNotReachHere();
1119     }
1120   }
1121 
1122   // --------------------------------------
1123   // Check for mem-mem move.  push/pop to move.
1124   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1125     if( src_second == dst_first ) { // overlapping stack copy ranges
1126       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1127       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1128       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1129       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1130     }
1131     // move low bits
1132     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1133     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1134     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1135       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1136       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1137     }
1138     return size;
1139   }
1140 
1141   // --------------------------------------
1142   // Check for integer reg-reg copy
1143   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1144     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1145 
1146   // Check for integer store
1147   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1148     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1149 
1150   // Check for integer load
1151   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1152     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1153 
1154   // Check for integer reg-xmm reg copy
1155   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1156     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1157             "no 64 bit integer-float reg moves" );
1158     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1159   }
1160   // --------------------------------------
1161   // Check for float reg-reg copy
1162   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1163     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1164             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1165     if( cbuf ) {
1166 
1167       // Note the mucking with the register encode to compensate for the 0/1
1168       // indexing issue mentioned in a comment in the reg_def sections
1169       // for FPR registers many lines above here.
1170 
1171       if( src_first != FPR1L_num ) {
1172         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1173         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1174         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1175         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1176      } else {
1177         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1178         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1179      }
1180 #ifndef PRODUCT
1181     } else if( !do_size ) {
1182       if( size != 0 ) st->print("\n\t");
1183       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1184       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1185 #endif
1186     }
1187     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1188   }
1189 
1190   // Check for float store
1191   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1192     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1193   }
1194 
1195   // Check for float load
1196   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1197     int offset = ra_->reg2offset(src_first);
1198     const char *op_str;
1199     int op;
1200     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1201       op_str = "FLD_D";
1202       op = 0xDD;
1203     } else {                   // 32-bit load
1204       op_str = "FLD_S";
1205       op = 0xD9;
1206       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1207     }
1208     if( cbuf ) {
1209       emit_opcode  (*cbuf, op );
1210       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1211       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1212       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1213 #ifndef PRODUCT
1214     } else if( !do_size ) {
1215       if( size != 0 ) st->print("\n\t");
1216       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1217 #endif
1218     }
1219     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1220     return size + 3+offset_size+2;
1221   }
1222 
1223   // Check for xmm reg-reg copy
1224   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1225     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1226             (src_first+1 == src_second && dst_first+1 == dst_second),
1227             "no non-adjacent float-moves" );
1228     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1229   }
1230 
1231   // Check for xmm reg-integer reg copy
1232   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1233     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1234             "no 64 bit float-integer reg moves" );
1235     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1236   }
1237 
1238   // Check for xmm store
1239   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1240     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1241   }
1242 
1243   // Check for float xmm load
1244   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1245     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1246   }
1247 
1248   // Copy from float reg to xmm reg
1249   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1250     // copy to the top of stack from floating point reg
1251     // and use LEA to preserve flags
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0xF8);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP-8]");
1261 #endif
1262     }
1263     size += 4;
1264 
1265     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1266 
1267     // Copy from the temp memory to the xmm reg.
1268     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1269 
1270     if( cbuf ) {
1271       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1272       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1273       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1274       emit_d8(*cbuf,0x08);
1275 #ifndef PRODUCT
1276     } else if( !do_size ) {
1277       if( size != 0 ) st->print("\n\t");
1278       st->print("LEA    ESP,[ESP+8]");
1279 #endif
1280     }
1281     size += 4;
1282     return size;
1283   }
1284 
1285   assert( size > 0, "missed a case" );
1286 
1287   // --------------------------------------------------------------------
1288   // Check for second bits still needing moving.
1289   if( src_second == dst_second )
1290     return size;               // Self copy; no move
1291   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1292 
1293   // Check for second word int-int move
1294   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1295     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1296 
1297   // Check for second word integer store
1298   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1299     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1300 
1301   // Check for second word integer load
1302   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1303     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1304 
1305 
1306   Unimplemented();
1307   return 0; // Mute compiler
1308 }
1309 
1310 #ifndef PRODUCT
1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1312   implementation( NULL, ra_, false, st );
1313 }
1314 #endif
1315 
1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   implementation( &cbuf, ra_, false, NULL );
1318 }
1319 
1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1321   return implementation( NULL, ra_, true, NULL );
1322 }
1323 
1324 
1325 //=============================================================================
1326 #ifndef PRODUCT
1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   int reg = ra_->get_reg_first(this);
1330   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1331 }
1332 #endif
1333 
1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1336   int reg = ra_->get_encode(this);
1337   if( offset >= 128 ) {
1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1339     emit_rm(cbuf, 0x2, reg, 0x04);
1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1341     emit_d32(cbuf, offset);
1342   }
1343   else {
1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1345     emit_rm(cbuf, 0x1, reg, 0x04);
1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347     emit_d8(cbuf, offset);
1348   }
1349 }
1350 
1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1353   if( offset >= 128 ) {
1354     return 7;
1355   }
1356   else {
1357     return 4;
1358   }
1359 }
1360 
1361 //=============================================================================
1362 #ifndef PRODUCT
1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1364   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1365   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1366   st->print_cr("\tNOP");
1367   st->print_cr("\tNOP");
1368   if( !OptoBreakpoint )
1369     st->print_cr("\tNOP");
1370 }
1371 #endif
1372 
1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1374   MacroAssembler masm(&cbuf);
1375 #ifdef ASSERT
1376   uint insts_size = cbuf.insts_size();
1377 #endif
1378   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1379   masm.jump_cc(Assembler::notEqual,
1380                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1381   /* WARNING these NOPs are critical so that verified entry point is properly
1382      aligned for patching by NativeJump::patch_verified_entry() */
1383   int nops_cnt = 2;
1384   if( !OptoBreakpoint ) // Leave space for int3
1385      nops_cnt += 1;
1386   masm.nop(nops_cnt);
1387 
1388   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1389 }
1390 
1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1392   return OptoBreakpoint ? 11 : 12;
1393 }
1394 
1395 
1396 //=============================================================================
1397 
1398 int Matcher::regnum_to_fpu_offset(int regnum) {
1399   return regnum - 32; // The FP registers are in the second chunk
1400 }
1401 
1402 // This is UltraSparc specific, true just means we have fast l2f conversion
1403 const bool Matcher::convL2FSupported(void) {
1404   return true;
1405 }
1406 
1407 // Is this branch offset short enough that a short branch can be used?
1408 //
1409 // NOTE: If the platform does not provide any short branch variants, then
1410 //       this method should return false for offset 0.
1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1412   // The passed offset is relative to address of the branch.
1413   // On 86 a branch displacement is calculated relative to address
1414   // of a next instruction.
1415   offset -= br_size;
1416 
1417   // the short version of jmpConUCF2 contains multiple branches,
1418   // making the reach slightly less
1419   if (rule == jmpConUCF2_rule)
1420     return (-126 <= offset && offset <= 125);
1421   return (-128 <= offset && offset <= 127);
1422 }
1423 
1424 const bool Matcher::isSimpleConstant64(jlong value) {
1425   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1426   return false;
1427 }
1428 
1429 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1430 const bool Matcher::init_array_count_is_in_bytes = false;
1431 
1432 // Needs 2 CMOV's for longs.
1433 const int Matcher::long_cmove_cost() { return 1; }
1434 
1435 // No CMOVF/CMOVD with SSE/SSE2
1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1437 
1438 // Does the CPU require late expand (see block.cpp for description of late expand)?
1439 const bool Matcher::require_postalloc_expand = false;
1440 
1441 // Do we need to mask the count passed to shift instructions or does
1442 // the cpu only look at the lower 5/6 bits anyway?
1443 const bool Matcher::need_masked_shift_count = false;
1444 
1445 bool Matcher::narrow_oop_use_complex_address() {
1446   ShouldNotCallThis();
1447   return true;
1448 }
1449 
1450 bool Matcher::narrow_klass_use_complex_address() {
1451   ShouldNotCallThis();
1452   return true;
1453 }
1454 
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878 
1879   enc_class pre_call_resets %{
1880     // If method sets FPU control word restore it here
1881     debug_only(int off0 = cbuf.insts_size());
1882     if (ra_->C->in_24_bit_fp_mode()) {
1883       MacroAssembler _masm(&cbuf);
1884       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1885     }
1886     if (ra_->C->max_vector_size() > 16) {
1887       // Clear upper bits of YMM registers when current compiled code uses
1888       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1889       MacroAssembler _masm(&cbuf);
1890       __ vzeroupper();
1891     }
1892     debug_only(int off1 = cbuf.insts_size());
1893     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1894   %}
1895 
1896   enc_class post_call_FPU %{
1897     // If method sets FPU control word do it here also
1898     if (Compile::current()->in_24_bit_fp_mode()) {
1899       MacroAssembler masm(&cbuf);
1900       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1901     }
1902   %}
1903 
1904   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1905     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1906     // who we intended to call.
1907     cbuf.set_insts_mark();
1908     $$$emit8$primary;
1909 
1910     if (!_method) {
1911       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1912                      runtime_call_Relocation::spec(),
1913                      RELOC_IMM32);
1914     } else {
1915       int method_index = resolved_method_index(cbuf);
1916       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1917                                                   : static_call_Relocation::spec(method_index);
1918       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919                      rspec, RELOC_DISP32);
1920       // Emit stubs for static call.
1921       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1922       if (stub == NULL) {
1923         ciEnv::current()->record_failure("CodeCache is full");
1924         return;
1925       }
1926     }
1927   %}
1928 
1929   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1930     MacroAssembler _masm(&cbuf);
1931     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1932   %}
1933 
1934   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1935     int disp = in_bytes(Method::from_compiled_offset());
1936     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1937 
1938     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1939     cbuf.set_insts_mark();
1940     $$$emit8$primary;
1941     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1942     emit_d8(cbuf, disp);             // Displacement
1943 
1944   %}
1945 
1946 //   Following encoding is no longer used, but may be restored if calling
1947 //   convention changes significantly.
1948 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1949 //
1950 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1951 //     // int ic_reg     = Matcher::inline_cache_reg();
1952 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1953 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1954 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1955 //
1956 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1957 //     // // so we load it immediately before the call
1958 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1959 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1960 //
1961 //     // xor rbp,ebp
1962 //     emit_opcode(cbuf, 0x33);
1963 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1964 //
1965 //     // CALL to interpreter.
1966 //     cbuf.set_insts_mark();
1967 //     $$$emit8$primary;
1968 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1969 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1970 //   %}
1971 
1972   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1973     $$$emit8$primary;
1974     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1975     $$$emit8$shift$$constant;
1976   %}
1977 
1978   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1979     // Load immediate does not have a zero or sign extended version
1980     // for 8-bit immediates
1981     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1982     $$$emit32$src$$constant;
1983   %}
1984 
1985   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1986     // Load immediate does not have a zero or sign extended version
1987     // for 8-bit immediates
1988     emit_opcode(cbuf, $primary + $dst$$reg);
1989     $$$emit32$src$$constant;
1990   %}
1991 
1992   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1993     // Load immediate does not have a zero or sign extended version
1994     // for 8-bit immediates
1995     int dst_enc = $dst$$reg;
1996     int src_con = $src$$constant & 0x0FFFFFFFFL;
1997     if (src_con == 0) {
1998       // xor dst, dst
1999       emit_opcode(cbuf, 0x33);
2000       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2001     } else {
2002       emit_opcode(cbuf, $primary + dst_enc);
2003       emit_d32(cbuf, src_con);
2004     }
2005   %}
2006 
2007   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2008     // Load immediate does not have a zero or sign extended version
2009     // for 8-bit immediates
2010     int dst_enc = $dst$$reg + 2;
2011     int src_con = ((julong)($src$$constant)) >> 32;
2012     if (src_con == 0) {
2013       // xor dst, dst
2014       emit_opcode(cbuf, 0x33);
2015       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2016     } else {
2017       emit_opcode(cbuf, $primary + dst_enc);
2018       emit_d32(cbuf, src_con);
2019     }
2020   %}
2021 
2022 
2023   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2024   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2025     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2026   %}
2027 
2028   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2029     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2030   %}
2031 
2032   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2033     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2034   %}
2035 
2036   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2037     $$$emit8$primary;
2038     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2039   %}
2040 
2041   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2042     $$$emit8$secondary;
2043     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2044   %}
2045 
2046   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2047     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2048   %}
2049 
2050   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2051     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2052   %}
2053 
2054   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2055     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2056   %}
2057 
2058   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2059     // Output immediate
2060     $$$emit32$src$$constant;
2061   %}
2062 
2063   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2064     // Output Float immediate bits
2065     jfloat jf = $src$$constant;
2066     int    jf_as_bits = jint_cast( jf );
2067     emit_d32(cbuf, jf_as_bits);
2068   %}
2069 
2070   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2071     // Output Float immediate bits
2072     jfloat jf = $src$$constant;
2073     int    jf_as_bits = jint_cast( jf );
2074     emit_d32(cbuf, jf_as_bits);
2075   %}
2076 
2077   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2078     // Output immediate
2079     $$$emit16$src$$constant;
2080   %}
2081 
2082   enc_class Con_d32(immI src) %{
2083     emit_d32(cbuf,$src$$constant);
2084   %}
2085 
2086   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2087     // Output immediate memory reference
2088     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2089     emit_d32(cbuf, 0x00);
2090   %}
2091 
2092   enc_class lock_prefix( ) %{
2093     if( os::is_MP() )
2094       emit_opcode(cbuf,0xF0);         // [Lock]
2095   %}
2096 
2097   // Cmp-xchg long value.
2098   // Note: we need to swap rbx, and rcx before and after the
2099   //       cmpxchg8 instruction because the instruction uses
2100   //       rcx as the high order word of the new value to store but
2101   //       our register encoding uses rbx,.
2102   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2103 
2104     // XCHG  rbx,ecx
2105     emit_opcode(cbuf,0x87);
2106     emit_opcode(cbuf,0xD9);
2107     // [Lock]
2108     if( os::is_MP() )
2109       emit_opcode(cbuf,0xF0);
2110     // CMPXCHG8 [Eptr]
2111     emit_opcode(cbuf,0x0F);
2112     emit_opcode(cbuf,0xC7);
2113     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2114     // XCHG  rbx,ecx
2115     emit_opcode(cbuf,0x87);
2116     emit_opcode(cbuf,0xD9);
2117   %}
2118 
2119   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2120     // [Lock]
2121     if( os::is_MP() )
2122       emit_opcode(cbuf,0xF0);
2123 
2124     // CMPXCHG [Eptr]
2125     emit_opcode(cbuf,0x0F);
2126     emit_opcode(cbuf,0xB1);
2127     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2128   %}
2129 
2130   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2131     // [Lock]
2132     if( os::is_MP() )
2133       emit_opcode(cbuf,0xF0);
2134 
2135     // CMPXCHGB [Eptr]
2136     emit_opcode(cbuf,0x0F);
2137     emit_opcode(cbuf,0xB0);
2138     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2139   %}
2140 
2141   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2142     // [Lock]
2143     if( os::is_MP() )
2144       emit_opcode(cbuf,0xF0);
2145 
2146     // 16-bit mode
2147     emit_opcode(cbuf, 0x66);
2148 
2149     // CMPXCHGW [Eptr]
2150     emit_opcode(cbuf,0x0F);
2151     emit_opcode(cbuf,0xB1);
2152     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2153   %}
2154 
2155   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2156     int res_encoding = $res$$reg;
2157 
2158     // MOV  res,0
2159     emit_opcode( cbuf, 0xB8 + res_encoding);
2160     emit_d32( cbuf, 0 );
2161     // JNE,s  fail
2162     emit_opcode(cbuf,0x75);
2163     emit_d8(cbuf, 5 );
2164     // MOV  res,1
2165     emit_opcode( cbuf, 0xB8 + res_encoding);
2166     emit_d32( cbuf, 1 );
2167     // fail:
2168   %}
2169 
2170   enc_class set_instruction_start( ) %{
2171     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2172   %}
2173 
2174   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2175     int reg_encoding = $ereg$$reg;
2176     int base  = $mem$$base;
2177     int index = $mem$$index;
2178     int scale = $mem$$scale;
2179     int displace = $mem$$disp;
2180     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2181     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2182   %}
2183 
2184   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2185     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2186     int base  = $mem$$base;
2187     int index = $mem$$index;
2188     int scale = $mem$$scale;
2189     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2190     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2191     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2192   %}
2193 
2194   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2195     int r1, r2;
2196     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2197     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2198     emit_opcode(cbuf,0x0F);
2199     emit_opcode(cbuf,$tertiary);
2200     emit_rm(cbuf, 0x3, r1, r2);
2201     emit_d8(cbuf,$cnt$$constant);
2202     emit_d8(cbuf,$primary);
2203     emit_rm(cbuf, 0x3, $secondary, r1);
2204     emit_d8(cbuf,$cnt$$constant);
2205   %}
2206 
2207   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2208     emit_opcode( cbuf, 0x8B ); // Move
2209     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2210     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2211       emit_d8(cbuf,$primary);
2212       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2213       emit_d8(cbuf,$cnt$$constant-32);
2214     }
2215     emit_d8(cbuf,$primary);
2216     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2217     emit_d8(cbuf,31);
2218   %}
2219 
2220   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2221     int r1, r2;
2222     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2223     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2224 
2225     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2226     emit_rm(cbuf, 0x3, r1, r2);
2227     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2228       emit_opcode(cbuf,$primary);
2229       emit_rm(cbuf, 0x3, $secondary, r1);
2230       emit_d8(cbuf,$cnt$$constant-32);
2231     }
2232     emit_opcode(cbuf,0x33);  // XOR r2,r2
2233     emit_rm(cbuf, 0x3, r2, r2);
2234   %}
2235 
2236   // Clone of RegMem but accepts an extra parameter to access each
2237   // half of a double in memory; it never needs relocation info.
2238   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2239     emit_opcode(cbuf,$opcode$$constant);
2240     int reg_encoding = $rm_reg$$reg;
2241     int base     = $mem$$base;
2242     int index    = $mem$$index;
2243     int scale    = $mem$$scale;
2244     int displace = $mem$$disp + $disp_for_half$$constant;
2245     relocInfo::relocType disp_reloc = relocInfo::none;
2246     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2247   %}
2248 
2249   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2250   //
2251   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2252   // and it never needs relocation information.
2253   // Frequently used to move data between FPU's Stack Top and memory.
2254   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2255     int rm_byte_opcode = $rm_opcode$$constant;
2256     int base     = $mem$$base;
2257     int index    = $mem$$index;
2258     int scale    = $mem$$scale;
2259     int displace = $mem$$disp;
2260     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2261     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2262   %}
2263 
2264   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2265     int rm_byte_opcode = $rm_opcode$$constant;
2266     int base     = $mem$$base;
2267     int index    = $mem$$index;
2268     int scale    = $mem$$scale;
2269     int displace = $mem$$disp;
2270     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2271     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2272   %}
2273 
2274   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2275     int reg_encoding = $dst$$reg;
2276     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2277     int index        = 0x04;            // 0x04 indicates no index
2278     int scale        = 0x00;            // 0x00 indicates no scale
2279     int displace     = $src1$$constant; // 0x00 indicates no displacement
2280     relocInfo::relocType disp_reloc = relocInfo::none;
2281     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2282   %}
2283 
2284   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2285     // Compare dst,src
2286     emit_opcode(cbuf,0x3B);
2287     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2288     // jmp dst < src around move
2289     emit_opcode(cbuf,0x7C);
2290     emit_d8(cbuf,2);
2291     // move dst,src
2292     emit_opcode(cbuf,0x8B);
2293     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2294   %}
2295 
2296   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2297     // Compare dst,src
2298     emit_opcode(cbuf,0x3B);
2299     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2300     // jmp dst > src around move
2301     emit_opcode(cbuf,0x7F);
2302     emit_d8(cbuf,2);
2303     // move dst,src
2304     emit_opcode(cbuf,0x8B);
2305     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2306   %}
2307 
2308   enc_class enc_FPR_store(memory mem, regDPR src) %{
2309     // If src is FPR1, we can just FST to store it.
2310     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2311     int reg_encoding = 0x2; // Just store
2312     int base  = $mem$$base;
2313     int index = $mem$$index;
2314     int scale = $mem$$scale;
2315     int displace = $mem$$disp;
2316     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2317     if( $src$$reg != FPR1L_enc ) {
2318       reg_encoding = 0x3;  // Store & pop
2319       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2320       emit_d8( cbuf, 0xC0-1+$src$$reg );
2321     }
2322     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2323     emit_opcode(cbuf,$primary);
2324     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2325   %}
2326 
2327   enc_class neg_reg(rRegI dst) %{
2328     // NEG $dst
2329     emit_opcode(cbuf,0xF7);
2330     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2331   %}
2332 
2333   enc_class setLT_reg(eCXRegI dst) %{
2334     // SETLT $dst
2335     emit_opcode(cbuf,0x0F);
2336     emit_opcode(cbuf,0x9C);
2337     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2338   %}
2339 
2340   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2341     int tmpReg = $tmp$$reg;
2342 
2343     // SUB $p,$q
2344     emit_opcode(cbuf,0x2B);
2345     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2346     // SBB $tmp,$tmp
2347     emit_opcode(cbuf,0x1B);
2348     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2349     // AND $tmp,$y
2350     emit_opcode(cbuf,0x23);
2351     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2352     // ADD $p,$tmp
2353     emit_opcode(cbuf,0x03);
2354     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2355   %}
2356 
2357   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2358     // TEST shift,32
2359     emit_opcode(cbuf,0xF7);
2360     emit_rm(cbuf, 0x3, 0, ECX_enc);
2361     emit_d32(cbuf,0x20);
2362     // JEQ,s small
2363     emit_opcode(cbuf, 0x74);
2364     emit_d8(cbuf, 0x04);
2365     // MOV    $dst.hi,$dst.lo
2366     emit_opcode( cbuf, 0x8B );
2367     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2368     // CLR    $dst.lo
2369     emit_opcode(cbuf, 0x33);
2370     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2371 // small:
2372     // SHLD   $dst.hi,$dst.lo,$shift
2373     emit_opcode(cbuf,0x0F);
2374     emit_opcode(cbuf,0xA5);
2375     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2376     // SHL    $dst.lo,$shift"
2377     emit_opcode(cbuf,0xD3);
2378     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2379   %}
2380 
2381   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2382     // TEST shift,32
2383     emit_opcode(cbuf,0xF7);
2384     emit_rm(cbuf, 0x3, 0, ECX_enc);
2385     emit_d32(cbuf,0x20);
2386     // JEQ,s small
2387     emit_opcode(cbuf, 0x74);
2388     emit_d8(cbuf, 0x04);
2389     // MOV    $dst.lo,$dst.hi
2390     emit_opcode( cbuf, 0x8B );
2391     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2392     // CLR    $dst.hi
2393     emit_opcode(cbuf, 0x33);
2394     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2395 // small:
2396     // SHRD   $dst.lo,$dst.hi,$shift
2397     emit_opcode(cbuf,0x0F);
2398     emit_opcode(cbuf,0xAD);
2399     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2400     // SHR    $dst.hi,$shift"
2401     emit_opcode(cbuf,0xD3);
2402     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2403   %}
2404 
2405   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2406     // TEST shift,32
2407     emit_opcode(cbuf,0xF7);
2408     emit_rm(cbuf, 0x3, 0, ECX_enc);
2409     emit_d32(cbuf,0x20);
2410     // JEQ,s small
2411     emit_opcode(cbuf, 0x74);
2412     emit_d8(cbuf, 0x05);
2413     // MOV    $dst.lo,$dst.hi
2414     emit_opcode( cbuf, 0x8B );
2415     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2416     // SAR    $dst.hi,31
2417     emit_opcode(cbuf, 0xC1);
2418     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2419     emit_d8(cbuf, 0x1F );
2420 // small:
2421     // SHRD   $dst.lo,$dst.hi,$shift
2422     emit_opcode(cbuf,0x0F);
2423     emit_opcode(cbuf,0xAD);
2424     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2425     // SAR    $dst.hi,$shift"
2426     emit_opcode(cbuf,0xD3);
2427     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2428   %}
2429 
2430 
2431   // ----------------- Encodings for floating point unit -----------------
2432   // May leave result in FPU-TOS or FPU reg depending on opcodes
2433   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2434     $$$emit8$primary;
2435     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2436   %}
2437 
2438   // Pop argument in FPR0 with FSTP ST(0)
2439   enc_class PopFPU() %{
2440     emit_opcode( cbuf, 0xDD );
2441     emit_d8( cbuf, 0xD8 );
2442   %}
2443 
2444   // !!!!! equivalent to Pop_Reg_F
2445   enc_class Pop_Reg_DPR( regDPR dst ) %{
2446     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2447     emit_d8( cbuf, 0xD8+$dst$$reg );
2448   %}
2449 
2450   enc_class Push_Reg_DPR( regDPR dst ) %{
2451     emit_opcode( cbuf, 0xD9 );
2452     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2453   %}
2454 
2455   enc_class strictfp_bias1( regDPR dst ) %{
2456     emit_opcode( cbuf, 0xDB );           // FLD m80real
2457     emit_opcode( cbuf, 0x2D );
2458     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2459     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460     emit_opcode( cbuf, 0xC8+$dst$$reg );
2461   %}
2462 
2463   enc_class strictfp_bias2( regDPR dst ) %{
2464     emit_opcode( cbuf, 0xDB );           // FLD m80real
2465     emit_opcode( cbuf, 0x2D );
2466     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2467     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2468     emit_opcode( cbuf, 0xC8+$dst$$reg );
2469   %}
2470 
2471   // Special case for moving an integer register to a stack slot.
2472   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2473     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2474   %}
2475 
2476   // Special case for moving a register to a stack slot.
2477   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2478     // Opcode already emitted
2479     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2480     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2481     emit_d32(cbuf, $dst$$disp);   // Displacement
2482   %}
2483 
2484   // Push the integer in stackSlot 'src' onto FP-stack
2485   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2486     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2487   %}
2488 
2489   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2490   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2491     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2492   %}
2493 
2494   // Same as Pop_Mem_F except for opcode
2495   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2496   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2497     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2498   %}
2499 
2500   enc_class Pop_Reg_FPR( regFPR dst ) %{
2501     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2502     emit_d8( cbuf, 0xD8+$dst$$reg );
2503   %}
2504 
2505   enc_class Push_Reg_FPR( regFPR dst ) %{
2506     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2507     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2508   %}
2509 
2510   // Push FPU's float to a stack-slot, and pop FPU-stack
2511   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2512     int pop = 0x02;
2513     if ($src$$reg != FPR1L_enc) {
2514       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2515       emit_d8( cbuf, 0xC0-1+$src$$reg );
2516       pop = 0x03;
2517     }
2518     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2519   %}
2520 
2521   // Push FPU's double to a stack-slot, and pop FPU-stack
2522   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2523     int pop = 0x02;
2524     if ($src$$reg != FPR1L_enc) {
2525       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2526       emit_d8( cbuf, 0xC0-1+$src$$reg );
2527       pop = 0x03;
2528     }
2529     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2530   %}
2531 
2532   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2533   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2534     int pop = 0xD0 - 1; // -1 since we skip FLD
2535     if ($src$$reg != FPR1L_enc) {
2536       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2537       emit_d8( cbuf, 0xC0-1+$src$$reg );
2538       pop = 0xD8;
2539     }
2540     emit_opcode( cbuf, 0xDD );
2541     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2542   %}
2543 
2544 
2545   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2546     // load dst in FPR0
2547     emit_opcode( cbuf, 0xD9 );
2548     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2549     if ($src$$reg != FPR1L_enc) {
2550       // fincstp
2551       emit_opcode (cbuf, 0xD9);
2552       emit_opcode (cbuf, 0xF7);
2553       // swap src with FPR1:
2554       // FXCH FPR1 with src
2555       emit_opcode(cbuf, 0xD9);
2556       emit_d8(cbuf, 0xC8-1+$src$$reg );
2557       // fdecstp
2558       emit_opcode (cbuf, 0xD9);
2559       emit_opcode (cbuf, 0xF6);
2560     }
2561   %}
2562 
2563   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2564     MacroAssembler _masm(&cbuf);
2565     __ subptr(rsp, 8);
2566     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2567     __ fld_d(Address(rsp, 0));
2568     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2569     __ fld_d(Address(rsp, 0));
2570   %}
2571 
2572   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2573     MacroAssembler _masm(&cbuf);
2574     __ subptr(rsp, 4);
2575     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2576     __ fld_s(Address(rsp, 0));
2577     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2578     __ fld_s(Address(rsp, 0));
2579   %}
2580 
2581   enc_class Push_ResultD(regD dst) %{
2582     MacroAssembler _masm(&cbuf);
2583     __ fstp_d(Address(rsp, 0));
2584     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2585     __ addptr(rsp, 8);
2586   %}
2587 
2588   enc_class Push_ResultF(regF dst, immI d8) %{
2589     MacroAssembler _masm(&cbuf);
2590     __ fstp_s(Address(rsp, 0));
2591     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2592     __ addptr(rsp, $d8$$constant);
2593   %}
2594 
2595   enc_class Push_SrcD(regD src) %{
2596     MacroAssembler _masm(&cbuf);
2597     __ subptr(rsp, 8);
2598     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2599     __ fld_d(Address(rsp, 0));
2600   %}
2601 
2602   enc_class push_stack_temp_qword() %{
2603     MacroAssembler _masm(&cbuf);
2604     __ subptr(rsp, 8);
2605   %}
2606 
2607   enc_class pop_stack_temp_qword() %{
2608     MacroAssembler _masm(&cbuf);
2609     __ addptr(rsp, 8);
2610   %}
2611 
2612   enc_class push_xmm_to_fpr1(regD src) %{
2613     MacroAssembler _masm(&cbuf);
2614     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2615     __ fld_d(Address(rsp, 0));
2616   %}
2617 
2618   enc_class Push_Result_Mod_DPR( regDPR src) %{
2619     if ($src$$reg != FPR1L_enc) {
2620       // fincstp
2621       emit_opcode (cbuf, 0xD9);
2622       emit_opcode (cbuf, 0xF7);
2623       // FXCH FPR1 with src
2624       emit_opcode(cbuf, 0xD9);
2625       emit_d8(cbuf, 0xC8-1+$src$$reg );
2626       // fdecstp
2627       emit_opcode (cbuf, 0xD9);
2628       emit_opcode (cbuf, 0xF6);
2629     }
2630     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2631     // // FSTP   FPR$dst$$reg
2632     // emit_opcode( cbuf, 0xDD );
2633     // emit_d8( cbuf, 0xD8+$dst$$reg );
2634   %}
2635 
2636   enc_class fnstsw_sahf_skip_parity() %{
2637     // fnstsw ax
2638     emit_opcode( cbuf, 0xDF );
2639     emit_opcode( cbuf, 0xE0 );
2640     // sahf
2641     emit_opcode( cbuf, 0x9E );
2642     // jnp  ::skip
2643     emit_opcode( cbuf, 0x7B );
2644     emit_opcode( cbuf, 0x05 );
2645   %}
2646 
2647   enc_class emitModDPR() %{
2648     // fprem must be iterative
2649     // :: loop
2650     // fprem
2651     emit_opcode( cbuf, 0xD9 );
2652     emit_opcode( cbuf, 0xF8 );
2653     // wait
2654     emit_opcode( cbuf, 0x9b );
2655     // fnstsw ax
2656     emit_opcode( cbuf, 0xDF );
2657     emit_opcode( cbuf, 0xE0 );
2658     // sahf
2659     emit_opcode( cbuf, 0x9E );
2660     // jp  ::loop
2661     emit_opcode( cbuf, 0x0F );
2662     emit_opcode( cbuf, 0x8A );
2663     emit_opcode( cbuf, 0xF4 );
2664     emit_opcode( cbuf, 0xFF );
2665     emit_opcode( cbuf, 0xFF );
2666     emit_opcode( cbuf, 0xFF );
2667   %}
2668 
2669   enc_class fpu_flags() %{
2670     // fnstsw_ax
2671     emit_opcode( cbuf, 0xDF);
2672     emit_opcode( cbuf, 0xE0);
2673     // test ax,0x0400
2674     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2675     emit_opcode( cbuf, 0xA9 );
2676     emit_d16   ( cbuf, 0x0400 );
2677     // // // This sequence works, but stalls for 12-16 cycles on PPro
2678     // // test rax,0x0400
2679     // emit_opcode( cbuf, 0xA9 );
2680     // emit_d32   ( cbuf, 0x00000400 );
2681     //
2682     // jz exit (no unordered comparison)
2683     emit_opcode( cbuf, 0x74 );
2684     emit_d8    ( cbuf, 0x02 );
2685     // mov ah,1 - treat as LT case (set carry flag)
2686     emit_opcode( cbuf, 0xB4 );
2687     emit_d8    ( cbuf, 0x01 );
2688     // sahf
2689     emit_opcode( cbuf, 0x9E);
2690   %}
2691 
2692   enc_class cmpF_P6_fixup() %{
2693     // Fixup the integer flags in case comparison involved a NaN
2694     //
2695     // JNP exit (no unordered comparison, P-flag is set by NaN)
2696     emit_opcode( cbuf, 0x7B );
2697     emit_d8    ( cbuf, 0x03 );
2698     // MOV AH,1 - treat as LT case (set carry flag)
2699     emit_opcode( cbuf, 0xB4 );
2700     emit_d8    ( cbuf, 0x01 );
2701     // SAHF
2702     emit_opcode( cbuf, 0x9E);
2703     // NOP     // target for branch to avoid branch to branch
2704     emit_opcode( cbuf, 0x90);
2705   %}
2706 
2707 //     fnstsw_ax();
2708 //     sahf();
2709 //     movl(dst, nan_result);
2710 //     jcc(Assembler::parity, exit);
2711 //     movl(dst, less_result);
2712 //     jcc(Assembler::below, exit);
2713 //     movl(dst, equal_result);
2714 //     jcc(Assembler::equal, exit);
2715 //     movl(dst, greater_result);
2716 
2717 // less_result     =  1;
2718 // greater_result  = -1;
2719 // equal_result    = 0;
2720 // nan_result      = -1;
2721 
2722   enc_class CmpF_Result(rRegI dst) %{
2723     // fnstsw_ax();
2724     emit_opcode( cbuf, 0xDF);
2725     emit_opcode( cbuf, 0xE0);
2726     // sahf
2727     emit_opcode( cbuf, 0x9E);
2728     // movl(dst, nan_result);
2729     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2730     emit_d32( cbuf, -1 );
2731     // jcc(Assembler::parity, exit);
2732     emit_opcode( cbuf, 0x7A );
2733     emit_d8    ( cbuf, 0x13 );
2734     // movl(dst, less_result);
2735     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2736     emit_d32( cbuf, -1 );
2737     // jcc(Assembler::below, exit);
2738     emit_opcode( cbuf, 0x72 );
2739     emit_d8    ( cbuf, 0x0C );
2740     // movl(dst, equal_result);
2741     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2742     emit_d32( cbuf, 0 );
2743     // jcc(Assembler::equal, exit);
2744     emit_opcode( cbuf, 0x74 );
2745     emit_d8    ( cbuf, 0x05 );
2746     // movl(dst, greater_result);
2747     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2748     emit_d32( cbuf, 1 );
2749   %}
2750 
2751 
2752   // Compare the longs and set flags
2753   // BROKEN!  Do Not use as-is
2754   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2755     // CMP    $src1.hi,$src2.hi
2756     emit_opcode( cbuf, 0x3B );
2757     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2758     // JNE,s  done
2759     emit_opcode(cbuf,0x75);
2760     emit_d8(cbuf, 2 );
2761     // CMP    $src1.lo,$src2.lo
2762     emit_opcode( cbuf, 0x3B );
2763     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2764 // done:
2765   %}
2766 
2767   enc_class convert_int_long( regL dst, rRegI src ) %{
2768     // mov $dst.lo,$src
2769     int dst_encoding = $dst$$reg;
2770     int src_encoding = $src$$reg;
2771     encode_Copy( cbuf, dst_encoding  , src_encoding );
2772     // mov $dst.hi,$src
2773     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2774     // sar $dst.hi,31
2775     emit_opcode( cbuf, 0xC1 );
2776     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2777     emit_d8(cbuf, 0x1F );
2778   %}
2779 
2780   enc_class convert_long_double( eRegL src ) %{
2781     // push $src.hi
2782     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2783     // push $src.lo
2784     emit_opcode(cbuf, 0x50+$src$$reg  );
2785     // fild 64-bits at [SP]
2786     emit_opcode(cbuf,0xdf);
2787     emit_d8(cbuf, 0x6C);
2788     emit_d8(cbuf, 0x24);
2789     emit_d8(cbuf, 0x00);
2790     // pop stack
2791     emit_opcode(cbuf, 0x83); // add  SP, #8
2792     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2793     emit_d8(cbuf, 0x8);
2794   %}
2795 
2796   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2797     // IMUL   EDX:EAX,$src1
2798     emit_opcode( cbuf, 0xF7 );
2799     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2800     // SAR    EDX,$cnt-32
2801     int shift_count = ((int)$cnt$$constant) - 32;
2802     if (shift_count > 0) {
2803       emit_opcode(cbuf, 0xC1);
2804       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2805       emit_d8(cbuf, shift_count);
2806     }
2807   %}
2808 
2809   // this version doesn't have add sp, 8
2810   enc_class convert_long_double2( eRegL src ) %{
2811     // push $src.hi
2812     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2813     // push $src.lo
2814     emit_opcode(cbuf, 0x50+$src$$reg  );
2815     // fild 64-bits at [SP]
2816     emit_opcode(cbuf,0xdf);
2817     emit_d8(cbuf, 0x6C);
2818     emit_d8(cbuf, 0x24);
2819     emit_d8(cbuf, 0x00);
2820   %}
2821 
2822   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2823     // Basic idea: long = (long)int * (long)int
2824     // IMUL EDX:EAX, src
2825     emit_opcode( cbuf, 0xF7 );
2826     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2827   %}
2828 
2829   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2830     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2831     // MUL EDX:EAX, src
2832     emit_opcode( cbuf, 0xF7 );
2833     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2834   %}
2835 
2836   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2837     // Basic idea: lo(result) = lo(x_lo * y_lo)
2838     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2839     // MOV    $tmp,$src.lo
2840     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2841     // IMUL   $tmp,EDX
2842     emit_opcode( cbuf, 0x0F );
2843     emit_opcode( cbuf, 0xAF );
2844     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2845     // MOV    EDX,$src.hi
2846     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2847     // IMUL   EDX,EAX
2848     emit_opcode( cbuf, 0x0F );
2849     emit_opcode( cbuf, 0xAF );
2850     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2851     // ADD    $tmp,EDX
2852     emit_opcode( cbuf, 0x03 );
2853     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2854     // MUL   EDX:EAX,$src.lo
2855     emit_opcode( cbuf, 0xF7 );
2856     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2857     // ADD    EDX,ESI
2858     emit_opcode( cbuf, 0x03 );
2859     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2860   %}
2861 
2862   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2863     // Basic idea: lo(result) = lo(src * y_lo)
2864     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2865     // IMUL   $tmp,EDX,$src
2866     emit_opcode( cbuf, 0x6B );
2867     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2868     emit_d8( cbuf, (int)$src$$constant );
2869     // MOV    EDX,$src
2870     emit_opcode(cbuf, 0xB8 + EDX_enc);
2871     emit_d32( cbuf, (int)$src$$constant );
2872     // MUL   EDX:EAX,EDX
2873     emit_opcode( cbuf, 0xF7 );
2874     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2875     // ADD    EDX,ESI
2876     emit_opcode( cbuf, 0x03 );
2877     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2878   %}
2879 
2880   enc_class long_div( eRegL src1, eRegL src2 ) %{
2881     // PUSH src1.hi
2882     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2883     // PUSH src1.lo
2884     emit_opcode(cbuf,               0x50+$src1$$reg  );
2885     // PUSH src2.hi
2886     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2887     // PUSH src2.lo
2888     emit_opcode(cbuf,               0x50+$src2$$reg  );
2889     // CALL directly to the runtime
2890     cbuf.set_insts_mark();
2891     emit_opcode(cbuf,0xE8);       // Call into runtime
2892     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2893     // Restore stack
2894     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2895     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2896     emit_d8(cbuf, 4*4);
2897   %}
2898 
2899   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2900     // PUSH src1.hi
2901     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2902     // PUSH src1.lo
2903     emit_opcode(cbuf,               0x50+$src1$$reg  );
2904     // PUSH src2.hi
2905     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2906     // PUSH src2.lo
2907     emit_opcode(cbuf,               0x50+$src2$$reg  );
2908     // CALL directly to the runtime
2909     cbuf.set_insts_mark();
2910     emit_opcode(cbuf,0xE8);       // Call into runtime
2911     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2912     // Restore stack
2913     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2914     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2915     emit_d8(cbuf, 4*4);
2916   %}
2917 
2918   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2919     // MOV   $tmp,$src.lo
2920     emit_opcode(cbuf, 0x8B);
2921     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2922     // OR    $tmp,$src.hi
2923     emit_opcode(cbuf, 0x0B);
2924     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2925   %}
2926 
2927   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2928     // CMP    $src1.lo,$src2.lo
2929     emit_opcode( cbuf, 0x3B );
2930     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2931     // JNE,s  skip
2932     emit_cc(cbuf, 0x70, 0x5);
2933     emit_d8(cbuf,2);
2934     // CMP    $src1.hi,$src2.hi
2935     emit_opcode( cbuf, 0x3B );
2936     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2937   %}
2938 
2939   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2940     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2941     emit_opcode( cbuf, 0x3B );
2942     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2943     // MOV    $tmp,$src1.hi
2944     emit_opcode( cbuf, 0x8B );
2945     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2946     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2947     emit_opcode( cbuf, 0x1B );
2948     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2949   %}
2950 
2951   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2952     // XOR    $tmp,$tmp
2953     emit_opcode(cbuf,0x33);  // XOR
2954     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2955     // CMP    $tmp,$src.lo
2956     emit_opcode( cbuf, 0x3B );
2957     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2958     // SBB    $tmp,$src.hi
2959     emit_opcode( cbuf, 0x1B );
2960     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2961   %}
2962 
2963  // Sniff, sniff... smells like Gnu Superoptimizer
2964   enc_class neg_long( eRegL dst ) %{
2965     emit_opcode(cbuf,0xF7);    // NEG hi
2966     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2967     emit_opcode(cbuf,0xF7);    // NEG lo
2968     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2969     emit_opcode(cbuf,0x83);    // SBB hi,0
2970     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2971     emit_d8    (cbuf,0 );
2972   %}
2973 
2974   enc_class enc_pop_rdx() %{
2975     emit_opcode(cbuf,0x5A);
2976   %}
2977 
2978   enc_class enc_rethrow() %{
2979     cbuf.set_insts_mark();
2980     emit_opcode(cbuf, 0xE9);        // jmp    entry
2981     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2982                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2983   %}
2984 
2985 
2986   // Convert a double to an int.  Java semantics require we do complex
2987   // manglelations in the corner cases.  So we set the rounding mode to
2988   // 'zero', store the darned double down as an int, and reset the
2989   // rounding mode to 'nearest'.  The hardware throws an exception which
2990   // patches up the correct value directly to the stack.
2991   enc_class DPR2I_encoding( regDPR src ) %{
2992     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2993     // exceptions here, so that a NAN or other corner-case value will
2994     // thrown an exception (but normal values get converted at full speed).
2995     // However, I2C adapters and other float-stack manglers leave pending
2996     // invalid-op exceptions hanging.  We would have to clear them before
2997     // enabling them and that is more expensive than just testing for the
2998     // invalid value Intel stores down in the corner cases.
2999     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3000     emit_opcode(cbuf,0x2D);
3001     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3002     // Allocate a word
3003     emit_opcode(cbuf,0x83);            // SUB ESP,4
3004     emit_opcode(cbuf,0xEC);
3005     emit_d8(cbuf,0x04);
3006     // Encoding assumes a double has been pushed into FPR0.
3007     // Store down the double as an int, popping the FPU stack
3008     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3009     emit_opcode(cbuf,0x1C);
3010     emit_d8(cbuf,0x24);
3011     // Restore the rounding mode; mask the exception
3012     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3013     emit_opcode(cbuf,0x2D);
3014     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3015         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3016         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3017 
3018     // Load the converted int; adjust CPU stack
3019     emit_opcode(cbuf,0x58);       // POP EAX
3020     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3021     emit_d32   (cbuf,0x80000000); //         0x80000000
3022     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3023     emit_d8    (cbuf,0x07);       // Size of slow_call
3024     // Push src onto stack slow-path
3025     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3026     emit_d8    (cbuf,0xC0-1+$src$$reg );
3027     // CALL directly to the runtime
3028     cbuf.set_insts_mark();
3029     emit_opcode(cbuf,0xE8);       // Call into runtime
3030     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3031     // Carry on here...
3032   %}
3033 
3034   enc_class DPR2L_encoding( regDPR src ) %{
3035     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3036     emit_opcode(cbuf,0x2D);
3037     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3038     // Allocate a word
3039     emit_opcode(cbuf,0x83);            // SUB ESP,8
3040     emit_opcode(cbuf,0xEC);
3041     emit_d8(cbuf,0x08);
3042     // Encoding assumes a double has been pushed into FPR0.
3043     // Store down the double as a long, popping the FPU stack
3044     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3045     emit_opcode(cbuf,0x3C);
3046     emit_d8(cbuf,0x24);
3047     // Restore the rounding mode; mask the exception
3048     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3049     emit_opcode(cbuf,0x2D);
3050     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3051         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3052         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3053 
3054     // Load the converted int; adjust CPU stack
3055     emit_opcode(cbuf,0x58);       // POP EAX
3056     emit_opcode(cbuf,0x5A);       // POP EDX
3057     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3058     emit_d8    (cbuf,0xFA);       // rdx
3059     emit_d32   (cbuf,0x80000000); //         0x80000000
3060     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3061     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3062     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3063     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3064     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3065     emit_d8    (cbuf,0x07);       // Size of slow_call
3066     // Push src onto stack slow-path
3067     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3068     emit_d8    (cbuf,0xC0-1+$src$$reg );
3069     // CALL directly to the runtime
3070     cbuf.set_insts_mark();
3071     emit_opcode(cbuf,0xE8);       // Call into runtime
3072     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3073     // Carry on here...
3074   %}
3075 
3076   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3077     // Operand was loaded from memory into fp ST (stack top)
3078     // FMUL   ST,$src  /* D8 C8+i */
3079     emit_opcode(cbuf, 0xD8);
3080     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3081   %}
3082 
3083   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3084     // FADDP  ST,src2  /* D8 C0+i */
3085     emit_opcode(cbuf, 0xD8);
3086     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3087     //could use FADDP  src2,fpST  /* DE C0+i */
3088   %}
3089 
3090   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3091     // FADDP  src2,ST  /* DE C0+i */
3092     emit_opcode(cbuf, 0xDE);
3093     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3094   %}
3095 
3096   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3097     // Operand has been loaded into fp ST (stack top)
3098       // FSUB   ST,$src1
3099       emit_opcode(cbuf, 0xD8);
3100       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3101 
3102       // FDIV
3103       emit_opcode(cbuf, 0xD8);
3104       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3105   %}
3106 
3107   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3108     // Operand was loaded from memory into fp ST (stack top)
3109     // FADD   ST,$src  /* D8 C0+i */
3110     emit_opcode(cbuf, 0xD8);
3111     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3112 
3113     // FMUL  ST,src2  /* D8 C*+i */
3114     emit_opcode(cbuf, 0xD8);
3115     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3116   %}
3117 
3118 
3119   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3120     // Operand was loaded from memory into fp ST (stack top)
3121     // FADD   ST,$src  /* D8 C0+i */
3122     emit_opcode(cbuf, 0xD8);
3123     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3124 
3125     // FMULP  src2,ST  /* DE C8+i */
3126     emit_opcode(cbuf, 0xDE);
3127     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3128   %}
3129 
3130   // Atomically load the volatile long
3131   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3132     emit_opcode(cbuf,0xDF);
3133     int rm_byte_opcode = 0x05;
3134     int base     = $mem$$base;
3135     int index    = $mem$$index;
3136     int scale    = $mem$$scale;
3137     int displace = $mem$$disp;
3138     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3139     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3140     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3141   %}
3142 
3143   // Volatile Store Long.  Must be atomic, so move it into
3144   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3145   // target address before the store (for null-ptr checks)
3146   // so the memory operand is used twice in the encoding.
3147   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3148     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3149     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3150     emit_opcode(cbuf,0xDF);
3151     int rm_byte_opcode = 0x07;
3152     int base     = $mem$$base;
3153     int index    = $mem$$index;
3154     int scale    = $mem$$scale;
3155     int displace = $mem$$disp;
3156     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3157     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3158   %}
3159 
3160   // Safepoint Poll.  This polls the safepoint page, and causes an
3161   // exception if it is not readable. Unfortunately, it kills the condition code
3162   // in the process
3163   // We current use TESTL [spp],EDI
3164   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3165 
3166   enc_class Safepoint_Poll() %{
3167     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3168     emit_opcode(cbuf,0x85);
3169     emit_rm (cbuf, 0x0, 0x7, 0x5);
3170     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3171   %}
3172 %}
3173 
3174 
3175 //----------FRAME--------------------------------------------------------------
3176 // Definition of frame structure and management information.
3177 //
3178 //  S T A C K   L A Y O U T    Allocators stack-slot number
3179 //                             |   (to get allocators register number
3180 //  G  Owned by    |        |  v    add OptoReg::stack0())
3181 //  r   CALLER     |        |
3182 //  o     |        +--------+      pad to even-align allocators stack-slot
3183 //  w     V        |  pad0  |        numbers; owned by CALLER
3184 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3185 //  h     ^        |   in   |  5
3186 //        |        |  args  |  4   Holes in incoming args owned by SELF
3187 //  |     |        |        |  3
3188 //  |     |        +--------+
3189 //  V     |        | old out|      Empty on Intel, window on Sparc
3190 //        |    old |preserve|      Must be even aligned.
3191 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3192 //        |        |   in   |  3   area for Intel ret address
3193 //     Owned by    |preserve|      Empty on Sparc.
3194 //       SELF      +--------+
3195 //        |        |  pad2  |  2   pad to align old SP
3196 //        |        +--------+  1
3197 //        |        | locks  |  0
3198 //        |        +--------+----> OptoReg::stack0(), even aligned
3199 //        |        |  pad1  | 11   pad to align new SP
3200 //        |        +--------+
3201 //        |        |        | 10
3202 //        |        | spills |  9   spills
3203 //        V        |        |  8   (pad0 slot for callee)
3204 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3205 //        ^        |  out   |  7
3206 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3207 //     Owned by    +--------+
3208 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3209 //        |    new |preserve|      Must be even-aligned.
3210 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3211 //        |        |        |
3212 //
3213 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3214 //         known from SELF's arguments and the Java calling convention.
3215 //         Region 6-7 is determined per call site.
3216 // Note 2: If the calling convention leaves holes in the incoming argument
3217 //         area, those holes are owned by SELF.  Holes in the outgoing area
3218 //         are owned by the CALLEE.  Holes should not be nessecary in the
3219 //         incoming area, as the Java calling convention is completely under
3220 //         the control of the AD file.  Doubles can be sorted and packed to
3221 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3222 //         varargs C calling conventions.
3223 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3224 //         even aligned with pad0 as needed.
3225 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3226 //         region 6-11 is even aligned; it may be padded out more so that
3227 //         the region from SP to FP meets the minimum stack alignment.
3228 
3229 frame %{
3230   // What direction does stack grow in (assumed to be same for C & Java)
3231   stack_direction(TOWARDS_LOW);
3232 
3233   // These three registers define part of the calling convention
3234   // between compiled code and the interpreter.
3235   inline_cache_reg(EAX);                // Inline Cache Register
3236   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3237 
3238   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3239   cisc_spilling_operand_name(indOffset32);
3240 
3241   // Number of stack slots consumed by locking an object
3242   sync_stack_slots(1);
3243 
3244   // Compiled code's Frame Pointer
3245   frame_pointer(ESP);
3246   // Interpreter stores its frame pointer in a register which is
3247   // stored to the stack by I2CAdaptors.
3248   // I2CAdaptors convert from interpreted java to compiled java.
3249   interpreter_frame_pointer(EBP);
3250 
3251   // Stack alignment requirement
3252   // Alignment size in bytes (128-bit -> 16 bytes)
3253   stack_alignment(StackAlignmentInBytes);
3254 
3255   // Number of stack slots between incoming argument block and the start of
3256   // a new frame.  The PROLOG must add this many slots to the stack.  The
3257   // EPILOG must remove this many slots.  Intel needs one slot for
3258   // return address and one for rbp, (must save rbp)
3259   in_preserve_stack_slots(2+VerifyStackAtCalls);
3260 
3261   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3262   // for calls to C.  Supports the var-args backing area for register parms.
3263   varargs_C_out_slots_killed(0);
3264 
3265   // The after-PROLOG location of the return address.  Location of
3266   // return address specifies a type (REG or STACK) and a number
3267   // representing the register number (i.e. - use a register name) or
3268   // stack slot.
3269   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3270   // Otherwise, it is above the locks and verification slot and alignment word
3271   return_addr(STACK - 1 +
3272               round_to((Compile::current()->in_preserve_stack_slots() +
3273                         Compile::current()->fixed_slots()),
3274                        stack_alignment_in_slots()));
3275 
3276   // Body of function which returns an integer array locating
3277   // arguments either in registers or in stack slots.  Passed an array
3278   // of ideal registers called "sig" and a "length" count.  Stack-slot
3279   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3280   // arguments for a CALLEE.  Incoming stack arguments are
3281   // automatically biased by the preserve_stack_slots field above.
3282   calling_convention %{
3283     // No difference between ingoing/outgoing just pass false
3284     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3285   %}
3286 
3287 
3288   // Body of function which returns an integer array locating
3289   // arguments either in registers or in stack slots.  Passed an array
3290   // of ideal registers called "sig" and a "length" count.  Stack-slot
3291   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3292   // arguments for a CALLEE.  Incoming stack arguments are
3293   // automatically biased by the preserve_stack_slots field above.
3294   c_calling_convention %{
3295     // This is obviously always outgoing
3296     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3297   %}
3298 
3299   // Location of C & interpreter return values
3300   c_return_value %{
3301     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3302     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3303     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3304 
3305     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3306     // that C functions return float and double results in XMM0.
3307     if( ideal_reg == Op_RegD && UseSSE>=2 )
3308       return OptoRegPair(XMM0b_num,XMM0_num);
3309     if( ideal_reg == Op_RegF && UseSSE>=2 )
3310       return OptoRegPair(OptoReg::Bad,XMM0_num);
3311 
3312     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3313   %}
3314 
3315   // Location of return values
3316   return_value %{
3317     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3318     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3319     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3320     if( ideal_reg == Op_RegD && UseSSE>=2 )
3321       return OptoRegPair(XMM0b_num,XMM0_num);
3322     if( ideal_reg == Op_RegF && UseSSE>=1 )
3323       return OptoRegPair(OptoReg::Bad,XMM0_num);
3324     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3325   %}
3326 
3327 %}
3328 
3329 //----------ATTRIBUTES---------------------------------------------------------
3330 //----------Operand Attributes-------------------------------------------------
3331 op_attrib op_cost(0);        // Required cost attribute
3332 
3333 //----------Instruction Attributes---------------------------------------------
3334 ins_attrib ins_cost(100);       // Required cost attribute
3335 ins_attrib ins_size(8);         // Required size attribute (in bits)
3336 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3337                                 // non-matching short branch variant of some
3338                                                             // long branch?
3339 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3340                                 // specifies the alignment that some part of the instruction (not
3341                                 // necessarily the start) requires.  If > 1, a compute_padding()
3342                                 // function must be provided for the instruction
3343 
3344 //----------OPERANDS-----------------------------------------------------------
3345 // Operand definitions must precede instruction definitions for correct parsing
3346 // in the ADLC because operands constitute user defined types which are used in
3347 // instruction definitions.
3348 
3349 //----------Simple Operands----------------------------------------------------
3350 // Immediate Operands
3351 // Integer Immediate
3352 operand immI() %{
3353   match(ConI);
3354 
3355   op_cost(10);
3356   format %{ %}
3357   interface(CONST_INTER);
3358 %}
3359 
3360 // Constant for test vs zero
3361 operand immI0() %{
3362   predicate(n->get_int() == 0);
3363   match(ConI);
3364 
3365   op_cost(0);
3366   format %{ %}
3367   interface(CONST_INTER);
3368 %}
3369 
3370 // Constant for increment
3371 operand immI1() %{
3372   predicate(n->get_int() == 1);
3373   match(ConI);
3374 
3375   op_cost(0);
3376   format %{ %}
3377   interface(CONST_INTER);
3378 %}
3379 
3380 // Constant for decrement
3381 operand immI_M1() %{
3382   predicate(n->get_int() == -1);
3383   match(ConI);
3384 
3385   op_cost(0);
3386   format %{ %}
3387   interface(CONST_INTER);
3388 %}
3389 
3390 // Valid scale values for addressing modes
3391 operand immI2() %{
3392   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3393   match(ConI);
3394 
3395   format %{ %}
3396   interface(CONST_INTER);
3397 %}
3398 
3399 operand immI8() %{
3400   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3401   match(ConI);
3402 
3403   op_cost(5);
3404   format %{ %}
3405   interface(CONST_INTER);
3406 %}
3407 
3408 operand immI16() %{
3409   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3410   match(ConI);
3411 
3412   op_cost(10);
3413   format %{ %}
3414   interface(CONST_INTER);
3415 %}
3416 
3417 // Int Immediate non-negative
3418 operand immU31()
3419 %{
3420   predicate(n->get_int() >= 0);
3421   match(ConI);
3422 
3423   op_cost(0);
3424   format %{ %}
3425   interface(CONST_INTER);
3426 %}
3427 
3428 // Constant for long shifts
3429 operand immI_32() %{
3430   predicate( n->get_int() == 32 );
3431   match(ConI);
3432 
3433   op_cost(0);
3434   format %{ %}
3435   interface(CONST_INTER);
3436 %}
3437 
3438 operand immI_1_31() %{
3439   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3440   match(ConI);
3441 
3442   op_cost(0);
3443   format %{ %}
3444   interface(CONST_INTER);
3445 %}
3446 
3447 operand immI_32_63() %{
3448   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3449   match(ConI);
3450   op_cost(0);
3451 
3452   format %{ %}
3453   interface(CONST_INTER);
3454 %}
3455 
3456 operand immI_1() %{
3457   predicate( n->get_int() == 1 );
3458   match(ConI);
3459 
3460   op_cost(0);
3461   format %{ %}
3462   interface(CONST_INTER);
3463 %}
3464 
3465 operand immI_2() %{
3466   predicate( n->get_int() == 2 );
3467   match(ConI);
3468 
3469   op_cost(0);
3470   format %{ %}
3471   interface(CONST_INTER);
3472 %}
3473 
3474 operand immI_3() %{
3475   predicate( n->get_int() == 3 );
3476   match(ConI);
3477 
3478   op_cost(0);
3479   format %{ %}
3480   interface(CONST_INTER);
3481 %}
3482 
3483 // Pointer Immediate
3484 operand immP() %{
3485   match(ConP);
3486 
3487   op_cost(10);
3488   format %{ %}
3489   interface(CONST_INTER);
3490 %}
3491 
3492 // NULL Pointer Immediate
3493 operand immP0() %{
3494   predicate( n->get_ptr() == 0 );
3495   match(ConP);
3496   op_cost(0);
3497 
3498   format %{ %}
3499   interface(CONST_INTER);
3500 %}
3501 
3502 // Long Immediate
3503 operand immL() %{
3504   match(ConL);
3505 
3506   op_cost(20);
3507   format %{ %}
3508   interface(CONST_INTER);
3509 %}
3510 
3511 // Long Immediate zero
3512 operand immL0() %{
3513   predicate( n->get_long() == 0L );
3514   match(ConL);
3515   op_cost(0);
3516 
3517   format %{ %}
3518   interface(CONST_INTER);
3519 %}
3520 
3521 // Long Immediate zero
3522 operand immL_M1() %{
3523   predicate( n->get_long() == -1L );
3524   match(ConL);
3525   op_cost(0);
3526 
3527   format %{ %}
3528   interface(CONST_INTER);
3529 %}
3530 
3531 // Long immediate from 0 to 127.
3532 // Used for a shorter form of long mul by 10.
3533 operand immL_127() %{
3534   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3535   match(ConL);
3536   op_cost(0);
3537 
3538   format %{ %}
3539   interface(CONST_INTER);
3540 %}
3541 
3542 // Long Immediate: low 32-bit mask
3543 operand immL_32bits() %{
3544   predicate(n->get_long() == 0xFFFFFFFFL);
3545   match(ConL);
3546   op_cost(0);
3547 
3548   format %{ %}
3549   interface(CONST_INTER);
3550 %}
3551 
3552 // Long Immediate: low 32-bit mask
3553 operand immL32() %{
3554   predicate(n->get_long() == (int)(n->get_long()));
3555   match(ConL);
3556   op_cost(20);
3557 
3558   format %{ %}
3559   interface(CONST_INTER);
3560 %}
3561 
3562 //Double Immediate zero
3563 operand immDPR0() %{
3564   // Do additional (and counter-intuitive) test against NaN to work around VC++
3565   // bug that generates code such that NaNs compare equal to 0.0
3566   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3567   match(ConD);
3568 
3569   op_cost(5);
3570   format %{ %}
3571   interface(CONST_INTER);
3572 %}
3573 
3574 // Double Immediate one
3575 operand immDPR1() %{
3576   predicate( UseSSE<=1 && n->getd() == 1.0 );
3577   match(ConD);
3578 
3579   op_cost(5);
3580   format %{ %}
3581   interface(CONST_INTER);
3582 %}
3583 
3584 // Double Immediate
3585 operand immDPR() %{
3586   predicate(UseSSE<=1);
3587   match(ConD);
3588 
3589   op_cost(5);
3590   format %{ %}
3591   interface(CONST_INTER);
3592 %}
3593 
3594 operand immD() %{
3595   predicate(UseSSE>=2);
3596   match(ConD);
3597 
3598   op_cost(5);
3599   format %{ %}
3600   interface(CONST_INTER);
3601 %}
3602 
3603 // Double Immediate zero
3604 operand immD0() %{
3605   // Do additional (and counter-intuitive) test against NaN to work around VC++
3606   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3607   // compare equal to -0.0.
3608   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3609   match(ConD);
3610 
3611   format %{ %}
3612   interface(CONST_INTER);
3613 %}
3614 
3615 // Float Immediate zero
3616 operand immFPR0() %{
3617   predicate(UseSSE == 0 && n->getf() == 0.0F);
3618   match(ConF);
3619 
3620   op_cost(5);
3621   format %{ %}
3622   interface(CONST_INTER);
3623 %}
3624 
3625 // Float Immediate one
3626 operand immFPR1() %{
3627   predicate(UseSSE == 0 && n->getf() == 1.0F);
3628   match(ConF);
3629 
3630   op_cost(5);
3631   format %{ %}
3632   interface(CONST_INTER);
3633 %}
3634 
3635 // Float Immediate
3636 operand immFPR() %{
3637   predicate( UseSSE == 0 );
3638   match(ConF);
3639 
3640   op_cost(5);
3641   format %{ %}
3642   interface(CONST_INTER);
3643 %}
3644 
3645 // Float Immediate
3646 operand immF() %{
3647   predicate(UseSSE >= 1);
3648   match(ConF);
3649 
3650   op_cost(5);
3651   format %{ %}
3652   interface(CONST_INTER);
3653 %}
3654 
3655 // Float Immediate zero.  Zero and not -0.0
3656 operand immF0() %{
3657   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3658   match(ConF);
3659 
3660   op_cost(5);
3661   format %{ %}
3662   interface(CONST_INTER);
3663 %}
3664 
3665 // Immediates for special shifts (sign extend)
3666 
3667 // Constants for increment
3668 operand immI_16() %{
3669   predicate( n->get_int() == 16 );
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 operand immI_24() %{
3677   predicate( n->get_int() == 24 );
3678   match(ConI);
3679 
3680   format %{ %}
3681   interface(CONST_INTER);
3682 %}
3683 
3684 // Constant for byte-wide masking
3685 operand immI_255() %{
3686   predicate( n->get_int() == 255 );
3687   match(ConI);
3688 
3689   format %{ %}
3690   interface(CONST_INTER);
3691 %}
3692 
3693 // Constant for short-wide masking
3694 operand immI_65535() %{
3695   predicate(n->get_int() == 65535);
3696   match(ConI);
3697 
3698   format %{ %}
3699   interface(CONST_INTER);
3700 %}
3701 
3702 // Register Operands
3703 // Integer Register
3704 operand rRegI() %{
3705   constraint(ALLOC_IN_RC(int_reg));
3706   match(RegI);
3707   match(xRegI);
3708   match(eAXRegI);
3709   match(eBXRegI);
3710   match(eCXRegI);
3711   match(eDXRegI);
3712   match(eDIRegI);
3713   match(eSIRegI);
3714 
3715   format %{ %}
3716   interface(REG_INTER);
3717 %}
3718 
3719 // Subset of Integer Register
3720 operand xRegI(rRegI reg) %{
3721   constraint(ALLOC_IN_RC(int_x_reg));
3722   match(reg);
3723   match(eAXRegI);
3724   match(eBXRegI);
3725   match(eCXRegI);
3726   match(eDXRegI);
3727 
3728   format %{ %}
3729   interface(REG_INTER);
3730 %}
3731 
3732 // Special Registers
3733 operand eAXRegI(xRegI reg) %{
3734   constraint(ALLOC_IN_RC(eax_reg));
3735   match(reg);
3736   match(rRegI);
3737 
3738   format %{ "EAX" %}
3739   interface(REG_INTER);
3740 %}
3741 
3742 // Special Registers
3743 operand eBXRegI(xRegI reg) %{
3744   constraint(ALLOC_IN_RC(ebx_reg));
3745   match(reg);
3746   match(rRegI);
3747 
3748   format %{ "EBX" %}
3749   interface(REG_INTER);
3750 %}
3751 
3752 operand eCXRegI(xRegI reg) %{
3753   constraint(ALLOC_IN_RC(ecx_reg));
3754   match(reg);
3755   match(rRegI);
3756 
3757   format %{ "ECX" %}
3758   interface(REG_INTER);
3759 %}
3760 
3761 operand eDXRegI(xRegI reg) %{
3762   constraint(ALLOC_IN_RC(edx_reg));
3763   match(reg);
3764   match(rRegI);
3765 
3766   format %{ "EDX" %}
3767   interface(REG_INTER);
3768 %}
3769 
3770 operand eDIRegI(xRegI reg) %{
3771   constraint(ALLOC_IN_RC(edi_reg));
3772   match(reg);
3773   match(rRegI);
3774 
3775   format %{ "EDI" %}
3776   interface(REG_INTER);
3777 %}
3778 
3779 operand naxRegI() %{
3780   constraint(ALLOC_IN_RC(nax_reg));
3781   match(RegI);
3782   match(eCXRegI);
3783   match(eDXRegI);
3784   match(eSIRegI);
3785   match(eDIRegI);
3786 
3787   format %{ %}
3788   interface(REG_INTER);
3789 %}
3790 
3791 operand nadxRegI() %{
3792   constraint(ALLOC_IN_RC(nadx_reg));
3793   match(RegI);
3794   match(eBXRegI);
3795   match(eCXRegI);
3796   match(eSIRegI);
3797   match(eDIRegI);
3798 
3799   format %{ %}
3800   interface(REG_INTER);
3801 %}
3802 
3803 operand ncxRegI() %{
3804   constraint(ALLOC_IN_RC(ncx_reg));
3805   match(RegI);
3806   match(eAXRegI);
3807   match(eDXRegI);
3808   match(eSIRegI);
3809   match(eDIRegI);
3810 
3811   format %{ %}
3812   interface(REG_INTER);
3813 %}
3814 
3815 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3816 // //
3817 operand eSIRegI(xRegI reg) %{
3818    constraint(ALLOC_IN_RC(esi_reg));
3819    match(reg);
3820    match(rRegI);
3821 
3822    format %{ "ESI" %}
3823    interface(REG_INTER);
3824 %}
3825 
3826 // Pointer Register
3827 operand anyRegP() %{
3828   constraint(ALLOC_IN_RC(any_reg));
3829   match(RegP);
3830   match(eAXRegP);
3831   match(eBXRegP);
3832   match(eCXRegP);
3833   match(eDIRegP);
3834   match(eRegP);
3835 
3836   format %{ %}
3837   interface(REG_INTER);
3838 %}
3839 
3840 operand eRegP() %{
3841   constraint(ALLOC_IN_RC(int_reg));
3842   match(RegP);
3843   match(eAXRegP);
3844   match(eBXRegP);
3845   match(eCXRegP);
3846   match(eDIRegP);
3847 
3848   format %{ %}
3849   interface(REG_INTER);
3850 %}
3851 
3852 // On windows95, EBP is not safe to use for implicit null tests.
3853 operand eRegP_no_EBP() %{
3854   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3855   match(RegP);
3856   match(eAXRegP);
3857   match(eBXRegP);
3858   match(eCXRegP);
3859   match(eDIRegP);
3860 
3861   op_cost(100);
3862   format %{ %}
3863   interface(REG_INTER);
3864 %}
3865 
3866 operand naxRegP() %{
3867   constraint(ALLOC_IN_RC(nax_reg));
3868   match(RegP);
3869   match(eBXRegP);
3870   match(eDXRegP);
3871   match(eCXRegP);
3872   match(eSIRegP);
3873   match(eDIRegP);
3874 
3875   format %{ %}
3876   interface(REG_INTER);
3877 %}
3878 
3879 operand nabxRegP() %{
3880   constraint(ALLOC_IN_RC(nabx_reg));
3881   match(RegP);
3882   match(eCXRegP);
3883   match(eDXRegP);
3884   match(eSIRegP);
3885   match(eDIRegP);
3886 
3887   format %{ %}
3888   interface(REG_INTER);
3889 %}
3890 
3891 operand pRegP() %{
3892   constraint(ALLOC_IN_RC(p_reg));
3893   match(RegP);
3894   match(eBXRegP);
3895   match(eDXRegP);
3896   match(eSIRegP);
3897   match(eDIRegP);
3898 
3899   format %{ %}
3900   interface(REG_INTER);
3901 %}
3902 
3903 // Special Registers
3904 // Return a pointer value
3905 operand eAXRegP(eRegP reg) %{
3906   constraint(ALLOC_IN_RC(eax_reg));
3907   match(reg);
3908   format %{ "EAX" %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Used in AtomicAdd
3913 operand eBXRegP(eRegP reg) %{
3914   constraint(ALLOC_IN_RC(ebx_reg));
3915   match(reg);
3916   format %{ "EBX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 // Tail-call (interprocedural jump) to interpreter
3921 operand eCXRegP(eRegP reg) %{
3922   constraint(ALLOC_IN_RC(ecx_reg));
3923   match(reg);
3924   format %{ "ECX" %}
3925   interface(REG_INTER);
3926 %}
3927 
3928 operand eSIRegP(eRegP reg) %{
3929   constraint(ALLOC_IN_RC(esi_reg));
3930   match(reg);
3931   format %{ "ESI" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 // Used in rep stosw
3936 operand eDIRegP(eRegP reg) %{
3937   constraint(ALLOC_IN_RC(edi_reg));
3938   match(reg);
3939   format %{ "EDI" %}
3940   interface(REG_INTER);
3941 %}
3942 
3943 operand eRegL() %{
3944   constraint(ALLOC_IN_RC(long_reg));
3945   match(RegL);
3946   match(eADXRegL);
3947 
3948   format %{ %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eADXRegL( eRegL reg ) %{
3953   constraint(ALLOC_IN_RC(eadx_reg));
3954   match(reg);
3955 
3956   format %{ "EDX:EAX" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 operand eBCXRegL( eRegL reg ) %{
3961   constraint(ALLOC_IN_RC(ebcx_reg));
3962   match(reg);
3963 
3964   format %{ "EBX:ECX" %}
3965   interface(REG_INTER);
3966 %}
3967 
3968 // Special case for integer high multiply
3969 operand eADXRegL_low_only() %{
3970   constraint(ALLOC_IN_RC(eadx_reg));
3971   match(RegL);
3972 
3973   format %{ "EAX" %}
3974   interface(REG_INTER);
3975 %}
3976 
3977 // Flags register, used as output of compare instructions
3978 operand eFlagsReg() %{
3979   constraint(ALLOC_IN_RC(int_flags));
3980   match(RegFlags);
3981 
3982   format %{ "EFLAGS" %}
3983   interface(REG_INTER);
3984 %}
3985 
3986 // Flags register, used as output of FLOATING POINT compare instructions
3987 operand eFlagsRegU() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990 
3991   format %{ "EFLAGS_U" %}
3992   interface(REG_INTER);
3993 %}
3994 
3995 operand eFlagsRegUCF() %{
3996   constraint(ALLOC_IN_RC(int_flags));
3997   match(RegFlags);
3998   predicate(false);
3999 
4000   format %{ "EFLAGS_U_CF" %}
4001   interface(REG_INTER);
4002 %}
4003 
4004 // Condition Code Register used by long compare
4005 operand flagsReg_long_LTGE() %{
4006   constraint(ALLOC_IN_RC(int_flags));
4007   match(RegFlags);
4008   format %{ "FLAGS_LTGE" %}
4009   interface(REG_INTER);
4010 %}
4011 operand flagsReg_long_EQNE() %{
4012   constraint(ALLOC_IN_RC(int_flags));
4013   match(RegFlags);
4014   format %{ "FLAGS_EQNE" %}
4015   interface(REG_INTER);
4016 %}
4017 operand flagsReg_long_LEGT() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_LEGT" %}
4021   interface(REG_INTER);
4022 %}
4023 
4024 // Float register operands
4025 operand regDPR() %{
4026   predicate( UseSSE < 2 );
4027   constraint(ALLOC_IN_RC(fp_dbl_reg));
4028   match(RegD);
4029   match(regDPR1);
4030   match(regDPR2);
4031   format %{ %}
4032   interface(REG_INTER);
4033 %}
4034 
4035 operand regDPR1(regDPR reg) %{
4036   predicate( UseSSE < 2 );
4037   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4038   match(reg);
4039   format %{ "FPR1" %}
4040   interface(REG_INTER);
4041 %}
4042 
4043 operand regDPR2(regDPR reg) %{
4044   predicate( UseSSE < 2 );
4045   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4046   match(reg);
4047   format %{ "FPR2" %}
4048   interface(REG_INTER);
4049 %}
4050 
4051 operand regnotDPR1(regDPR reg) %{
4052   predicate( UseSSE < 2 );
4053   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4054   match(reg);
4055   format %{ %}
4056   interface(REG_INTER);
4057 %}
4058 
4059 // Float register operands
4060 operand regFPR() %{
4061   predicate( UseSSE < 2 );
4062   constraint(ALLOC_IN_RC(fp_flt_reg));
4063   match(RegF);
4064   match(regFPR1);
4065   format %{ %}
4066   interface(REG_INTER);
4067 %}
4068 
4069 // Float register operands
4070 operand regFPR1(regFPR reg) %{
4071   predicate( UseSSE < 2 );
4072   constraint(ALLOC_IN_RC(fp_flt_reg0));
4073   match(reg);
4074   format %{ "FPR1" %}
4075   interface(REG_INTER);
4076 %}
4077 
4078 // XMM Float register operands
4079 operand regF() %{
4080   predicate( UseSSE>=1 );
4081   constraint(ALLOC_IN_RC(float_reg_legacy));
4082   match(RegF);
4083   format %{ %}
4084   interface(REG_INTER);
4085 %}
4086 
4087 // XMM Double register operands
4088 operand regD() %{
4089   predicate( UseSSE>=2 );
4090   constraint(ALLOC_IN_RC(double_reg_legacy));
4091   match(RegD);
4092   format %{ %}
4093   interface(REG_INTER);
4094 %}
4095 
4096 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4097 // runtime code generation via reg_class_dynamic.
4098 operand vecS() %{
4099   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4100   match(VecS);
4101 
4102   format %{ %}
4103   interface(REG_INTER);
4104 %}
4105 
4106 operand vecD() %{
4107   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4108   match(VecD);
4109 
4110   format %{ %}
4111   interface(REG_INTER);
4112 %}
4113 
4114 operand vecX() %{
4115   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4116   match(VecX);
4117 
4118   format %{ %}
4119   interface(REG_INTER);
4120 %}
4121 
4122 operand vecY() %{
4123   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4124   match(VecY);
4125 
4126   format %{ %}
4127   interface(REG_INTER);
4128 %}
4129 
4130 //----------Memory Operands----------------------------------------------------
4131 // Direct Memory Operand
4132 operand direct(immP addr) %{
4133   match(addr);
4134 
4135   format %{ "[$addr]" %}
4136   interface(MEMORY_INTER) %{
4137     base(0xFFFFFFFF);
4138     index(0x4);
4139     scale(0x0);
4140     disp($addr);
4141   %}
4142 %}
4143 
4144 // Indirect Memory Operand
4145 operand indirect(eRegP reg) %{
4146   constraint(ALLOC_IN_RC(int_reg));
4147   match(reg);
4148 
4149   format %{ "[$reg]" %}
4150   interface(MEMORY_INTER) %{
4151     base($reg);
4152     index(0x4);
4153     scale(0x0);
4154     disp(0x0);
4155   %}
4156 %}
4157 
4158 // Indirect Memory Plus Short Offset Operand
4159 operand indOffset8(eRegP reg, immI8 off) %{
4160   match(AddP reg off);
4161 
4162   format %{ "[$reg + $off]" %}
4163   interface(MEMORY_INTER) %{
4164     base($reg);
4165     index(0x4);
4166     scale(0x0);
4167     disp($off);
4168   %}
4169 %}
4170 
4171 // Indirect Memory Plus Long Offset Operand
4172 operand indOffset32(eRegP reg, immI off) %{
4173   match(AddP reg off);
4174 
4175   format %{ "[$reg + $off]" %}
4176   interface(MEMORY_INTER) %{
4177     base($reg);
4178     index(0x4);
4179     scale(0x0);
4180     disp($off);
4181   %}
4182 %}
4183 
4184 // Indirect Memory Plus Long Offset Operand
4185 operand indOffset32X(rRegI reg, immP off) %{
4186   match(AddP off reg);
4187 
4188   format %{ "[$reg + $off]" %}
4189   interface(MEMORY_INTER) %{
4190     base($reg);
4191     index(0x4);
4192     scale(0x0);
4193     disp($off);
4194   %}
4195 %}
4196 
4197 // Indirect Memory Plus Index Register Plus Offset Operand
4198 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4199   match(AddP (AddP reg ireg) off);
4200 
4201   op_cost(10);
4202   format %{"[$reg + $off + $ireg]" %}
4203   interface(MEMORY_INTER) %{
4204     base($reg);
4205     index($ireg);
4206     scale(0x0);
4207     disp($off);
4208   %}
4209 %}
4210 
4211 // Indirect Memory Plus Index Register Plus Offset Operand
4212 operand indIndex(eRegP reg, rRegI ireg) %{
4213   match(AddP reg ireg);
4214 
4215   op_cost(10);
4216   format %{"[$reg + $ireg]" %}
4217   interface(MEMORY_INTER) %{
4218     base($reg);
4219     index($ireg);
4220     scale(0x0);
4221     disp(0x0);
4222   %}
4223 %}
4224 
4225 // // -------------------------------------------------------------------------
4226 // // 486 architecture doesn't support "scale * index + offset" with out a base
4227 // // -------------------------------------------------------------------------
4228 // // Scaled Memory Operands
4229 // // Indirect Memory Times Scale Plus Offset Operand
4230 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4231 //   match(AddP off (LShiftI ireg scale));
4232 //
4233 //   op_cost(10);
4234 //   format %{"[$off + $ireg << $scale]" %}
4235 //   interface(MEMORY_INTER) %{
4236 //     base(0x4);
4237 //     index($ireg);
4238 //     scale($scale);
4239 //     disp($off);
4240 //   %}
4241 // %}
4242 
4243 // Indirect Memory Times Scale Plus Index Register
4244 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4245   match(AddP reg (LShiftI ireg scale));
4246 
4247   op_cost(10);
4248   format %{"[$reg + $ireg << $scale]" %}
4249   interface(MEMORY_INTER) %{
4250     base($reg);
4251     index($ireg);
4252     scale($scale);
4253     disp(0x0);
4254   %}
4255 %}
4256 
4257 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4258 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4259   match(AddP (AddP reg (LShiftI ireg scale)) off);
4260 
4261   op_cost(10);
4262   format %{"[$reg + $off + $ireg << $scale]" %}
4263   interface(MEMORY_INTER) %{
4264     base($reg);
4265     index($ireg);
4266     scale($scale);
4267     disp($off);
4268   %}
4269 %}
4270 
4271 //----------Load Long Memory Operands------------------------------------------
4272 // The load-long idiom will use it's address expression again after loading
4273 // the first word of the long.  If the load-long destination overlaps with
4274 // registers used in the addressing expression, the 2nd half will be loaded
4275 // from a clobbered address.  Fix this by requiring that load-long use
4276 // address registers that do not overlap with the load-long target.
4277 
4278 // load-long support
4279 operand load_long_RegP() %{
4280   constraint(ALLOC_IN_RC(esi_reg));
4281   match(RegP);
4282   match(eSIRegP);
4283   op_cost(100);
4284   format %{  %}
4285   interface(REG_INTER);
4286 %}
4287 
4288 // Indirect Memory Operand Long
4289 operand load_long_indirect(load_long_RegP reg) %{
4290   constraint(ALLOC_IN_RC(esi_reg));
4291   match(reg);
4292 
4293   format %{ "[$reg]" %}
4294   interface(MEMORY_INTER) %{
4295     base($reg);
4296     index(0x4);
4297     scale(0x0);
4298     disp(0x0);
4299   %}
4300 %}
4301 
4302 // Indirect Memory Plus Long Offset Operand
4303 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4304   match(AddP reg off);
4305 
4306   format %{ "[$reg + $off]" %}
4307   interface(MEMORY_INTER) %{
4308     base($reg);
4309     index(0x4);
4310     scale(0x0);
4311     disp($off);
4312   %}
4313 %}
4314 
4315 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4316 
4317 
4318 //----------Special Memory Operands--------------------------------------------
4319 // Stack Slot Operand - This operand is used for loading and storing temporary
4320 //                      values on the stack where a match requires a value to
4321 //                      flow through memory.
4322 operand stackSlotP(sRegP reg) %{
4323   constraint(ALLOC_IN_RC(stack_slots));
4324   // No match rule because this operand is only generated in matching
4325   format %{ "[$reg]" %}
4326   interface(MEMORY_INTER) %{
4327     base(0x4);   // ESP
4328     index(0x4);  // No Index
4329     scale(0x0);  // No Scale
4330     disp($reg);  // Stack Offset
4331   %}
4332 %}
4333 
4334 operand stackSlotI(sRegI reg) %{
4335   constraint(ALLOC_IN_RC(stack_slots));
4336   // No match rule because this operand is only generated in matching
4337   format %{ "[$reg]" %}
4338   interface(MEMORY_INTER) %{
4339     base(0x4);   // ESP
4340     index(0x4);  // No Index
4341     scale(0x0);  // No Scale
4342     disp($reg);  // Stack Offset
4343   %}
4344 %}
4345 
4346 operand stackSlotF(sRegF reg) %{
4347   constraint(ALLOC_IN_RC(stack_slots));
4348   // No match rule because this operand is only generated in matching
4349   format %{ "[$reg]" %}
4350   interface(MEMORY_INTER) %{
4351     base(0x4);   // ESP
4352     index(0x4);  // No Index
4353     scale(0x0);  // No Scale
4354     disp($reg);  // Stack Offset
4355   %}
4356 %}
4357 
4358 operand stackSlotD(sRegD reg) %{
4359   constraint(ALLOC_IN_RC(stack_slots));
4360   // No match rule because this operand is only generated in matching
4361   format %{ "[$reg]" %}
4362   interface(MEMORY_INTER) %{
4363     base(0x4);   // ESP
4364     index(0x4);  // No Index
4365     scale(0x0);  // No Scale
4366     disp($reg);  // Stack Offset
4367   %}
4368 %}
4369 
4370 operand stackSlotL(sRegL reg) %{
4371   constraint(ALLOC_IN_RC(stack_slots));
4372   // No match rule because this operand is only generated in matching
4373   format %{ "[$reg]" %}
4374   interface(MEMORY_INTER) %{
4375     base(0x4);   // ESP
4376     index(0x4);  // No Index
4377     scale(0x0);  // No Scale
4378     disp($reg);  // Stack Offset
4379   %}
4380 %}
4381 
4382 //----------Memory Operands - Win95 Implicit Null Variants----------------
4383 // Indirect Memory Operand
4384 operand indirect_win95_safe(eRegP_no_EBP reg)
4385 %{
4386   constraint(ALLOC_IN_RC(int_reg));
4387   match(reg);
4388 
4389   op_cost(100);
4390   format %{ "[$reg]" %}
4391   interface(MEMORY_INTER) %{
4392     base($reg);
4393     index(0x4);
4394     scale(0x0);
4395     disp(0x0);
4396   %}
4397 %}
4398 
4399 // Indirect Memory Plus Short Offset Operand
4400 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4401 %{
4402   match(AddP reg off);
4403 
4404   op_cost(100);
4405   format %{ "[$reg + $off]" %}
4406   interface(MEMORY_INTER) %{
4407     base($reg);
4408     index(0x4);
4409     scale(0x0);
4410     disp($off);
4411   %}
4412 %}
4413 
4414 // Indirect Memory Plus Long Offset Operand
4415 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4416 %{
4417   match(AddP reg off);
4418 
4419   op_cost(100);
4420   format %{ "[$reg + $off]" %}
4421   interface(MEMORY_INTER) %{
4422     base($reg);
4423     index(0x4);
4424     scale(0x0);
4425     disp($off);
4426   %}
4427 %}
4428 
4429 // Indirect Memory Plus Index Register Plus Offset Operand
4430 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4431 %{
4432   match(AddP (AddP reg ireg) off);
4433 
4434   op_cost(100);
4435   format %{"[$reg + $off + $ireg]" %}
4436   interface(MEMORY_INTER) %{
4437     base($reg);
4438     index($ireg);
4439     scale(0x0);
4440     disp($off);
4441   %}
4442 %}
4443 
4444 // Indirect Memory Times Scale Plus Index Register
4445 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4446 %{
4447   match(AddP reg (LShiftI ireg scale));
4448 
4449   op_cost(100);
4450   format %{"[$reg + $ireg << $scale]" %}
4451   interface(MEMORY_INTER) %{
4452     base($reg);
4453     index($ireg);
4454     scale($scale);
4455     disp(0x0);
4456   %}
4457 %}
4458 
4459 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4460 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4461 %{
4462   match(AddP (AddP reg (LShiftI ireg scale)) off);
4463 
4464   op_cost(100);
4465   format %{"[$reg + $off + $ireg << $scale]" %}
4466   interface(MEMORY_INTER) %{
4467     base($reg);
4468     index($ireg);
4469     scale($scale);
4470     disp($off);
4471   %}
4472 %}
4473 
4474 //----------Conditional Branch Operands----------------------------------------
4475 // Comparison Op  - This is the operation of the comparison, and is limited to
4476 //                  the following set of codes:
4477 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4478 //
4479 // Other attributes of the comparison, such as unsignedness, are specified
4480 // by the comparison instruction that sets a condition code flags register.
4481 // That result is represented by a flags operand whose subtype is appropriate
4482 // to the unsignedness (etc.) of the comparison.
4483 //
4484 // Later, the instruction which matches both the Comparison Op (a Bool) and
4485 // the flags (produced by the Cmp) specifies the coding of the comparison op
4486 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4487 
4488 // Comparision Code
4489 operand cmpOp() %{
4490   match(Bool);
4491 
4492   format %{ "" %}
4493   interface(COND_INTER) %{
4494     equal(0x4, "e");
4495     not_equal(0x5, "ne");
4496     less(0xC, "l");
4497     greater_equal(0xD, "ge");
4498     less_equal(0xE, "le");
4499     greater(0xF, "g");
4500     overflow(0x0, "o");
4501     no_overflow(0x1, "no");
4502   %}
4503 %}
4504 
4505 // Comparison Code, unsigned compare.  Used by FP also, with
4506 // C2 (unordered) turned into GT or LT already.  The other bits
4507 // C0 and C3 are turned into Carry & Zero flags.
4508 operand cmpOpU() %{
4509   match(Bool);
4510 
4511   format %{ "" %}
4512   interface(COND_INTER) %{
4513     equal(0x4, "e");
4514     not_equal(0x5, "ne");
4515     less(0x2, "b");
4516     greater_equal(0x3, "nb");
4517     less_equal(0x6, "be");
4518     greater(0x7, "nbe");
4519     overflow(0x0, "o");
4520     no_overflow(0x1, "no");
4521   %}
4522 %}
4523 
4524 // Floating comparisons that don't require any fixup for the unordered case
4525 operand cmpOpUCF() %{
4526   match(Bool);
4527   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4528             n->as_Bool()->_test._test == BoolTest::ge ||
4529             n->as_Bool()->_test._test == BoolTest::le ||
4530             n->as_Bool()->_test._test == BoolTest::gt);
4531   format %{ "" %}
4532   interface(COND_INTER) %{
4533     equal(0x4, "e");
4534     not_equal(0x5, "ne");
4535     less(0x2, "b");
4536     greater_equal(0x3, "nb");
4537     less_equal(0x6, "be");
4538     greater(0x7, "nbe");
4539     overflow(0x0, "o");
4540     no_overflow(0x1, "no");
4541   %}
4542 %}
4543 
4544 
4545 // Floating comparisons that can be fixed up with extra conditional jumps
4546 operand cmpOpUCF2() %{
4547   match(Bool);
4548   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4549             n->as_Bool()->_test._test == BoolTest::eq);
4550   format %{ "" %}
4551   interface(COND_INTER) %{
4552     equal(0x4, "e");
4553     not_equal(0x5, "ne");
4554     less(0x2, "b");
4555     greater_equal(0x3, "nb");
4556     less_equal(0x6, "be");
4557     greater(0x7, "nbe");
4558     overflow(0x0, "o");
4559     no_overflow(0x1, "no");
4560   %}
4561 %}
4562 
4563 // Comparison Code for FP conditional move
4564 operand cmpOp_fcmov() %{
4565   match(Bool);
4566 
4567   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4568             n->as_Bool()->_test._test != BoolTest::no_overflow);
4569   format %{ "" %}
4570   interface(COND_INTER) %{
4571     equal        (0x0C8);
4572     not_equal    (0x1C8);
4573     less         (0x0C0);
4574     greater_equal(0x1C0);
4575     less_equal   (0x0D0);
4576     greater      (0x1D0);
4577     overflow(0x0, "o"); // not really supported by the instruction
4578     no_overflow(0x1, "no"); // not really supported by the instruction
4579   %}
4580 %}
4581 
4582 // Comparision Code used in long compares
4583 operand cmpOp_commute() %{
4584   match(Bool);
4585 
4586   format %{ "" %}
4587   interface(COND_INTER) %{
4588     equal(0x4, "e");
4589     not_equal(0x5, "ne");
4590     less(0xF, "g");
4591     greater_equal(0xE, "le");
4592     less_equal(0xD, "ge");
4593     greater(0xC, "l");
4594     overflow(0x0, "o");
4595     no_overflow(0x1, "no");
4596   %}
4597 %}
4598 
4599 //----------OPERAND CLASSES----------------------------------------------------
4600 // Operand Classes are groups of operands that are used as to simplify
4601 // instruction definitions by not requiring the AD writer to specify separate
4602 // instructions for every form of operand when the instruction accepts
4603 // multiple operand types with the same basic encoding and format.  The classic
4604 // case of this is memory operands.
4605 
4606 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4607                indIndex, indIndexScale, indIndexScaleOffset);
4608 
4609 // Long memory operations are encoded in 2 instructions and a +4 offset.
4610 // This means some kind of offset is always required and you cannot use
4611 // an oop as the offset (done when working on static globals).
4612 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4613                     indIndex, indIndexScale, indIndexScaleOffset);
4614 
4615 
4616 //----------PIPELINE-----------------------------------------------------------
4617 // Rules which define the behavior of the target architectures pipeline.
4618 pipeline %{
4619 
4620 //----------ATTRIBUTES---------------------------------------------------------
4621 attributes %{
4622   variable_size_instructions;        // Fixed size instructions
4623   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4624   instruction_unit_size = 1;         // An instruction is 1 bytes long
4625   instruction_fetch_unit_size = 16;  // The processor fetches one line
4626   instruction_fetch_units = 1;       // of 16 bytes
4627 
4628   // List of nop instructions
4629   nops( MachNop );
4630 %}
4631 
4632 //----------RESOURCES----------------------------------------------------------
4633 // Resources are the functional units available to the machine
4634 
4635 // Generic P2/P3 pipeline
4636 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4637 // 3 instructions decoded per cycle.
4638 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4639 // 2 ALU op, only ALU0 handles mul/div instructions.
4640 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4641            MS0, MS1, MEM = MS0 | MS1,
4642            BR, FPU,
4643            ALU0, ALU1, ALU = ALU0 | ALU1 );
4644 
4645 //----------PIPELINE DESCRIPTION-----------------------------------------------
4646 // Pipeline Description specifies the stages in the machine's pipeline
4647 
4648 // Generic P2/P3 pipeline
4649 pipe_desc(S0, S1, S2, S3, S4, S5);
4650 
4651 //----------PIPELINE CLASSES---------------------------------------------------
4652 // Pipeline Classes describe the stages in which input and output are
4653 // referenced by the hardware pipeline.
4654 
4655 // Naming convention: ialu or fpu
4656 // Then: _reg
4657 // Then: _reg if there is a 2nd register
4658 // Then: _long if it's a pair of instructions implementing a long
4659 // Then: _fat if it requires the big decoder
4660 //   Or: _mem if it requires the big decoder and a memory unit.
4661 
4662 // Integer ALU reg operation
4663 pipe_class ialu_reg(rRegI dst) %{
4664     single_instruction;
4665     dst    : S4(write);
4666     dst    : S3(read);
4667     DECODE : S0;        // any decoder
4668     ALU    : S3;        // any alu
4669 %}
4670 
4671 // Long ALU reg operation
4672 pipe_class ialu_reg_long(eRegL dst) %{
4673     instruction_count(2);
4674     dst    : S4(write);
4675     dst    : S3(read);
4676     DECODE : S0(2);     // any 2 decoders
4677     ALU    : S3(2);     // both alus
4678 %}
4679 
4680 // Integer ALU reg operation using big decoder
4681 pipe_class ialu_reg_fat(rRegI dst) %{
4682     single_instruction;
4683     dst    : S4(write);
4684     dst    : S3(read);
4685     D0     : S0;        // big decoder only
4686     ALU    : S3;        // any alu
4687 %}
4688 
4689 // Long ALU reg operation using big decoder
4690 pipe_class ialu_reg_long_fat(eRegL dst) %{
4691     instruction_count(2);
4692     dst    : S4(write);
4693     dst    : S3(read);
4694     D0     : S0(2);     // big decoder only; twice
4695     ALU    : S3(2);     // any 2 alus
4696 %}
4697 
4698 // Integer ALU reg-reg operation
4699 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4700     single_instruction;
4701     dst    : S4(write);
4702     src    : S3(read);
4703     DECODE : S0;        // any decoder
4704     ALU    : S3;        // any alu
4705 %}
4706 
4707 // Long ALU reg-reg operation
4708 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4709     instruction_count(2);
4710     dst    : S4(write);
4711     src    : S3(read);
4712     DECODE : S0(2);     // any 2 decoders
4713     ALU    : S3(2);     // both alus
4714 %}
4715 
4716 // Integer ALU reg-reg operation
4717 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4718     single_instruction;
4719     dst    : S4(write);
4720     src    : S3(read);
4721     D0     : S0;        // big decoder only
4722     ALU    : S3;        // any alu
4723 %}
4724 
4725 // Long ALU reg-reg operation
4726 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4727     instruction_count(2);
4728     dst    : S4(write);
4729     src    : S3(read);
4730     D0     : S0(2);     // big decoder only; twice
4731     ALU    : S3(2);     // both alus
4732 %}
4733 
4734 // Integer ALU reg-mem operation
4735 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4736     single_instruction;
4737     dst    : S5(write);
4738     mem    : S3(read);
4739     D0     : S0;        // big decoder only
4740     ALU    : S4;        // any alu
4741     MEM    : S3;        // any mem
4742 %}
4743 
4744 // Long ALU reg-mem operation
4745 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4746     instruction_count(2);
4747     dst    : S5(write);
4748     mem    : S3(read);
4749     D0     : S0(2);     // big decoder only; twice
4750     ALU    : S4(2);     // any 2 alus
4751     MEM    : S3(2);     // both mems
4752 %}
4753 
4754 // Integer mem operation (prefetch)
4755 pipe_class ialu_mem(memory mem)
4756 %{
4757     single_instruction;
4758     mem    : S3(read);
4759     D0     : S0;        // big decoder only
4760     MEM    : S3;        // any mem
4761 %}
4762 
4763 // Integer Store to Memory
4764 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4765     single_instruction;
4766     mem    : S3(read);
4767     src    : S5(read);
4768     D0     : S0;        // big decoder only
4769     ALU    : S4;        // any alu
4770     MEM    : S3;
4771 %}
4772 
4773 // Long Store to Memory
4774 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4775     instruction_count(2);
4776     mem    : S3(read);
4777     src    : S5(read);
4778     D0     : S0(2);     // big decoder only; twice
4779     ALU    : S4(2);     // any 2 alus
4780     MEM    : S3(2);     // Both mems
4781 %}
4782 
4783 // Integer Store to Memory
4784 pipe_class ialu_mem_imm(memory mem) %{
4785     single_instruction;
4786     mem    : S3(read);
4787     D0     : S0;        // big decoder only
4788     ALU    : S4;        // any alu
4789     MEM    : S3;
4790 %}
4791 
4792 // Integer ALU0 reg-reg operation
4793 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4794     single_instruction;
4795     dst    : S4(write);
4796     src    : S3(read);
4797     D0     : S0;        // Big decoder only
4798     ALU0   : S3;        // only alu0
4799 %}
4800 
4801 // Integer ALU0 reg-mem operation
4802 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4803     single_instruction;
4804     dst    : S5(write);
4805     mem    : S3(read);
4806     D0     : S0;        // big decoder only
4807     ALU0   : S4;        // ALU0 only
4808     MEM    : S3;        // any mem
4809 %}
4810 
4811 // Integer ALU reg-reg operation
4812 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4813     single_instruction;
4814     cr     : S4(write);
4815     src1   : S3(read);
4816     src2   : S3(read);
4817     DECODE : S0;        // any decoder
4818     ALU    : S3;        // any alu
4819 %}
4820 
4821 // Integer ALU reg-imm operation
4822 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4823     single_instruction;
4824     cr     : S4(write);
4825     src1   : S3(read);
4826     DECODE : S0;        // any decoder
4827     ALU    : S3;        // any alu
4828 %}
4829 
4830 // Integer ALU reg-mem operation
4831 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4832     single_instruction;
4833     cr     : S4(write);
4834     src1   : S3(read);
4835     src2   : S3(read);
4836     D0     : S0;        // big decoder only
4837     ALU    : S4;        // any alu
4838     MEM    : S3;
4839 %}
4840 
4841 // Conditional move reg-reg
4842 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4843     instruction_count(4);
4844     y      : S4(read);
4845     q      : S3(read);
4846     p      : S3(read);
4847     DECODE : S0(4);     // any decoder
4848 %}
4849 
4850 // Conditional move reg-reg
4851 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4852     single_instruction;
4853     dst    : S4(write);
4854     src    : S3(read);
4855     cr     : S3(read);
4856     DECODE : S0;        // any decoder
4857 %}
4858 
4859 // Conditional move reg-mem
4860 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4861     single_instruction;
4862     dst    : S4(write);
4863     src    : S3(read);
4864     cr     : S3(read);
4865     DECODE : S0;        // any decoder
4866     MEM    : S3;
4867 %}
4868 
4869 // Conditional move reg-reg long
4870 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4871     single_instruction;
4872     dst    : S4(write);
4873     src    : S3(read);
4874     cr     : S3(read);
4875     DECODE : S0(2);     // any 2 decoders
4876 %}
4877 
4878 // Conditional move double reg-reg
4879 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4880     single_instruction;
4881     dst    : S4(write);
4882     src    : S3(read);
4883     cr     : S3(read);
4884     DECODE : S0;        // any decoder
4885 %}
4886 
4887 // Float reg-reg operation
4888 pipe_class fpu_reg(regDPR dst) %{
4889     instruction_count(2);
4890     dst    : S3(read);
4891     DECODE : S0(2);     // any 2 decoders
4892     FPU    : S3;
4893 %}
4894 
4895 // Float reg-reg operation
4896 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4897     instruction_count(2);
4898     dst    : S4(write);
4899     src    : S3(read);
4900     DECODE : S0(2);     // any 2 decoders
4901     FPU    : S3;
4902 %}
4903 
4904 // Float reg-reg operation
4905 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4906     instruction_count(3);
4907     dst    : S4(write);
4908     src1   : S3(read);
4909     src2   : S3(read);
4910     DECODE : S0(3);     // any 3 decoders
4911     FPU    : S3(2);
4912 %}
4913 
4914 // Float reg-reg operation
4915 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4916     instruction_count(4);
4917     dst    : S4(write);
4918     src1   : S3(read);
4919     src2   : S3(read);
4920     src3   : S3(read);
4921     DECODE : S0(4);     // any 3 decoders
4922     FPU    : S3(2);
4923 %}
4924 
4925 // Float reg-reg operation
4926 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4927     instruction_count(4);
4928     dst    : S4(write);
4929     src1   : S3(read);
4930     src2   : S3(read);
4931     src3   : S3(read);
4932     DECODE : S1(3);     // any 3 decoders
4933     D0     : S0;        // Big decoder only
4934     FPU    : S3(2);
4935     MEM    : S3;
4936 %}
4937 
4938 // Float reg-mem operation
4939 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4940     instruction_count(2);
4941     dst    : S5(write);
4942     mem    : S3(read);
4943     D0     : S0;        // big decoder only
4944     DECODE : S1;        // any decoder for FPU POP
4945     FPU    : S4;
4946     MEM    : S3;        // any mem
4947 %}
4948 
4949 // Float reg-mem operation
4950 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4951     instruction_count(3);
4952     dst    : S5(write);
4953     src1   : S3(read);
4954     mem    : S3(read);
4955     D0     : S0;        // big decoder only
4956     DECODE : S1(2);     // any decoder for FPU POP
4957     FPU    : S4;
4958     MEM    : S3;        // any mem
4959 %}
4960 
4961 // Float mem-reg operation
4962 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4963     instruction_count(2);
4964     src    : S5(read);
4965     mem    : S3(read);
4966     DECODE : S0;        // any decoder for FPU PUSH
4967     D0     : S1;        // big decoder only
4968     FPU    : S4;
4969     MEM    : S3;        // any mem
4970 %}
4971 
4972 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4973     instruction_count(3);
4974     src1   : S3(read);
4975     src2   : S3(read);
4976     mem    : S3(read);
4977     DECODE : S0(2);     // any decoder for FPU PUSH
4978     D0     : S1;        // big decoder only
4979     FPU    : S4;
4980     MEM    : S3;        // any mem
4981 %}
4982 
4983 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4984     instruction_count(3);
4985     src1   : S3(read);
4986     src2   : S3(read);
4987     mem    : S4(read);
4988     DECODE : S0;        // any decoder for FPU PUSH
4989     D0     : S0(2);     // big decoder only
4990     FPU    : S4;
4991     MEM    : S3(2);     // any mem
4992 %}
4993 
4994 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4995     instruction_count(2);
4996     src1   : S3(read);
4997     dst    : S4(read);
4998     D0     : S0(2);     // big decoder only
4999     MEM    : S3(2);     // any mem
5000 %}
5001 
5002 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5003     instruction_count(3);
5004     src1   : S3(read);
5005     src2   : S3(read);
5006     dst    : S4(read);
5007     D0     : S0(3);     // big decoder only
5008     FPU    : S4;
5009     MEM    : S3(3);     // any mem
5010 %}
5011 
5012 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5013     instruction_count(3);
5014     src1   : S4(read);
5015     mem    : S4(read);
5016     DECODE : S0;        // any decoder for FPU PUSH
5017     D0     : S0(2);     // big decoder only
5018     FPU    : S4;
5019     MEM    : S3(2);     // any mem
5020 %}
5021 
5022 // Float load constant
5023 pipe_class fpu_reg_con(regDPR dst) %{
5024     instruction_count(2);
5025     dst    : S5(write);
5026     D0     : S0;        // big decoder only for the load
5027     DECODE : S1;        // any decoder for FPU POP
5028     FPU    : S4;
5029     MEM    : S3;        // any mem
5030 %}
5031 
5032 // Float load constant
5033 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5034     instruction_count(3);
5035     dst    : S5(write);
5036     src    : S3(read);
5037     D0     : S0;        // big decoder only for the load
5038     DECODE : S1(2);     // any decoder for FPU POP
5039     FPU    : S4;
5040     MEM    : S3;        // any mem
5041 %}
5042 
5043 // UnConditional branch
5044 pipe_class pipe_jmp( label labl ) %{
5045     single_instruction;
5046     BR   : S3;
5047 %}
5048 
5049 // Conditional branch
5050 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5051     single_instruction;
5052     cr    : S1(read);
5053     BR    : S3;
5054 %}
5055 
5056 // Allocation idiom
5057 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5058     instruction_count(1); force_serialization;
5059     fixed_latency(6);
5060     heap_ptr : S3(read);
5061     DECODE   : S0(3);
5062     D0       : S2;
5063     MEM      : S3;
5064     ALU      : S3(2);
5065     dst      : S5(write);
5066     BR       : S5;
5067 %}
5068 
5069 // Generic big/slow expanded idiom
5070 pipe_class pipe_slow(  ) %{
5071     instruction_count(10); multiple_bundles; force_serialization;
5072     fixed_latency(100);
5073     D0  : S0(2);
5074     MEM : S3(2);
5075 %}
5076 
5077 // The real do-nothing guy
5078 pipe_class empty( ) %{
5079     instruction_count(0);
5080 %}
5081 
5082 // Define the class for the Nop node
5083 define %{
5084    MachNop = empty;
5085 %}
5086 
5087 %}
5088 
5089 //----------INSTRUCTIONS-------------------------------------------------------
5090 //
5091 // match      -- States which machine-independent subtree may be replaced
5092 //               by this instruction.
5093 // ins_cost   -- The estimated cost of this instruction is used by instruction
5094 //               selection to identify a minimum cost tree of machine
5095 //               instructions that matches a tree of machine-independent
5096 //               instructions.
5097 // format     -- A string providing the disassembly for this instruction.
5098 //               The value of an instruction's operand may be inserted
5099 //               by referring to it with a '$' prefix.
5100 // opcode     -- Three instruction opcodes may be provided.  These are referred
5101 //               to within an encode class as $primary, $secondary, and $tertiary
5102 //               respectively.  The primary opcode is commonly used to
5103 //               indicate the type of machine instruction, while secondary
5104 //               and tertiary are often used for prefix options or addressing
5105 //               modes.
5106 // ins_encode -- A list of encode classes with parameters. The encode class
5107 //               name must have been defined in an 'enc_class' specification
5108 //               in the encode section of the architecture description.
5109 
5110 //----------BSWAP-Instruction--------------------------------------------------
5111 instruct bytes_reverse_int(rRegI dst) %{
5112   match(Set dst (ReverseBytesI dst));
5113 
5114   format %{ "BSWAP  $dst" %}
5115   opcode(0x0F, 0xC8);
5116   ins_encode( OpcP, OpcSReg(dst) );
5117   ins_pipe( ialu_reg );
5118 %}
5119 
5120 instruct bytes_reverse_long(eRegL dst) %{
5121   match(Set dst (ReverseBytesL dst));
5122 
5123   format %{ "BSWAP  $dst.lo\n\t"
5124             "BSWAP  $dst.hi\n\t"
5125             "XCHG   $dst.lo $dst.hi" %}
5126 
5127   ins_cost(125);
5128   ins_encode( bswap_long_bytes(dst) );
5129   ins_pipe( ialu_reg_reg);
5130 %}
5131 
5132 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5133   match(Set dst (ReverseBytesUS dst));
5134   effect(KILL cr);
5135 
5136   format %{ "BSWAP  $dst\n\t"
5137             "SHR    $dst,16\n\t" %}
5138   ins_encode %{
5139     __ bswapl($dst$$Register);
5140     __ shrl($dst$$Register, 16);
5141   %}
5142   ins_pipe( ialu_reg );
5143 %}
5144 
5145 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5146   match(Set dst (ReverseBytesS dst));
5147   effect(KILL cr);
5148 
5149   format %{ "BSWAP  $dst\n\t"
5150             "SAR    $dst,16\n\t" %}
5151   ins_encode %{
5152     __ bswapl($dst$$Register);
5153     __ sarl($dst$$Register, 16);
5154   %}
5155   ins_pipe( ialu_reg );
5156 %}
5157 
5158 
5159 //---------- Zeros Count Instructions ------------------------------------------
5160 
5161 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5162   predicate(UseCountLeadingZerosInstruction);
5163   match(Set dst (CountLeadingZerosI src));
5164   effect(KILL cr);
5165 
5166   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5167   ins_encode %{
5168     __ lzcntl($dst$$Register, $src$$Register);
5169   %}
5170   ins_pipe(ialu_reg);
5171 %}
5172 
5173 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5174   predicate(!UseCountLeadingZerosInstruction);
5175   match(Set dst (CountLeadingZerosI src));
5176   effect(KILL cr);
5177 
5178   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5179             "JNZ    skip\n\t"
5180             "MOV    $dst, -1\n"
5181       "skip:\n\t"
5182             "NEG    $dst\n\t"
5183             "ADD    $dst, 31" %}
5184   ins_encode %{
5185     Register Rdst = $dst$$Register;
5186     Register Rsrc = $src$$Register;
5187     Label skip;
5188     __ bsrl(Rdst, Rsrc);
5189     __ jccb(Assembler::notZero, skip);
5190     __ movl(Rdst, -1);
5191     __ bind(skip);
5192     __ negl(Rdst);
5193     __ addl(Rdst, BitsPerInt - 1);
5194   %}
5195   ins_pipe(ialu_reg);
5196 %}
5197 
5198 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5199   predicate(UseCountLeadingZerosInstruction);
5200   match(Set dst (CountLeadingZerosL src));
5201   effect(TEMP dst, KILL cr);
5202 
5203   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5204             "JNC    done\n\t"
5205             "LZCNT  $dst, $src.lo\n\t"
5206             "ADD    $dst, 32\n"
5207       "done:" %}
5208   ins_encode %{
5209     Register Rdst = $dst$$Register;
5210     Register Rsrc = $src$$Register;
5211     Label done;
5212     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5213     __ jccb(Assembler::carryClear, done);
5214     __ lzcntl(Rdst, Rsrc);
5215     __ addl(Rdst, BitsPerInt);
5216     __ bind(done);
5217   %}
5218   ins_pipe(ialu_reg);
5219 %}
5220 
5221 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5222   predicate(!UseCountLeadingZerosInstruction);
5223   match(Set dst (CountLeadingZerosL src));
5224   effect(TEMP dst, KILL cr);
5225 
5226   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5227             "JZ     msw_is_zero\n\t"
5228             "ADD    $dst, 32\n\t"
5229             "JMP    not_zero\n"
5230       "msw_is_zero:\n\t"
5231             "BSR    $dst, $src.lo\n\t"
5232             "JNZ    not_zero\n\t"
5233             "MOV    $dst, -1\n"
5234       "not_zero:\n\t"
5235             "NEG    $dst\n\t"
5236             "ADD    $dst, 63\n" %}
5237  ins_encode %{
5238     Register Rdst = $dst$$Register;
5239     Register Rsrc = $src$$Register;
5240     Label msw_is_zero;
5241     Label not_zero;
5242     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5243     __ jccb(Assembler::zero, msw_is_zero);
5244     __ addl(Rdst, BitsPerInt);
5245     __ jmpb(not_zero);
5246     __ bind(msw_is_zero);
5247     __ bsrl(Rdst, Rsrc);
5248     __ jccb(Assembler::notZero, not_zero);
5249     __ movl(Rdst, -1);
5250     __ bind(not_zero);
5251     __ negl(Rdst);
5252     __ addl(Rdst, BitsPerLong - 1);
5253   %}
5254   ins_pipe(ialu_reg);
5255 %}
5256 
5257 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5258   predicate(UseCountTrailingZerosInstruction);
5259   match(Set dst (CountTrailingZerosI src));
5260   effect(KILL cr);
5261 
5262   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5263   ins_encode %{
5264     __ tzcntl($dst$$Register, $src$$Register);
5265   %}
5266   ins_pipe(ialu_reg);
5267 %}
5268 
5269 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5270   predicate(!UseCountTrailingZerosInstruction);
5271   match(Set dst (CountTrailingZerosI src));
5272   effect(KILL cr);
5273 
5274   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5275             "JNZ    done\n\t"
5276             "MOV    $dst, 32\n"
5277       "done:" %}
5278   ins_encode %{
5279     Register Rdst = $dst$$Register;
5280     Label done;
5281     __ bsfl(Rdst, $src$$Register);
5282     __ jccb(Assembler::notZero, done);
5283     __ movl(Rdst, BitsPerInt);
5284     __ bind(done);
5285   %}
5286   ins_pipe(ialu_reg);
5287 %}
5288 
5289 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5290   predicate(UseCountTrailingZerosInstruction);
5291   match(Set dst (CountTrailingZerosL src));
5292   effect(TEMP dst, KILL cr);
5293 
5294   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5295             "JNC    done\n\t"
5296             "TZCNT  $dst, $src.hi\n\t"
5297             "ADD    $dst, 32\n"
5298             "done:" %}
5299   ins_encode %{
5300     Register Rdst = $dst$$Register;
5301     Register Rsrc = $src$$Register;
5302     Label done;
5303     __ tzcntl(Rdst, Rsrc);
5304     __ jccb(Assembler::carryClear, done);
5305     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5306     __ addl(Rdst, BitsPerInt);
5307     __ bind(done);
5308   %}
5309   ins_pipe(ialu_reg);
5310 %}
5311 
5312 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5313   predicate(!UseCountTrailingZerosInstruction);
5314   match(Set dst (CountTrailingZerosL src));
5315   effect(TEMP dst, KILL cr);
5316 
5317   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5318             "JNZ    done\n\t"
5319             "BSF    $dst, $src.hi\n\t"
5320             "JNZ    msw_not_zero\n\t"
5321             "MOV    $dst, 32\n"
5322       "msw_not_zero:\n\t"
5323             "ADD    $dst, 32\n"
5324       "done:" %}
5325   ins_encode %{
5326     Register Rdst = $dst$$Register;
5327     Register Rsrc = $src$$Register;
5328     Label msw_not_zero;
5329     Label done;
5330     __ bsfl(Rdst, Rsrc);
5331     __ jccb(Assembler::notZero, done);
5332     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5333     __ jccb(Assembler::notZero, msw_not_zero);
5334     __ movl(Rdst, BitsPerInt);
5335     __ bind(msw_not_zero);
5336     __ addl(Rdst, BitsPerInt);
5337     __ bind(done);
5338   %}
5339   ins_pipe(ialu_reg);
5340 %}
5341 
5342 
5343 //---------- Population Count Instructions -------------------------------------
5344 
5345 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5346   predicate(UsePopCountInstruction);
5347   match(Set dst (PopCountI src));
5348   effect(KILL cr);
5349 
5350   format %{ "POPCNT $dst, $src" %}
5351   ins_encode %{
5352     __ popcntl($dst$$Register, $src$$Register);
5353   %}
5354   ins_pipe(ialu_reg);
5355 %}
5356 
5357 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5358   predicate(UsePopCountInstruction);
5359   match(Set dst (PopCountI (LoadI mem)));
5360   effect(KILL cr);
5361 
5362   format %{ "POPCNT $dst, $mem" %}
5363   ins_encode %{
5364     __ popcntl($dst$$Register, $mem$$Address);
5365   %}
5366   ins_pipe(ialu_reg);
5367 %}
5368 
5369 // Note: Long.bitCount(long) returns an int.
5370 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5371   predicate(UsePopCountInstruction);
5372   match(Set dst (PopCountL src));
5373   effect(KILL cr, TEMP tmp, TEMP dst);
5374 
5375   format %{ "POPCNT $dst, $src.lo\n\t"
5376             "POPCNT $tmp, $src.hi\n\t"
5377             "ADD    $dst, $tmp" %}
5378   ins_encode %{
5379     __ popcntl($dst$$Register, $src$$Register);
5380     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5381     __ addl($dst$$Register, $tmp$$Register);
5382   %}
5383   ins_pipe(ialu_reg);
5384 %}
5385 
5386 // Note: Long.bitCount(long) returns an int.
5387 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5388   predicate(UsePopCountInstruction);
5389   match(Set dst (PopCountL (LoadL mem)));
5390   effect(KILL cr, TEMP tmp, TEMP dst);
5391 
5392   format %{ "POPCNT $dst, $mem\n\t"
5393             "POPCNT $tmp, $mem+4\n\t"
5394             "ADD    $dst, $tmp" %}
5395   ins_encode %{
5396     //__ popcntl($dst$$Register, $mem$$Address$$first);
5397     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5398     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5399     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5400     __ addl($dst$$Register, $tmp$$Register);
5401   %}
5402   ins_pipe(ialu_reg);
5403 %}
5404 
5405 
5406 //----------Load/Store/Move Instructions---------------------------------------
5407 //----------Load Instructions--------------------------------------------------
5408 // Load Byte (8bit signed)
5409 instruct loadB(xRegI dst, memory mem) %{
5410   match(Set dst (LoadB mem));
5411 
5412   ins_cost(125);
5413   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5414 
5415   ins_encode %{
5416     __ movsbl($dst$$Register, $mem$$Address);
5417   %}
5418 
5419   ins_pipe(ialu_reg_mem);
5420 %}
5421 
5422 // Load Byte (8bit signed) into Long Register
5423 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5424   match(Set dst (ConvI2L (LoadB mem)));
5425   effect(KILL cr);
5426 
5427   ins_cost(375);
5428   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5429             "MOV    $dst.hi,$dst.lo\n\t"
5430             "SAR    $dst.hi,7" %}
5431 
5432   ins_encode %{
5433     __ movsbl($dst$$Register, $mem$$Address);
5434     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5435     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5436   %}
5437 
5438   ins_pipe(ialu_reg_mem);
5439 %}
5440 
5441 // Load Unsigned Byte (8bit UNsigned)
5442 instruct loadUB(xRegI dst, memory mem) %{
5443   match(Set dst (LoadUB mem));
5444 
5445   ins_cost(125);
5446   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5447 
5448   ins_encode %{
5449     __ movzbl($dst$$Register, $mem$$Address);
5450   %}
5451 
5452   ins_pipe(ialu_reg_mem);
5453 %}
5454 
5455 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5456 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5457   match(Set dst (ConvI2L (LoadUB mem)));
5458   effect(KILL cr);
5459 
5460   ins_cost(250);
5461   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5462             "XOR    $dst.hi,$dst.hi" %}
5463 
5464   ins_encode %{
5465     Register Rdst = $dst$$Register;
5466     __ movzbl(Rdst, $mem$$Address);
5467     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5468   %}
5469 
5470   ins_pipe(ialu_reg_mem);
5471 %}
5472 
5473 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5474 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5475   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5476   effect(KILL cr);
5477 
5478   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5479             "XOR    $dst.hi,$dst.hi\n\t"
5480             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5481   ins_encode %{
5482     Register Rdst = $dst$$Register;
5483     __ movzbl(Rdst, $mem$$Address);
5484     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5485     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5486   %}
5487   ins_pipe(ialu_reg_mem);
5488 %}
5489 
5490 // Load Short (16bit signed)
5491 instruct loadS(rRegI dst, memory mem) %{
5492   match(Set dst (LoadS mem));
5493 
5494   ins_cost(125);
5495   format %{ "MOVSX  $dst,$mem\t# short" %}
5496 
5497   ins_encode %{
5498     __ movswl($dst$$Register, $mem$$Address);
5499   %}
5500 
5501   ins_pipe(ialu_reg_mem);
5502 %}
5503 
5504 // Load Short (16 bit signed) to Byte (8 bit signed)
5505 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5506   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5507 
5508   ins_cost(125);
5509   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5510   ins_encode %{
5511     __ movsbl($dst$$Register, $mem$$Address);
5512   %}
5513   ins_pipe(ialu_reg_mem);
5514 %}
5515 
5516 // Load Short (16bit signed) into Long Register
5517 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5518   match(Set dst (ConvI2L (LoadS mem)));
5519   effect(KILL cr);
5520 
5521   ins_cost(375);
5522   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5523             "MOV    $dst.hi,$dst.lo\n\t"
5524             "SAR    $dst.hi,15" %}
5525 
5526   ins_encode %{
5527     __ movswl($dst$$Register, $mem$$Address);
5528     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5529     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5530   %}
5531 
5532   ins_pipe(ialu_reg_mem);
5533 %}
5534 
5535 // Load Unsigned Short/Char (16bit unsigned)
5536 instruct loadUS(rRegI dst, memory mem) %{
5537   match(Set dst (LoadUS mem));
5538 
5539   ins_cost(125);
5540   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5541 
5542   ins_encode %{
5543     __ movzwl($dst$$Register, $mem$$Address);
5544   %}
5545 
5546   ins_pipe(ialu_reg_mem);
5547 %}
5548 
5549 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5550 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5551   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5552 
5553   ins_cost(125);
5554   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5555   ins_encode %{
5556     __ movsbl($dst$$Register, $mem$$Address);
5557   %}
5558   ins_pipe(ialu_reg_mem);
5559 %}
5560 
5561 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5562 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5563   match(Set dst (ConvI2L (LoadUS mem)));
5564   effect(KILL cr);
5565 
5566   ins_cost(250);
5567   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5568             "XOR    $dst.hi,$dst.hi" %}
5569 
5570   ins_encode %{
5571     __ movzwl($dst$$Register, $mem$$Address);
5572     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5573   %}
5574 
5575   ins_pipe(ialu_reg_mem);
5576 %}
5577 
5578 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5579 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5580   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5581   effect(KILL cr);
5582 
5583   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5584             "XOR    $dst.hi,$dst.hi" %}
5585   ins_encode %{
5586     Register Rdst = $dst$$Register;
5587     __ movzbl(Rdst, $mem$$Address);
5588     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5589   %}
5590   ins_pipe(ialu_reg_mem);
5591 %}
5592 
5593 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5594 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5595   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5596   effect(KILL cr);
5597 
5598   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5599             "XOR    $dst.hi,$dst.hi\n\t"
5600             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5601   ins_encode %{
5602     Register Rdst = $dst$$Register;
5603     __ movzwl(Rdst, $mem$$Address);
5604     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5605     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5606   %}
5607   ins_pipe(ialu_reg_mem);
5608 %}
5609 
5610 // Load Integer
5611 instruct loadI(rRegI dst, memory mem) %{
5612   match(Set dst (LoadI mem));
5613 
5614   ins_cost(125);
5615   format %{ "MOV    $dst,$mem\t# int" %}
5616 
5617   ins_encode %{
5618     __ movl($dst$$Register, $mem$$Address);
5619   %}
5620 
5621   ins_pipe(ialu_reg_mem);
5622 %}
5623 
5624 // Load Integer (32 bit signed) to Byte (8 bit signed)
5625 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5626   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5627 
5628   ins_cost(125);
5629   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5630   ins_encode %{
5631     __ movsbl($dst$$Register, $mem$$Address);
5632   %}
5633   ins_pipe(ialu_reg_mem);
5634 %}
5635 
5636 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5637 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5638   match(Set dst (AndI (LoadI mem) mask));
5639 
5640   ins_cost(125);
5641   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5642   ins_encode %{
5643     __ movzbl($dst$$Register, $mem$$Address);
5644   %}
5645   ins_pipe(ialu_reg_mem);
5646 %}
5647 
5648 // Load Integer (32 bit signed) to Short (16 bit signed)
5649 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5650   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5651 
5652   ins_cost(125);
5653   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5654   ins_encode %{
5655     __ movswl($dst$$Register, $mem$$Address);
5656   %}
5657   ins_pipe(ialu_reg_mem);
5658 %}
5659 
5660 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5661 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5662   match(Set dst (AndI (LoadI mem) mask));
5663 
5664   ins_cost(125);
5665   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5666   ins_encode %{
5667     __ movzwl($dst$$Register, $mem$$Address);
5668   %}
5669   ins_pipe(ialu_reg_mem);
5670 %}
5671 
5672 // Load Integer into Long Register
5673 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5674   match(Set dst (ConvI2L (LoadI mem)));
5675   effect(KILL cr);
5676 
5677   ins_cost(375);
5678   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5679             "MOV    $dst.hi,$dst.lo\n\t"
5680             "SAR    $dst.hi,31" %}
5681 
5682   ins_encode %{
5683     __ movl($dst$$Register, $mem$$Address);
5684     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5685     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5686   %}
5687 
5688   ins_pipe(ialu_reg_mem);
5689 %}
5690 
5691 // Load Integer with mask 0xFF into Long Register
5692 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5693   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5694   effect(KILL cr);
5695 
5696   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5697             "XOR    $dst.hi,$dst.hi" %}
5698   ins_encode %{
5699     Register Rdst = $dst$$Register;
5700     __ movzbl(Rdst, $mem$$Address);
5701     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5702   %}
5703   ins_pipe(ialu_reg_mem);
5704 %}
5705 
5706 // Load Integer with mask 0xFFFF into Long Register
5707 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5708   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5709   effect(KILL cr);
5710 
5711   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5712             "XOR    $dst.hi,$dst.hi" %}
5713   ins_encode %{
5714     Register Rdst = $dst$$Register;
5715     __ movzwl(Rdst, $mem$$Address);
5716     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5717   %}
5718   ins_pipe(ialu_reg_mem);
5719 %}
5720 
5721 // Load Integer with 31-bit mask into Long Register
5722 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5723   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5724   effect(KILL cr);
5725 
5726   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5727             "XOR    $dst.hi,$dst.hi\n\t"
5728             "AND    $dst.lo,$mask" %}
5729   ins_encode %{
5730     Register Rdst = $dst$$Register;
5731     __ movl(Rdst, $mem$$Address);
5732     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5733     __ andl(Rdst, $mask$$constant);
5734   %}
5735   ins_pipe(ialu_reg_mem);
5736 %}
5737 
5738 // Load Unsigned Integer into Long Register
5739 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5740   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5741   effect(KILL cr);
5742 
5743   ins_cost(250);
5744   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5745             "XOR    $dst.hi,$dst.hi" %}
5746 
5747   ins_encode %{
5748     __ movl($dst$$Register, $mem$$Address);
5749     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5750   %}
5751 
5752   ins_pipe(ialu_reg_mem);
5753 %}
5754 
5755 // Load Long.  Cannot clobber address while loading, so restrict address
5756 // register to ESI
5757 instruct loadL(eRegL dst, load_long_memory mem) %{
5758   predicate(!((LoadLNode*)n)->require_atomic_access());
5759   match(Set dst (LoadL mem));
5760 
5761   ins_cost(250);
5762   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5763             "MOV    $dst.hi,$mem+4" %}
5764 
5765   ins_encode %{
5766     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5767     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5768     __ movl($dst$$Register, Amemlo);
5769     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5770   %}
5771 
5772   ins_pipe(ialu_reg_long_mem);
5773 %}
5774 
5775 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5776 // then store it down to the stack and reload on the int
5777 // side.
5778 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5779   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5780   match(Set dst (LoadL mem));
5781 
5782   ins_cost(200);
5783   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5784             "FISTp  $dst" %}
5785   ins_encode(enc_loadL_volatile(mem,dst));
5786   ins_pipe( fpu_reg_mem );
5787 %}
5788 
5789 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5790   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5791   match(Set dst (LoadL mem));
5792   effect(TEMP tmp);
5793   ins_cost(180);
5794   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5795             "MOVSD  $dst,$tmp" %}
5796   ins_encode %{
5797     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5798     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5799   %}
5800   ins_pipe( pipe_slow );
5801 %}
5802 
5803 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5804   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5805   match(Set dst (LoadL mem));
5806   effect(TEMP tmp);
5807   ins_cost(160);
5808   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5809             "MOVD   $dst.lo,$tmp\n\t"
5810             "PSRLQ  $tmp,32\n\t"
5811             "MOVD   $dst.hi,$tmp" %}
5812   ins_encode %{
5813     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5814     __ movdl($dst$$Register, $tmp$$XMMRegister);
5815     __ psrlq($tmp$$XMMRegister, 32);
5816     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5817   %}
5818   ins_pipe( pipe_slow );
5819 %}
5820 
5821 // Load Range
5822 instruct loadRange(rRegI dst, memory mem) %{
5823   match(Set dst (LoadRange mem));
5824 
5825   ins_cost(125);
5826   format %{ "MOV    $dst,$mem" %}
5827   opcode(0x8B);
5828   ins_encode( OpcP, RegMem(dst,mem));
5829   ins_pipe( ialu_reg_mem );
5830 %}
5831 
5832 
5833 // Load Pointer
5834 instruct loadP(eRegP dst, memory mem) %{
5835   match(Set dst (LoadP mem));
5836 
5837   ins_cost(125);
5838   format %{ "MOV    $dst,$mem" %}
5839   opcode(0x8B);
5840   ins_encode( OpcP, RegMem(dst,mem));
5841   ins_pipe( ialu_reg_mem );
5842 %}
5843 
5844 // Load Klass Pointer
5845 instruct loadKlass(eRegP dst, memory mem) %{
5846   match(Set dst (LoadKlass mem));
5847 
5848   ins_cost(125);
5849   format %{ "MOV    $dst,$mem" %}
5850   opcode(0x8B);
5851   ins_encode( OpcP, RegMem(dst,mem));
5852   ins_pipe( ialu_reg_mem );
5853 %}
5854 
5855 // Load Double
5856 instruct loadDPR(regDPR dst, memory mem) %{
5857   predicate(UseSSE<=1);
5858   match(Set dst (LoadD mem));
5859 
5860   ins_cost(150);
5861   format %{ "FLD_D  ST,$mem\n\t"
5862             "FSTP   $dst" %}
5863   opcode(0xDD);               /* DD /0 */
5864   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5865               Pop_Reg_DPR(dst) );
5866   ins_pipe( fpu_reg_mem );
5867 %}
5868 
5869 // Load Double to XMM
5870 instruct loadD(regD dst, memory mem) %{
5871   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5872   match(Set dst (LoadD mem));
5873   ins_cost(145);
5874   format %{ "MOVSD  $dst,$mem" %}
5875   ins_encode %{
5876     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5877   %}
5878   ins_pipe( pipe_slow );
5879 %}
5880 
5881 instruct loadD_partial(regD dst, memory mem) %{
5882   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5883   match(Set dst (LoadD mem));
5884   ins_cost(145);
5885   format %{ "MOVLPD $dst,$mem" %}
5886   ins_encode %{
5887     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5888   %}
5889   ins_pipe( pipe_slow );
5890 %}
5891 
5892 // Load to XMM register (single-precision floating point)
5893 // MOVSS instruction
5894 instruct loadF(regF dst, memory mem) %{
5895   predicate(UseSSE>=1);
5896   match(Set dst (LoadF mem));
5897   ins_cost(145);
5898   format %{ "MOVSS  $dst,$mem" %}
5899   ins_encode %{
5900     __ movflt ($dst$$XMMRegister, $mem$$Address);
5901   %}
5902   ins_pipe( pipe_slow );
5903 %}
5904 
5905 // Load Float
5906 instruct loadFPR(regFPR dst, memory mem) %{
5907   predicate(UseSSE==0);
5908   match(Set dst (LoadF mem));
5909 
5910   ins_cost(150);
5911   format %{ "FLD_S  ST,$mem\n\t"
5912             "FSTP   $dst" %}
5913   opcode(0xD9);               /* D9 /0 */
5914   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5915               Pop_Reg_FPR(dst) );
5916   ins_pipe( fpu_reg_mem );
5917 %}
5918 
5919 // Load Effective Address
5920 instruct leaP8(eRegP dst, indOffset8 mem) %{
5921   match(Set dst mem);
5922 
5923   ins_cost(110);
5924   format %{ "LEA    $dst,$mem" %}
5925   opcode(0x8D);
5926   ins_encode( OpcP, RegMem(dst,mem));
5927   ins_pipe( ialu_reg_reg_fat );
5928 %}
5929 
5930 instruct leaP32(eRegP dst, indOffset32 mem) %{
5931   match(Set dst mem);
5932 
5933   ins_cost(110);
5934   format %{ "LEA    $dst,$mem" %}
5935   opcode(0x8D);
5936   ins_encode( OpcP, RegMem(dst,mem));
5937   ins_pipe( ialu_reg_reg_fat );
5938 %}
5939 
5940 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5941   match(Set dst mem);
5942 
5943   ins_cost(110);
5944   format %{ "LEA    $dst,$mem" %}
5945   opcode(0x8D);
5946   ins_encode( OpcP, RegMem(dst,mem));
5947   ins_pipe( ialu_reg_reg_fat );
5948 %}
5949 
5950 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5951   match(Set dst mem);
5952 
5953   ins_cost(110);
5954   format %{ "LEA    $dst,$mem" %}
5955   opcode(0x8D);
5956   ins_encode( OpcP, RegMem(dst,mem));
5957   ins_pipe( ialu_reg_reg_fat );
5958 %}
5959 
5960 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5961   match(Set dst mem);
5962 
5963   ins_cost(110);
5964   format %{ "LEA    $dst,$mem" %}
5965   opcode(0x8D);
5966   ins_encode( OpcP, RegMem(dst,mem));
5967   ins_pipe( ialu_reg_reg_fat );
5968 %}
5969 
5970 // Load Constant
5971 instruct loadConI(rRegI dst, immI src) %{
5972   match(Set dst src);
5973 
5974   format %{ "MOV    $dst,$src" %}
5975   ins_encode( LdImmI(dst, src) );
5976   ins_pipe( ialu_reg_fat );
5977 %}
5978 
5979 // Load Constant zero
5980 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5981   match(Set dst src);
5982   effect(KILL cr);
5983 
5984   ins_cost(50);
5985   format %{ "XOR    $dst,$dst" %}
5986   opcode(0x33);  /* + rd */
5987   ins_encode( OpcP, RegReg( dst, dst ) );
5988   ins_pipe( ialu_reg );
5989 %}
5990 
5991 instruct loadConP(eRegP dst, immP src) %{
5992   match(Set dst src);
5993 
5994   format %{ "MOV    $dst,$src" %}
5995   opcode(0xB8);  /* + rd */
5996   ins_encode( LdImmP(dst, src) );
5997   ins_pipe( ialu_reg_fat );
5998 %}
5999 
6000 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6001   match(Set dst src);
6002   effect(KILL cr);
6003   ins_cost(200);
6004   format %{ "MOV    $dst.lo,$src.lo\n\t"
6005             "MOV    $dst.hi,$src.hi" %}
6006   opcode(0xB8);
6007   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6008   ins_pipe( ialu_reg_long_fat );
6009 %}
6010 
6011 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6012   match(Set dst src);
6013   effect(KILL cr);
6014   ins_cost(150);
6015   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6016             "XOR    $dst.hi,$dst.hi" %}
6017   opcode(0x33,0x33);
6018   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6019   ins_pipe( ialu_reg_long );
6020 %}
6021 
6022 // The instruction usage is guarded by predicate in operand immFPR().
6023 instruct loadConFPR(regFPR dst, immFPR con) %{
6024   match(Set dst con);
6025   ins_cost(125);
6026   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6027             "FSTP   $dst" %}
6028   ins_encode %{
6029     __ fld_s($constantaddress($con));
6030     __ fstp_d($dst$$reg);
6031   %}
6032   ins_pipe(fpu_reg_con);
6033 %}
6034 
6035 // The instruction usage is guarded by predicate in operand immFPR0().
6036 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6037   match(Set dst con);
6038   ins_cost(125);
6039   format %{ "FLDZ   ST\n\t"
6040             "FSTP   $dst" %}
6041   ins_encode %{
6042     __ fldz();
6043     __ fstp_d($dst$$reg);
6044   %}
6045   ins_pipe(fpu_reg_con);
6046 %}
6047 
6048 // The instruction usage is guarded by predicate in operand immFPR1().
6049 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6050   match(Set dst con);
6051   ins_cost(125);
6052   format %{ "FLD1   ST\n\t"
6053             "FSTP   $dst" %}
6054   ins_encode %{
6055     __ fld1();
6056     __ fstp_d($dst$$reg);
6057   %}
6058   ins_pipe(fpu_reg_con);
6059 %}
6060 
6061 // The instruction usage is guarded by predicate in operand immF().
6062 instruct loadConF(regF dst, immF con) %{
6063   match(Set dst con);
6064   ins_cost(125);
6065   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6066   ins_encode %{
6067     __ movflt($dst$$XMMRegister, $constantaddress($con));
6068   %}
6069   ins_pipe(pipe_slow);
6070 %}
6071 
6072 // The instruction usage is guarded by predicate in operand immF0().
6073 instruct loadConF0(regF dst, immF0 src) %{
6074   match(Set dst src);
6075   ins_cost(100);
6076   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6077   ins_encode %{
6078     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6079   %}
6080   ins_pipe(pipe_slow);
6081 %}
6082 
6083 // The instruction usage is guarded by predicate in operand immDPR().
6084 instruct loadConDPR(regDPR dst, immDPR con) %{
6085   match(Set dst con);
6086   ins_cost(125);
6087 
6088   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6089             "FSTP   $dst" %}
6090   ins_encode %{
6091     __ fld_d($constantaddress($con));
6092     __ fstp_d($dst$$reg);
6093   %}
6094   ins_pipe(fpu_reg_con);
6095 %}
6096 
6097 // The instruction usage is guarded by predicate in operand immDPR0().
6098 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6099   match(Set dst con);
6100   ins_cost(125);
6101 
6102   format %{ "FLDZ   ST\n\t"
6103             "FSTP   $dst" %}
6104   ins_encode %{
6105     __ fldz();
6106     __ fstp_d($dst$$reg);
6107   %}
6108   ins_pipe(fpu_reg_con);
6109 %}
6110 
6111 // The instruction usage is guarded by predicate in operand immDPR1().
6112 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6113   match(Set dst con);
6114   ins_cost(125);
6115 
6116   format %{ "FLD1   ST\n\t"
6117             "FSTP   $dst" %}
6118   ins_encode %{
6119     __ fld1();
6120     __ fstp_d($dst$$reg);
6121   %}
6122   ins_pipe(fpu_reg_con);
6123 %}
6124 
6125 // The instruction usage is guarded by predicate in operand immD().
6126 instruct loadConD(regD dst, immD con) %{
6127   match(Set dst con);
6128   ins_cost(125);
6129   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6130   ins_encode %{
6131     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6132   %}
6133   ins_pipe(pipe_slow);
6134 %}
6135 
6136 // The instruction usage is guarded by predicate in operand immD0().
6137 instruct loadConD0(regD dst, immD0 src) %{
6138   match(Set dst src);
6139   ins_cost(100);
6140   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6141   ins_encode %{
6142     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6143   %}
6144   ins_pipe( pipe_slow );
6145 %}
6146 
6147 // Load Stack Slot
6148 instruct loadSSI(rRegI dst, stackSlotI src) %{
6149   match(Set dst src);
6150   ins_cost(125);
6151 
6152   format %{ "MOV    $dst,$src" %}
6153   opcode(0x8B);
6154   ins_encode( OpcP, RegMem(dst,src));
6155   ins_pipe( ialu_reg_mem );
6156 %}
6157 
6158 instruct loadSSL(eRegL dst, stackSlotL src) %{
6159   match(Set dst src);
6160 
6161   ins_cost(200);
6162   format %{ "MOV    $dst,$src.lo\n\t"
6163             "MOV    $dst+4,$src.hi" %}
6164   opcode(0x8B, 0x8B);
6165   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6166   ins_pipe( ialu_mem_long_reg );
6167 %}
6168 
6169 // Load Stack Slot
6170 instruct loadSSP(eRegP dst, stackSlotP src) %{
6171   match(Set dst src);
6172   ins_cost(125);
6173 
6174   format %{ "MOV    $dst,$src" %}
6175   opcode(0x8B);
6176   ins_encode( OpcP, RegMem(dst,src));
6177   ins_pipe( ialu_reg_mem );
6178 %}
6179 
6180 // Load Stack Slot
6181 instruct loadSSF(regFPR dst, stackSlotF src) %{
6182   match(Set dst src);
6183   ins_cost(125);
6184 
6185   format %{ "FLD_S  $src\n\t"
6186             "FSTP   $dst" %}
6187   opcode(0xD9);               /* D9 /0, FLD m32real */
6188   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6189               Pop_Reg_FPR(dst) );
6190   ins_pipe( fpu_reg_mem );
6191 %}
6192 
6193 // Load Stack Slot
6194 instruct loadSSD(regDPR dst, stackSlotD src) %{
6195   match(Set dst src);
6196   ins_cost(125);
6197 
6198   format %{ "FLD_D  $src\n\t"
6199             "FSTP   $dst" %}
6200   opcode(0xDD);               /* DD /0, FLD m64real */
6201   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6202               Pop_Reg_DPR(dst) );
6203   ins_pipe( fpu_reg_mem );
6204 %}
6205 
6206 // Prefetch instructions for allocation.
6207 // Must be safe to execute with invalid address (cannot fault).
6208 
6209 instruct prefetchAlloc0( memory mem ) %{
6210   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6211   match(PrefetchAllocation mem);
6212   ins_cost(0);
6213   size(0);
6214   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6215   ins_encode();
6216   ins_pipe(empty);
6217 %}
6218 
6219 instruct prefetchAlloc( memory mem ) %{
6220   predicate(AllocatePrefetchInstr==3);
6221   match( PrefetchAllocation mem );
6222   ins_cost(100);
6223 
6224   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6225   ins_encode %{
6226     __ prefetchw($mem$$Address);
6227   %}
6228   ins_pipe(ialu_mem);
6229 %}
6230 
6231 instruct prefetchAllocNTA( memory mem ) %{
6232   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6233   match(PrefetchAllocation mem);
6234   ins_cost(100);
6235 
6236   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6237   ins_encode %{
6238     __ prefetchnta($mem$$Address);
6239   %}
6240   ins_pipe(ialu_mem);
6241 %}
6242 
6243 instruct prefetchAllocT0( memory mem ) %{
6244   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6245   match(PrefetchAllocation mem);
6246   ins_cost(100);
6247 
6248   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6249   ins_encode %{
6250     __ prefetcht0($mem$$Address);
6251   %}
6252   ins_pipe(ialu_mem);
6253 %}
6254 
6255 instruct prefetchAllocT2( memory mem ) %{
6256   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6257   match(PrefetchAllocation mem);
6258   ins_cost(100);
6259 
6260   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6261   ins_encode %{
6262     __ prefetcht2($mem$$Address);
6263   %}
6264   ins_pipe(ialu_mem);
6265 %}
6266 
6267 //----------Store Instructions-------------------------------------------------
6268 
6269 // Store Byte
6270 instruct storeB(memory mem, xRegI src) %{
6271   match(Set mem (StoreB mem src));
6272 
6273   ins_cost(125);
6274   format %{ "MOV8   $mem,$src" %}
6275   opcode(0x88);
6276   ins_encode( OpcP, RegMem( src, mem ) );
6277   ins_pipe( ialu_mem_reg );
6278 %}
6279 
6280 // Store Char/Short
6281 instruct storeC(memory mem, rRegI src) %{
6282   match(Set mem (StoreC mem src));
6283 
6284   ins_cost(125);
6285   format %{ "MOV16  $mem,$src" %}
6286   opcode(0x89, 0x66);
6287   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6288   ins_pipe( ialu_mem_reg );
6289 %}
6290 
6291 // Store Integer
6292 instruct storeI(memory mem, rRegI src) %{
6293   match(Set mem (StoreI mem src));
6294 
6295   ins_cost(125);
6296   format %{ "MOV    $mem,$src" %}
6297   opcode(0x89);
6298   ins_encode( OpcP, RegMem( src, mem ) );
6299   ins_pipe( ialu_mem_reg );
6300 %}
6301 
6302 // Store Long
6303 instruct storeL(long_memory mem, eRegL src) %{
6304   predicate(!((StoreLNode*)n)->require_atomic_access());
6305   match(Set mem (StoreL mem src));
6306 
6307   ins_cost(200);
6308   format %{ "MOV    $mem,$src.lo\n\t"
6309             "MOV    $mem+4,$src.hi" %}
6310   opcode(0x89, 0x89);
6311   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6312   ins_pipe( ialu_mem_long_reg );
6313 %}
6314 
6315 // Store Long to Integer
6316 instruct storeL2I(memory mem, eRegL src) %{
6317   match(Set mem (StoreI mem (ConvL2I src)));
6318 
6319   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6320   ins_encode %{
6321     __ movl($mem$$Address, $src$$Register);
6322   %}
6323   ins_pipe(ialu_mem_reg);
6324 %}
6325 
6326 // Volatile Store Long.  Must be atomic, so move it into
6327 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6328 // target address before the store (for null-ptr checks)
6329 // so the memory operand is used twice in the encoding.
6330 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6331   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6332   match(Set mem (StoreL mem src));
6333   effect( KILL cr );
6334   ins_cost(400);
6335   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6336             "FILD   $src\n\t"
6337             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6338   opcode(0x3B);
6339   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6340   ins_pipe( fpu_reg_mem );
6341 %}
6342 
6343 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6344   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6345   match(Set mem (StoreL mem src));
6346   effect( TEMP tmp, KILL cr );
6347   ins_cost(380);
6348   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6349             "MOVSD  $tmp,$src\n\t"
6350             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6351   ins_encode %{
6352     __ cmpl(rax, $mem$$Address);
6353     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6354     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6355   %}
6356   ins_pipe( pipe_slow );
6357 %}
6358 
6359 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6360   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6361   match(Set mem (StoreL mem src));
6362   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6363   ins_cost(360);
6364   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6365             "MOVD   $tmp,$src.lo\n\t"
6366             "MOVD   $tmp2,$src.hi\n\t"
6367             "PUNPCKLDQ $tmp,$tmp2\n\t"
6368             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6369   ins_encode %{
6370     __ cmpl(rax, $mem$$Address);
6371     __ movdl($tmp$$XMMRegister, $src$$Register);
6372     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6373     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6374     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6375   %}
6376   ins_pipe( pipe_slow );
6377 %}
6378 
6379 // Store Pointer; for storing unknown oops and raw pointers
6380 instruct storeP(memory mem, anyRegP src) %{
6381   match(Set mem (StoreP mem src));
6382 
6383   ins_cost(125);
6384   format %{ "MOV    $mem,$src" %}
6385   opcode(0x89);
6386   ins_encode( OpcP, RegMem( src, mem ) );
6387   ins_pipe( ialu_mem_reg );
6388 %}
6389 
6390 // Store Integer Immediate
6391 instruct storeImmI(memory mem, immI src) %{
6392   match(Set mem (StoreI mem src));
6393 
6394   ins_cost(150);
6395   format %{ "MOV    $mem,$src" %}
6396   opcode(0xC7);               /* C7 /0 */
6397   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6398   ins_pipe( ialu_mem_imm );
6399 %}
6400 
6401 // Store Short/Char Immediate
6402 instruct storeImmI16(memory mem, immI16 src) %{
6403   predicate(UseStoreImmI16);
6404   match(Set mem (StoreC mem src));
6405 
6406   ins_cost(150);
6407   format %{ "MOV16  $mem,$src" %}
6408   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6409   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6410   ins_pipe( ialu_mem_imm );
6411 %}
6412 
6413 // Store Pointer Immediate; null pointers or constant oops that do not
6414 // need card-mark barriers.
6415 instruct storeImmP(memory mem, immP src) %{
6416   match(Set mem (StoreP mem src));
6417 
6418   ins_cost(150);
6419   format %{ "MOV    $mem,$src" %}
6420   opcode(0xC7);               /* C7 /0 */
6421   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6422   ins_pipe( ialu_mem_imm );
6423 %}
6424 
6425 // Store Byte Immediate
6426 instruct storeImmB(memory mem, immI8 src) %{
6427   match(Set mem (StoreB mem src));
6428 
6429   ins_cost(150);
6430   format %{ "MOV8   $mem,$src" %}
6431   opcode(0xC6);               /* C6 /0 */
6432   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6433   ins_pipe( ialu_mem_imm );
6434 %}
6435 
6436 // Store CMS card-mark Immediate
6437 instruct storeImmCM(memory mem, immI8 src) %{
6438   match(Set mem (StoreCM mem src));
6439 
6440   ins_cost(150);
6441   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6442   opcode(0xC6);               /* C6 /0 */
6443   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6444   ins_pipe( ialu_mem_imm );
6445 %}
6446 
6447 // Store Double
6448 instruct storeDPR( memory mem, regDPR1 src) %{
6449   predicate(UseSSE<=1);
6450   match(Set mem (StoreD mem src));
6451 
6452   ins_cost(100);
6453   format %{ "FST_D  $mem,$src" %}
6454   opcode(0xDD);       /* DD /2 */
6455   ins_encode( enc_FPR_store(mem,src) );
6456   ins_pipe( fpu_mem_reg );
6457 %}
6458 
6459 // Store double does rounding on x86
6460 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6461   predicate(UseSSE<=1);
6462   match(Set mem (StoreD mem (RoundDouble src)));
6463 
6464   ins_cost(100);
6465   format %{ "FST_D  $mem,$src\t# round" %}
6466   opcode(0xDD);       /* DD /2 */
6467   ins_encode( enc_FPR_store(mem,src) );
6468   ins_pipe( fpu_mem_reg );
6469 %}
6470 
6471 // Store XMM register to memory (double-precision floating points)
6472 // MOVSD instruction
6473 instruct storeD(memory mem, regD src) %{
6474   predicate(UseSSE>=2);
6475   match(Set mem (StoreD mem src));
6476   ins_cost(95);
6477   format %{ "MOVSD  $mem,$src" %}
6478   ins_encode %{
6479     __ movdbl($mem$$Address, $src$$XMMRegister);
6480   %}
6481   ins_pipe( pipe_slow );
6482 %}
6483 
6484 // Store XMM register to memory (single-precision floating point)
6485 // MOVSS instruction
6486 instruct storeF(memory mem, regF src) %{
6487   predicate(UseSSE>=1);
6488   match(Set mem (StoreF mem src));
6489   ins_cost(95);
6490   format %{ "MOVSS  $mem,$src" %}
6491   ins_encode %{
6492     __ movflt($mem$$Address, $src$$XMMRegister);
6493   %}
6494   ins_pipe( pipe_slow );
6495 %}
6496 
6497 // Store Float
6498 instruct storeFPR( memory mem, regFPR1 src) %{
6499   predicate(UseSSE==0);
6500   match(Set mem (StoreF mem src));
6501 
6502   ins_cost(100);
6503   format %{ "FST_S  $mem,$src" %}
6504   opcode(0xD9);       /* D9 /2 */
6505   ins_encode( enc_FPR_store(mem,src) );
6506   ins_pipe( fpu_mem_reg );
6507 %}
6508 
6509 // Store Float does rounding on x86
6510 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6511   predicate(UseSSE==0);
6512   match(Set mem (StoreF mem (RoundFloat src)));
6513 
6514   ins_cost(100);
6515   format %{ "FST_S  $mem,$src\t# round" %}
6516   opcode(0xD9);       /* D9 /2 */
6517   ins_encode( enc_FPR_store(mem,src) );
6518   ins_pipe( fpu_mem_reg );
6519 %}
6520 
6521 // Store Float does rounding on x86
6522 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6523   predicate(UseSSE<=1);
6524   match(Set mem (StoreF mem (ConvD2F src)));
6525 
6526   ins_cost(100);
6527   format %{ "FST_S  $mem,$src\t# D-round" %}
6528   opcode(0xD9);       /* D9 /2 */
6529   ins_encode( enc_FPR_store(mem,src) );
6530   ins_pipe( fpu_mem_reg );
6531 %}
6532 
6533 // Store immediate Float value (it is faster than store from FPU register)
6534 // The instruction usage is guarded by predicate in operand immFPR().
6535 instruct storeFPR_imm( memory mem, immFPR src) %{
6536   match(Set mem (StoreF mem src));
6537 
6538   ins_cost(50);
6539   format %{ "MOV    $mem,$src\t# store float" %}
6540   opcode(0xC7);               /* C7 /0 */
6541   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6542   ins_pipe( ialu_mem_imm );
6543 %}
6544 
6545 // Store immediate Float value (it is faster than store from XMM register)
6546 // The instruction usage is guarded by predicate in operand immF().
6547 instruct storeF_imm( memory mem, immF src) %{
6548   match(Set mem (StoreF mem src));
6549 
6550   ins_cost(50);
6551   format %{ "MOV    $mem,$src\t# store float" %}
6552   opcode(0xC7);               /* C7 /0 */
6553   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6554   ins_pipe( ialu_mem_imm );
6555 %}
6556 
6557 // Store Integer to stack slot
6558 instruct storeSSI(stackSlotI dst, rRegI src) %{
6559   match(Set dst src);
6560 
6561   ins_cost(100);
6562   format %{ "MOV    $dst,$src" %}
6563   opcode(0x89);
6564   ins_encode( OpcPRegSS( dst, src ) );
6565   ins_pipe( ialu_mem_reg );
6566 %}
6567 
6568 // Store Integer to stack slot
6569 instruct storeSSP(stackSlotP dst, eRegP src) %{
6570   match(Set dst src);
6571 
6572   ins_cost(100);
6573   format %{ "MOV    $dst,$src" %}
6574   opcode(0x89);
6575   ins_encode( OpcPRegSS( dst, src ) );
6576   ins_pipe( ialu_mem_reg );
6577 %}
6578 
6579 // Store Long to stack slot
6580 instruct storeSSL(stackSlotL dst, eRegL src) %{
6581   match(Set dst src);
6582 
6583   ins_cost(200);
6584   format %{ "MOV    $dst,$src.lo\n\t"
6585             "MOV    $dst+4,$src.hi" %}
6586   opcode(0x89, 0x89);
6587   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6588   ins_pipe( ialu_mem_long_reg );
6589 %}
6590 
6591 //----------MemBar Instructions-----------------------------------------------
6592 // Memory barrier flavors
6593 
6594 instruct membar_acquire() %{
6595   match(MemBarAcquire);
6596   match(LoadFence);
6597   ins_cost(400);
6598 
6599   size(0);
6600   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6601   ins_encode();
6602   ins_pipe(empty);
6603 %}
6604 
6605 instruct membar_acquire_lock() %{
6606   match(MemBarAcquireLock);
6607   ins_cost(0);
6608 
6609   size(0);
6610   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6611   ins_encode( );
6612   ins_pipe(empty);
6613 %}
6614 
6615 instruct membar_release() %{
6616   match(MemBarRelease);
6617   match(StoreFence);
6618   ins_cost(400);
6619 
6620   size(0);
6621   format %{ "MEMBAR-release ! (empty encoding)" %}
6622   ins_encode( );
6623   ins_pipe(empty);
6624 %}
6625 
6626 instruct membar_release_lock() %{
6627   match(MemBarReleaseLock);
6628   ins_cost(0);
6629 
6630   size(0);
6631   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6632   ins_encode( );
6633   ins_pipe(empty);
6634 %}
6635 
6636 instruct membar_volatile(eFlagsReg cr) %{
6637   match(MemBarVolatile);
6638   effect(KILL cr);
6639   ins_cost(400);
6640 
6641   format %{
6642     $$template
6643     if (os::is_MP()) {
6644       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6645     } else {
6646       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6647     }
6648   %}
6649   ins_encode %{
6650     __ membar(Assembler::StoreLoad);
6651   %}
6652   ins_pipe(pipe_slow);
6653 %}
6654 
6655 instruct unnecessary_membar_volatile() %{
6656   match(MemBarVolatile);
6657   predicate(Matcher::post_store_load_barrier(n));
6658   ins_cost(0);
6659 
6660   size(0);
6661   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6662   ins_encode( );
6663   ins_pipe(empty);
6664 %}
6665 
6666 instruct membar_storestore() %{
6667   match(MemBarStoreStore);
6668   ins_cost(0);
6669 
6670   size(0);
6671   format %{ "MEMBAR-storestore (empty encoding)" %}
6672   ins_encode( );
6673   ins_pipe(empty);
6674 %}
6675 
6676 //----------Move Instructions--------------------------------------------------
6677 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6678   match(Set dst (CastX2P src));
6679   format %{ "# X2P  $dst, $src" %}
6680   ins_encode( /*empty encoding*/ );
6681   ins_cost(0);
6682   ins_pipe(empty);
6683 %}
6684 
6685 instruct castP2X(rRegI dst, eRegP src ) %{
6686   match(Set dst (CastP2X src));
6687   ins_cost(50);
6688   format %{ "MOV    $dst, $src\t# CastP2X" %}
6689   ins_encode( enc_Copy( dst, src) );
6690   ins_pipe( ialu_reg_reg );
6691 %}
6692 
6693 //----------Conditional Move---------------------------------------------------
6694 // Conditional move
6695 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6696   predicate(!VM_Version::supports_cmov() );
6697   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6698   ins_cost(200);
6699   format %{ "J$cop,us skip\t# signed cmove\n\t"
6700             "MOV    $dst,$src\n"
6701       "skip:" %}
6702   ins_encode %{
6703     Label Lskip;
6704     // Invert sense of branch from sense of CMOV
6705     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6706     __ movl($dst$$Register, $src$$Register);
6707     __ bind(Lskip);
6708   %}
6709   ins_pipe( pipe_cmov_reg );
6710 %}
6711 
6712 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6713   predicate(!VM_Version::supports_cmov() );
6714   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6715   ins_cost(200);
6716   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6717             "MOV    $dst,$src\n"
6718       "skip:" %}
6719   ins_encode %{
6720     Label Lskip;
6721     // Invert sense of branch from sense of CMOV
6722     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6723     __ movl($dst$$Register, $src$$Register);
6724     __ bind(Lskip);
6725   %}
6726   ins_pipe( pipe_cmov_reg );
6727 %}
6728 
6729 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6730   predicate(VM_Version::supports_cmov() );
6731   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6732   ins_cost(200);
6733   format %{ "CMOV$cop $dst,$src" %}
6734   opcode(0x0F,0x40);
6735   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6736   ins_pipe( pipe_cmov_reg );
6737 %}
6738 
6739 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6740   predicate(VM_Version::supports_cmov() );
6741   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6742   ins_cost(200);
6743   format %{ "CMOV$cop $dst,$src" %}
6744   opcode(0x0F,0x40);
6745   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6746   ins_pipe( pipe_cmov_reg );
6747 %}
6748 
6749 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6750   predicate(VM_Version::supports_cmov() );
6751   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6752   ins_cost(200);
6753   expand %{
6754     cmovI_regU(cop, cr, dst, src);
6755   %}
6756 %}
6757 
6758 // Conditional move
6759 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6760   predicate(VM_Version::supports_cmov() );
6761   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6762   ins_cost(250);
6763   format %{ "CMOV$cop $dst,$src" %}
6764   opcode(0x0F,0x40);
6765   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6766   ins_pipe( pipe_cmov_mem );
6767 %}
6768 
6769 // Conditional move
6770 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6771   predicate(VM_Version::supports_cmov() );
6772   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6773   ins_cost(250);
6774   format %{ "CMOV$cop $dst,$src" %}
6775   opcode(0x0F,0x40);
6776   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6777   ins_pipe( pipe_cmov_mem );
6778 %}
6779 
6780 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6781   predicate(VM_Version::supports_cmov() );
6782   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6783   ins_cost(250);
6784   expand %{
6785     cmovI_memU(cop, cr, dst, src);
6786   %}
6787 %}
6788 
6789 // Conditional move
6790 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6791   predicate(VM_Version::supports_cmov() );
6792   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6793   ins_cost(200);
6794   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6795   opcode(0x0F,0x40);
6796   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6797   ins_pipe( pipe_cmov_reg );
6798 %}
6799 
6800 // Conditional move (non-P6 version)
6801 // Note:  a CMoveP is generated for  stubs and native wrappers
6802 //        regardless of whether we are on a P6, so we
6803 //        emulate a cmov here
6804 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6805   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6806   ins_cost(300);
6807   format %{ "Jn$cop   skip\n\t"
6808           "MOV    $dst,$src\t# pointer\n"
6809       "skip:" %}
6810   opcode(0x8b);
6811   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6812   ins_pipe( pipe_cmov_reg );
6813 %}
6814 
6815 // Conditional move
6816 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6817   predicate(VM_Version::supports_cmov() );
6818   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6819   ins_cost(200);
6820   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6821   opcode(0x0F,0x40);
6822   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6823   ins_pipe( pipe_cmov_reg );
6824 %}
6825 
6826 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6827   predicate(VM_Version::supports_cmov() );
6828   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6829   ins_cost(200);
6830   expand %{
6831     cmovP_regU(cop, cr, dst, src);
6832   %}
6833 %}
6834 
6835 // DISABLED: Requires the ADLC to emit a bottom_type call that
6836 // correctly meets the two pointer arguments; one is an incoming
6837 // register but the other is a memory operand.  ALSO appears to
6838 // be buggy with implicit null checks.
6839 //
6840 //// Conditional move
6841 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6842 //  predicate(VM_Version::supports_cmov() );
6843 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6844 //  ins_cost(250);
6845 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6846 //  opcode(0x0F,0x40);
6847 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6848 //  ins_pipe( pipe_cmov_mem );
6849 //%}
6850 //
6851 //// Conditional move
6852 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6853 //  predicate(VM_Version::supports_cmov() );
6854 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6855 //  ins_cost(250);
6856 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6857 //  opcode(0x0F,0x40);
6858 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6859 //  ins_pipe( pipe_cmov_mem );
6860 //%}
6861 
6862 // Conditional move
6863 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6864   predicate(UseSSE<=1);
6865   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6866   ins_cost(200);
6867   format %{ "FCMOV$cop $dst,$src\t# double" %}
6868   opcode(0xDA);
6869   ins_encode( enc_cmov_dpr(cop,src) );
6870   ins_pipe( pipe_cmovDPR_reg );
6871 %}
6872 
6873 // Conditional move
6874 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6875   predicate(UseSSE==0);
6876   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6877   ins_cost(200);
6878   format %{ "FCMOV$cop $dst,$src\t# float" %}
6879   opcode(0xDA);
6880   ins_encode( enc_cmov_dpr(cop,src) );
6881   ins_pipe( pipe_cmovDPR_reg );
6882 %}
6883 
6884 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6885 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6886   predicate(UseSSE<=1);
6887   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6888   ins_cost(200);
6889   format %{ "Jn$cop   skip\n\t"
6890             "MOV    $dst,$src\t# double\n"
6891       "skip:" %}
6892   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6893   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6894   ins_pipe( pipe_cmovDPR_reg );
6895 %}
6896 
6897 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6898 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6899   predicate(UseSSE==0);
6900   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6901   ins_cost(200);
6902   format %{ "Jn$cop    skip\n\t"
6903             "MOV    $dst,$src\t# float\n"
6904       "skip:" %}
6905   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6906   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6907   ins_pipe( pipe_cmovDPR_reg );
6908 %}
6909 
6910 // No CMOVE with SSE/SSE2
6911 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6912   predicate (UseSSE>=1);
6913   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6914   ins_cost(200);
6915   format %{ "Jn$cop   skip\n\t"
6916             "MOVSS  $dst,$src\t# float\n"
6917       "skip:" %}
6918   ins_encode %{
6919     Label skip;
6920     // Invert sense of branch from sense of CMOV
6921     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6922     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6923     __ bind(skip);
6924   %}
6925   ins_pipe( pipe_slow );
6926 %}
6927 
6928 // No CMOVE with SSE/SSE2
6929 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6930   predicate (UseSSE>=2);
6931   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6932   ins_cost(200);
6933   format %{ "Jn$cop   skip\n\t"
6934             "MOVSD  $dst,$src\t# float\n"
6935       "skip:" %}
6936   ins_encode %{
6937     Label skip;
6938     // Invert sense of branch from sense of CMOV
6939     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6940     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6941     __ bind(skip);
6942   %}
6943   ins_pipe( pipe_slow );
6944 %}
6945 
6946 // unsigned version
6947 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6948   predicate (UseSSE>=1);
6949   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6950   ins_cost(200);
6951   format %{ "Jn$cop   skip\n\t"
6952             "MOVSS  $dst,$src\t# float\n"
6953       "skip:" %}
6954   ins_encode %{
6955     Label skip;
6956     // Invert sense of branch from sense of CMOV
6957     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6958     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6959     __ bind(skip);
6960   %}
6961   ins_pipe( pipe_slow );
6962 %}
6963 
6964 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6965   predicate (UseSSE>=1);
6966   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6967   ins_cost(200);
6968   expand %{
6969     fcmovF_regU(cop, cr, dst, src);
6970   %}
6971 %}
6972 
6973 // unsigned version
6974 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6975   predicate (UseSSE>=2);
6976   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6977   ins_cost(200);
6978   format %{ "Jn$cop   skip\n\t"
6979             "MOVSD  $dst,$src\t# float\n"
6980       "skip:" %}
6981   ins_encode %{
6982     Label skip;
6983     // Invert sense of branch from sense of CMOV
6984     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6985     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6986     __ bind(skip);
6987   %}
6988   ins_pipe( pipe_slow );
6989 %}
6990 
6991 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6992   predicate (UseSSE>=2);
6993   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6994   ins_cost(200);
6995   expand %{
6996     fcmovD_regU(cop, cr, dst, src);
6997   %}
6998 %}
6999 
7000 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7001   predicate(VM_Version::supports_cmov() );
7002   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7003   ins_cost(200);
7004   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7005             "CMOV$cop $dst.hi,$src.hi" %}
7006   opcode(0x0F,0x40);
7007   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7008   ins_pipe( pipe_cmov_reg_long );
7009 %}
7010 
7011 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7012   predicate(VM_Version::supports_cmov() );
7013   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7014   ins_cost(200);
7015   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7016             "CMOV$cop $dst.hi,$src.hi" %}
7017   opcode(0x0F,0x40);
7018   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7019   ins_pipe( pipe_cmov_reg_long );
7020 %}
7021 
7022 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7023   predicate(VM_Version::supports_cmov() );
7024   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7025   ins_cost(200);
7026   expand %{
7027     cmovL_regU(cop, cr, dst, src);
7028   %}
7029 %}
7030 
7031 //----------Arithmetic Instructions--------------------------------------------
7032 //----------Addition Instructions----------------------------------------------
7033 
7034 // Integer Addition Instructions
7035 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7036   match(Set dst (AddI dst src));
7037   effect(KILL cr);
7038 
7039   size(2);
7040   format %{ "ADD    $dst,$src" %}
7041   opcode(0x03);
7042   ins_encode( OpcP, RegReg( dst, src) );
7043   ins_pipe( ialu_reg_reg );
7044 %}
7045 
7046 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7047   match(Set dst (AddI dst src));
7048   effect(KILL cr);
7049 
7050   format %{ "ADD    $dst,$src" %}
7051   opcode(0x81, 0x00); /* /0 id */
7052   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7053   ins_pipe( ialu_reg );
7054 %}
7055 
7056 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7057   predicate(UseIncDec);
7058   match(Set dst (AddI dst src));
7059   effect(KILL cr);
7060 
7061   size(1);
7062   format %{ "INC    $dst" %}
7063   opcode(0x40); /*  */
7064   ins_encode( Opc_plus( primary, dst ) );
7065   ins_pipe( ialu_reg );
7066 %}
7067 
7068 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7069   match(Set dst (AddI src0 src1));
7070   ins_cost(110);
7071 
7072   format %{ "LEA    $dst,[$src0 + $src1]" %}
7073   opcode(0x8D); /* 0x8D /r */
7074   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7075   ins_pipe( ialu_reg_reg );
7076 %}
7077 
7078 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7079   match(Set dst (AddP src0 src1));
7080   ins_cost(110);
7081 
7082   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7083   opcode(0x8D); /* 0x8D /r */
7084   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7085   ins_pipe( ialu_reg_reg );
7086 %}
7087 
7088 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7089   predicate(UseIncDec);
7090   match(Set dst (AddI dst src));
7091   effect(KILL cr);
7092 
7093   size(1);
7094   format %{ "DEC    $dst" %}
7095   opcode(0x48); /*  */
7096   ins_encode( Opc_plus( primary, dst ) );
7097   ins_pipe( ialu_reg );
7098 %}
7099 
7100 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7101   match(Set dst (AddP dst src));
7102   effect(KILL cr);
7103 
7104   size(2);
7105   format %{ "ADD    $dst,$src" %}
7106   opcode(0x03);
7107   ins_encode( OpcP, RegReg( dst, src) );
7108   ins_pipe( ialu_reg_reg );
7109 %}
7110 
7111 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7112   match(Set dst (AddP dst src));
7113   effect(KILL cr);
7114 
7115   format %{ "ADD    $dst,$src" %}
7116   opcode(0x81,0x00); /* Opcode 81 /0 id */
7117   // ins_encode( RegImm( dst, src) );
7118   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7119   ins_pipe( ialu_reg );
7120 %}
7121 
7122 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7123   match(Set dst (AddI dst (LoadI src)));
7124   effect(KILL cr);
7125 
7126   ins_cost(125);
7127   format %{ "ADD    $dst,$src" %}
7128   opcode(0x03);
7129   ins_encode( OpcP, RegMem( dst, src) );
7130   ins_pipe( ialu_reg_mem );
7131 %}
7132 
7133 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7134   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7135   effect(KILL cr);
7136 
7137   ins_cost(150);
7138   format %{ "ADD    $dst,$src" %}
7139   opcode(0x01);  /* Opcode 01 /r */
7140   ins_encode( OpcP, RegMem( src, dst ) );
7141   ins_pipe( ialu_mem_reg );
7142 %}
7143 
7144 // Add Memory with Immediate
7145 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7146   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7147   effect(KILL cr);
7148 
7149   ins_cost(125);
7150   format %{ "ADD    $dst,$src" %}
7151   opcode(0x81);               /* Opcode 81 /0 id */
7152   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7153   ins_pipe( ialu_mem_imm );
7154 %}
7155 
7156 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7157   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7158   effect(KILL cr);
7159 
7160   ins_cost(125);
7161   format %{ "INC    $dst" %}
7162   opcode(0xFF);               /* Opcode FF /0 */
7163   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7164   ins_pipe( ialu_mem_imm );
7165 %}
7166 
7167 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7169   effect(KILL cr);
7170 
7171   ins_cost(125);
7172   format %{ "DEC    $dst" %}
7173   opcode(0xFF);               /* Opcode FF /1 */
7174   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7175   ins_pipe( ialu_mem_imm );
7176 %}
7177 
7178 
7179 instruct checkCastPP( eRegP dst ) %{
7180   match(Set dst (CheckCastPP dst));
7181 
7182   size(0);
7183   format %{ "#checkcastPP of $dst" %}
7184   ins_encode( /*empty encoding*/ );
7185   ins_pipe( empty );
7186 %}
7187 
7188 instruct castPP( eRegP dst ) %{
7189   match(Set dst (CastPP dst));
7190   format %{ "#castPP of $dst" %}
7191   ins_encode( /*empty encoding*/ );
7192   ins_pipe( empty );
7193 %}
7194 
7195 instruct castII( rRegI dst ) %{
7196   match(Set dst (CastII dst));
7197   format %{ "#castII of $dst" %}
7198   ins_encode( /*empty encoding*/ );
7199   ins_cost(0);
7200   ins_pipe( empty );
7201 %}
7202 
7203 
7204 // Load-locked - same as a regular pointer load when used with compare-swap
7205 instruct loadPLocked(eRegP dst, memory mem) %{
7206   match(Set dst (LoadPLocked mem));
7207 
7208   ins_cost(125);
7209   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7210   opcode(0x8B);
7211   ins_encode( OpcP, RegMem(dst,mem));
7212   ins_pipe( ialu_reg_mem );
7213 %}
7214 
7215 // Conditional-store of the updated heap-top.
7216 // Used during allocation of the shared heap.
7217 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7218 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7219   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7220   // EAX is killed if there is contention, but then it's also unused.
7221   // In the common case of no contention, EAX holds the new oop address.
7222   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7223   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7224   ins_pipe( pipe_cmpxchg );
7225 %}
7226 
7227 // Conditional-store of an int value.
7228 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7229 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7230   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7231   effect(KILL oldval);
7232   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7233   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7234   ins_pipe( pipe_cmpxchg );
7235 %}
7236 
7237 // Conditional-store of a long value.
7238 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7239 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7240   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7241   effect(KILL oldval);
7242   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7243             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7244             "XCHG   EBX,ECX"
7245   %}
7246   ins_encode %{
7247     // Note: we need to swap rbx, and rcx before and after the
7248     //       cmpxchg8 instruction because the instruction uses
7249     //       rcx as the high order word of the new value to store but
7250     //       our register encoding uses rbx.
7251     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7252     if( os::is_MP() )
7253       __ lock();
7254     __ cmpxchg8($mem$$Address);
7255     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7256   %}
7257   ins_pipe( pipe_cmpxchg );
7258 %}
7259 
7260 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7261 
7262 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7263   predicate(VM_Version::supports_cx8());
7264   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7265   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7266   effect(KILL cr, KILL oldval);
7267   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7268             "MOV    $res,0\n\t"
7269             "JNE,s  fail\n\t"
7270             "MOV    $res,1\n"
7271           "fail:" %}
7272   ins_encode( enc_cmpxchg8(mem_ptr),
7273               enc_flags_ne_to_boolean(res) );
7274   ins_pipe( pipe_cmpxchg );
7275 %}
7276 
7277 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7278   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7279   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7280   effect(KILL cr, KILL oldval);
7281   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7282             "MOV    $res,0\n\t"
7283             "JNE,s  fail\n\t"
7284             "MOV    $res,1\n"
7285           "fail:" %}
7286   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7287   ins_pipe( pipe_cmpxchg );
7288 %}
7289 
7290 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7291   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7292   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7293   effect(KILL cr, KILL oldval);
7294   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7295             "MOV    $res,0\n\t"
7296             "JNE,s  fail\n\t"
7297             "MOV    $res,1\n"
7298           "fail:" %}
7299   ins_encode( enc_cmpxchgb(mem_ptr),
7300               enc_flags_ne_to_boolean(res) );
7301   ins_pipe( pipe_cmpxchg );
7302 %}
7303 
7304 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7305   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7306   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7307   effect(KILL cr, KILL oldval);
7308   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7309             "MOV    $res,0\n\t"
7310             "JNE,s  fail\n\t"
7311             "MOV    $res,1\n"
7312           "fail:" %}
7313   ins_encode( enc_cmpxchgw(mem_ptr),
7314               enc_flags_ne_to_boolean(res) );
7315   ins_pipe( pipe_cmpxchg );
7316 %}
7317 
7318 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7319   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7320   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7321   effect(KILL cr, KILL oldval);
7322   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7323             "MOV    $res,0\n\t"
7324             "JNE,s  fail\n\t"
7325             "MOV    $res,1\n"
7326           "fail:" %}
7327   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7328   ins_pipe( pipe_cmpxchg );
7329 %}
7330 
7331 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7332   predicate(VM_Version::supports_cx8());
7333   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7334   effect(KILL cr);
7335   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7336   ins_encode( enc_cmpxchg8(mem_ptr) );
7337   ins_pipe( pipe_cmpxchg );
7338 %}
7339 
7340 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7341   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7342   effect(KILL cr);
7343   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7344   ins_encode( enc_cmpxchg(mem_ptr) );
7345   ins_pipe( pipe_cmpxchg );
7346 %}
7347 
7348 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7349   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7350   effect(KILL cr);
7351   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7352   ins_encode( enc_cmpxchgb(mem_ptr) );
7353   ins_pipe( pipe_cmpxchg );
7354 %}
7355 
7356 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7357   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7358   effect(KILL cr);
7359   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7360   ins_encode( enc_cmpxchgw(mem_ptr) );
7361   ins_pipe( pipe_cmpxchg );
7362 %}
7363 
7364 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7365   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7366   effect(KILL cr);
7367   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7368   ins_encode( enc_cmpxchg(mem_ptr) );
7369   ins_pipe( pipe_cmpxchg );
7370 %}
7371 
7372 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7373   predicate(n->as_LoadStore()->result_not_used());
7374   match(Set dummy (GetAndAddB mem add));
7375   effect(KILL cr);
7376   format %{ "ADDB  [$mem],$add" %}
7377   ins_encode %{
7378     if (os::is_MP()) { __ lock(); }
7379     __ addb($mem$$Address, $add$$constant);
7380   %}
7381   ins_pipe( pipe_cmpxchg );
7382 %}
7383 
7384 // Important to match to xRegI: only 8-bit regs.
7385 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7386   match(Set newval (GetAndAddB mem newval));
7387   effect(KILL cr);
7388   format %{ "XADDB  [$mem],$newval" %}
7389   ins_encode %{
7390     if (os::is_MP()) { __ lock(); }
7391     __ xaddb($mem$$Address, $newval$$Register);
7392   %}
7393   ins_pipe( pipe_cmpxchg );
7394 %}
7395 
7396 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7397   predicate(n->as_LoadStore()->result_not_used());
7398   match(Set dummy (GetAndAddS mem add));
7399   effect(KILL cr);
7400   format %{ "ADDS  [$mem],$add" %}
7401   ins_encode %{
7402     if (os::is_MP()) { __ lock(); }
7403     __ addw($mem$$Address, $add$$constant);
7404   %}
7405   ins_pipe( pipe_cmpxchg );
7406 %}
7407 
7408 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7409   match(Set newval (GetAndAddS mem newval));
7410   effect(KILL cr);
7411   format %{ "XADDS  [$mem],$newval" %}
7412   ins_encode %{
7413     if (os::is_MP()) { __ lock(); }
7414     __ xaddw($mem$$Address, $newval$$Register);
7415   %}
7416   ins_pipe( pipe_cmpxchg );
7417 %}
7418 
7419 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7420   predicate(n->as_LoadStore()->result_not_used());
7421   match(Set dummy (GetAndAddI mem add));
7422   effect(KILL cr);
7423   format %{ "ADDL  [$mem],$add" %}
7424   ins_encode %{
7425     if (os::is_MP()) { __ lock(); }
7426     __ addl($mem$$Address, $add$$constant);
7427   %}
7428   ins_pipe( pipe_cmpxchg );
7429 %}
7430 
7431 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7432   match(Set newval (GetAndAddI mem newval));
7433   effect(KILL cr);
7434   format %{ "XADDL  [$mem],$newval" %}
7435   ins_encode %{
7436     if (os::is_MP()) { __ lock(); }
7437     __ xaddl($mem$$Address, $newval$$Register);
7438   %}
7439   ins_pipe( pipe_cmpxchg );
7440 %}
7441 
7442 // Important to match to xRegI: only 8-bit regs.
7443 instruct xchgB( memory mem, xRegI newval) %{
7444   match(Set newval (GetAndSetB mem newval));
7445   format %{ "XCHGB  $newval,[$mem]" %}
7446   ins_encode %{
7447     __ xchgb($newval$$Register, $mem$$Address);
7448   %}
7449   ins_pipe( pipe_cmpxchg );
7450 %}
7451 
7452 instruct xchgS( memory mem, rRegI newval) %{
7453   match(Set newval (GetAndSetS mem newval));
7454   format %{ "XCHGW  $newval,[$mem]" %}
7455   ins_encode %{
7456     __ xchgw($newval$$Register, $mem$$Address);
7457   %}
7458   ins_pipe( pipe_cmpxchg );
7459 %}
7460 
7461 instruct xchgI( memory mem, rRegI newval) %{
7462   match(Set newval (GetAndSetI mem newval));
7463   format %{ "XCHGL  $newval,[$mem]" %}
7464   ins_encode %{
7465     __ xchgl($newval$$Register, $mem$$Address);
7466   %}
7467   ins_pipe( pipe_cmpxchg );
7468 %}
7469 
7470 instruct xchgP( memory mem, pRegP newval) %{
7471   match(Set newval (GetAndSetP mem newval));
7472   format %{ "XCHGL  $newval,[$mem]" %}
7473   ins_encode %{
7474     __ xchgl($newval$$Register, $mem$$Address);
7475   %}
7476   ins_pipe( pipe_cmpxchg );
7477 %}
7478 
7479 //----------Subtraction Instructions-------------------------------------------
7480 
7481 // Integer Subtraction Instructions
7482 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7483   match(Set dst (SubI dst src));
7484   effect(KILL cr);
7485 
7486   size(2);
7487   format %{ "SUB    $dst,$src" %}
7488   opcode(0x2B);
7489   ins_encode( OpcP, RegReg( dst, src) );
7490   ins_pipe( ialu_reg_reg );
7491 %}
7492 
7493 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7494   match(Set dst (SubI dst src));
7495   effect(KILL cr);
7496 
7497   format %{ "SUB    $dst,$src" %}
7498   opcode(0x81,0x05);  /* Opcode 81 /5 */
7499   // ins_encode( RegImm( dst, src) );
7500   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7501   ins_pipe( ialu_reg );
7502 %}
7503 
7504 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7505   match(Set dst (SubI dst (LoadI src)));
7506   effect(KILL cr);
7507 
7508   ins_cost(125);
7509   format %{ "SUB    $dst,$src" %}
7510   opcode(0x2B);
7511   ins_encode( OpcP, RegMem( dst, src) );
7512   ins_pipe( ialu_reg_mem );
7513 %}
7514 
7515 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7516   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7517   effect(KILL cr);
7518 
7519   ins_cost(150);
7520   format %{ "SUB    $dst,$src" %}
7521   opcode(0x29);  /* Opcode 29 /r */
7522   ins_encode( OpcP, RegMem( src, dst ) );
7523   ins_pipe( ialu_mem_reg );
7524 %}
7525 
7526 // Subtract from a pointer
7527 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7528   match(Set dst (AddP dst (SubI zero src)));
7529   effect(KILL cr);
7530 
7531   size(2);
7532   format %{ "SUB    $dst,$src" %}
7533   opcode(0x2B);
7534   ins_encode( OpcP, RegReg( dst, src) );
7535   ins_pipe( ialu_reg_reg );
7536 %}
7537 
7538 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7539   match(Set dst (SubI zero dst));
7540   effect(KILL cr);
7541 
7542   size(2);
7543   format %{ "NEG    $dst" %}
7544   opcode(0xF7,0x03);  // Opcode F7 /3
7545   ins_encode( OpcP, RegOpc( dst ) );
7546   ins_pipe( ialu_reg );
7547 %}
7548 
7549 //----------Multiplication/Division Instructions-------------------------------
7550 // Integer Multiplication Instructions
7551 // Multiply Register
7552 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7553   match(Set dst (MulI dst src));
7554   effect(KILL cr);
7555 
7556   size(3);
7557   ins_cost(300);
7558   format %{ "IMUL   $dst,$src" %}
7559   opcode(0xAF, 0x0F);
7560   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7561   ins_pipe( ialu_reg_reg_alu0 );
7562 %}
7563 
7564 // Multiply 32-bit Immediate
7565 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7566   match(Set dst (MulI src imm));
7567   effect(KILL cr);
7568 
7569   ins_cost(300);
7570   format %{ "IMUL   $dst,$src,$imm" %}
7571   opcode(0x69);  /* 69 /r id */
7572   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7573   ins_pipe( ialu_reg_reg_alu0 );
7574 %}
7575 
7576 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7577   match(Set dst src);
7578   effect(KILL cr);
7579 
7580   // Note that this is artificially increased to make it more expensive than loadConL
7581   ins_cost(250);
7582   format %{ "MOV    EAX,$src\t// low word only" %}
7583   opcode(0xB8);
7584   ins_encode( LdImmL_Lo(dst, src) );
7585   ins_pipe( ialu_reg_fat );
7586 %}
7587 
7588 // Multiply by 32-bit Immediate, taking the shifted high order results
7589 //  (special case for shift by 32)
7590 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7591   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7592   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7593              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7594              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7595   effect(USE src1, KILL cr);
7596 
7597   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7598   ins_cost(0*100 + 1*400 - 150);
7599   format %{ "IMUL   EDX:EAX,$src1" %}
7600   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7601   ins_pipe( pipe_slow );
7602 %}
7603 
7604 // Multiply by 32-bit Immediate, taking the shifted high order results
7605 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7606   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7607   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7608              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7609              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7610   effect(USE src1, KILL cr);
7611 
7612   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7613   ins_cost(1*100 + 1*400 - 150);
7614   format %{ "IMUL   EDX:EAX,$src1\n\t"
7615             "SAR    EDX,$cnt-32" %}
7616   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7617   ins_pipe( pipe_slow );
7618 %}
7619 
7620 // Multiply Memory 32-bit Immediate
7621 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7622   match(Set dst (MulI (LoadI src) imm));
7623   effect(KILL cr);
7624 
7625   ins_cost(300);
7626   format %{ "IMUL   $dst,$src,$imm" %}
7627   opcode(0x69);  /* 69 /r id */
7628   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7629   ins_pipe( ialu_reg_mem_alu0 );
7630 %}
7631 
7632 // Multiply Memory
7633 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7634   match(Set dst (MulI dst (LoadI src)));
7635   effect(KILL cr);
7636 
7637   ins_cost(350);
7638   format %{ "IMUL   $dst,$src" %}
7639   opcode(0xAF, 0x0F);
7640   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7641   ins_pipe( ialu_reg_mem_alu0 );
7642 %}
7643 
7644 // Multiply Register Int to Long
7645 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7646   // Basic Idea: long = (long)int * (long)int
7647   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7648   effect(DEF dst, USE src, USE src1, KILL flags);
7649 
7650   ins_cost(300);
7651   format %{ "IMUL   $dst,$src1" %}
7652 
7653   ins_encode( long_int_multiply( dst, src1 ) );
7654   ins_pipe( ialu_reg_reg_alu0 );
7655 %}
7656 
7657 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7658   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7659   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7660   effect(KILL flags);
7661 
7662   ins_cost(300);
7663   format %{ "MUL    $dst,$src1" %}
7664 
7665   ins_encode( long_uint_multiply(dst, src1) );
7666   ins_pipe( ialu_reg_reg_alu0 );
7667 %}
7668 
7669 // Multiply Register Long
7670 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7671   match(Set dst (MulL dst src));
7672   effect(KILL cr, TEMP tmp);
7673   ins_cost(4*100+3*400);
7674 // Basic idea: lo(result) = lo(x_lo * y_lo)
7675 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7676   format %{ "MOV    $tmp,$src.lo\n\t"
7677             "IMUL   $tmp,EDX\n\t"
7678             "MOV    EDX,$src.hi\n\t"
7679             "IMUL   EDX,EAX\n\t"
7680             "ADD    $tmp,EDX\n\t"
7681             "MUL    EDX:EAX,$src.lo\n\t"
7682             "ADD    EDX,$tmp" %}
7683   ins_encode( long_multiply( dst, src, tmp ) );
7684   ins_pipe( pipe_slow );
7685 %}
7686 
7687 // Multiply Register Long where the left operand's high 32 bits are zero
7688 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7689   predicate(is_operand_hi32_zero(n->in(1)));
7690   match(Set dst (MulL dst src));
7691   effect(KILL cr, TEMP tmp);
7692   ins_cost(2*100+2*400);
7693 // Basic idea: lo(result) = lo(x_lo * y_lo)
7694 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7695   format %{ "MOV    $tmp,$src.hi\n\t"
7696             "IMUL   $tmp,EAX\n\t"
7697             "MUL    EDX:EAX,$src.lo\n\t"
7698             "ADD    EDX,$tmp" %}
7699   ins_encode %{
7700     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7701     __ imull($tmp$$Register, rax);
7702     __ mull($src$$Register);
7703     __ addl(rdx, $tmp$$Register);
7704   %}
7705   ins_pipe( pipe_slow );
7706 %}
7707 
7708 // Multiply Register Long where the right operand's high 32 bits are zero
7709 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7710   predicate(is_operand_hi32_zero(n->in(2)));
7711   match(Set dst (MulL dst src));
7712   effect(KILL cr, TEMP tmp);
7713   ins_cost(2*100+2*400);
7714 // Basic idea: lo(result) = lo(x_lo * y_lo)
7715 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7716   format %{ "MOV    $tmp,$src.lo\n\t"
7717             "IMUL   $tmp,EDX\n\t"
7718             "MUL    EDX:EAX,$src.lo\n\t"
7719             "ADD    EDX,$tmp" %}
7720   ins_encode %{
7721     __ movl($tmp$$Register, $src$$Register);
7722     __ imull($tmp$$Register, rdx);
7723     __ mull($src$$Register);
7724     __ addl(rdx, $tmp$$Register);
7725   %}
7726   ins_pipe( pipe_slow );
7727 %}
7728 
7729 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7730 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7731   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7732   match(Set dst (MulL dst src));
7733   effect(KILL cr);
7734   ins_cost(1*400);
7735 // Basic idea: lo(result) = lo(x_lo * y_lo)
7736 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7737   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7738   ins_encode %{
7739     __ mull($src$$Register);
7740   %}
7741   ins_pipe( pipe_slow );
7742 %}
7743 
7744 // Multiply Register Long by small constant
7745 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7746   match(Set dst (MulL dst src));
7747   effect(KILL cr, TEMP tmp);
7748   ins_cost(2*100+2*400);
7749   size(12);
7750 // Basic idea: lo(result) = lo(src * EAX)
7751 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7752   format %{ "IMUL   $tmp,EDX,$src\n\t"
7753             "MOV    EDX,$src\n\t"
7754             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7755             "ADD    EDX,$tmp" %}
7756   ins_encode( long_multiply_con( dst, src, tmp ) );
7757   ins_pipe( pipe_slow );
7758 %}
7759 
7760 // Integer DIV with Register
7761 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7762   match(Set rax (DivI rax div));
7763   effect(KILL rdx, KILL cr);
7764   size(26);
7765   ins_cost(30*100+10*100);
7766   format %{ "CMP    EAX,0x80000000\n\t"
7767             "JNE,s  normal\n\t"
7768             "XOR    EDX,EDX\n\t"
7769             "CMP    ECX,-1\n\t"
7770             "JE,s   done\n"
7771     "normal: CDQ\n\t"
7772             "IDIV   $div\n\t"
7773     "done:"        %}
7774   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7775   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7776   ins_pipe( ialu_reg_reg_alu0 );
7777 %}
7778 
7779 // Divide Register Long
7780 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7781   match(Set dst (DivL src1 src2));
7782   effect( KILL cr, KILL cx, KILL bx );
7783   ins_cost(10000);
7784   format %{ "PUSH   $src1.hi\n\t"
7785             "PUSH   $src1.lo\n\t"
7786             "PUSH   $src2.hi\n\t"
7787             "PUSH   $src2.lo\n\t"
7788             "CALL   SharedRuntime::ldiv\n\t"
7789             "ADD    ESP,16" %}
7790   ins_encode( long_div(src1,src2) );
7791   ins_pipe( pipe_slow );
7792 %}
7793 
7794 // Integer DIVMOD with Register, both quotient and mod results
7795 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7796   match(DivModI rax div);
7797   effect(KILL cr);
7798   size(26);
7799   ins_cost(30*100+10*100);
7800   format %{ "CMP    EAX,0x80000000\n\t"
7801             "JNE,s  normal\n\t"
7802             "XOR    EDX,EDX\n\t"
7803             "CMP    ECX,-1\n\t"
7804             "JE,s   done\n"
7805     "normal: CDQ\n\t"
7806             "IDIV   $div\n\t"
7807     "done:"        %}
7808   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7809   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7810   ins_pipe( pipe_slow );
7811 %}
7812 
7813 // Integer MOD with Register
7814 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7815   match(Set rdx (ModI rax div));
7816   effect(KILL rax, KILL cr);
7817 
7818   size(26);
7819   ins_cost(300);
7820   format %{ "CDQ\n\t"
7821             "IDIV   $div" %}
7822   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7823   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7824   ins_pipe( ialu_reg_reg_alu0 );
7825 %}
7826 
7827 // Remainder Register Long
7828 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7829   match(Set dst (ModL src1 src2));
7830   effect( KILL cr, KILL cx, KILL bx );
7831   ins_cost(10000);
7832   format %{ "PUSH   $src1.hi\n\t"
7833             "PUSH   $src1.lo\n\t"
7834             "PUSH   $src2.hi\n\t"
7835             "PUSH   $src2.lo\n\t"
7836             "CALL   SharedRuntime::lrem\n\t"
7837             "ADD    ESP,16" %}
7838   ins_encode( long_mod(src1,src2) );
7839   ins_pipe( pipe_slow );
7840 %}
7841 
7842 // Divide Register Long (no special case since divisor != -1)
7843 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7844   match(Set dst (DivL dst imm));
7845   effect( TEMP tmp, TEMP tmp2, KILL cr );
7846   ins_cost(1000);
7847   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7848             "XOR    $tmp2,$tmp2\n\t"
7849             "CMP    $tmp,EDX\n\t"
7850             "JA,s   fast\n\t"
7851             "MOV    $tmp2,EAX\n\t"
7852             "MOV    EAX,EDX\n\t"
7853             "MOV    EDX,0\n\t"
7854             "JLE,s  pos\n\t"
7855             "LNEG   EAX : $tmp2\n\t"
7856             "DIV    $tmp # unsigned division\n\t"
7857             "XCHG   EAX,$tmp2\n\t"
7858             "DIV    $tmp\n\t"
7859             "LNEG   $tmp2 : EAX\n\t"
7860             "JMP,s  done\n"
7861     "pos:\n\t"
7862             "DIV    $tmp\n\t"
7863             "XCHG   EAX,$tmp2\n"
7864     "fast:\n\t"
7865             "DIV    $tmp\n"
7866     "done:\n\t"
7867             "MOV    EDX,$tmp2\n\t"
7868             "NEG    EDX:EAX # if $imm < 0" %}
7869   ins_encode %{
7870     int con = (int)$imm$$constant;
7871     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7872     int pcon = (con > 0) ? con : -con;
7873     Label Lfast, Lpos, Ldone;
7874 
7875     __ movl($tmp$$Register, pcon);
7876     __ xorl($tmp2$$Register,$tmp2$$Register);
7877     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7878     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7879 
7880     __ movl($tmp2$$Register, $dst$$Register); // save
7881     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7882     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7883     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7884 
7885     // Negative dividend.
7886     // convert value to positive to use unsigned division
7887     __ lneg($dst$$Register, $tmp2$$Register);
7888     __ divl($tmp$$Register);
7889     __ xchgl($dst$$Register, $tmp2$$Register);
7890     __ divl($tmp$$Register);
7891     // revert result back to negative
7892     __ lneg($tmp2$$Register, $dst$$Register);
7893     __ jmpb(Ldone);
7894 
7895     __ bind(Lpos);
7896     __ divl($tmp$$Register); // Use unsigned division
7897     __ xchgl($dst$$Register, $tmp2$$Register);
7898     // Fallthrow for final divide, tmp2 has 32 bit hi result
7899 
7900     __ bind(Lfast);
7901     // fast path: src is positive
7902     __ divl($tmp$$Register); // Use unsigned division
7903 
7904     __ bind(Ldone);
7905     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7906     if (con < 0) {
7907       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7908     }
7909   %}
7910   ins_pipe( pipe_slow );
7911 %}
7912 
7913 // Remainder Register Long (remainder fit into 32 bits)
7914 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7915   match(Set dst (ModL dst imm));
7916   effect( TEMP tmp, TEMP tmp2, KILL cr );
7917   ins_cost(1000);
7918   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7919             "CMP    $tmp,EDX\n\t"
7920             "JA,s   fast\n\t"
7921             "MOV    $tmp2,EAX\n\t"
7922             "MOV    EAX,EDX\n\t"
7923             "MOV    EDX,0\n\t"
7924             "JLE,s  pos\n\t"
7925             "LNEG   EAX : $tmp2\n\t"
7926             "DIV    $tmp # unsigned division\n\t"
7927             "MOV    EAX,$tmp2\n\t"
7928             "DIV    $tmp\n\t"
7929             "NEG    EDX\n\t"
7930             "JMP,s  done\n"
7931     "pos:\n\t"
7932             "DIV    $tmp\n\t"
7933             "MOV    EAX,$tmp2\n"
7934     "fast:\n\t"
7935             "DIV    $tmp\n"
7936     "done:\n\t"
7937             "MOV    EAX,EDX\n\t"
7938             "SAR    EDX,31\n\t" %}
7939   ins_encode %{
7940     int con = (int)$imm$$constant;
7941     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7942     int pcon = (con > 0) ? con : -con;
7943     Label  Lfast, Lpos, Ldone;
7944 
7945     __ movl($tmp$$Register, pcon);
7946     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7947     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7948 
7949     __ movl($tmp2$$Register, $dst$$Register); // save
7950     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7951     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7952     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7953 
7954     // Negative dividend.
7955     // convert value to positive to use unsigned division
7956     __ lneg($dst$$Register, $tmp2$$Register);
7957     __ divl($tmp$$Register);
7958     __ movl($dst$$Register, $tmp2$$Register);
7959     __ divl($tmp$$Register);
7960     // revert remainder back to negative
7961     __ negl(HIGH_FROM_LOW($dst$$Register));
7962     __ jmpb(Ldone);
7963 
7964     __ bind(Lpos);
7965     __ divl($tmp$$Register);
7966     __ movl($dst$$Register, $tmp2$$Register);
7967 
7968     __ bind(Lfast);
7969     // fast path: src is positive
7970     __ divl($tmp$$Register);
7971 
7972     __ bind(Ldone);
7973     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7974     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7975 
7976   %}
7977   ins_pipe( pipe_slow );
7978 %}
7979 
7980 // Integer Shift Instructions
7981 // Shift Left by one
7982 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7983   match(Set dst (LShiftI dst shift));
7984   effect(KILL cr);
7985 
7986   size(2);
7987   format %{ "SHL    $dst,$shift" %}
7988   opcode(0xD1, 0x4);  /* D1 /4 */
7989   ins_encode( OpcP, RegOpc( dst ) );
7990   ins_pipe( ialu_reg );
7991 %}
7992 
7993 // Shift Left by 8-bit immediate
7994 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7995   match(Set dst (LShiftI dst shift));
7996   effect(KILL cr);
7997 
7998   size(3);
7999   format %{ "SHL    $dst,$shift" %}
8000   opcode(0xC1, 0x4);  /* C1 /4 ib */
8001   ins_encode( RegOpcImm( dst, shift) );
8002   ins_pipe( ialu_reg );
8003 %}
8004 
8005 // Shift Left by variable
8006 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8007   match(Set dst (LShiftI dst shift));
8008   effect(KILL cr);
8009 
8010   size(2);
8011   format %{ "SHL    $dst,$shift" %}
8012   opcode(0xD3, 0x4);  /* D3 /4 */
8013   ins_encode( OpcP, RegOpc( dst ) );
8014   ins_pipe( ialu_reg_reg );
8015 %}
8016 
8017 // Arithmetic shift right by one
8018 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8019   match(Set dst (RShiftI dst shift));
8020   effect(KILL cr);
8021 
8022   size(2);
8023   format %{ "SAR    $dst,$shift" %}
8024   opcode(0xD1, 0x7);  /* D1 /7 */
8025   ins_encode( OpcP, RegOpc( dst ) );
8026   ins_pipe( ialu_reg );
8027 %}
8028 
8029 // Arithmetic shift right by one
8030 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8031   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8032   effect(KILL cr);
8033   format %{ "SAR    $dst,$shift" %}
8034   opcode(0xD1, 0x7);  /* D1 /7 */
8035   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8036   ins_pipe( ialu_mem_imm );
8037 %}
8038 
8039 // Arithmetic Shift Right by 8-bit immediate
8040 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8041   match(Set dst (RShiftI dst shift));
8042   effect(KILL cr);
8043 
8044   size(3);
8045   format %{ "SAR    $dst,$shift" %}
8046   opcode(0xC1, 0x7);  /* C1 /7 ib */
8047   ins_encode( RegOpcImm( dst, shift ) );
8048   ins_pipe( ialu_mem_imm );
8049 %}
8050 
8051 // Arithmetic Shift Right by 8-bit immediate
8052 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8053   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8054   effect(KILL cr);
8055 
8056   format %{ "SAR    $dst,$shift" %}
8057   opcode(0xC1, 0x7);  /* C1 /7 ib */
8058   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8059   ins_pipe( ialu_mem_imm );
8060 %}
8061 
8062 // Arithmetic Shift Right by variable
8063 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8064   match(Set dst (RShiftI dst shift));
8065   effect(KILL cr);
8066 
8067   size(2);
8068   format %{ "SAR    $dst,$shift" %}
8069   opcode(0xD3, 0x7);  /* D3 /7 */
8070   ins_encode( OpcP, RegOpc( dst ) );
8071   ins_pipe( ialu_reg_reg );
8072 %}
8073 
8074 // Logical shift right by one
8075 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8076   match(Set dst (URShiftI dst shift));
8077   effect(KILL cr);
8078 
8079   size(2);
8080   format %{ "SHR    $dst,$shift" %}
8081   opcode(0xD1, 0x5);  /* D1 /5 */
8082   ins_encode( OpcP, RegOpc( dst ) );
8083   ins_pipe( ialu_reg );
8084 %}
8085 
8086 // Logical Shift Right by 8-bit immediate
8087 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8088   match(Set dst (URShiftI dst shift));
8089   effect(KILL cr);
8090 
8091   size(3);
8092   format %{ "SHR    $dst,$shift" %}
8093   opcode(0xC1, 0x5);  /* C1 /5 ib */
8094   ins_encode( RegOpcImm( dst, shift) );
8095   ins_pipe( ialu_reg );
8096 %}
8097 
8098 
8099 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8100 // This idiom is used by the compiler for the i2b bytecode.
8101 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8102   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8103 
8104   size(3);
8105   format %{ "MOVSX  $dst,$src :8" %}
8106   ins_encode %{
8107     __ movsbl($dst$$Register, $src$$Register);
8108   %}
8109   ins_pipe(ialu_reg_reg);
8110 %}
8111 
8112 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8113 // This idiom is used by the compiler the i2s bytecode.
8114 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8115   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8116 
8117   size(3);
8118   format %{ "MOVSX  $dst,$src :16" %}
8119   ins_encode %{
8120     __ movswl($dst$$Register, $src$$Register);
8121   %}
8122   ins_pipe(ialu_reg_reg);
8123 %}
8124 
8125 
8126 // Logical Shift Right by variable
8127 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8128   match(Set dst (URShiftI dst shift));
8129   effect(KILL cr);
8130 
8131   size(2);
8132   format %{ "SHR    $dst,$shift" %}
8133   opcode(0xD3, 0x5);  /* D3 /5 */
8134   ins_encode( OpcP, RegOpc( dst ) );
8135   ins_pipe( ialu_reg_reg );
8136 %}
8137 
8138 
8139 //----------Logical Instructions-----------------------------------------------
8140 //----------Integer Logical Instructions---------------------------------------
8141 // And Instructions
8142 // And Register with Register
8143 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8144   match(Set dst (AndI dst src));
8145   effect(KILL cr);
8146 
8147   size(2);
8148   format %{ "AND    $dst,$src" %}
8149   opcode(0x23);
8150   ins_encode( OpcP, RegReg( dst, src) );
8151   ins_pipe( ialu_reg_reg );
8152 %}
8153 
8154 // And Register with Immediate
8155 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8156   match(Set dst (AndI dst src));
8157   effect(KILL cr);
8158 
8159   format %{ "AND    $dst,$src" %}
8160   opcode(0x81,0x04);  /* Opcode 81 /4 */
8161   // ins_encode( RegImm( dst, src) );
8162   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8163   ins_pipe( ialu_reg );
8164 %}
8165 
8166 // And Register with Memory
8167 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8168   match(Set dst (AndI dst (LoadI src)));
8169   effect(KILL cr);
8170 
8171   ins_cost(125);
8172   format %{ "AND    $dst,$src" %}
8173   opcode(0x23);
8174   ins_encode( OpcP, RegMem( dst, src) );
8175   ins_pipe( ialu_reg_mem );
8176 %}
8177 
8178 // And Memory with Register
8179 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8180   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8181   effect(KILL cr);
8182 
8183   ins_cost(150);
8184   format %{ "AND    $dst,$src" %}
8185   opcode(0x21);  /* Opcode 21 /r */
8186   ins_encode( OpcP, RegMem( src, dst ) );
8187   ins_pipe( ialu_mem_reg );
8188 %}
8189 
8190 // And Memory with Immediate
8191 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8192   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8193   effect(KILL cr);
8194 
8195   ins_cost(125);
8196   format %{ "AND    $dst,$src" %}
8197   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8198   // ins_encode( MemImm( dst, src) );
8199   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8200   ins_pipe( ialu_mem_imm );
8201 %}
8202 
8203 // BMI1 instructions
8204 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8205   match(Set dst (AndI (XorI src1 minus_1) src2));
8206   predicate(UseBMI1Instructions);
8207   effect(KILL cr);
8208 
8209   format %{ "ANDNL  $dst, $src1, $src2" %}
8210 
8211   ins_encode %{
8212     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8213   %}
8214   ins_pipe(ialu_reg);
8215 %}
8216 
8217 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8218   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8219   predicate(UseBMI1Instructions);
8220   effect(KILL cr);
8221 
8222   ins_cost(125);
8223   format %{ "ANDNL  $dst, $src1, $src2" %}
8224 
8225   ins_encode %{
8226     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8227   %}
8228   ins_pipe(ialu_reg_mem);
8229 %}
8230 
8231 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8232   match(Set dst (AndI (SubI imm_zero src) src));
8233   predicate(UseBMI1Instructions);
8234   effect(KILL cr);
8235 
8236   format %{ "BLSIL  $dst, $src" %}
8237 
8238   ins_encode %{
8239     __ blsil($dst$$Register, $src$$Register);
8240   %}
8241   ins_pipe(ialu_reg);
8242 %}
8243 
8244 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8245   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8246   predicate(UseBMI1Instructions);
8247   effect(KILL cr);
8248 
8249   ins_cost(125);
8250   format %{ "BLSIL  $dst, $src" %}
8251 
8252   ins_encode %{
8253     __ blsil($dst$$Register, $src$$Address);
8254   %}
8255   ins_pipe(ialu_reg_mem);
8256 %}
8257 
8258 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8259 %{
8260   match(Set dst (XorI (AddI src minus_1) src));
8261   predicate(UseBMI1Instructions);
8262   effect(KILL cr);
8263 
8264   format %{ "BLSMSKL $dst, $src" %}
8265 
8266   ins_encode %{
8267     __ blsmskl($dst$$Register, $src$$Register);
8268   %}
8269 
8270   ins_pipe(ialu_reg);
8271 %}
8272 
8273 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8274 %{
8275   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8276   predicate(UseBMI1Instructions);
8277   effect(KILL cr);
8278 
8279   ins_cost(125);
8280   format %{ "BLSMSKL $dst, $src" %}
8281 
8282   ins_encode %{
8283     __ blsmskl($dst$$Register, $src$$Address);
8284   %}
8285 
8286   ins_pipe(ialu_reg_mem);
8287 %}
8288 
8289 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8290 %{
8291   match(Set dst (AndI (AddI src minus_1) src) );
8292   predicate(UseBMI1Instructions);
8293   effect(KILL cr);
8294 
8295   format %{ "BLSRL  $dst, $src" %}
8296 
8297   ins_encode %{
8298     __ blsrl($dst$$Register, $src$$Register);
8299   %}
8300 
8301   ins_pipe(ialu_reg);
8302 %}
8303 
8304 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8305 %{
8306   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8307   predicate(UseBMI1Instructions);
8308   effect(KILL cr);
8309 
8310   ins_cost(125);
8311   format %{ "BLSRL  $dst, $src" %}
8312 
8313   ins_encode %{
8314     __ blsrl($dst$$Register, $src$$Address);
8315   %}
8316 
8317   ins_pipe(ialu_reg_mem);
8318 %}
8319 
8320 // Or Instructions
8321 // Or Register with Register
8322 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8323   match(Set dst (OrI dst src));
8324   effect(KILL cr);
8325 
8326   size(2);
8327   format %{ "OR     $dst,$src" %}
8328   opcode(0x0B);
8329   ins_encode( OpcP, RegReg( dst, src) );
8330   ins_pipe( ialu_reg_reg );
8331 %}
8332 
8333 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8334   match(Set dst (OrI dst (CastP2X src)));
8335   effect(KILL cr);
8336 
8337   size(2);
8338   format %{ "OR     $dst,$src" %}
8339   opcode(0x0B);
8340   ins_encode( OpcP, RegReg( dst, src) );
8341   ins_pipe( ialu_reg_reg );
8342 %}
8343 
8344 
8345 // Or Register with Immediate
8346 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8347   match(Set dst (OrI dst src));
8348   effect(KILL cr);
8349 
8350   format %{ "OR     $dst,$src" %}
8351   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8352   // ins_encode( RegImm( dst, src) );
8353   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8354   ins_pipe( ialu_reg );
8355 %}
8356 
8357 // Or Register with Memory
8358 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8359   match(Set dst (OrI dst (LoadI src)));
8360   effect(KILL cr);
8361 
8362   ins_cost(125);
8363   format %{ "OR     $dst,$src" %}
8364   opcode(0x0B);
8365   ins_encode( OpcP, RegMem( dst, src) );
8366   ins_pipe( ialu_reg_mem );
8367 %}
8368 
8369 // Or Memory with Register
8370 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8371   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8372   effect(KILL cr);
8373 
8374   ins_cost(150);
8375   format %{ "OR     $dst,$src" %}
8376   opcode(0x09);  /* Opcode 09 /r */
8377   ins_encode( OpcP, RegMem( src, dst ) );
8378   ins_pipe( ialu_mem_reg );
8379 %}
8380 
8381 // Or Memory with Immediate
8382 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8383   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8384   effect(KILL cr);
8385 
8386   ins_cost(125);
8387   format %{ "OR     $dst,$src" %}
8388   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8389   // ins_encode( MemImm( dst, src) );
8390   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8391   ins_pipe( ialu_mem_imm );
8392 %}
8393 
8394 // ROL/ROR
8395 // ROL expand
8396 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8397   effect(USE_DEF dst, USE shift, KILL cr);
8398 
8399   format %{ "ROL    $dst, $shift" %}
8400   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8401   ins_encode( OpcP, RegOpc( dst ));
8402   ins_pipe( ialu_reg );
8403 %}
8404 
8405 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8406   effect(USE_DEF dst, USE shift, KILL cr);
8407 
8408   format %{ "ROL    $dst, $shift" %}
8409   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8410   ins_encode( RegOpcImm(dst, shift) );
8411   ins_pipe(ialu_reg);
8412 %}
8413 
8414 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8415   effect(USE_DEF dst, USE shift, KILL cr);
8416 
8417   format %{ "ROL    $dst, $shift" %}
8418   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8419   ins_encode(OpcP, RegOpc(dst));
8420   ins_pipe( ialu_reg_reg );
8421 %}
8422 // end of ROL expand
8423 
8424 // ROL 32bit by one once
8425 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8426   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8427 
8428   expand %{
8429     rolI_eReg_imm1(dst, lshift, cr);
8430   %}
8431 %}
8432 
8433 // ROL 32bit var by imm8 once
8434 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8435   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8436   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8437 
8438   expand %{
8439     rolI_eReg_imm8(dst, lshift, cr);
8440   %}
8441 %}
8442 
8443 // ROL 32bit var by var once
8444 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8445   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8446 
8447   expand %{
8448     rolI_eReg_CL(dst, shift, cr);
8449   %}
8450 %}
8451 
8452 // ROL 32bit var by var once
8453 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8454   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8455 
8456   expand %{
8457     rolI_eReg_CL(dst, shift, cr);
8458   %}
8459 %}
8460 
8461 // ROR expand
8462 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8463   effect(USE_DEF dst, USE shift, KILL cr);
8464 
8465   format %{ "ROR    $dst, $shift" %}
8466   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8467   ins_encode( OpcP, RegOpc( dst ) );
8468   ins_pipe( ialu_reg );
8469 %}
8470 
8471 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8472   effect (USE_DEF dst, USE shift, KILL cr);
8473 
8474   format %{ "ROR    $dst, $shift" %}
8475   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8476   ins_encode( RegOpcImm(dst, shift) );
8477   ins_pipe( ialu_reg );
8478 %}
8479 
8480 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8481   effect(USE_DEF dst, USE shift, KILL cr);
8482 
8483   format %{ "ROR    $dst, $shift" %}
8484   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8485   ins_encode(OpcP, RegOpc(dst));
8486   ins_pipe( ialu_reg_reg );
8487 %}
8488 // end of ROR expand
8489 
8490 // ROR right once
8491 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8492   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8493 
8494   expand %{
8495     rorI_eReg_imm1(dst, rshift, cr);
8496   %}
8497 %}
8498 
8499 // ROR 32bit by immI8 once
8500 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8501   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8502   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8503 
8504   expand %{
8505     rorI_eReg_imm8(dst, rshift, cr);
8506   %}
8507 %}
8508 
8509 // ROR 32bit var by var once
8510 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8511   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8512 
8513   expand %{
8514     rorI_eReg_CL(dst, shift, cr);
8515   %}
8516 %}
8517 
8518 // ROR 32bit var by var once
8519 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8520   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8521 
8522   expand %{
8523     rorI_eReg_CL(dst, shift, cr);
8524   %}
8525 %}
8526 
8527 // Xor Instructions
8528 // Xor Register with Register
8529 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8530   match(Set dst (XorI dst src));
8531   effect(KILL cr);
8532 
8533   size(2);
8534   format %{ "XOR    $dst,$src" %}
8535   opcode(0x33);
8536   ins_encode( OpcP, RegReg( dst, src) );
8537   ins_pipe( ialu_reg_reg );
8538 %}
8539 
8540 // Xor Register with Immediate -1
8541 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8542   match(Set dst (XorI dst imm));
8543 
8544   size(2);
8545   format %{ "NOT    $dst" %}
8546   ins_encode %{
8547      __ notl($dst$$Register);
8548   %}
8549   ins_pipe( ialu_reg );
8550 %}
8551 
8552 // Xor Register with Immediate
8553 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8554   match(Set dst (XorI dst src));
8555   effect(KILL cr);
8556 
8557   format %{ "XOR    $dst,$src" %}
8558   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8559   // ins_encode( RegImm( dst, src) );
8560   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8561   ins_pipe( ialu_reg );
8562 %}
8563 
8564 // Xor Register with Memory
8565 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8566   match(Set dst (XorI dst (LoadI src)));
8567   effect(KILL cr);
8568 
8569   ins_cost(125);
8570   format %{ "XOR    $dst,$src" %}
8571   opcode(0x33);
8572   ins_encode( OpcP, RegMem(dst, src) );
8573   ins_pipe( ialu_reg_mem );
8574 %}
8575 
8576 // Xor Memory with Register
8577 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8578   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8579   effect(KILL cr);
8580 
8581   ins_cost(150);
8582   format %{ "XOR    $dst,$src" %}
8583   opcode(0x31);  /* Opcode 31 /r */
8584   ins_encode( OpcP, RegMem( src, dst ) );
8585   ins_pipe( ialu_mem_reg );
8586 %}
8587 
8588 // Xor Memory with Immediate
8589 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8590   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8591   effect(KILL cr);
8592 
8593   ins_cost(125);
8594   format %{ "XOR    $dst,$src" %}
8595   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8596   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8597   ins_pipe( ialu_mem_imm );
8598 %}
8599 
8600 //----------Convert Int to Boolean---------------------------------------------
8601 
8602 instruct movI_nocopy(rRegI dst, rRegI src) %{
8603   effect( DEF dst, USE src );
8604   format %{ "MOV    $dst,$src" %}
8605   ins_encode( enc_Copy( dst, src) );
8606   ins_pipe( ialu_reg_reg );
8607 %}
8608 
8609 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8610   effect( USE_DEF dst, USE src, KILL cr );
8611 
8612   size(4);
8613   format %{ "NEG    $dst\n\t"
8614             "ADC    $dst,$src" %}
8615   ins_encode( neg_reg(dst),
8616               OpcRegReg(0x13,dst,src) );
8617   ins_pipe( ialu_reg_reg_long );
8618 %}
8619 
8620 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8621   match(Set dst (Conv2B src));
8622 
8623   expand %{
8624     movI_nocopy(dst,src);
8625     ci2b(dst,src,cr);
8626   %}
8627 %}
8628 
8629 instruct movP_nocopy(rRegI dst, eRegP src) %{
8630   effect( DEF dst, USE src );
8631   format %{ "MOV    $dst,$src" %}
8632   ins_encode( enc_Copy( dst, src) );
8633   ins_pipe( ialu_reg_reg );
8634 %}
8635 
8636 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8637   effect( USE_DEF dst, USE src, KILL cr );
8638   format %{ "NEG    $dst\n\t"
8639             "ADC    $dst,$src" %}
8640   ins_encode( neg_reg(dst),
8641               OpcRegReg(0x13,dst,src) );
8642   ins_pipe( ialu_reg_reg_long );
8643 %}
8644 
8645 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8646   match(Set dst (Conv2B src));
8647 
8648   expand %{
8649     movP_nocopy(dst,src);
8650     cp2b(dst,src,cr);
8651   %}
8652 %}
8653 
8654 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8655   match(Set dst (CmpLTMask p q));
8656   effect(KILL cr);
8657   ins_cost(400);
8658 
8659   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8660   format %{ "XOR    $dst,$dst\n\t"
8661             "CMP    $p,$q\n\t"
8662             "SETlt  $dst\n\t"
8663             "NEG    $dst" %}
8664   ins_encode %{
8665     Register Rp = $p$$Register;
8666     Register Rq = $q$$Register;
8667     Register Rd = $dst$$Register;
8668     Label done;
8669     __ xorl(Rd, Rd);
8670     __ cmpl(Rp, Rq);
8671     __ setb(Assembler::less, Rd);
8672     __ negl(Rd);
8673   %}
8674 
8675   ins_pipe(pipe_slow);
8676 %}
8677 
8678 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8679   match(Set dst (CmpLTMask dst zero));
8680   effect(DEF dst, KILL cr);
8681   ins_cost(100);
8682 
8683   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8684   ins_encode %{
8685   __ sarl($dst$$Register, 31);
8686   %}
8687   ins_pipe(ialu_reg);
8688 %}
8689 
8690 /* better to save a register than avoid a branch */
8691 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8692   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8693   effect(KILL cr);
8694   ins_cost(400);
8695   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8696             "JGE    done\n\t"
8697             "ADD    $p,$y\n"
8698             "done:  " %}
8699   ins_encode %{
8700     Register Rp = $p$$Register;
8701     Register Rq = $q$$Register;
8702     Register Ry = $y$$Register;
8703     Label done;
8704     __ subl(Rp, Rq);
8705     __ jccb(Assembler::greaterEqual, done);
8706     __ addl(Rp, Ry);
8707     __ bind(done);
8708   %}
8709 
8710   ins_pipe(pipe_cmplt);
8711 %}
8712 
8713 /* better to save a register than avoid a branch */
8714 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8715   match(Set y (AndI (CmpLTMask p q) y));
8716   effect(KILL cr);
8717 
8718   ins_cost(300);
8719 
8720   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8721             "JLT      done\n\t"
8722             "XORL     $y, $y\n"
8723             "done:  " %}
8724   ins_encode %{
8725     Register Rp = $p$$Register;
8726     Register Rq = $q$$Register;
8727     Register Ry = $y$$Register;
8728     Label done;
8729     __ cmpl(Rp, Rq);
8730     __ jccb(Assembler::less, done);
8731     __ xorl(Ry, Ry);
8732     __ bind(done);
8733   %}
8734 
8735   ins_pipe(pipe_cmplt);
8736 %}
8737 
8738 /* If I enable this, I encourage spilling in the inner loop of compress.
8739 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8740   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8741 */
8742 //----------Overflow Math Instructions-----------------------------------------
8743 
8744 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8745 %{
8746   match(Set cr (OverflowAddI op1 op2));
8747   effect(DEF cr, USE_KILL op1, USE op2);
8748 
8749   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8750 
8751   ins_encode %{
8752     __ addl($op1$$Register, $op2$$Register);
8753   %}
8754   ins_pipe(ialu_reg_reg);
8755 %}
8756 
8757 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8758 %{
8759   match(Set cr (OverflowAddI op1 op2));
8760   effect(DEF cr, USE_KILL op1, USE op2);
8761 
8762   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8763 
8764   ins_encode %{
8765     __ addl($op1$$Register, $op2$$constant);
8766   %}
8767   ins_pipe(ialu_reg_reg);
8768 %}
8769 
8770 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8771 %{
8772   match(Set cr (OverflowSubI op1 op2));
8773 
8774   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8775   ins_encode %{
8776     __ cmpl($op1$$Register, $op2$$Register);
8777   %}
8778   ins_pipe(ialu_reg_reg);
8779 %}
8780 
8781 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8782 %{
8783   match(Set cr (OverflowSubI op1 op2));
8784 
8785   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8786   ins_encode %{
8787     __ cmpl($op1$$Register, $op2$$constant);
8788   %}
8789   ins_pipe(ialu_reg_reg);
8790 %}
8791 
8792 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8793 %{
8794   match(Set cr (OverflowSubI zero op2));
8795   effect(DEF cr, USE_KILL op2);
8796 
8797   format %{ "NEG    $op2\t# overflow check int" %}
8798   ins_encode %{
8799     __ negl($op2$$Register);
8800   %}
8801   ins_pipe(ialu_reg_reg);
8802 %}
8803 
8804 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8805 %{
8806   match(Set cr (OverflowMulI op1 op2));
8807   effect(DEF cr, USE_KILL op1, USE op2);
8808 
8809   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8810   ins_encode %{
8811     __ imull($op1$$Register, $op2$$Register);
8812   %}
8813   ins_pipe(ialu_reg_reg_alu0);
8814 %}
8815 
8816 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8817 %{
8818   match(Set cr (OverflowMulI op1 op2));
8819   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8820 
8821   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8822   ins_encode %{
8823     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8824   %}
8825   ins_pipe(ialu_reg_reg_alu0);
8826 %}
8827 
8828 //----------Long Instructions------------------------------------------------
8829 // Add Long Register with Register
8830 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8831   match(Set dst (AddL dst src));
8832   effect(KILL cr);
8833   ins_cost(200);
8834   format %{ "ADD    $dst.lo,$src.lo\n\t"
8835             "ADC    $dst.hi,$src.hi" %}
8836   opcode(0x03, 0x13);
8837   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8838   ins_pipe( ialu_reg_reg_long );
8839 %}
8840 
8841 // Add Long Register with Immediate
8842 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8843   match(Set dst (AddL dst src));
8844   effect(KILL cr);
8845   format %{ "ADD    $dst.lo,$src.lo\n\t"
8846             "ADC    $dst.hi,$src.hi" %}
8847   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8848   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8849   ins_pipe( ialu_reg_long );
8850 %}
8851 
8852 // Add Long Register with Memory
8853 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8854   match(Set dst (AddL dst (LoadL mem)));
8855   effect(KILL cr);
8856   ins_cost(125);
8857   format %{ "ADD    $dst.lo,$mem\n\t"
8858             "ADC    $dst.hi,$mem+4" %}
8859   opcode(0x03, 0x13);
8860   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8861   ins_pipe( ialu_reg_long_mem );
8862 %}
8863 
8864 // Subtract Long Register with Register.
8865 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8866   match(Set dst (SubL dst src));
8867   effect(KILL cr);
8868   ins_cost(200);
8869   format %{ "SUB    $dst.lo,$src.lo\n\t"
8870             "SBB    $dst.hi,$src.hi" %}
8871   opcode(0x2B, 0x1B);
8872   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8873   ins_pipe( ialu_reg_reg_long );
8874 %}
8875 
8876 // Subtract Long Register with Immediate
8877 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8878   match(Set dst (SubL dst src));
8879   effect(KILL cr);
8880   format %{ "SUB    $dst.lo,$src.lo\n\t"
8881             "SBB    $dst.hi,$src.hi" %}
8882   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8883   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8884   ins_pipe( ialu_reg_long );
8885 %}
8886 
8887 // Subtract Long Register with Memory
8888 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8889   match(Set dst (SubL dst (LoadL mem)));
8890   effect(KILL cr);
8891   ins_cost(125);
8892   format %{ "SUB    $dst.lo,$mem\n\t"
8893             "SBB    $dst.hi,$mem+4" %}
8894   opcode(0x2B, 0x1B);
8895   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8896   ins_pipe( ialu_reg_long_mem );
8897 %}
8898 
8899 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8900   match(Set dst (SubL zero dst));
8901   effect(KILL cr);
8902   ins_cost(300);
8903   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8904   ins_encode( neg_long(dst) );
8905   ins_pipe( ialu_reg_reg_long );
8906 %}
8907 
8908 // And Long Register with Register
8909 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8910   match(Set dst (AndL dst src));
8911   effect(KILL cr);
8912   format %{ "AND    $dst.lo,$src.lo\n\t"
8913             "AND    $dst.hi,$src.hi" %}
8914   opcode(0x23,0x23);
8915   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8916   ins_pipe( ialu_reg_reg_long );
8917 %}
8918 
8919 // And Long Register with Immediate
8920 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8921   match(Set dst (AndL dst src));
8922   effect(KILL cr);
8923   format %{ "AND    $dst.lo,$src.lo\n\t"
8924             "AND    $dst.hi,$src.hi" %}
8925   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8926   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8927   ins_pipe( ialu_reg_long );
8928 %}
8929 
8930 // And Long Register with Memory
8931 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8932   match(Set dst (AndL dst (LoadL mem)));
8933   effect(KILL cr);
8934   ins_cost(125);
8935   format %{ "AND    $dst.lo,$mem\n\t"
8936             "AND    $dst.hi,$mem+4" %}
8937   opcode(0x23, 0x23);
8938   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8939   ins_pipe( ialu_reg_long_mem );
8940 %}
8941 
8942 // BMI1 instructions
8943 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8944   match(Set dst (AndL (XorL src1 minus_1) src2));
8945   predicate(UseBMI1Instructions);
8946   effect(KILL cr, TEMP dst);
8947 
8948   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8949             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8950          %}
8951 
8952   ins_encode %{
8953     Register Rdst = $dst$$Register;
8954     Register Rsrc1 = $src1$$Register;
8955     Register Rsrc2 = $src2$$Register;
8956     __ andnl(Rdst, Rsrc1, Rsrc2);
8957     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8958   %}
8959   ins_pipe(ialu_reg_reg_long);
8960 %}
8961 
8962 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8963   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8964   predicate(UseBMI1Instructions);
8965   effect(KILL cr, TEMP dst);
8966 
8967   ins_cost(125);
8968   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8969             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8970          %}
8971 
8972   ins_encode %{
8973     Register Rdst = $dst$$Register;
8974     Register Rsrc1 = $src1$$Register;
8975     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8976 
8977     __ andnl(Rdst, Rsrc1, $src2$$Address);
8978     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8979   %}
8980   ins_pipe(ialu_reg_mem);
8981 %}
8982 
8983 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8984   match(Set dst (AndL (SubL imm_zero src) src));
8985   predicate(UseBMI1Instructions);
8986   effect(KILL cr, TEMP dst);
8987 
8988   format %{ "MOVL   $dst.hi, 0\n\t"
8989             "BLSIL  $dst.lo, $src.lo\n\t"
8990             "JNZ    done\n\t"
8991             "BLSIL  $dst.hi, $src.hi\n"
8992             "done:"
8993          %}
8994 
8995   ins_encode %{
8996     Label done;
8997     Register Rdst = $dst$$Register;
8998     Register Rsrc = $src$$Register;
8999     __ movl(HIGH_FROM_LOW(Rdst), 0);
9000     __ blsil(Rdst, Rsrc);
9001     __ jccb(Assembler::notZero, done);
9002     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9003     __ bind(done);
9004   %}
9005   ins_pipe(ialu_reg);
9006 %}
9007 
9008 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9009   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9010   predicate(UseBMI1Instructions);
9011   effect(KILL cr, TEMP dst);
9012 
9013   ins_cost(125);
9014   format %{ "MOVL   $dst.hi, 0\n\t"
9015             "BLSIL  $dst.lo, $src\n\t"
9016             "JNZ    done\n\t"
9017             "BLSIL  $dst.hi, $src+4\n"
9018             "done:"
9019          %}
9020 
9021   ins_encode %{
9022     Label done;
9023     Register Rdst = $dst$$Register;
9024     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9025 
9026     __ movl(HIGH_FROM_LOW(Rdst), 0);
9027     __ blsil(Rdst, $src$$Address);
9028     __ jccb(Assembler::notZero, done);
9029     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9030     __ bind(done);
9031   %}
9032   ins_pipe(ialu_reg_mem);
9033 %}
9034 
9035 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9036 %{
9037   match(Set dst (XorL (AddL src minus_1) src));
9038   predicate(UseBMI1Instructions);
9039   effect(KILL cr, TEMP dst);
9040 
9041   format %{ "MOVL    $dst.hi, 0\n\t"
9042             "BLSMSKL $dst.lo, $src.lo\n\t"
9043             "JNC     done\n\t"
9044             "BLSMSKL $dst.hi, $src.hi\n"
9045             "done:"
9046          %}
9047 
9048   ins_encode %{
9049     Label done;
9050     Register Rdst = $dst$$Register;
9051     Register Rsrc = $src$$Register;
9052     __ movl(HIGH_FROM_LOW(Rdst), 0);
9053     __ blsmskl(Rdst, Rsrc);
9054     __ jccb(Assembler::carryClear, done);
9055     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9056     __ bind(done);
9057   %}
9058 
9059   ins_pipe(ialu_reg);
9060 %}
9061 
9062 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9063 %{
9064   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9065   predicate(UseBMI1Instructions);
9066   effect(KILL cr, TEMP dst);
9067 
9068   ins_cost(125);
9069   format %{ "MOVL    $dst.hi, 0\n\t"
9070             "BLSMSKL $dst.lo, $src\n\t"
9071             "JNC     done\n\t"
9072             "BLSMSKL $dst.hi, $src+4\n"
9073             "done:"
9074          %}
9075 
9076   ins_encode %{
9077     Label done;
9078     Register Rdst = $dst$$Register;
9079     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9080 
9081     __ movl(HIGH_FROM_LOW(Rdst), 0);
9082     __ blsmskl(Rdst, $src$$Address);
9083     __ jccb(Assembler::carryClear, done);
9084     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9085     __ bind(done);
9086   %}
9087 
9088   ins_pipe(ialu_reg_mem);
9089 %}
9090 
9091 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9092 %{
9093   match(Set dst (AndL (AddL src minus_1) src) );
9094   predicate(UseBMI1Instructions);
9095   effect(KILL cr, TEMP dst);
9096 
9097   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9098             "BLSRL  $dst.lo, $src.lo\n\t"
9099             "JNC    done\n\t"
9100             "BLSRL  $dst.hi, $src.hi\n"
9101             "done:"
9102   %}
9103 
9104   ins_encode %{
9105     Label done;
9106     Register Rdst = $dst$$Register;
9107     Register Rsrc = $src$$Register;
9108     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9109     __ blsrl(Rdst, Rsrc);
9110     __ jccb(Assembler::carryClear, done);
9111     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9112     __ bind(done);
9113   %}
9114 
9115   ins_pipe(ialu_reg);
9116 %}
9117 
9118 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9119 %{
9120   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9121   predicate(UseBMI1Instructions);
9122   effect(KILL cr, TEMP dst);
9123 
9124   ins_cost(125);
9125   format %{ "MOVL   $dst.hi, $src+4\n\t"
9126             "BLSRL  $dst.lo, $src\n\t"
9127             "JNC    done\n\t"
9128             "BLSRL  $dst.hi, $src+4\n"
9129             "done:"
9130   %}
9131 
9132   ins_encode %{
9133     Label done;
9134     Register Rdst = $dst$$Register;
9135     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9136     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9137     __ blsrl(Rdst, $src$$Address);
9138     __ jccb(Assembler::carryClear, done);
9139     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9140     __ bind(done);
9141   %}
9142 
9143   ins_pipe(ialu_reg_mem);
9144 %}
9145 
9146 // Or Long Register with Register
9147 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9148   match(Set dst (OrL dst src));
9149   effect(KILL cr);
9150   format %{ "OR     $dst.lo,$src.lo\n\t"
9151             "OR     $dst.hi,$src.hi" %}
9152   opcode(0x0B,0x0B);
9153   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9154   ins_pipe( ialu_reg_reg_long );
9155 %}
9156 
9157 // Or Long Register with Immediate
9158 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9159   match(Set dst (OrL dst src));
9160   effect(KILL cr);
9161   format %{ "OR     $dst.lo,$src.lo\n\t"
9162             "OR     $dst.hi,$src.hi" %}
9163   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9164   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9165   ins_pipe( ialu_reg_long );
9166 %}
9167 
9168 // Or Long Register with Memory
9169 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9170   match(Set dst (OrL dst (LoadL mem)));
9171   effect(KILL cr);
9172   ins_cost(125);
9173   format %{ "OR     $dst.lo,$mem\n\t"
9174             "OR     $dst.hi,$mem+4" %}
9175   opcode(0x0B,0x0B);
9176   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9177   ins_pipe( ialu_reg_long_mem );
9178 %}
9179 
9180 // Xor Long Register with Register
9181 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9182   match(Set dst (XorL dst src));
9183   effect(KILL cr);
9184   format %{ "XOR    $dst.lo,$src.lo\n\t"
9185             "XOR    $dst.hi,$src.hi" %}
9186   opcode(0x33,0x33);
9187   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9188   ins_pipe( ialu_reg_reg_long );
9189 %}
9190 
9191 // Xor Long Register with Immediate -1
9192 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9193   match(Set dst (XorL dst imm));
9194   format %{ "NOT    $dst.lo\n\t"
9195             "NOT    $dst.hi" %}
9196   ins_encode %{
9197      __ notl($dst$$Register);
9198      __ notl(HIGH_FROM_LOW($dst$$Register));
9199   %}
9200   ins_pipe( ialu_reg_long );
9201 %}
9202 
9203 // Xor Long Register with Immediate
9204 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9205   match(Set dst (XorL dst src));
9206   effect(KILL cr);
9207   format %{ "XOR    $dst.lo,$src.lo\n\t"
9208             "XOR    $dst.hi,$src.hi" %}
9209   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9210   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9211   ins_pipe( ialu_reg_long );
9212 %}
9213 
9214 // Xor Long Register with Memory
9215 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9216   match(Set dst (XorL dst (LoadL mem)));
9217   effect(KILL cr);
9218   ins_cost(125);
9219   format %{ "XOR    $dst.lo,$mem\n\t"
9220             "XOR    $dst.hi,$mem+4" %}
9221   opcode(0x33,0x33);
9222   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9223   ins_pipe( ialu_reg_long_mem );
9224 %}
9225 
9226 // Shift Left Long by 1
9227 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9228   predicate(UseNewLongLShift);
9229   match(Set dst (LShiftL dst cnt));
9230   effect(KILL cr);
9231   ins_cost(100);
9232   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9233             "ADC    $dst.hi,$dst.hi" %}
9234   ins_encode %{
9235     __ addl($dst$$Register,$dst$$Register);
9236     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9237   %}
9238   ins_pipe( ialu_reg_long );
9239 %}
9240 
9241 // Shift Left Long by 2
9242 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9243   predicate(UseNewLongLShift);
9244   match(Set dst (LShiftL dst cnt));
9245   effect(KILL cr);
9246   ins_cost(100);
9247   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9248             "ADC    $dst.hi,$dst.hi\n\t"
9249             "ADD    $dst.lo,$dst.lo\n\t"
9250             "ADC    $dst.hi,$dst.hi" %}
9251   ins_encode %{
9252     __ addl($dst$$Register,$dst$$Register);
9253     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9254     __ addl($dst$$Register,$dst$$Register);
9255     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9256   %}
9257   ins_pipe( ialu_reg_long );
9258 %}
9259 
9260 // Shift Left Long by 3
9261 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9262   predicate(UseNewLongLShift);
9263   match(Set dst (LShiftL dst cnt));
9264   effect(KILL cr);
9265   ins_cost(100);
9266   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9267             "ADC    $dst.hi,$dst.hi\n\t"
9268             "ADD    $dst.lo,$dst.lo\n\t"
9269             "ADC    $dst.hi,$dst.hi\n\t"
9270             "ADD    $dst.lo,$dst.lo\n\t"
9271             "ADC    $dst.hi,$dst.hi" %}
9272   ins_encode %{
9273     __ addl($dst$$Register,$dst$$Register);
9274     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9275     __ addl($dst$$Register,$dst$$Register);
9276     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9277     __ addl($dst$$Register,$dst$$Register);
9278     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9279   %}
9280   ins_pipe( ialu_reg_long );
9281 %}
9282 
9283 // Shift Left Long by 1-31
9284 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9285   match(Set dst (LShiftL dst cnt));
9286   effect(KILL cr);
9287   ins_cost(200);
9288   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9289             "SHL    $dst.lo,$cnt" %}
9290   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9291   ins_encode( move_long_small_shift(dst,cnt) );
9292   ins_pipe( ialu_reg_long );
9293 %}
9294 
9295 // Shift Left Long by 32-63
9296 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9297   match(Set dst (LShiftL dst cnt));
9298   effect(KILL cr);
9299   ins_cost(300);
9300   format %{ "MOV    $dst.hi,$dst.lo\n"
9301           "\tSHL    $dst.hi,$cnt-32\n"
9302           "\tXOR    $dst.lo,$dst.lo" %}
9303   opcode(0xC1, 0x4);  /* C1 /4 ib */
9304   ins_encode( move_long_big_shift_clr(dst,cnt) );
9305   ins_pipe( ialu_reg_long );
9306 %}
9307 
9308 // Shift Left Long by variable
9309 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9310   match(Set dst (LShiftL dst shift));
9311   effect(KILL cr);
9312   ins_cost(500+200);
9313   size(17);
9314   format %{ "TEST   $shift,32\n\t"
9315             "JEQ,s  small\n\t"
9316             "MOV    $dst.hi,$dst.lo\n\t"
9317             "XOR    $dst.lo,$dst.lo\n"
9318     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9319             "SHL    $dst.lo,$shift" %}
9320   ins_encode( shift_left_long( dst, shift ) );
9321   ins_pipe( pipe_slow );
9322 %}
9323 
9324 // Shift Right Long by 1-31
9325 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9326   match(Set dst (URShiftL dst cnt));
9327   effect(KILL cr);
9328   ins_cost(200);
9329   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9330             "SHR    $dst.hi,$cnt" %}
9331   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9332   ins_encode( move_long_small_shift(dst,cnt) );
9333   ins_pipe( ialu_reg_long );
9334 %}
9335 
9336 // Shift Right Long by 32-63
9337 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9338   match(Set dst (URShiftL dst cnt));
9339   effect(KILL cr);
9340   ins_cost(300);
9341   format %{ "MOV    $dst.lo,$dst.hi\n"
9342           "\tSHR    $dst.lo,$cnt-32\n"
9343           "\tXOR    $dst.hi,$dst.hi" %}
9344   opcode(0xC1, 0x5);  /* C1 /5 ib */
9345   ins_encode( move_long_big_shift_clr(dst,cnt) );
9346   ins_pipe( ialu_reg_long );
9347 %}
9348 
9349 // Shift Right Long by variable
9350 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9351   match(Set dst (URShiftL dst shift));
9352   effect(KILL cr);
9353   ins_cost(600);
9354   size(17);
9355   format %{ "TEST   $shift,32\n\t"
9356             "JEQ,s  small\n\t"
9357             "MOV    $dst.lo,$dst.hi\n\t"
9358             "XOR    $dst.hi,$dst.hi\n"
9359     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9360             "SHR    $dst.hi,$shift" %}
9361   ins_encode( shift_right_long( dst, shift ) );
9362   ins_pipe( pipe_slow );
9363 %}
9364 
9365 // Shift Right Long by 1-31
9366 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9367   match(Set dst (RShiftL dst cnt));
9368   effect(KILL cr);
9369   ins_cost(200);
9370   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9371             "SAR    $dst.hi,$cnt" %}
9372   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9373   ins_encode( move_long_small_shift(dst,cnt) );
9374   ins_pipe( ialu_reg_long );
9375 %}
9376 
9377 // Shift Right Long by 32-63
9378 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9379   match(Set dst (RShiftL dst cnt));
9380   effect(KILL cr);
9381   ins_cost(300);
9382   format %{ "MOV    $dst.lo,$dst.hi\n"
9383           "\tSAR    $dst.lo,$cnt-32\n"
9384           "\tSAR    $dst.hi,31" %}
9385   opcode(0xC1, 0x7);  /* C1 /7 ib */
9386   ins_encode( move_long_big_shift_sign(dst,cnt) );
9387   ins_pipe( ialu_reg_long );
9388 %}
9389 
9390 // Shift Right arithmetic Long by variable
9391 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9392   match(Set dst (RShiftL dst shift));
9393   effect(KILL cr);
9394   ins_cost(600);
9395   size(18);
9396   format %{ "TEST   $shift,32\n\t"
9397             "JEQ,s  small\n\t"
9398             "MOV    $dst.lo,$dst.hi\n\t"
9399             "SAR    $dst.hi,31\n"
9400     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9401             "SAR    $dst.hi,$shift" %}
9402   ins_encode( shift_right_arith_long( dst, shift ) );
9403   ins_pipe( pipe_slow );
9404 %}
9405 
9406 
9407 //----------Double Instructions------------------------------------------------
9408 // Double Math
9409 
9410 // Compare & branch
9411 
9412 // P6 version of float compare, sets condition codes in EFLAGS
9413 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9414   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9415   match(Set cr (CmpD src1 src2));
9416   effect(KILL rax);
9417   ins_cost(150);
9418   format %{ "FLD    $src1\n\t"
9419             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9420             "JNP    exit\n\t"
9421             "MOV    ah,1       // saw a NaN, set CF\n\t"
9422             "SAHF\n"
9423      "exit:\tNOP               // avoid branch to branch" %}
9424   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9425   ins_encode( Push_Reg_DPR(src1),
9426               OpcP, RegOpc(src2),
9427               cmpF_P6_fixup );
9428   ins_pipe( pipe_slow );
9429 %}
9430 
9431 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9432   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9433   match(Set cr (CmpD src1 src2));
9434   ins_cost(150);
9435   format %{ "FLD    $src1\n\t"
9436             "FUCOMIP ST,$src2  // P6 instruction" %}
9437   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9438   ins_encode( Push_Reg_DPR(src1),
9439               OpcP, RegOpc(src2));
9440   ins_pipe( pipe_slow );
9441 %}
9442 
9443 // Compare & branch
9444 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9445   predicate(UseSSE<=1);
9446   match(Set cr (CmpD src1 src2));
9447   effect(KILL rax);
9448   ins_cost(200);
9449   format %{ "FLD    $src1\n\t"
9450             "FCOMp  $src2\n\t"
9451             "FNSTSW AX\n\t"
9452             "TEST   AX,0x400\n\t"
9453             "JZ,s   flags\n\t"
9454             "MOV    AH,1\t# unordered treat as LT\n"
9455     "flags:\tSAHF" %}
9456   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9457   ins_encode( Push_Reg_DPR(src1),
9458               OpcP, RegOpc(src2),
9459               fpu_flags);
9460   ins_pipe( pipe_slow );
9461 %}
9462 
9463 // Compare vs zero into -1,0,1
9464 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9465   predicate(UseSSE<=1);
9466   match(Set dst (CmpD3 src1 zero));
9467   effect(KILL cr, KILL rax);
9468   ins_cost(280);
9469   format %{ "FTSTD  $dst,$src1" %}
9470   opcode(0xE4, 0xD9);
9471   ins_encode( Push_Reg_DPR(src1),
9472               OpcS, OpcP, PopFPU,
9473               CmpF_Result(dst));
9474   ins_pipe( pipe_slow );
9475 %}
9476 
9477 // Compare into -1,0,1
9478 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9479   predicate(UseSSE<=1);
9480   match(Set dst (CmpD3 src1 src2));
9481   effect(KILL cr, KILL rax);
9482   ins_cost(300);
9483   format %{ "FCMPD  $dst,$src1,$src2" %}
9484   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9485   ins_encode( Push_Reg_DPR(src1),
9486               OpcP, RegOpc(src2),
9487               CmpF_Result(dst));
9488   ins_pipe( pipe_slow );
9489 %}
9490 
9491 // float compare and set condition codes in EFLAGS by XMM regs
9492 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9493   predicate(UseSSE>=2);
9494   match(Set cr (CmpD src1 src2));
9495   ins_cost(145);
9496   format %{ "UCOMISD $src1,$src2\n\t"
9497             "JNP,s   exit\n\t"
9498             "PUSHF\t# saw NaN, set CF\n\t"
9499             "AND     [rsp], #0xffffff2b\n\t"
9500             "POPF\n"
9501     "exit:" %}
9502   ins_encode %{
9503     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9504     emit_cmpfp_fixup(_masm);
9505   %}
9506   ins_pipe( pipe_slow );
9507 %}
9508 
9509 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9510   predicate(UseSSE>=2);
9511   match(Set cr (CmpD src1 src2));
9512   ins_cost(100);
9513   format %{ "UCOMISD $src1,$src2" %}
9514   ins_encode %{
9515     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9516   %}
9517   ins_pipe( pipe_slow );
9518 %}
9519 
9520 // float compare and set condition codes in EFLAGS by XMM regs
9521 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9522   predicate(UseSSE>=2);
9523   match(Set cr (CmpD src1 (LoadD src2)));
9524   ins_cost(145);
9525   format %{ "UCOMISD $src1,$src2\n\t"
9526             "JNP,s   exit\n\t"
9527             "PUSHF\t# saw NaN, set CF\n\t"
9528             "AND     [rsp], #0xffffff2b\n\t"
9529             "POPF\n"
9530     "exit:" %}
9531   ins_encode %{
9532     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9533     emit_cmpfp_fixup(_masm);
9534   %}
9535   ins_pipe( pipe_slow );
9536 %}
9537 
9538 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9539   predicate(UseSSE>=2);
9540   match(Set cr (CmpD src1 (LoadD src2)));
9541   ins_cost(100);
9542   format %{ "UCOMISD $src1,$src2" %}
9543   ins_encode %{
9544     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9545   %}
9546   ins_pipe( pipe_slow );
9547 %}
9548 
9549 // Compare into -1,0,1 in XMM
9550 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9551   predicate(UseSSE>=2);
9552   match(Set dst (CmpD3 src1 src2));
9553   effect(KILL cr);
9554   ins_cost(255);
9555   format %{ "UCOMISD $src1, $src2\n\t"
9556             "MOV     $dst, #-1\n\t"
9557             "JP,s    done\n\t"
9558             "JB,s    done\n\t"
9559             "SETNE   $dst\n\t"
9560             "MOVZB   $dst, $dst\n"
9561     "done:" %}
9562   ins_encode %{
9563     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9564     emit_cmpfp3(_masm, $dst$$Register);
9565   %}
9566   ins_pipe( pipe_slow );
9567 %}
9568 
9569 // Compare into -1,0,1 in XMM and memory
9570 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9571   predicate(UseSSE>=2);
9572   match(Set dst (CmpD3 src1 (LoadD src2)));
9573   effect(KILL cr);
9574   ins_cost(275);
9575   format %{ "UCOMISD $src1, $src2\n\t"
9576             "MOV     $dst, #-1\n\t"
9577             "JP,s    done\n\t"
9578             "JB,s    done\n\t"
9579             "SETNE   $dst\n\t"
9580             "MOVZB   $dst, $dst\n"
9581     "done:" %}
9582   ins_encode %{
9583     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9584     emit_cmpfp3(_masm, $dst$$Register);
9585   %}
9586   ins_pipe( pipe_slow );
9587 %}
9588 
9589 
9590 instruct subDPR_reg(regDPR dst, regDPR src) %{
9591   predicate (UseSSE <=1);
9592   match(Set dst (SubD dst src));
9593 
9594   format %{ "FLD    $src\n\t"
9595             "DSUBp  $dst,ST" %}
9596   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9597   ins_cost(150);
9598   ins_encode( Push_Reg_DPR(src),
9599               OpcP, RegOpc(dst) );
9600   ins_pipe( fpu_reg_reg );
9601 %}
9602 
9603 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9604   predicate (UseSSE <=1);
9605   match(Set dst (RoundDouble (SubD src1 src2)));
9606   ins_cost(250);
9607 
9608   format %{ "FLD    $src2\n\t"
9609             "DSUB   ST,$src1\n\t"
9610             "FSTP_D $dst\t# D-round" %}
9611   opcode(0xD8, 0x5);
9612   ins_encode( Push_Reg_DPR(src2),
9613               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9614   ins_pipe( fpu_mem_reg_reg );
9615 %}
9616 
9617 
9618 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9619   predicate (UseSSE <=1);
9620   match(Set dst (SubD dst (LoadD src)));
9621   ins_cost(150);
9622 
9623   format %{ "FLD    $src\n\t"
9624             "DSUBp  $dst,ST" %}
9625   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9626   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9627               OpcP, RegOpc(dst) );
9628   ins_pipe( fpu_reg_mem );
9629 %}
9630 
9631 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9632   predicate (UseSSE<=1);
9633   match(Set dst (AbsD src));
9634   ins_cost(100);
9635   format %{ "FABS" %}
9636   opcode(0xE1, 0xD9);
9637   ins_encode( OpcS, OpcP );
9638   ins_pipe( fpu_reg_reg );
9639 %}
9640 
9641 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9642   predicate(UseSSE<=1);
9643   match(Set dst (NegD src));
9644   ins_cost(100);
9645   format %{ "FCHS" %}
9646   opcode(0xE0, 0xD9);
9647   ins_encode( OpcS, OpcP );
9648   ins_pipe( fpu_reg_reg );
9649 %}
9650 
9651 instruct addDPR_reg(regDPR dst, regDPR src) %{
9652   predicate(UseSSE<=1);
9653   match(Set dst (AddD dst src));
9654   format %{ "FLD    $src\n\t"
9655             "DADD   $dst,ST" %}
9656   size(4);
9657   ins_cost(150);
9658   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9659   ins_encode( Push_Reg_DPR(src),
9660               OpcP, RegOpc(dst) );
9661   ins_pipe( fpu_reg_reg );
9662 %}
9663 
9664 
9665 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9666   predicate(UseSSE<=1);
9667   match(Set dst (RoundDouble (AddD src1 src2)));
9668   ins_cost(250);
9669 
9670   format %{ "FLD    $src2\n\t"
9671             "DADD   ST,$src1\n\t"
9672             "FSTP_D $dst\t# D-round" %}
9673   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9674   ins_encode( Push_Reg_DPR(src2),
9675               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9676   ins_pipe( fpu_mem_reg_reg );
9677 %}
9678 
9679 
9680 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9681   predicate(UseSSE<=1);
9682   match(Set dst (AddD dst (LoadD src)));
9683   ins_cost(150);
9684 
9685   format %{ "FLD    $src\n\t"
9686             "DADDp  $dst,ST" %}
9687   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9688   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9689               OpcP, RegOpc(dst) );
9690   ins_pipe( fpu_reg_mem );
9691 %}
9692 
9693 // add-to-memory
9694 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9695   predicate(UseSSE<=1);
9696   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9697   ins_cost(150);
9698 
9699   format %{ "FLD_D  $dst\n\t"
9700             "DADD   ST,$src\n\t"
9701             "FST_D  $dst" %}
9702   opcode(0xDD, 0x0);
9703   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9704               Opcode(0xD8), RegOpc(src),
9705               set_instruction_start,
9706               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9707   ins_pipe( fpu_reg_mem );
9708 %}
9709 
9710 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9711   predicate(UseSSE<=1);
9712   match(Set dst (AddD dst con));
9713   ins_cost(125);
9714   format %{ "FLD1\n\t"
9715             "DADDp  $dst,ST" %}
9716   ins_encode %{
9717     __ fld1();
9718     __ faddp($dst$$reg);
9719   %}
9720   ins_pipe(fpu_reg);
9721 %}
9722 
9723 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9724   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9725   match(Set dst (AddD dst con));
9726   ins_cost(200);
9727   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9728             "DADDp  $dst,ST" %}
9729   ins_encode %{
9730     __ fld_d($constantaddress($con));
9731     __ faddp($dst$$reg);
9732   %}
9733   ins_pipe(fpu_reg_mem);
9734 %}
9735 
9736 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9737   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9738   match(Set dst (RoundDouble (AddD src con)));
9739   ins_cost(200);
9740   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9741             "DADD   ST,$src\n\t"
9742             "FSTP_D $dst\t# D-round" %}
9743   ins_encode %{
9744     __ fld_d($constantaddress($con));
9745     __ fadd($src$$reg);
9746     __ fstp_d(Address(rsp, $dst$$disp));
9747   %}
9748   ins_pipe(fpu_mem_reg_con);
9749 %}
9750 
9751 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9752   predicate(UseSSE<=1);
9753   match(Set dst (MulD dst src));
9754   format %{ "FLD    $src\n\t"
9755             "DMULp  $dst,ST" %}
9756   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9757   ins_cost(150);
9758   ins_encode( Push_Reg_DPR(src),
9759               OpcP, RegOpc(dst) );
9760   ins_pipe( fpu_reg_reg );
9761 %}
9762 
9763 // Strict FP instruction biases argument before multiply then
9764 // biases result to avoid double rounding of subnormals.
9765 //
9766 // scale arg1 by multiplying arg1 by 2^(-15360)
9767 // load arg2
9768 // multiply scaled arg1 by arg2
9769 // rescale product by 2^(15360)
9770 //
9771 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9772   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9773   match(Set dst (MulD dst src));
9774   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9775 
9776   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9777             "DMULp  $dst,ST\n\t"
9778             "FLD    $src\n\t"
9779             "DMULp  $dst,ST\n\t"
9780             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9781             "DMULp  $dst,ST\n\t" %}
9782   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9783   ins_encode( strictfp_bias1(dst),
9784               Push_Reg_DPR(src),
9785               OpcP, RegOpc(dst),
9786               strictfp_bias2(dst) );
9787   ins_pipe( fpu_reg_reg );
9788 %}
9789 
9790 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9791   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9792   match(Set dst (MulD dst con));
9793   ins_cost(200);
9794   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9795             "DMULp  $dst,ST" %}
9796   ins_encode %{
9797     __ fld_d($constantaddress($con));
9798     __ fmulp($dst$$reg);
9799   %}
9800   ins_pipe(fpu_reg_mem);
9801 %}
9802 
9803 
9804 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9805   predicate( UseSSE<=1 );
9806   match(Set dst (MulD dst (LoadD src)));
9807   ins_cost(200);
9808   format %{ "FLD_D  $src\n\t"
9809             "DMULp  $dst,ST" %}
9810   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9811   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9812               OpcP, RegOpc(dst) );
9813   ins_pipe( fpu_reg_mem );
9814 %}
9815 
9816 //
9817 // Cisc-alternate to reg-reg multiply
9818 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9819   predicate( UseSSE<=1 );
9820   match(Set dst (MulD src (LoadD mem)));
9821   ins_cost(250);
9822   format %{ "FLD_D  $mem\n\t"
9823             "DMUL   ST,$src\n\t"
9824             "FSTP_D $dst" %}
9825   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9826   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9827               OpcReg_FPR(src),
9828               Pop_Reg_DPR(dst) );
9829   ins_pipe( fpu_reg_reg_mem );
9830 %}
9831 
9832 
9833 // MACRO3 -- addDPR a mulDPR
9834 // This instruction is a '2-address' instruction in that the result goes
9835 // back to src2.  This eliminates a move from the macro; possibly the
9836 // register allocator will have to add it back (and maybe not).
9837 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9838   predicate( UseSSE<=1 );
9839   match(Set src2 (AddD (MulD src0 src1) src2));
9840   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9841             "DMUL   ST,$src1\n\t"
9842             "DADDp  $src2,ST" %}
9843   ins_cost(250);
9844   opcode(0xDD); /* LoadD DD /0 */
9845   ins_encode( Push_Reg_FPR(src0),
9846               FMul_ST_reg(src1),
9847               FAddP_reg_ST(src2) );
9848   ins_pipe( fpu_reg_reg_reg );
9849 %}
9850 
9851 
9852 // MACRO3 -- subDPR a mulDPR
9853 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9854   predicate( UseSSE<=1 );
9855   match(Set src2 (SubD (MulD src0 src1) src2));
9856   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9857             "DMUL   ST,$src1\n\t"
9858             "DSUBRp $src2,ST" %}
9859   ins_cost(250);
9860   ins_encode( Push_Reg_FPR(src0),
9861               FMul_ST_reg(src1),
9862               Opcode(0xDE), Opc_plus(0xE0,src2));
9863   ins_pipe( fpu_reg_reg_reg );
9864 %}
9865 
9866 
9867 instruct divDPR_reg(regDPR dst, regDPR src) %{
9868   predicate( UseSSE<=1 );
9869   match(Set dst (DivD dst src));
9870 
9871   format %{ "FLD    $src\n\t"
9872             "FDIVp  $dst,ST" %}
9873   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9874   ins_cost(150);
9875   ins_encode( Push_Reg_DPR(src),
9876               OpcP, RegOpc(dst) );
9877   ins_pipe( fpu_reg_reg );
9878 %}
9879 
9880 // Strict FP instruction biases argument before division then
9881 // biases result, to avoid double rounding of subnormals.
9882 //
9883 // scale dividend by multiplying dividend by 2^(-15360)
9884 // load divisor
9885 // divide scaled dividend by divisor
9886 // rescale quotient by 2^(15360)
9887 //
9888 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9889   predicate (UseSSE<=1);
9890   match(Set dst (DivD dst src));
9891   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9892   ins_cost(01);
9893 
9894   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9895             "DMULp  $dst,ST\n\t"
9896             "FLD    $src\n\t"
9897             "FDIVp  $dst,ST\n\t"
9898             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9899             "DMULp  $dst,ST\n\t" %}
9900   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9901   ins_encode( strictfp_bias1(dst),
9902               Push_Reg_DPR(src),
9903               OpcP, RegOpc(dst),
9904               strictfp_bias2(dst) );
9905   ins_pipe( fpu_reg_reg );
9906 %}
9907 
9908 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9909   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9910   match(Set dst (RoundDouble (DivD src1 src2)));
9911 
9912   format %{ "FLD    $src1\n\t"
9913             "FDIV   ST,$src2\n\t"
9914             "FSTP_D $dst\t# D-round" %}
9915   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9916   ins_encode( Push_Reg_DPR(src1),
9917               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9918   ins_pipe( fpu_mem_reg_reg );
9919 %}
9920 
9921 
9922 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9923   predicate(UseSSE<=1);
9924   match(Set dst (ModD dst src));
9925   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9926 
9927   format %{ "DMOD   $dst,$src" %}
9928   ins_cost(250);
9929   ins_encode(Push_Reg_Mod_DPR(dst, src),
9930               emitModDPR(),
9931               Push_Result_Mod_DPR(src),
9932               Pop_Reg_DPR(dst));
9933   ins_pipe( pipe_slow );
9934 %}
9935 
9936 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9937   predicate(UseSSE>=2);
9938   match(Set dst (ModD src0 src1));
9939   effect(KILL rax, KILL cr);
9940 
9941   format %{ "SUB    ESP,8\t # DMOD\n"
9942           "\tMOVSD  [ESP+0],$src1\n"
9943           "\tFLD_D  [ESP+0]\n"
9944           "\tMOVSD  [ESP+0],$src0\n"
9945           "\tFLD_D  [ESP+0]\n"
9946      "loop:\tFPREM\n"
9947           "\tFWAIT\n"
9948           "\tFNSTSW AX\n"
9949           "\tSAHF\n"
9950           "\tJP     loop\n"
9951           "\tFSTP_D [ESP+0]\n"
9952           "\tMOVSD  $dst,[ESP+0]\n"
9953           "\tADD    ESP,8\n"
9954           "\tFSTP   ST0\t # Restore FPU Stack"
9955     %}
9956   ins_cost(250);
9957   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9958   ins_pipe( pipe_slow );
9959 %}
9960 
9961 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9962   predicate (UseSSE<=1);
9963   match(Set dst(AtanD dst src));
9964   format %{ "DATA   $dst,$src" %}
9965   opcode(0xD9, 0xF3);
9966   ins_encode( Push_Reg_DPR(src),
9967               OpcP, OpcS, RegOpc(dst) );
9968   ins_pipe( pipe_slow );
9969 %}
9970 
9971 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9972   predicate (UseSSE>=2);
9973   match(Set dst(AtanD dst src));
9974   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9975   format %{ "DATA   $dst,$src" %}
9976   opcode(0xD9, 0xF3);
9977   ins_encode( Push_SrcD(src),
9978               OpcP, OpcS, Push_ResultD(dst) );
9979   ins_pipe( pipe_slow );
9980 %}
9981 
9982 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9983   predicate (UseSSE<=1);
9984   match(Set dst (SqrtD src));
9985   format %{ "DSQRT  $dst,$src" %}
9986   opcode(0xFA, 0xD9);
9987   ins_encode( Push_Reg_DPR(src),
9988               OpcS, OpcP, Pop_Reg_DPR(dst) );
9989   ins_pipe( pipe_slow );
9990 %}
9991 
9992 //-------------Float Instructions-------------------------------
9993 // Float Math
9994 
9995 // Code for float compare:
9996 //     fcompp();
9997 //     fwait(); fnstsw_ax();
9998 //     sahf();
9999 //     movl(dst, unordered_result);
10000 //     jcc(Assembler::parity, exit);
10001 //     movl(dst, less_result);
10002 //     jcc(Assembler::below, exit);
10003 //     movl(dst, equal_result);
10004 //     jcc(Assembler::equal, exit);
10005 //     movl(dst, greater_result);
10006 //   exit:
10007 
10008 // P6 version of float compare, sets condition codes in EFLAGS
10009 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10010   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10011   match(Set cr (CmpF src1 src2));
10012   effect(KILL rax);
10013   ins_cost(150);
10014   format %{ "FLD    $src1\n\t"
10015             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10016             "JNP    exit\n\t"
10017             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10018             "SAHF\n"
10019      "exit:\tNOP               // avoid branch to branch" %}
10020   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10021   ins_encode( Push_Reg_DPR(src1),
10022               OpcP, RegOpc(src2),
10023               cmpF_P6_fixup );
10024   ins_pipe( pipe_slow );
10025 %}
10026 
10027 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10028   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10029   match(Set cr (CmpF src1 src2));
10030   ins_cost(100);
10031   format %{ "FLD    $src1\n\t"
10032             "FUCOMIP ST,$src2  // P6 instruction" %}
10033   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10034   ins_encode( Push_Reg_DPR(src1),
10035               OpcP, RegOpc(src2));
10036   ins_pipe( pipe_slow );
10037 %}
10038 
10039 
10040 // Compare & branch
10041 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10042   predicate(UseSSE == 0);
10043   match(Set cr (CmpF src1 src2));
10044   effect(KILL rax);
10045   ins_cost(200);
10046   format %{ "FLD    $src1\n\t"
10047             "FCOMp  $src2\n\t"
10048             "FNSTSW AX\n\t"
10049             "TEST   AX,0x400\n\t"
10050             "JZ,s   flags\n\t"
10051             "MOV    AH,1\t# unordered treat as LT\n"
10052     "flags:\tSAHF" %}
10053   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10054   ins_encode( Push_Reg_DPR(src1),
10055               OpcP, RegOpc(src2),
10056               fpu_flags);
10057   ins_pipe( pipe_slow );
10058 %}
10059 
10060 // Compare vs zero into -1,0,1
10061 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10062   predicate(UseSSE == 0);
10063   match(Set dst (CmpF3 src1 zero));
10064   effect(KILL cr, KILL rax);
10065   ins_cost(280);
10066   format %{ "FTSTF  $dst,$src1" %}
10067   opcode(0xE4, 0xD9);
10068   ins_encode( Push_Reg_DPR(src1),
10069               OpcS, OpcP, PopFPU,
10070               CmpF_Result(dst));
10071   ins_pipe( pipe_slow );
10072 %}
10073 
10074 // Compare into -1,0,1
10075 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10076   predicate(UseSSE == 0);
10077   match(Set dst (CmpF3 src1 src2));
10078   effect(KILL cr, KILL rax);
10079   ins_cost(300);
10080   format %{ "FCMPF  $dst,$src1,$src2" %}
10081   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10082   ins_encode( Push_Reg_DPR(src1),
10083               OpcP, RegOpc(src2),
10084               CmpF_Result(dst));
10085   ins_pipe( pipe_slow );
10086 %}
10087 
10088 // float compare and set condition codes in EFLAGS by XMM regs
10089 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10090   predicate(UseSSE>=1);
10091   match(Set cr (CmpF src1 src2));
10092   ins_cost(145);
10093   format %{ "UCOMISS $src1,$src2\n\t"
10094             "JNP,s   exit\n\t"
10095             "PUSHF\t# saw NaN, set CF\n\t"
10096             "AND     [rsp], #0xffffff2b\n\t"
10097             "POPF\n"
10098     "exit:" %}
10099   ins_encode %{
10100     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10101     emit_cmpfp_fixup(_masm);
10102   %}
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10107   predicate(UseSSE>=1);
10108   match(Set cr (CmpF src1 src2));
10109   ins_cost(100);
10110   format %{ "UCOMISS $src1,$src2" %}
10111   ins_encode %{
10112     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10113   %}
10114   ins_pipe( pipe_slow );
10115 %}
10116 
10117 // float compare and set condition codes in EFLAGS by XMM regs
10118 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10119   predicate(UseSSE>=1);
10120   match(Set cr (CmpF src1 (LoadF src2)));
10121   ins_cost(165);
10122   format %{ "UCOMISS $src1,$src2\n\t"
10123             "JNP,s   exit\n\t"
10124             "PUSHF\t# saw NaN, set CF\n\t"
10125             "AND     [rsp], #0xffffff2b\n\t"
10126             "POPF\n"
10127     "exit:" %}
10128   ins_encode %{
10129     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10130     emit_cmpfp_fixup(_masm);
10131   %}
10132   ins_pipe( pipe_slow );
10133 %}
10134 
10135 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10136   predicate(UseSSE>=1);
10137   match(Set cr (CmpF src1 (LoadF src2)));
10138   ins_cost(100);
10139   format %{ "UCOMISS $src1,$src2" %}
10140   ins_encode %{
10141     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10142   %}
10143   ins_pipe( pipe_slow );
10144 %}
10145 
10146 // Compare into -1,0,1 in XMM
10147 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10148   predicate(UseSSE>=1);
10149   match(Set dst (CmpF3 src1 src2));
10150   effect(KILL cr);
10151   ins_cost(255);
10152   format %{ "UCOMISS $src1, $src2\n\t"
10153             "MOV     $dst, #-1\n\t"
10154             "JP,s    done\n\t"
10155             "JB,s    done\n\t"
10156             "SETNE   $dst\n\t"
10157             "MOVZB   $dst, $dst\n"
10158     "done:" %}
10159   ins_encode %{
10160     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10161     emit_cmpfp3(_masm, $dst$$Register);
10162   %}
10163   ins_pipe( pipe_slow );
10164 %}
10165 
10166 // Compare into -1,0,1 in XMM and memory
10167 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10168   predicate(UseSSE>=1);
10169   match(Set dst (CmpF3 src1 (LoadF src2)));
10170   effect(KILL cr);
10171   ins_cost(275);
10172   format %{ "UCOMISS $src1, $src2\n\t"
10173             "MOV     $dst, #-1\n\t"
10174             "JP,s    done\n\t"
10175             "JB,s    done\n\t"
10176             "SETNE   $dst\n\t"
10177             "MOVZB   $dst, $dst\n"
10178     "done:" %}
10179   ins_encode %{
10180     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10181     emit_cmpfp3(_masm, $dst$$Register);
10182   %}
10183   ins_pipe( pipe_slow );
10184 %}
10185 
10186 // Spill to obtain 24-bit precision
10187 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10188   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10189   match(Set dst (SubF src1 src2));
10190 
10191   format %{ "FSUB   $dst,$src1 - $src2" %}
10192   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10193   ins_encode( Push_Reg_FPR(src1),
10194               OpcReg_FPR(src2),
10195               Pop_Mem_FPR(dst) );
10196   ins_pipe( fpu_mem_reg_reg );
10197 %}
10198 //
10199 // This instruction does not round to 24-bits
10200 instruct subFPR_reg(regFPR dst, regFPR src) %{
10201   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10202   match(Set dst (SubF dst src));
10203 
10204   format %{ "FSUB   $dst,$src" %}
10205   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10206   ins_encode( Push_Reg_FPR(src),
10207               OpcP, RegOpc(dst) );
10208   ins_pipe( fpu_reg_reg );
10209 %}
10210 
10211 // Spill to obtain 24-bit precision
10212 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10213   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10214   match(Set dst (AddF src1 src2));
10215 
10216   format %{ "FADD   $dst,$src1,$src2" %}
10217   opcode(0xD8, 0x0); /* D8 C0+i */
10218   ins_encode( Push_Reg_FPR(src2),
10219               OpcReg_FPR(src1),
10220               Pop_Mem_FPR(dst) );
10221   ins_pipe( fpu_mem_reg_reg );
10222 %}
10223 //
10224 // This instruction does not round to 24-bits
10225 instruct addFPR_reg(regFPR dst, regFPR src) %{
10226   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10227   match(Set dst (AddF dst src));
10228 
10229   format %{ "FLD    $src\n\t"
10230             "FADDp  $dst,ST" %}
10231   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10232   ins_encode( Push_Reg_FPR(src),
10233               OpcP, RegOpc(dst) );
10234   ins_pipe( fpu_reg_reg );
10235 %}
10236 
10237 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10238   predicate(UseSSE==0);
10239   match(Set dst (AbsF src));
10240   ins_cost(100);
10241   format %{ "FABS" %}
10242   opcode(0xE1, 0xD9);
10243   ins_encode( OpcS, OpcP );
10244   ins_pipe( fpu_reg_reg );
10245 %}
10246 
10247 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10248   predicate(UseSSE==0);
10249   match(Set dst (NegF src));
10250   ins_cost(100);
10251   format %{ "FCHS" %}
10252   opcode(0xE0, 0xD9);
10253   ins_encode( OpcS, OpcP );
10254   ins_pipe( fpu_reg_reg );
10255 %}
10256 
10257 // Cisc-alternate to addFPR_reg
10258 // Spill to obtain 24-bit precision
10259 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10260   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10261   match(Set dst (AddF src1 (LoadF src2)));
10262 
10263   format %{ "FLD    $src2\n\t"
10264             "FADD   ST,$src1\n\t"
10265             "FSTP_S $dst" %}
10266   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10267   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10268               OpcReg_FPR(src1),
10269               Pop_Mem_FPR(dst) );
10270   ins_pipe( fpu_mem_reg_mem );
10271 %}
10272 //
10273 // Cisc-alternate to addFPR_reg
10274 // This instruction does not round to 24-bits
10275 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10276   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10277   match(Set dst (AddF dst (LoadF src)));
10278 
10279   format %{ "FADD   $dst,$src" %}
10280   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10281   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10282               OpcP, RegOpc(dst) );
10283   ins_pipe( fpu_reg_mem );
10284 %}
10285 
10286 // // Following two instructions for _222_mpegaudio
10287 // Spill to obtain 24-bit precision
10288 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10289   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10290   match(Set dst (AddF src1 src2));
10291 
10292   format %{ "FADD   $dst,$src1,$src2" %}
10293   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10294   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10295               OpcReg_FPR(src2),
10296               Pop_Mem_FPR(dst) );
10297   ins_pipe( fpu_mem_reg_mem );
10298 %}
10299 
10300 // Cisc-spill variant
10301 // Spill to obtain 24-bit precision
10302 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10303   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10304   match(Set dst (AddF src1 (LoadF src2)));
10305 
10306   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10307   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10308   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10309               set_instruction_start,
10310               OpcP, RMopc_Mem(secondary,src1),
10311               Pop_Mem_FPR(dst) );
10312   ins_pipe( fpu_mem_mem_mem );
10313 %}
10314 
10315 // Spill to obtain 24-bit precision
10316 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10317   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10318   match(Set dst (AddF src1 src2));
10319 
10320   format %{ "FADD   $dst,$src1,$src2" %}
10321   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10322   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10323               set_instruction_start,
10324               OpcP, RMopc_Mem(secondary,src1),
10325               Pop_Mem_FPR(dst) );
10326   ins_pipe( fpu_mem_mem_mem );
10327 %}
10328 
10329 
10330 // Spill to obtain 24-bit precision
10331 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10332   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10333   match(Set dst (AddF src con));
10334   format %{ "FLD    $src\n\t"
10335             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10336             "FSTP_S $dst"  %}
10337   ins_encode %{
10338     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10339     __ fadd_s($constantaddress($con));
10340     __ fstp_s(Address(rsp, $dst$$disp));
10341   %}
10342   ins_pipe(fpu_mem_reg_con);
10343 %}
10344 //
10345 // This instruction does not round to 24-bits
10346 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10347   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10348   match(Set dst (AddF src con));
10349   format %{ "FLD    $src\n\t"
10350             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10351             "FSTP   $dst"  %}
10352   ins_encode %{
10353     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10354     __ fadd_s($constantaddress($con));
10355     __ fstp_d($dst$$reg);
10356   %}
10357   ins_pipe(fpu_reg_reg_con);
10358 %}
10359 
10360 // Spill to obtain 24-bit precision
10361 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10362   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10363   match(Set dst (MulF src1 src2));
10364 
10365   format %{ "FLD    $src1\n\t"
10366             "FMUL   $src2\n\t"
10367             "FSTP_S $dst"  %}
10368   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10369   ins_encode( Push_Reg_FPR(src1),
10370               OpcReg_FPR(src2),
10371               Pop_Mem_FPR(dst) );
10372   ins_pipe( fpu_mem_reg_reg );
10373 %}
10374 //
10375 // This instruction does not round to 24-bits
10376 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10377   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10378   match(Set dst (MulF src1 src2));
10379 
10380   format %{ "FLD    $src1\n\t"
10381             "FMUL   $src2\n\t"
10382             "FSTP_S $dst"  %}
10383   opcode(0xD8, 0x1); /* D8 C8+i */
10384   ins_encode( Push_Reg_FPR(src2),
10385               OpcReg_FPR(src1),
10386               Pop_Reg_FPR(dst) );
10387   ins_pipe( fpu_reg_reg_reg );
10388 %}
10389 
10390 
10391 // Spill to obtain 24-bit precision
10392 // Cisc-alternate to reg-reg multiply
10393 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10394   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10395   match(Set dst (MulF src1 (LoadF src2)));
10396 
10397   format %{ "FLD_S  $src2\n\t"
10398             "FMUL   $src1\n\t"
10399             "FSTP_S $dst"  %}
10400   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10401   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10402               OpcReg_FPR(src1),
10403               Pop_Mem_FPR(dst) );
10404   ins_pipe( fpu_mem_reg_mem );
10405 %}
10406 //
10407 // This instruction does not round to 24-bits
10408 // Cisc-alternate to reg-reg multiply
10409 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10410   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10411   match(Set dst (MulF src1 (LoadF src2)));
10412 
10413   format %{ "FMUL   $dst,$src1,$src2" %}
10414   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10415   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10416               OpcReg_FPR(src1),
10417               Pop_Reg_FPR(dst) );
10418   ins_pipe( fpu_reg_reg_mem );
10419 %}
10420 
10421 // Spill to obtain 24-bit precision
10422 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10423   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10424   match(Set dst (MulF src1 src2));
10425 
10426   format %{ "FMUL   $dst,$src1,$src2" %}
10427   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10428   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10429               set_instruction_start,
10430               OpcP, RMopc_Mem(secondary,src1),
10431               Pop_Mem_FPR(dst) );
10432   ins_pipe( fpu_mem_mem_mem );
10433 %}
10434 
10435 // Spill to obtain 24-bit precision
10436 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10437   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10438   match(Set dst (MulF src con));
10439 
10440   format %{ "FLD    $src\n\t"
10441             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10442             "FSTP_S $dst"  %}
10443   ins_encode %{
10444     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10445     __ fmul_s($constantaddress($con));
10446     __ fstp_s(Address(rsp, $dst$$disp));
10447   %}
10448   ins_pipe(fpu_mem_reg_con);
10449 %}
10450 //
10451 // This instruction does not round to 24-bits
10452 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10453   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10454   match(Set dst (MulF src con));
10455 
10456   format %{ "FLD    $src\n\t"
10457             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10458             "FSTP   $dst"  %}
10459   ins_encode %{
10460     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10461     __ fmul_s($constantaddress($con));
10462     __ fstp_d($dst$$reg);
10463   %}
10464   ins_pipe(fpu_reg_reg_con);
10465 %}
10466 
10467 
10468 //
10469 // MACRO1 -- subsume unshared load into mulFPR
10470 // This instruction does not round to 24-bits
10471 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10472   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10473   match(Set dst (MulF (LoadF mem1) src));
10474 
10475   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10476             "FMUL   ST,$src\n\t"
10477             "FSTP   $dst" %}
10478   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10479   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10480               OpcReg_FPR(src),
10481               Pop_Reg_FPR(dst) );
10482   ins_pipe( fpu_reg_reg_mem );
10483 %}
10484 //
10485 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10486 // This instruction does not round to 24-bits
10487 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10488   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10489   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10490   ins_cost(95);
10491 
10492   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10493             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10494             "FADD   ST,$src2\n\t"
10495             "FSTP   $dst" %}
10496   opcode(0xD9); /* LoadF D9 /0 */
10497   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10498               FMul_ST_reg(src1),
10499               FAdd_ST_reg(src2),
10500               Pop_Reg_FPR(dst) );
10501   ins_pipe( fpu_reg_mem_reg_reg );
10502 %}
10503 
10504 // MACRO3 -- addFPR a mulFPR
10505 // This instruction does not round to 24-bits.  It is a '2-address'
10506 // instruction in that the result goes back to src2.  This eliminates
10507 // a move from the macro; possibly the register allocator will have
10508 // to add it back (and maybe not).
10509 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10510   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10511   match(Set src2 (AddF (MulF src0 src1) src2));
10512 
10513   format %{ "FLD    $src0     ===MACRO3===\n\t"
10514             "FMUL   ST,$src1\n\t"
10515             "FADDP  $src2,ST" %}
10516   opcode(0xD9); /* LoadF D9 /0 */
10517   ins_encode( Push_Reg_FPR(src0),
10518               FMul_ST_reg(src1),
10519               FAddP_reg_ST(src2) );
10520   ins_pipe( fpu_reg_reg_reg );
10521 %}
10522 
10523 // MACRO4 -- divFPR subFPR
10524 // This instruction does not round to 24-bits
10525 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10526   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10527   match(Set dst (DivF (SubF src2 src1) src3));
10528 
10529   format %{ "FLD    $src2   ===MACRO4===\n\t"
10530             "FSUB   ST,$src1\n\t"
10531             "FDIV   ST,$src3\n\t"
10532             "FSTP  $dst" %}
10533   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10534   ins_encode( Push_Reg_FPR(src2),
10535               subFPR_divFPR_encode(src1,src3),
10536               Pop_Reg_FPR(dst) );
10537   ins_pipe( fpu_reg_reg_reg_reg );
10538 %}
10539 
10540 // Spill to obtain 24-bit precision
10541 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10542   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10543   match(Set dst (DivF src1 src2));
10544 
10545   format %{ "FDIV   $dst,$src1,$src2" %}
10546   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10547   ins_encode( Push_Reg_FPR(src1),
10548               OpcReg_FPR(src2),
10549               Pop_Mem_FPR(dst) );
10550   ins_pipe( fpu_mem_reg_reg );
10551 %}
10552 //
10553 // This instruction does not round to 24-bits
10554 instruct divFPR_reg(regFPR dst, regFPR src) %{
10555   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10556   match(Set dst (DivF dst src));
10557 
10558   format %{ "FDIV   $dst,$src" %}
10559   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10560   ins_encode( Push_Reg_FPR(src),
10561               OpcP, RegOpc(dst) );
10562   ins_pipe( fpu_reg_reg );
10563 %}
10564 
10565 
10566 // Spill to obtain 24-bit precision
10567 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10568   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10569   match(Set dst (ModF src1 src2));
10570   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10571 
10572   format %{ "FMOD   $dst,$src1,$src2" %}
10573   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10574               emitModDPR(),
10575               Push_Result_Mod_DPR(src2),
10576               Pop_Mem_FPR(dst));
10577   ins_pipe( pipe_slow );
10578 %}
10579 //
10580 // This instruction does not round to 24-bits
10581 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10582   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10583   match(Set dst (ModF dst src));
10584   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10585 
10586   format %{ "FMOD   $dst,$src" %}
10587   ins_encode(Push_Reg_Mod_DPR(dst, src),
10588               emitModDPR(),
10589               Push_Result_Mod_DPR(src),
10590               Pop_Reg_FPR(dst));
10591   ins_pipe( pipe_slow );
10592 %}
10593 
10594 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10595   predicate(UseSSE>=1);
10596   match(Set dst (ModF src0 src1));
10597   effect(KILL rax, KILL cr);
10598   format %{ "SUB    ESP,4\t # FMOD\n"
10599           "\tMOVSS  [ESP+0],$src1\n"
10600           "\tFLD_S  [ESP+0]\n"
10601           "\tMOVSS  [ESP+0],$src0\n"
10602           "\tFLD_S  [ESP+0]\n"
10603      "loop:\tFPREM\n"
10604           "\tFWAIT\n"
10605           "\tFNSTSW AX\n"
10606           "\tSAHF\n"
10607           "\tJP     loop\n"
10608           "\tFSTP_S [ESP+0]\n"
10609           "\tMOVSS  $dst,[ESP+0]\n"
10610           "\tADD    ESP,4\n"
10611           "\tFSTP   ST0\t # Restore FPU Stack"
10612     %}
10613   ins_cost(250);
10614   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10615   ins_pipe( pipe_slow );
10616 %}
10617 
10618 
10619 //----------Arithmetic Conversion Instructions---------------------------------
10620 // The conversions operations are all Alpha sorted.  Please keep it that way!
10621 
10622 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10623   predicate(UseSSE==0);
10624   match(Set dst (RoundFloat src));
10625   ins_cost(125);
10626   format %{ "FST_S  $dst,$src\t# F-round" %}
10627   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10628   ins_pipe( fpu_mem_reg );
10629 %}
10630 
10631 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10632   predicate(UseSSE<=1);
10633   match(Set dst (RoundDouble src));
10634   ins_cost(125);
10635   format %{ "FST_D  $dst,$src\t# D-round" %}
10636   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10637   ins_pipe( fpu_mem_reg );
10638 %}
10639 
10640 // Force rounding to 24-bit precision and 6-bit exponent
10641 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10642   predicate(UseSSE==0);
10643   match(Set dst (ConvD2F src));
10644   format %{ "FST_S  $dst,$src\t# F-round" %}
10645   expand %{
10646     roundFloat_mem_reg(dst,src);
10647   %}
10648 %}
10649 
10650 // Force rounding to 24-bit precision and 6-bit exponent
10651 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10652   predicate(UseSSE==1);
10653   match(Set dst (ConvD2F src));
10654   effect( KILL cr );
10655   format %{ "SUB    ESP,4\n\t"
10656             "FST_S  [ESP],$src\t# F-round\n\t"
10657             "MOVSS  $dst,[ESP]\n\t"
10658             "ADD ESP,4" %}
10659   ins_encode %{
10660     __ subptr(rsp, 4);
10661     if ($src$$reg != FPR1L_enc) {
10662       __ fld_s($src$$reg-1);
10663       __ fstp_s(Address(rsp, 0));
10664     } else {
10665       __ fst_s(Address(rsp, 0));
10666     }
10667     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10668     __ addptr(rsp, 4);
10669   %}
10670   ins_pipe( pipe_slow );
10671 %}
10672 
10673 // Force rounding double precision to single precision
10674 instruct convD2F_reg(regF dst, regD src) %{
10675   predicate(UseSSE>=2);
10676   match(Set dst (ConvD2F src));
10677   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10678   ins_encode %{
10679     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10680   %}
10681   ins_pipe( pipe_slow );
10682 %}
10683 
10684 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10685   predicate(UseSSE==0);
10686   match(Set dst (ConvF2D src));
10687   format %{ "FST_S  $dst,$src\t# D-round" %}
10688   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10689   ins_pipe( fpu_reg_reg );
10690 %}
10691 
10692 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10693   predicate(UseSSE==1);
10694   match(Set dst (ConvF2D src));
10695   format %{ "FST_D  $dst,$src\t# D-round" %}
10696   expand %{
10697     roundDouble_mem_reg(dst,src);
10698   %}
10699 %}
10700 
10701 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10702   predicate(UseSSE==1);
10703   match(Set dst (ConvF2D src));
10704   effect( KILL cr );
10705   format %{ "SUB    ESP,4\n\t"
10706             "MOVSS  [ESP] $src\n\t"
10707             "FLD_S  [ESP]\n\t"
10708             "ADD    ESP,4\n\t"
10709             "FSTP   $dst\t# D-round" %}
10710   ins_encode %{
10711     __ subptr(rsp, 4);
10712     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10713     __ fld_s(Address(rsp, 0));
10714     __ addptr(rsp, 4);
10715     __ fstp_d($dst$$reg);
10716   %}
10717   ins_pipe( pipe_slow );
10718 %}
10719 
10720 instruct convF2D_reg(regD dst, regF src) %{
10721   predicate(UseSSE>=2);
10722   match(Set dst (ConvF2D src));
10723   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10724   ins_encode %{
10725     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10726   %}
10727   ins_pipe( pipe_slow );
10728 %}
10729 
10730 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10731 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10732   predicate(UseSSE<=1);
10733   match(Set dst (ConvD2I src));
10734   effect( KILL tmp, KILL cr );
10735   format %{ "FLD    $src\t# Convert double to int \n\t"
10736             "FLDCW  trunc mode\n\t"
10737             "SUB    ESP,4\n\t"
10738             "FISTp  [ESP + #0]\n\t"
10739             "FLDCW  std/24-bit mode\n\t"
10740             "POP    EAX\n\t"
10741             "CMP    EAX,0x80000000\n\t"
10742             "JNE,s  fast\n\t"
10743             "FLD_D  $src\n\t"
10744             "CALL   d2i_wrapper\n"
10745       "fast:" %}
10746   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10747   ins_pipe( pipe_slow );
10748 %}
10749 
10750 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10751 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10752   predicate(UseSSE>=2);
10753   match(Set dst (ConvD2I src));
10754   effect( KILL tmp, KILL cr );
10755   format %{ "CVTTSD2SI $dst, $src\n\t"
10756             "CMP    $dst,0x80000000\n\t"
10757             "JNE,s  fast\n\t"
10758             "SUB    ESP, 8\n\t"
10759             "MOVSD  [ESP], $src\n\t"
10760             "FLD_D  [ESP]\n\t"
10761             "ADD    ESP, 8\n\t"
10762             "CALL   d2i_wrapper\n"
10763       "fast:" %}
10764   ins_encode %{
10765     Label fast;
10766     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10767     __ cmpl($dst$$Register, 0x80000000);
10768     __ jccb(Assembler::notEqual, fast);
10769     __ subptr(rsp, 8);
10770     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10771     __ fld_d(Address(rsp, 0));
10772     __ addptr(rsp, 8);
10773     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10774     __ bind(fast);
10775   %}
10776   ins_pipe( pipe_slow );
10777 %}
10778 
10779 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10780   predicate(UseSSE<=1);
10781   match(Set dst (ConvD2L src));
10782   effect( KILL cr );
10783   format %{ "FLD    $src\t# Convert double to long\n\t"
10784             "FLDCW  trunc mode\n\t"
10785             "SUB    ESP,8\n\t"
10786             "FISTp  [ESP + #0]\n\t"
10787             "FLDCW  std/24-bit mode\n\t"
10788             "POP    EAX\n\t"
10789             "POP    EDX\n\t"
10790             "CMP    EDX,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "TEST   EAX,EAX\n\t"
10793             "JNE,s  fast\n\t"
10794             "FLD    $src\n\t"
10795             "CALL   d2l_wrapper\n"
10796       "fast:" %}
10797   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10798   ins_pipe( pipe_slow );
10799 %}
10800 
10801 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10802 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10803   predicate (UseSSE>=2);
10804   match(Set dst (ConvD2L src));
10805   effect( KILL cr );
10806   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10807             "MOVSD  [ESP],$src\n\t"
10808             "FLD_D  [ESP]\n\t"
10809             "FLDCW  trunc mode\n\t"
10810             "FISTp  [ESP + #0]\n\t"
10811             "FLDCW  std/24-bit mode\n\t"
10812             "POP    EAX\n\t"
10813             "POP    EDX\n\t"
10814             "CMP    EDX,0x80000000\n\t"
10815             "JNE,s  fast\n\t"
10816             "TEST   EAX,EAX\n\t"
10817             "JNE,s  fast\n\t"
10818             "SUB    ESP,8\n\t"
10819             "MOVSD  [ESP],$src\n\t"
10820             "FLD_D  [ESP]\n\t"
10821             "ADD    ESP,8\n\t"
10822             "CALL   d2l_wrapper\n"
10823       "fast:" %}
10824   ins_encode %{
10825     Label fast;
10826     __ subptr(rsp, 8);
10827     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10828     __ fld_d(Address(rsp, 0));
10829     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10830     __ fistp_d(Address(rsp, 0));
10831     // Restore the rounding mode, mask the exception
10832     if (Compile::current()->in_24_bit_fp_mode()) {
10833       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10834     } else {
10835       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10836     }
10837     // Load the converted long, adjust CPU stack
10838     __ pop(rax);
10839     __ pop(rdx);
10840     __ cmpl(rdx, 0x80000000);
10841     __ jccb(Assembler::notEqual, fast);
10842     __ testl(rax, rax);
10843     __ jccb(Assembler::notEqual, fast);
10844     __ subptr(rsp, 8);
10845     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10846     __ fld_d(Address(rsp, 0));
10847     __ addptr(rsp, 8);
10848     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10849     __ bind(fast);
10850   %}
10851   ins_pipe( pipe_slow );
10852 %}
10853 
10854 // Convert a double to an int.  Java semantics require we do complex
10855 // manglations in the corner cases.  So we set the rounding mode to
10856 // 'zero', store the darned double down as an int, and reset the
10857 // rounding mode to 'nearest'.  The hardware stores a flag value down
10858 // if we would overflow or converted a NAN; we check for this and
10859 // and go the slow path if needed.
10860 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10861   predicate(UseSSE==0);
10862   match(Set dst (ConvF2I src));
10863   effect( KILL tmp, KILL cr );
10864   format %{ "FLD    $src\t# Convert float to int \n\t"
10865             "FLDCW  trunc mode\n\t"
10866             "SUB    ESP,4\n\t"
10867             "FISTp  [ESP + #0]\n\t"
10868             "FLDCW  std/24-bit mode\n\t"
10869             "POP    EAX\n\t"
10870             "CMP    EAX,0x80000000\n\t"
10871             "JNE,s  fast\n\t"
10872             "FLD    $src\n\t"
10873             "CALL   d2i_wrapper\n"
10874       "fast:" %}
10875   // DPR2I_encoding works for FPR2I
10876   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10877   ins_pipe( pipe_slow );
10878 %}
10879 
10880 // Convert a float in xmm to an int reg.
10881 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10882   predicate(UseSSE>=1);
10883   match(Set dst (ConvF2I src));
10884   effect( KILL tmp, KILL cr );
10885   format %{ "CVTTSS2SI $dst, $src\n\t"
10886             "CMP    $dst,0x80000000\n\t"
10887             "JNE,s  fast\n\t"
10888             "SUB    ESP, 4\n\t"
10889             "MOVSS  [ESP], $src\n\t"
10890             "FLD    [ESP]\n\t"
10891             "ADD    ESP, 4\n\t"
10892             "CALL   d2i_wrapper\n"
10893       "fast:" %}
10894   ins_encode %{
10895     Label fast;
10896     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10897     __ cmpl($dst$$Register, 0x80000000);
10898     __ jccb(Assembler::notEqual, fast);
10899     __ subptr(rsp, 4);
10900     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10901     __ fld_s(Address(rsp, 0));
10902     __ addptr(rsp, 4);
10903     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10904     __ bind(fast);
10905   %}
10906   ins_pipe( pipe_slow );
10907 %}
10908 
10909 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10910   predicate(UseSSE==0);
10911   match(Set dst (ConvF2L src));
10912   effect( KILL cr );
10913   format %{ "FLD    $src\t# Convert float to long\n\t"
10914             "FLDCW  trunc mode\n\t"
10915             "SUB    ESP,8\n\t"
10916             "FISTp  [ESP + #0]\n\t"
10917             "FLDCW  std/24-bit mode\n\t"
10918             "POP    EAX\n\t"
10919             "POP    EDX\n\t"
10920             "CMP    EDX,0x80000000\n\t"
10921             "JNE,s  fast\n\t"
10922             "TEST   EAX,EAX\n\t"
10923             "JNE,s  fast\n\t"
10924             "FLD    $src\n\t"
10925             "CALL   d2l_wrapper\n"
10926       "fast:" %}
10927   // DPR2L_encoding works for FPR2L
10928   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10929   ins_pipe( pipe_slow );
10930 %}
10931 
10932 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10933 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10934   predicate (UseSSE>=1);
10935   match(Set dst (ConvF2L src));
10936   effect( KILL cr );
10937   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10938             "MOVSS  [ESP],$src\n\t"
10939             "FLD_S  [ESP]\n\t"
10940             "FLDCW  trunc mode\n\t"
10941             "FISTp  [ESP + #0]\n\t"
10942             "FLDCW  std/24-bit mode\n\t"
10943             "POP    EAX\n\t"
10944             "POP    EDX\n\t"
10945             "CMP    EDX,0x80000000\n\t"
10946             "JNE,s  fast\n\t"
10947             "TEST   EAX,EAX\n\t"
10948             "JNE,s  fast\n\t"
10949             "SUB    ESP,4\t# Convert float to long\n\t"
10950             "MOVSS  [ESP],$src\n\t"
10951             "FLD_S  [ESP]\n\t"
10952             "ADD    ESP,4\n\t"
10953             "CALL   d2l_wrapper\n"
10954       "fast:" %}
10955   ins_encode %{
10956     Label fast;
10957     __ subptr(rsp, 8);
10958     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10959     __ fld_s(Address(rsp, 0));
10960     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10961     __ fistp_d(Address(rsp, 0));
10962     // Restore the rounding mode, mask the exception
10963     if (Compile::current()->in_24_bit_fp_mode()) {
10964       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10965     } else {
10966       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10967     }
10968     // Load the converted long, adjust CPU stack
10969     __ pop(rax);
10970     __ pop(rdx);
10971     __ cmpl(rdx, 0x80000000);
10972     __ jccb(Assembler::notEqual, fast);
10973     __ testl(rax, rax);
10974     __ jccb(Assembler::notEqual, fast);
10975     __ subptr(rsp, 4);
10976     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10977     __ fld_s(Address(rsp, 0));
10978     __ addptr(rsp, 4);
10979     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10980     __ bind(fast);
10981   %}
10982   ins_pipe( pipe_slow );
10983 %}
10984 
10985 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10986   predicate( UseSSE<=1 );
10987   match(Set dst (ConvI2D src));
10988   format %{ "FILD   $src\n\t"
10989             "FSTP   $dst" %}
10990   opcode(0xDB, 0x0);  /* DB /0 */
10991   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10992   ins_pipe( fpu_reg_mem );
10993 %}
10994 
10995 instruct convI2D_reg(regD dst, rRegI src) %{
10996   predicate( UseSSE>=2 && !UseXmmI2D );
10997   match(Set dst (ConvI2D src));
10998   format %{ "CVTSI2SD $dst,$src" %}
10999   ins_encode %{
11000     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11001   %}
11002   ins_pipe( pipe_slow );
11003 %}
11004 
11005 instruct convI2D_mem(regD dst, memory mem) %{
11006   predicate( UseSSE>=2 );
11007   match(Set dst (ConvI2D (LoadI mem)));
11008   format %{ "CVTSI2SD $dst,$mem" %}
11009   ins_encode %{
11010     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11011   %}
11012   ins_pipe( pipe_slow );
11013 %}
11014 
11015 instruct convXI2D_reg(regD dst, rRegI src)
11016 %{
11017   predicate( UseSSE>=2 && UseXmmI2D );
11018   match(Set dst (ConvI2D src));
11019 
11020   format %{ "MOVD  $dst,$src\n\t"
11021             "CVTDQ2PD $dst,$dst\t# i2d" %}
11022   ins_encode %{
11023     __ movdl($dst$$XMMRegister, $src$$Register);
11024     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11025   %}
11026   ins_pipe(pipe_slow); // XXX
11027 %}
11028 
11029 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11030   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11031   match(Set dst (ConvI2D (LoadI mem)));
11032   format %{ "FILD   $mem\n\t"
11033             "FSTP   $dst" %}
11034   opcode(0xDB);      /* DB /0 */
11035   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11036               Pop_Reg_DPR(dst));
11037   ins_pipe( fpu_reg_mem );
11038 %}
11039 
11040 // Convert a byte to a float; no rounding step needed.
11041 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11042   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11043   match(Set dst (ConvI2F src));
11044   format %{ "FILD   $src\n\t"
11045             "FSTP   $dst" %}
11046 
11047   opcode(0xDB, 0x0);  /* DB /0 */
11048   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11049   ins_pipe( fpu_reg_mem );
11050 %}
11051 
11052 // In 24-bit mode, force exponent rounding by storing back out
11053 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11054   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11055   match(Set dst (ConvI2F src));
11056   ins_cost(200);
11057   format %{ "FILD   $src\n\t"
11058             "FSTP_S $dst" %}
11059   opcode(0xDB, 0x0);  /* DB /0 */
11060   ins_encode( Push_Mem_I(src),
11061               Pop_Mem_FPR(dst));
11062   ins_pipe( fpu_mem_mem );
11063 %}
11064 
11065 // In 24-bit mode, force exponent rounding by storing back out
11066 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11067   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11068   match(Set dst (ConvI2F (LoadI mem)));
11069   ins_cost(200);
11070   format %{ "FILD   $mem\n\t"
11071             "FSTP_S $dst" %}
11072   opcode(0xDB);  /* DB /0 */
11073   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11074               Pop_Mem_FPR(dst));
11075   ins_pipe( fpu_mem_mem );
11076 %}
11077 
11078 // This instruction does not round to 24-bits
11079 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11080   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11081   match(Set dst (ConvI2F src));
11082   format %{ "FILD   $src\n\t"
11083             "FSTP   $dst" %}
11084   opcode(0xDB, 0x0);  /* DB /0 */
11085   ins_encode( Push_Mem_I(src),
11086               Pop_Reg_FPR(dst));
11087   ins_pipe( fpu_reg_mem );
11088 %}
11089 
11090 // This instruction does not round to 24-bits
11091 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11092   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11093   match(Set dst (ConvI2F (LoadI mem)));
11094   format %{ "FILD   $mem\n\t"
11095             "FSTP   $dst" %}
11096   opcode(0xDB);      /* DB /0 */
11097   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11098               Pop_Reg_FPR(dst));
11099   ins_pipe( fpu_reg_mem );
11100 %}
11101 
11102 // Convert an int to a float in xmm; no rounding step needed.
11103 instruct convI2F_reg(regF dst, rRegI src) %{
11104   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11105   match(Set dst (ConvI2F src));
11106   format %{ "CVTSI2SS $dst, $src" %}
11107   ins_encode %{
11108     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11109   %}
11110   ins_pipe( pipe_slow );
11111 %}
11112 
11113  instruct convXI2F_reg(regF dst, rRegI src)
11114 %{
11115   predicate( UseSSE>=2 && UseXmmI2F );
11116   match(Set dst (ConvI2F src));
11117 
11118   format %{ "MOVD  $dst,$src\n\t"
11119             "CVTDQ2PS $dst,$dst\t# i2f" %}
11120   ins_encode %{
11121     __ movdl($dst$$XMMRegister, $src$$Register);
11122     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11123   %}
11124   ins_pipe(pipe_slow); // XXX
11125 %}
11126 
11127 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11128   match(Set dst (ConvI2L src));
11129   effect(KILL cr);
11130   ins_cost(375);
11131   format %{ "MOV    $dst.lo,$src\n\t"
11132             "MOV    $dst.hi,$src\n\t"
11133             "SAR    $dst.hi,31" %}
11134   ins_encode(convert_int_long(dst,src));
11135   ins_pipe( ialu_reg_reg_long );
11136 %}
11137 
11138 // Zero-extend convert int to long
11139 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11140   match(Set dst (AndL (ConvI2L src) mask) );
11141   effect( KILL flags );
11142   ins_cost(250);
11143   format %{ "MOV    $dst.lo,$src\n\t"
11144             "XOR    $dst.hi,$dst.hi" %}
11145   opcode(0x33); // XOR
11146   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11147   ins_pipe( ialu_reg_reg_long );
11148 %}
11149 
11150 // Zero-extend long
11151 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11152   match(Set dst (AndL src mask) );
11153   effect( KILL flags );
11154   ins_cost(250);
11155   format %{ "MOV    $dst.lo,$src.lo\n\t"
11156             "XOR    $dst.hi,$dst.hi\n\t" %}
11157   opcode(0x33); // XOR
11158   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11159   ins_pipe( ialu_reg_reg_long );
11160 %}
11161 
11162 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11163   predicate (UseSSE<=1);
11164   match(Set dst (ConvL2D src));
11165   effect( KILL cr );
11166   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11167             "PUSH   $src.lo\n\t"
11168             "FILD   ST,[ESP + #0]\n\t"
11169             "ADD    ESP,8\n\t"
11170             "FSTP_D $dst\t# D-round" %}
11171   opcode(0xDF, 0x5);  /* DF /5 */
11172   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11173   ins_pipe( pipe_slow );
11174 %}
11175 
11176 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11177   predicate (UseSSE>=2);
11178   match(Set dst (ConvL2D src));
11179   effect( KILL cr );
11180   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11181             "PUSH   $src.lo\n\t"
11182             "FILD_D [ESP]\n\t"
11183             "FSTP_D [ESP]\n\t"
11184             "MOVSD  $dst,[ESP]\n\t"
11185             "ADD    ESP,8" %}
11186   opcode(0xDF, 0x5);  /* DF /5 */
11187   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11188   ins_pipe( pipe_slow );
11189 %}
11190 
11191 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11192   predicate (UseSSE>=1);
11193   match(Set dst (ConvL2F src));
11194   effect( KILL cr );
11195   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11196             "PUSH   $src.lo\n\t"
11197             "FILD_D [ESP]\n\t"
11198             "FSTP_S [ESP]\n\t"
11199             "MOVSS  $dst,[ESP]\n\t"
11200             "ADD    ESP,8" %}
11201   opcode(0xDF, 0x5);  /* DF /5 */
11202   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11203   ins_pipe( pipe_slow );
11204 %}
11205 
11206 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11207   match(Set dst (ConvL2F src));
11208   effect( KILL cr );
11209   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11210             "PUSH   $src.lo\n\t"
11211             "FILD   ST,[ESP + #0]\n\t"
11212             "ADD    ESP,8\n\t"
11213             "FSTP_S $dst\t# F-round" %}
11214   opcode(0xDF, 0x5);  /* DF /5 */
11215   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11216   ins_pipe( pipe_slow );
11217 %}
11218 
11219 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11220   match(Set dst (ConvL2I src));
11221   effect( DEF dst, USE src );
11222   format %{ "MOV    $dst,$src.lo" %}
11223   ins_encode(enc_CopyL_Lo(dst,src));
11224   ins_pipe( ialu_reg_reg );
11225 %}
11226 
11227 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11228   match(Set dst (MoveF2I src));
11229   effect( DEF dst, USE src );
11230   ins_cost(100);
11231   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11232   ins_encode %{
11233     __ movl($dst$$Register, Address(rsp, $src$$disp));
11234   %}
11235   ins_pipe( ialu_reg_mem );
11236 %}
11237 
11238 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11239   predicate(UseSSE==0);
11240   match(Set dst (MoveF2I src));
11241   effect( DEF dst, USE src );
11242 
11243   ins_cost(125);
11244   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11245   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11246   ins_pipe( fpu_mem_reg );
11247 %}
11248 
11249 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11250   predicate(UseSSE>=1);
11251   match(Set dst (MoveF2I src));
11252   effect( DEF dst, USE src );
11253 
11254   ins_cost(95);
11255   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11256   ins_encode %{
11257     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11258   %}
11259   ins_pipe( pipe_slow );
11260 %}
11261 
11262 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11263   predicate(UseSSE>=2);
11264   match(Set dst (MoveF2I src));
11265   effect( DEF dst, USE src );
11266   ins_cost(85);
11267   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11268   ins_encode %{
11269     __ movdl($dst$$Register, $src$$XMMRegister);
11270   %}
11271   ins_pipe( pipe_slow );
11272 %}
11273 
11274 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11275   match(Set dst (MoveI2F src));
11276   effect( DEF dst, USE src );
11277 
11278   ins_cost(100);
11279   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11280   ins_encode %{
11281     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11282   %}
11283   ins_pipe( ialu_mem_reg );
11284 %}
11285 
11286 
11287 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11288   predicate(UseSSE==0);
11289   match(Set dst (MoveI2F src));
11290   effect(DEF dst, USE src);
11291 
11292   ins_cost(125);
11293   format %{ "FLD_S  $src\n\t"
11294             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11295   opcode(0xD9);               /* D9 /0, FLD m32real */
11296   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11297               Pop_Reg_FPR(dst) );
11298   ins_pipe( fpu_reg_mem );
11299 %}
11300 
11301 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11302   predicate(UseSSE>=1);
11303   match(Set dst (MoveI2F src));
11304   effect( DEF dst, USE src );
11305 
11306   ins_cost(95);
11307   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11308   ins_encode %{
11309     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11310   %}
11311   ins_pipe( pipe_slow );
11312 %}
11313 
11314 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11315   predicate(UseSSE>=2);
11316   match(Set dst (MoveI2F src));
11317   effect( DEF dst, USE src );
11318 
11319   ins_cost(85);
11320   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11321   ins_encode %{
11322     __ movdl($dst$$XMMRegister, $src$$Register);
11323   %}
11324   ins_pipe( pipe_slow );
11325 %}
11326 
11327 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11328   match(Set dst (MoveD2L src));
11329   effect(DEF dst, USE src);
11330 
11331   ins_cost(250);
11332   format %{ "MOV    $dst.lo,$src\n\t"
11333             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11334   opcode(0x8B, 0x8B);
11335   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11336   ins_pipe( ialu_mem_long_reg );
11337 %}
11338 
11339 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11340   predicate(UseSSE<=1);
11341   match(Set dst (MoveD2L src));
11342   effect(DEF dst, USE src);
11343 
11344   ins_cost(125);
11345   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11346   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11347   ins_pipe( fpu_mem_reg );
11348 %}
11349 
11350 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11351   predicate(UseSSE>=2);
11352   match(Set dst (MoveD2L src));
11353   effect(DEF dst, USE src);
11354   ins_cost(95);
11355   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11356   ins_encode %{
11357     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11358   %}
11359   ins_pipe( pipe_slow );
11360 %}
11361 
11362 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11363   predicate(UseSSE>=2);
11364   match(Set dst (MoveD2L src));
11365   effect(DEF dst, USE src, TEMP tmp);
11366   ins_cost(85);
11367   format %{ "MOVD   $dst.lo,$src\n\t"
11368             "PSHUFLW $tmp,$src,0x4E\n\t"
11369             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11370   ins_encode %{
11371     __ movdl($dst$$Register, $src$$XMMRegister);
11372     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11373     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11374   %}
11375   ins_pipe( pipe_slow );
11376 %}
11377 
11378 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11379   match(Set dst (MoveL2D src));
11380   effect(DEF dst, USE src);
11381 
11382   ins_cost(200);
11383   format %{ "MOV    $dst,$src.lo\n\t"
11384             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11385   opcode(0x89, 0x89);
11386   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11387   ins_pipe( ialu_mem_long_reg );
11388 %}
11389 
11390 
11391 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11392   predicate(UseSSE<=1);
11393   match(Set dst (MoveL2D src));
11394   effect(DEF dst, USE src);
11395   ins_cost(125);
11396 
11397   format %{ "FLD_D  $src\n\t"
11398             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11399   opcode(0xDD);               /* DD /0, FLD m64real */
11400   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11401               Pop_Reg_DPR(dst) );
11402   ins_pipe( fpu_reg_mem );
11403 %}
11404 
11405 
11406 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11407   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11408   match(Set dst (MoveL2D src));
11409   effect(DEF dst, USE src);
11410 
11411   ins_cost(95);
11412   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11413   ins_encode %{
11414     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11415   %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11420   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11421   match(Set dst (MoveL2D src));
11422   effect(DEF dst, USE src);
11423 
11424   ins_cost(95);
11425   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11426   ins_encode %{
11427     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11428   %}
11429   ins_pipe( pipe_slow );
11430 %}
11431 
11432 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11433   predicate(UseSSE>=2);
11434   match(Set dst (MoveL2D src));
11435   effect(TEMP dst, USE src, TEMP tmp);
11436   ins_cost(85);
11437   format %{ "MOVD   $dst,$src.lo\n\t"
11438             "MOVD   $tmp,$src.hi\n\t"
11439             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11440   ins_encode %{
11441     __ movdl($dst$$XMMRegister, $src$$Register);
11442     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11443     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11444   %}
11445   ins_pipe( pipe_slow );
11446 %}
11447 
11448 
11449 // =======================================================================
11450 // fast clearing of an array
11451 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11452   predicate(!((ClearArrayNode*)n)->is_large());
11453   match(Set dummy (ClearArray cnt base));
11454   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11455 
11456   format %{ $$template
11457     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11458     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11459     $$emit$$"JG     LARGE\n\t"
11460     $$emit$$"SHL    ECX, 1\n\t"
11461     $$emit$$"DEC    ECX\n\t"
11462     $$emit$$"JS     DONE\t# Zero length\n\t"
11463     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11464     $$emit$$"DEC    ECX\n\t"
11465     $$emit$$"JGE    LOOP\n\t"
11466     $$emit$$"JMP    DONE\n\t"
11467     $$emit$$"# LARGE:\n\t"
11468     if (UseFastStosb) {
11469        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11470        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11471     } else {
11472        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11473        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11474     }
11475     $$emit$$"# DONE"
11476   %}
11477   ins_encode %{
11478     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11479   %}
11480   ins_pipe( pipe_slow );
11481 %}
11482 
11483 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11484   predicate(((ClearArrayNode*)n)->is_large());
11485   match(Set dummy (ClearArray cnt base));
11486   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11487   format %{ $$template
11488     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11489     if (UseFastStosb) {
11490        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11491        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11492     } else {
11493        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11494        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11495     }
11496     $$emit$$"# DONE"
11497   %}
11498   ins_encode %{
11499     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11500   %}
11501   ins_pipe( pipe_slow );
11502 %}
11503 
11504 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11505                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11506   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11507   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11508   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11509 
11510   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11511   ins_encode %{
11512     __ string_compare($str1$$Register, $str2$$Register,
11513                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11514                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11515   %}
11516   ins_pipe( pipe_slow );
11517 %}
11518 
11519 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11520                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11521   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11522   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11523   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11524 
11525   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11526   ins_encode %{
11527     __ string_compare($str1$$Register, $str2$$Register,
11528                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11529                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11530   %}
11531   ins_pipe( pipe_slow );
11532 %}
11533 
11534 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11535                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11536   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11537   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11538   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11539 
11540   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11541   ins_encode %{
11542     __ string_compare($str1$$Register, $str2$$Register,
11543                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11544                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11545   %}
11546   ins_pipe( pipe_slow );
11547 %}
11548 
11549 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11550                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11551   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11552   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11553   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11554 
11555   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11556   ins_encode %{
11557     __ string_compare($str2$$Register, $str1$$Register,
11558                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11559                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11560   %}
11561   ins_pipe( pipe_slow );
11562 %}
11563 
11564 // fast string equals
11565 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11566                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11567   match(Set result (StrEquals (Binary str1 str2) cnt));
11568   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11569 
11570   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11571   ins_encode %{
11572     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11573                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11574                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11575   %}
11576 
11577   ins_pipe( pipe_slow );
11578 %}
11579 
11580 // fast search of substring with known size.
11581 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11582                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11583   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11584   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11585   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11586 
11587   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11588   ins_encode %{
11589     int icnt2 = (int)$int_cnt2$$constant;
11590     if (icnt2 >= 16) {
11591       // IndexOf for constant substrings with size >= 16 elements
11592       // which don't need to be loaded through stack.
11593       __ string_indexofC8($str1$$Register, $str2$$Register,
11594                           $cnt1$$Register, $cnt2$$Register,
11595                           icnt2, $result$$Register,
11596                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11597     } else {
11598       // Small strings are loaded through stack if they cross page boundary.
11599       __ string_indexof($str1$$Register, $str2$$Register,
11600                         $cnt1$$Register, $cnt2$$Register,
11601                         icnt2, $result$$Register,
11602                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11603     }
11604   %}
11605   ins_pipe( pipe_slow );
11606 %}
11607 
11608 // fast search of substring with known size.
11609 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11610                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11611   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11612   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11613   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11614 
11615   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11616   ins_encode %{
11617     int icnt2 = (int)$int_cnt2$$constant;
11618     if (icnt2 >= 8) {
11619       // IndexOf for constant substrings with size >= 8 elements
11620       // which don't need to be loaded through stack.
11621       __ string_indexofC8($str1$$Register, $str2$$Register,
11622                           $cnt1$$Register, $cnt2$$Register,
11623                           icnt2, $result$$Register,
11624                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11625     } else {
11626       // Small strings are loaded through stack if they cross page boundary.
11627       __ string_indexof($str1$$Register, $str2$$Register,
11628                         $cnt1$$Register, $cnt2$$Register,
11629                         icnt2, $result$$Register,
11630                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11631     }
11632   %}
11633   ins_pipe( pipe_slow );
11634 %}
11635 
11636 // fast search of substring with known size.
11637 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11638                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11639   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11640   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11641   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11642 
11643   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11644   ins_encode %{
11645     int icnt2 = (int)$int_cnt2$$constant;
11646     if (icnt2 >= 8) {
11647       // IndexOf for constant substrings with size >= 8 elements
11648       // which don't need to be loaded through stack.
11649       __ string_indexofC8($str1$$Register, $str2$$Register,
11650                           $cnt1$$Register, $cnt2$$Register,
11651                           icnt2, $result$$Register,
11652                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11653     } else {
11654       // Small strings are loaded through stack if they cross page boundary.
11655       __ string_indexof($str1$$Register, $str2$$Register,
11656                         $cnt1$$Register, $cnt2$$Register,
11657                         icnt2, $result$$Register,
11658                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11659     }
11660   %}
11661   ins_pipe( pipe_slow );
11662 %}
11663 
11664 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11665                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11666   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11667   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11668   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11669 
11670   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11671   ins_encode %{
11672     __ string_indexof($str1$$Register, $str2$$Register,
11673                       $cnt1$$Register, $cnt2$$Register,
11674                       (-1), $result$$Register,
11675                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11676   %}
11677   ins_pipe( pipe_slow );
11678 %}
11679 
11680 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11681                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11682   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11683   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11684   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11685 
11686   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11687   ins_encode %{
11688     __ string_indexof($str1$$Register, $str2$$Register,
11689                       $cnt1$$Register, $cnt2$$Register,
11690                       (-1), $result$$Register,
11691                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11692   %}
11693   ins_pipe( pipe_slow );
11694 %}
11695 
11696 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11697                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11698   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11699   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11700   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11701 
11702   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11703   ins_encode %{
11704     __ string_indexof($str1$$Register, $str2$$Register,
11705                       $cnt1$$Register, $cnt2$$Register,
11706                       (-1), $result$$Register,
11707                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11708   %}
11709   ins_pipe( pipe_slow );
11710 %}
11711 
11712 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11713                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11714   predicate(UseSSE42Intrinsics);
11715   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11716   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11717   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11718   ins_encode %{
11719     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11720                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11721   %}
11722   ins_pipe( pipe_slow );
11723 %}
11724 
11725 // fast array equals
11726 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11727                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11728 %{
11729   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11730   match(Set result (AryEq ary1 ary2));
11731   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11732   //ins_cost(300);
11733 
11734   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11735   ins_encode %{
11736     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11737                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11738                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11739   %}
11740   ins_pipe( pipe_slow );
11741 %}
11742 
11743 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11744                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11745 %{
11746   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11747   match(Set result (AryEq ary1 ary2));
11748   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11749   //ins_cost(300);
11750 
11751   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11752   ins_encode %{
11753     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11754                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11755                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11756   %}
11757   ins_pipe( pipe_slow );
11758 %}
11759 
11760 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11761                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11762 %{
11763   match(Set result (HasNegatives ary1 len));
11764   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11765 
11766   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11767   ins_encode %{
11768     __ has_negatives($ary1$$Register, $len$$Register,
11769                      $result$$Register, $tmp3$$Register,
11770                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11771   %}
11772   ins_pipe( pipe_slow );
11773 %}
11774 
11775 // fast char[] to byte[] compression
11776 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11777                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11778   match(Set result (StrCompressedCopy src (Binary dst len)));
11779   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11780 
11781   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11782   ins_encode %{
11783     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11784                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11785                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11786   %}
11787   ins_pipe( pipe_slow );
11788 %}
11789 
11790 // fast byte[] to char[] inflation
11791 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11792                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11793   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11794   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11795 
11796   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11797   ins_encode %{
11798     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11799                           $tmp1$$XMMRegister, $tmp2$$Register);
11800   %}
11801   ins_pipe( pipe_slow );
11802 %}
11803 
11804 // encode char[] to byte[] in ISO_8859_1
11805 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11806                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11807                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11808   match(Set result (EncodeISOArray src (Binary dst len)));
11809   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11810 
11811   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11812   ins_encode %{
11813     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11814                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11815                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11816   %}
11817   ins_pipe( pipe_slow );
11818 %}
11819 
11820 
11821 //----------Control Flow Instructions------------------------------------------
11822 // Signed compare Instructions
11823 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11824   match(Set cr (CmpI op1 op2));
11825   effect( DEF cr, USE op1, USE op2 );
11826   format %{ "CMP    $op1,$op2" %}
11827   opcode(0x3B);  /* Opcode 3B /r */
11828   ins_encode( OpcP, RegReg( op1, op2) );
11829   ins_pipe( ialu_cr_reg_reg );
11830 %}
11831 
11832 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11833   match(Set cr (CmpI op1 op2));
11834   effect( DEF cr, USE op1 );
11835   format %{ "CMP    $op1,$op2" %}
11836   opcode(0x81,0x07);  /* Opcode 81 /7 */
11837   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11838   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11839   ins_pipe( ialu_cr_reg_imm );
11840 %}
11841 
11842 // Cisc-spilled version of cmpI_eReg
11843 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11844   match(Set cr (CmpI op1 (LoadI op2)));
11845 
11846   format %{ "CMP    $op1,$op2" %}
11847   ins_cost(500);
11848   opcode(0x3B);  /* Opcode 3B /r */
11849   ins_encode( OpcP, RegMem( op1, op2) );
11850   ins_pipe( ialu_cr_reg_mem );
11851 %}
11852 
11853 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11854   match(Set cr (CmpI src zero));
11855   effect( DEF cr, USE src );
11856 
11857   format %{ "TEST   $src,$src" %}
11858   opcode(0x85);
11859   ins_encode( OpcP, RegReg( src, src ) );
11860   ins_pipe( ialu_cr_reg_imm );
11861 %}
11862 
11863 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11864   match(Set cr (CmpI (AndI src con) zero));
11865 
11866   format %{ "TEST   $src,$con" %}
11867   opcode(0xF7,0x00);
11868   ins_encode( OpcP, RegOpc(src), Con32(con) );
11869   ins_pipe( ialu_cr_reg_imm );
11870 %}
11871 
11872 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11873   match(Set cr (CmpI (AndI src mem) zero));
11874 
11875   format %{ "TEST   $src,$mem" %}
11876   opcode(0x85);
11877   ins_encode( OpcP, RegMem( src, mem ) );
11878   ins_pipe( ialu_cr_reg_mem );
11879 %}
11880 
11881 // Unsigned compare Instructions; really, same as signed except they
11882 // produce an eFlagsRegU instead of eFlagsReg.
11883 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11884   match(Set cr (CmpU op1 op2));
11885 
11886   format %{ "CMPu   $op1,$op2" %}
11887   opcode(0x3B);  /* Opcode 3B /r */
11888   ins_encode( OpcP, RegReg( op1, op2) );
11889   ins_pipe( ialu_cr_reg_reg );
11890 %}
11891 
11892 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11893   match(Set cr (CmpU op1 op2));
11894 
11895   format %{ "CMPu   $op1,$op2" %}
11896   opcode(0x81,0x07);  /* Opcode 81 /7 */
11897   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11898   ins_pipe( ialu_cr_reg_imm );
11899 %}
11900 
11901 // // Cisc-spilled version of cmpU_eReg
11902 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11903   match(Set cr (CmpU op1 (LoadI op2)));
11904 
11905   format %{ "CMPu   $op1,$op2" %}
11906   ins_cost(500);
11907   opcode(0x3B);  /* Opcode 3B /r */
11908   ins_encode( OpcP, RegMem( op1, op2) );
11909   ins_pipe( ialu_cr_reg_mem );
11910 %}
11911 
11912 // // Cisc-spilled version of cmpU_eReg
11913 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11914 //  match(Set cr (CmpU (LoadI op1) op2));
11915 //
11916 //  format %{ "CMPu   $op1,$op2" %}
11917 //  ins_cost(500);
11918 //  opcode(0x39);  /* Opcode 39 /r */
11919 //  ins_encode( OpcP, RegMem( op1, op2) );
11920 //%}
11921 
11922 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11923   match(Set cr (CmpU src zero));
11924 
11925   format %{ "TESTu  $src,$src" %}
11926   opcode(0x85);
11927   ins_encode( OpcP, RegReg( src, src ) );
11928   ins_pipe( ialu_cr_reg_imm );
11929 %}
11930 
11931 // Unsigned pointer compare Instructions
11932 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11933   match(Set cr (CmpP op1 op2));
11934 
11935   format %{ "CMPu   $op1,$op2" %}
11936   opcode(0x3B);  /* Opcode 3B /r */
11937   ins_encode( OpcP, RegReg( op1, op2) );
11938   ins_pipe( ialu_cr_reg_reg );
11939 %}
11940 
11941 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11942   match(Set cr (CmpP op1 op2));
11943 
11944   format %{ "CMPu   $op1,$op2" %}
11945   opcode(0x81,0x07);  /* Opcode 81 /7 */
11946   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11947   ins_pipe( ialu_cr_reg_imm );
11948 %}
11949 
11950 // // Cisc-spilled version of cmpP_eReg
11951 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11952   match(Set cr (CmpP op1 (LoadP op2)));
11953 
11954   format %{ "CMPu   $op1,$op2" %}
11955   ins_cost(500);
11956   opcode(0x3B);  /* Opcode 3B /r */
11957   ins_encode( OpcP, RegMem( op1, op2) );
11958   ins_pipe( ialu_cr_reg_mem );
11959 %}
11960 
11961 // // Cisc-spilled version of cmpP_eReg
11962 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11963 //  match(Set cr (CmpP (LoadP op1) op2));
11964 //
11965 //  format %{ "CMPu   $op1,$op2" %}
11966 //  ins_cost(500);
11967 //  opcode(0x39);  /* Opcode 39 /r */
11968 //  ins_encode( OpcP, RegMem( op1, op2) );
11969 //%}
11970 
11971 // Compare raw pointer (used in out-of-heap check).
11972 // Only works because non-oop pointers must be raw pointers
11973 // and raw pointers have no anti-dependencies.
11974 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11975   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11976   match(Set cr (CmpP op1 (LoadP op2)));
11977 
11978   format %{ "CMPu   $op1,$op2" %}
11979   opcode(0x3B);  /* Opcode 3B /r */
11980   ins_encode( OpcP, RegMem( op1, op2) );
11981   ins_pipe( ialu_cr_reg_mem );
11982 %}
11983 
11984 //
11985 // This will generate a signed flags result. This should be ok
11986 // since any compare to a zero should be eq/neq.
11987 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11988   match(Set cr (CmpP src zero));
11989 
11990   format %{ "TEST   $src,$src" %}
11991   opcode(0x85);
11992   ins_encode( OpcP, RegReg( src, src ) );
11993   ins_pipe( ialu_cr_reg_imm );
11994 %}
11995 
11996 // Cisc-spilled version of testP_reg
11997 // This will generate a signed flags result. This should be ok
11998 // since any compare to a zero should be eq/neq.
11999 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12000   match(Set cr (CmpP (LoadP op) zero));
12001 
12002   format %{ "TEST   $op,0xFFFFFFFF" %}
12003   ins_cost(500);
12004   opcode(0xF7);               /* Opcode F7 /0 */
12005   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12006   ins_pipe( ialu_cr_reg_imm );
12007 %}
12008 
12009 // Yanked all unsigned pointer compare operations.
12010 // Pointer compares are done with CmpP which is already unsigned.
12011 
12012 //----------Max and Min--------------------------------------------------------
12013 // Min Instructions
12014 ////
12015 //   *** Min and Max using the conditional move are slower than the
12016 //   *** branch version on a Pentium III.
12017 // // Conditional move for min
12018 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12019 //  effect( USE_DEF op2, USE op1, USE cr );
12020 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12021 //  opcode(0x4C,0x0F);
12022 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12023 //  ins_pipe( pipe_cmov_reg );
12024 //%}
12025 //
12026 //// Min Register with Register (P6 version)
12027 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12028 //  predicate(VM_Version::supports_cmov() );
12029 //  match(Set op2 (MinI op1 op2));
12030 //  ins_cost(200);
12031 //  expand %{
12032 //    eFlagsReg cr;
12033 //    compI_eReg(cr,op1,op2);
12034 //    cmovI_reg_lt(op2,op1,cr);
12035 //  %}
12036 //%}
12037 
12038 // Min Register with Register (generic version)
12039 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12040   match(Set dst (MinI dst src));
12041   effect(KILL flags);
12042   ins_cost(300);
12043 
12044   format %{ "MIN    $dst,$src" %}
12045   opcode(0xCC);
12046   ins_encode( min_enc(dst,src) );
12047   ins_pipe( pipe_slow );
12048 %}
12049 
12050 // Max Register with Register
12051 //   *** Min and Max using the conditional move are slower than the
12052 //   *** branch version on a Pentium III.
12053 // // Conditional move for max
12054 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12055 //  effect( USE_DEF op2, USE op1, USE cr );
12056 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12057 //  opcode(0x4F,0x0F);
12058 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12059 //  ins_pipe( pipe_cmov_reg );
12060 //%}
12061 //
12062 // // Max Register with Register (P6 version)
12063 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12064 //  predicate(VM_Version::supports_cmov() );
12065 //  match(Set op2 (MaxI op1 op2));
12066 //  ins_cost(200);
12067 //  expand %{
12068 //    eFlagsReg cr;
12069 //    compI_eReg(cr,op1,op2);
12070 //    cmovI_reg_gt(op2,op1,cr);
12071 //  %}
12072 //%}
12073 
12074 // Max Register with Register (generic version)
12075 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12076   match(Set dst (MaxI dst src));
12077   effect(KILL flags);
12078   ins_cost(300);
12079 
12080   format %{ "MAX    $dst,$src" %}
12081   opcode(0xCC);
12082   ins_encode( max_enc(dst,src) );
12083   ins_pipe( pipe_slow );
12084 %}
12085 
12086 // ============================================================================
12087 // Counted Loop limit node which represents exact final iterator value.
12088 // Note: the resulting value should fit into integer range since
12089 // counted loops have limit check on overflow.
12090 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12091   match(Set limit (LoopLimit (Binary init limit) stride));
12092   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12093   ins_cost(300);
12094 
12095   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12096   ins_encode %{
12097     int strd = (int)$stride$$constant;
12098     assert(strd != 1 && strd != -1, "sanity");
12099     int m1 = (strd > 0) ? 1 : -1;
12100     // Convert limit to long (EAX:EDX)
12101     __ cdql();
12102     // Convert init to long (init:tmp)
12103     __ movl($tmp$$Register, $init$$Register);
12104     __ sarl($tmp$$Register, 31);
12105     // $limit - $init
12106     __ subl($limit$$Register, $init$$Register);
12107     __ sbbl($limit_hi$$Register, $tmp$$Register);
12108     // + ($stride - 1)
12109     if (strd > 0) {
12110       __ addl($limit$$Register, (strd - 1));
12111       __ adcl($limit_hi$$Register, 0);
12112       __ movl($tmp$$Register, strd);
12113     } else {
12114       __ addl($limit$$Register, (strd + 1));
12115       __ adcl($limit_hi$$Register, -1);
12116       __ lneg($limit_hi$$Register, $limit$$Register);
12117       __ movl($tmp$$Register, -strd);
12118     }
12119     // signed devision: (EAX:EDX) / pos_stride
12120     __ idivl($tmp$$Register);
12121     if (strd < 0) {
12122       // restore sign
12123       __ negl($tmp$$Register);
12124     }
12125     // (EAX) * stride
12126     __ mull($tmp$$Register);
12127     // + init (ignore upper bits)
12128     __ addl($limit$$Register, $init$$Register);
12129   %}
12130   ins_pipe( pipe_slow );
12131 %}
12132 
12133 // ============================================================================
12134 // Branch Instructions
12135 // Jump Table
12136 instruct jumpXtnd(rRegI switch_val) %{
12137   match(Jump switch_val);
12138   ins_cost(350);
12139   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12140   ins_encode %{
12141     // Jump to Address(table_base + switch_reg)
12142     Address index(noreg, $switch_val$$Register, Address::times_1);
12143     __ jump(ArrayAddress($constantaddress, index));
12144   %}
12145   ins_pipe(pipe_jmp);
12146 %}
12147 
12148 // Jump Direct - Label defines a relative address from JMP+1
12149 instruct jmpDir(label labl) %{
12150   match(Goto);
12151   effect(USE labl);
12152 
12153   ins_cost(300);
12154   format %{ "JMP    $labl" %}
12155   size(5);
12156   ins_encode %{
12157     Label* L = $labl$$label;
12158     __ jmp(*L, false); // Always long jump
12159   %}
12160   ins_pipe( pipe_jmp );
12161 %}
12162 
12163 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12164 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12165   match(If cop cr);
12166   effect(USE labl);
12167 
12168   ins_cost(300);
12169   format %{ "J$cop    $labl" %}
12170   size(6);
12171   ins_encode %{
12172     Label* L = $labl$$label;
12173     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12174   %}
12175   ins_pipe( pipe_jcc );
12176 %}
12177 
12178 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12179 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12180   predicate(!n->has_vector_mask_set());
12181   match(CountedLoopEnd cop cr);
12182   effect(USE labl);
12183 
12184   ins_cost(300);
12185   format %{ "J$cop    $labl\t# Loop end" %}
12186   size(6);
12187   ins_encode %{
12188     Label* L = $labl$$label;
12189     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12190   %}
12191   ins_pipe( pipe_jcc );
12192 %}
12193 
12194 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12195 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12196   predicate(!n->has_vector_mask_set());
12197   match(CountedLoopEnd cop cmp);
12198   effect(USE labl);
12199 
12200   ins_cost(300);
12201   format %{ "J$cop,u  $labl\t# Loop end" %}
12202   size(6);
12203   ins_encode %{
12204     Label* L = $labl$$label;
12205     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12206   %}
12207   ins_pipe( pipe_jcc );
12208 %}
12209 
12210 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12211   predicate(!n->has_vector_mask_set());
12212   match(CountedLoopEnd cop cmp);
12213   effect(USE labl);
12214 
12215   ins_cost(200);
12216   format %{ "J$cop,u  $labl\t# Loop end" %}
12217   size(6);
12218   ins_encode %{
12219     Label* L = $labl$$label;
12220     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12221   %}
12222   ins_pipe( pipe_jcc );
12223 %}
12224 
12225 // mask version
12226 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12227 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12228   predicate(n->has_vector_mask_set());
12229   match(CountedLoopEnd cop cr);
12230   effect(USE labl);
12231 
12232   ins_cost(400);
12233   format %{ "J$cop    $labl\t# Loop end\n\t"
12234             "restorevectmask \t# vector mask restore for loops" %}
12235   size(10);
12236   ins_encode %{
12237     Label* L = $labl$$label;
12238     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12239     __ restorevectmask();
12240   %}
12241   ins_pipe( pipe_jcc );
12242 %}
12243 
12244 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12245 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12246   predicate(n->has_vector_mask_set());
12247   match(CountedLoopEnd cop cmp);
12248   effect(USE labl);
12249 
12250   ins_cost(400);
12251   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12252             "restorevectmask \t# vector mask restore for loops" %}
12253   size(10);
12254   ins_encode %{
12255     Label* L = $labl$$label;
12256     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12257     __ restorevectmask();
12258   %}
12259   ins_pipe( pipe_jcc );
12260 %}
12261 
12262 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12263   predicate(n->has_vector_mask_set());
12264   match(CountedLoopEnd cop cmp);
12265   effect(USE labl);
12266 
12267   ins_cost(300);
12268   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12269             "restorevectmask \t# vector mask restore for loops" %}
12270   size(10);
12271   ins_encode %{
12272     Label* L = $labl$$label;
12273     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12274     __ restorevectmask();
12275   %}
12276   ins_pipe( pipe_jcc );
12277 %}
12278 
12279 // Jump Direct Conditional - using unsigned comparison
12280 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12281   match(If cop cmp);
12282   effect(USE labl);
12283 
12284   ins_cost(300);
12285   format %{ "J$cop,u  $labl" %}
12286   size(6);
12287   ins_encode %{
12288     Label* L = $labl$$label;
12289     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12290   %}
12291   ins_pipe(pipe_jcc);
12292 %}
12293 
12294 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12295   match(If cop cmp);
12296   effect(USE labl);
12297 
12298   ins_cost(200);
12299   format %{ "J$cop,u  $labl" %}
12300   size(6);
12301   ins_encode %{
12302     Label* L = $labl$$label;
12303     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12304   %}
12305   ins_pipe(pipe_jcc);
12306 %}
12307 
12308 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12309   match(If cop cmp);
12310   effect(USE labl);
12311 
12312   ins_cost(200);
12313   format %{ $$template
12314     if ($cop$$cmpcode == Assembler::notEqual) {
12315       $$emit$$"JP,u   $labl\n\t"
12316       $$emit$$"J$cop,u   $labl"
12317     } else {
12318       $$emit$$"JP,u   done\n\t"
12319       $$emit$$"J$cop,u   $labl\n\t"
12320       $$emit$$"done:"
12321     }
12322   %}
12323   ins_encode %{
12324     Label* l = $labl$$label;
12325     if ($cop$$cmpcode == Assembler::notEqual) {
12326       __ jcc(Assembler::parity, *l, false);
12327       __ jcc(Assembler::notEqual, *l, false);
12328     } else if ($cop$$cmpcode == Assembler::equal) {
12329       Label done;
12330       __ jccb(Assembler::parity, done);
12331       __ jcc(Assembler::equal, *l, false);
12332       __ bind(done);
12333     } else {
12334        ShouldNotReachHere();
12335     }
12336   %}
12337   ins_pipe(pipe_jcc);
12338 %}
12339 
12340 // ============================================================================
12341 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12342 // array for an instance of the superklass.  Set a hidden internal cache on a
12343 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12344 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12345 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12346   match(Set result (PartialSubtypeCheck sub super));
12347   effect( KILL rcx, KILL cr );
12348 
12349   ins_cost(1100);  // slightly larger than the next version
12350   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12351             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12352             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12353             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12354             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12355             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12356             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12357      "miss:\t" %}
12358 
12359   opcode(0x1); // Force a XOR of EDI
12360   ins_encode( enc_PartialSubtypeCheck() );
12361   ins_pipe( pipe_slow );
12362 %}
12363 
12364 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12365   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12366   effect( KILL rcx, KILL result );
12367 
12368   ins_cost(1000);
12369   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12370             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12371             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12372             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12373             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12374             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12375      "miss:\t" %}
12376 
12377   opcode(0x0);  // No need to XOR EDI
12378   ins_encode( enc_PartialSubtypeCheck() );
12379   ins_pipe( pipe_slow );
12380 %}
12381 
12382 // ============================================================================
12383 // Branch Instructions -- short offset versions
12384 //
12385 // These instructions are used to replace jumps of a long offset (the default
12386 // match) with jumps of a shorter offset.  These instructions are all tagged
12387 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12388 // match rules in general matching.  Instead, the ADLC generates a conversion
12389 // method in the MachNode which can be used to do in-place replacement of the
12390 // long variant with the shorter variant.  The compiler will determine if a
12391 // branch can be taken by the is_short_branch_offset() predicate in the machine
12392 // specific code section of the file.
12393 
12394 // Jump Direct - Label defines a relative address from JMP+1
12395 instruct jmpDir_short(label labl) %{
12396   match(Goto);
12397   effect(USE labl);
12398 
12399   ins_cost(300);
12400   format %{ "JMP,s  $labl" %}
12401   size(2);
12402   ins_encode %{
12403     Label* L = $labl$$label;
12404     __ jmpb(*L);
12405   %}
12406   ins_pipe( pipe_jmp );
12407   ins_short_branch(1);
12408 %}
12409 
12410 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12411 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12412   match(If cop cr);
12413   effect(USE labl);
12414 
12415   ins_cost(300);
12416   format %{ "J$cop,s  $labl" %}
12417   size(2);
12418   ins_encode %{
12419     Label* L = $labl$$label;
12420     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12421   %}
12422   ins_pipe( pipe_jcc );
12423   ins_short_branch(1);
12424 %}
12425 
12426 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12427 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12428   match(CountedLoopEnd cop cr);
12429   effect(USE labl);
12430 
12431   ins_cost(300);
12432   format %{ "J$cop,s  $labl\t# Loop end" %}
12433   size(2);
12434   ins_encode %{
12435     Label* L = $labl$$label;
12436     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12437   %}
12438   ins_pipe( pipe_jcc );
12439   ins_short_branch(1);
12440 %}
12441 
12442 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12443 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12444   match(CountedLoopEnd cop cmp);
12445   effect(USE labl);
12446 
12447   ins_cost(300);
12448   format %{ "J$cop,us $labl\t# Loop end" %}
12449   size(2);
12450   ins_encode %{
12451     Label* L = $labl$$label;
12452     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12453   %}
12454   ins_pipe( pipe_jcc );
12455   ins_short_branch(1);
12456 %}
12457 
12458 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12459   match(CountedLoopEnd cop cmp);
12460   effect(USE labl);
12461 
12462   ins_cost(300);
12463   format %{ "J$cop,us $labl\t# Loop end" %}
12464   size(2);
12465   ins_encode %{
12466     Label* L = $labl$$label;
12467     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12468   %}
12469   ins_pipe( pipe_jcc );
12470   ins_short_branch(1);
12471 %}
12472 
12473 // Jump Direct Conditional - using unsigned comparison
12474 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12475   match(If cop cmp);
12476   effect(USE labl);
12477 
12478   ins_cost(300);
12479   format %{ "J$cop,us $labl" %}
12480   size(2);
12481   ins_encode %{
12482     Label* L = $labl$$label;
12483     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12484   %}
12485   ins_pipe( pipe_jcc );
12486   ins_short_branch(1);
12487 %}
12488 
12489 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12490   match(If cop cmp);
12491   effect(USE labl);
12492 
12493   ins_cost(300);
12494   format %{ "J$cop,us $labl" %}
12495   size(2);
12496   ins_encode %{
12497     Label* L = $labl$$label;
12498     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12499   %}
12500   ins_pipe( pipe_jcc );
12501   ins_short_branch(1);
12502 %}
12503 
12504 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12505   match(If cop cmp);
12506   effect(USE labl);
12507 
12508   ins_cost(300);
12509   format %{ $$template
12510     if ($cop$$cmpcode == Assembler::notEqual) {
12511       $$emit$$"JP,u,s   $labl\n\t"
12512       $$emit$$"J$cop,u,s   $labl"
12513     } else {
12514       $$emit$$"JP,u,s   done\n\t"
12515       $$emit$$"J$cop,u,s  $labl\n\t"
12516       $$emit$$"done:"
12517     }
12518   %}
12519   size(4);
12520   ins_encode %{
12521     Label* l = $labl$$label;
12522     if ($cop$$cmpcode == Assembler::notEqual) {
12523       __ jccb(Assembler::parity, *l);
12524       __ jccb(Assembler::notEqual, *l);
12525     } else if ($cop$$cmpcode == Assembler::equal) {
12526       Label done;
12527       __ jccb(Assembler::parity, done);
12528       __ jccb(Assembler::equal, *l);
12529       __ bind(done);
12530     } else {
12531        ShouldNotReachHere();
12532     }
12533   %}
12534   ins_pipe(pipe_jcc);
12535   ins_short_branch(1);
12536 %}
12537 
12538 // ============================================================================
12539 // Long Compare
12540 //
12541 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12542 // is tricky.  The flavor of compare used depends on whether we are testing
12543 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12544 // The GE test is the negated LT test.  The LE test can be had by commuting
12545 // the operands (yielding a GE test) and then negating; negate again for the
12546 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12547 // NE test is negated from that.
12548 
12549 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12550 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12551 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12552 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12553 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12554 // foo match ends up with the wrong leaf.  One fix is to not match both
12555 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12556 // both forms beat the trinary form of long-compare and both are very useful
12557 // on Intel which has so few registers.
12558 
12559 // Manifest a CmpL result in an integer register.  Very painful.
12560 // This is the test to avoid.
12561 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12562   match(Set dst (CmpL3 src1 src2));
12563   effect( KILL flags );
12564   ins_cost(1000);
12565   format %{ "XOR    $dst,$dst\n\t"
12566             "CMP    $src1.hi,$src2.hi\n\t"
12567             "JLT,s  m_one\n\t"
12568             "JGT,s  p_one\n\t"
12569             "CMP    $src1.lo,$src2.lo\n\t"
12570             "JB,s   m_one\n\t"
12571             "JEQ,s  done\n"
12572     "p_one:\tINC    $dst\n\t"
12573             "JMP,s  done\n"
12574     "m_one:\tDEC    $dst\n"
12575      "done:" %}
12576   ins_encode %{
12577     Label p_one, m_one, done;
12578     __ xorptr($dst$$Register, $dst$$Register);
12579     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12580     __ jccb(Assembler::less,    m_one);
12581     __ jccb(Assembler::greater, p_one);
12582     __ cmpl($src1$$Register, $src2$$Register);
12583     __ jccb(Assembler::below,   m_one);
12584     __ jccb(Assembler::equal,   done);
12585     __ bind(p_one);
12586     __ incrementl($dst$$Register);
12587     __ jmpb(done);
12588     __ bind(m_one);
12589     __ decrementl($dst$$Register);
12590     __ bind(done);
12591   %}
12592   ins_pipe( pipe_slow );
12593 %}
12594 
12595 //======
12596 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12597 // compares.  Can be used for LE or GT compares by reversing arguments.
12598 // NOT GOOD FOR EQ/NE tests.
12599 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12600   match( Set flags (CmpL src zero ));
12601   ins_cost(100);
12602   format %{ "TEST   $src.hi,$src.hi" %}
12603   opcode(0x85);
12604   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12605   ins_pipe( ialu_cr_reg_reg );
12606 %}
12607 
12608 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12609 // compares.  Can be used for LE or GT compares by reversing arguments.
12610 // NOT GOOD FOR EQ/NE tests.
12611 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12612   match( Set flags (CmpL src1 src2 ));
12613   effect( TEMP tmp );
12614   ins_cost(300);
12615   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12616             "MOV    $tmp,$src1.hi\n\t"
12617             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12618   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12619   ins_pipe( ialu_cr_reg_reg );
12620 %}
12621 
12622 // Long compares reg < zero/req OR reg >= zero/req.
12623 // Just a wrapper for a normal branch, plus the predicate test.
12624 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12625   match(If cmp flags);
12626   effect(USE labl);
12627   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12628   expand %{
12629     jmpCon(cmp,flags,labl);    // JLT or JGE...
12630   %}
12631 %}
12632 
12633 // Compare 2 longs and CMOVE longs.
12634 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12635   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12636   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12637   ins_cost(400);
12638   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12639             "CMOV$cmp $dst.hi,$src.hi" %}
12640   opcode(0x0F,0x40);
12641   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12642   ins_pipe( pipe_cmov_reg_long );
12643 %}
12644 
12645 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12646   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12647   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12648   ins_cost(500);
12649   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12650             "CMOV$cmp $dst.hi,$src.hi" %}
12651   opcode(0x0F,0x40);
12652   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12653   ins_pipe( pipe_cmov_reg_long );
12654 %}
12655 
12656 // Compare 2 longs and CMOVE ints.
12657 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12658   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12659   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12660   ins_cost(200);
12661   format %{ "CMOV$cmp $dst,$src" %}
12662   opcode(0x0F,0x40);
12663   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12664   ins_pipe( pipe_cmov_reg );
12665 %}
12666 
12667 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12668   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12669   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12670   ins_cost(250);
12671   format %{ "CMOV$cmp $dst,$src" %}
12672   opcode(0x0F,0x40);
12673   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12674   ins_pipe( pipe_cmov_mem );
12675 %}
12676 
12677 // Compare 2 longs and CMOVE ints.
12678 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12679   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12680   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12681   ins_cost(200);
12682   format %{ "CMOV$cmp $dst,$src" %}
12683   opcode(0x0F,0x40);
12684   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12685   ins_pipe( pipe_cmov_reg );
12686 %}
12687 
12688 // Compare 2 longs and CMOVE doubles
12689 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12690   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12691   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12692   ins_cost(200);
12693   expand %{
12694     fcmovDPR_regS(cmp,flags,dst,src);
12695   %}
12696 %}
12697 
12698 // Compare 2 longs and CMOVE doubles
12699 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12700   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12701   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12702   ins_cost(200);
12703   expand %{
12704     fcmovD_regS(cmp,flags,dst,src);
12705   %}
12706 %}
12707 
12708 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12709   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12710   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12711   ins_cost(200);
12712   expand %{
12713     fcmovFPR_regS(cmp,flags,dst,src);
12714   %}
12715 %}
12716 
12717 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12718   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12719   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12720   ins_cost(200);
12721   expand %{
12722     fcmovF_regS(cmp,flags,dst,src);
12723   %}
12724 %}
12725 
12726 //======
12727 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12728 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12729   match( Set flags (CmpL src zero ));
12730   effect(TEMP tmp);
12731   ins_cost(200);
12732   format %{ "MOV    $tmp,$src.lo\n\t"
12733             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12734   ins_encode( long_cmp_flags0( src, tmp ) );
12735   ins_pipe( ialu_reg_reg_long );
12736 %}
12737 
12738 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12739 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12740   match( Set flags (CmpL src1 src2 ));
12741   ins_cost(200+300);
12742   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12743             "JNE,s  skip\n\t"
12744             "CMP    $src1.hi,$src2.hi\n\t"
12745      "skip:\t" %}
12746   ins_encode( long_cmp_flags1( src1, src2 ) );
12747   ins_pipe( ialu_cr_reg_reg );
12748 %}
12749 
12750 // Long compare reg == zero/reg OR reg != zero/reg
12751 // Just a wrapper for a normal branch, plus the predicate test.
12752 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12753   match(If cmp flags);
12754   effect(USE labl);
12755   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12756   expand %{
12757     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12758   %}
12759 %}
12760 
12761 // Compare 2 longs and CMOVE longs.
12762 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12763   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12764   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12765   ins_cost(400);
12766   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12767             "CMOV$cmp $dst.hi,$src.hi" %}
12768   opcode(0x0F,0x40);
12769   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12770   ins_pipe( pipe_cmov_reg_long );
12771 %}
12772 
12773 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12774   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12775   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12776   ins_cost(500);
12777   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12778             "CMOV$cmp $dst.hi,$src.hi" %}
12779   opcode(0x0F,0x40);
12780   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12781   ins_pipe( pipe_cmov_reg_long );
12782 %}
12783 
12784 // Compare 2 longs and CMOVE ints.
12785 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12786   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12787   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12788   ins_cost(200);
12789   format %{ "CMOV$cmp $dst,$src" %}
12790   opcode(0x0F,0x40);
12791   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12792   ins_pipe( pipe_cmov_reg );
12793 %}
12794 
12795 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12796   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12797   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12798   ins_cost(250);
12799   format %{ "CMOV$cmp $dst,$src" %}
12800   opcode(0x0F,0x40);
12801   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12802   ins_pipe( pipe_cmov_mem );
12803 %}
12804 
12805 // Compare 2 longs and CMOVE ints.
12806 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12807   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12808   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12809   ins_cost(200);
12810   format %{ "CMOV$cmp $dst,$src" %}
12811   opcode(0x0F,0x40);
12812   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12813   ins_pipe( pipe_cmov_reg );
12814 %}
12815 
12816 // Compare 2 longs and CMOVE doubles
12817 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12818   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12819   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12820   ins_cost(200);
12821   expand %{
12822     fcmovDPR_regS(cmp,flags,dst,src);
12823   %}
12824 %}
12825 
12826 // Compare 2 longs and CMOVE doubles
12827 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12828   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12829   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12830   ins_cost(200);
12831   expand %{
12832     fcmovD_regS(cmp,flags,dst,src);
12833   %}
12834 %}
12835 
12836 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12837   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12838   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12839   ins_cost(200);
12840   expand %{
12841     fcmovFPR_regS(cmp,flags,dst,src);
12842   %}
12843 %}
12844 
12845 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12846   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12847   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12848   ins_cost(200);
12849   expand %{
12850     fcmovF_regS(cmp,flags,dst,src);
12851   %}
12852 %}
12853 
12854 //======
12855 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12856 // Same as cmpL_reg_flags_LEGT except must negate src
12857 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12858   match( Set flags (CmpL src zero ));
12859   effect( TEMP tmp );
12860   ins_cost(300);
12861   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12862             "CMP    $tmp,$src.lo\n\t"
12863             "SBB    $tmp,$src.hi\n\t" %}
12864   ins_encode( long_cmp_flags3(src, tmp) );
12865   ins_pipe( ialu_reg_reg_long );
12866 %}
12867 
12868 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12869 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12870 // requires a commuted test to get the same result.
12871 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12872   match( Set flags (CmpL src1 src2 ));
12873   effect( TEMP tmp );
12874   ins_cost(300);
12875   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12876             "MOV    $tmp,$src2.hi\n\t"
12877             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12878   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12879   ins_pipe( ialu_cr_reg_reg );
12880 %}
12881 
12882 // Long compares reg < zero/req OR reg >= zero/req.
12883 // Just a wrapper for a normal branch, plus the predicate test
12884 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12885   match(If cmp flags);
12886   effect(USE labl);
12887   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12888   ins_cost(300);
12889   expand %{
12890     jmpCon(cmp,flags,labl);    // JGT or JLE...
12891   %}
12892 %}
12893 
12894 // Compare 2 longs and CMOVE longs.
12895 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12896   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12897   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12898   ins_cost(400);
12899   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12900             "CMOV$cmp $dst.hi,$src.hi" %}
12901   opcode(0x0F,0x40);
12902   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12903   ins_pipe( pipe_cmov_reg_long );
12904 %}
12905 
12906 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12907   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12908   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12909   ins_cost(500);
12910   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12911             "CMOV$cmp $dst.hi,$src.hi+4" %}
12912   opcode(0x0F,0x40);
12913   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12914   ins_pipe( pipe_cmov_reg_long );
12915 %}
12916 
12917 // Compare 2 longs and CMOVE ints.
12918 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12919   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12920   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12921   ins_cost(200);
12922   format %{ "CMOV$cmp $dst,$src" %}
12923   opcode(0x0F,0x40);
12924   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12925   ins_pipe( pipe_cmov_reg );
12926 %}
12927 
12928 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12929   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12930   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12931   ins_cost(250);
12932   format %{ "CMOV$cmp $dst,$src" %}
12933   opcode(0x0F,0x40);
12934   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12935   ins_pipe( pipe_cmov_mem );
12936 %}
12937 
12938 // Compare 2 longs and CMOVE ptrs.
12939 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12940   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12941   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12942   ins_cost(200);
12943   format %{ "CMOV$cmp $dst,$src" %}
12944   opcode(0x0F,0x40);
12945   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12946   ins_pipe( pipe_cmov_reg );
12947 %}
12948 
12949 // Compare 2 longs and CMOVE doubles
12950 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12951   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12952   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12953   ins_cost(200);
12954   expand %{
12955     fcmovDPR_regS(cmp,flags,dst,src);
12956   %}
12957 %}
12958 
12959 // Compare 2 longs and CMOVE doubles
12960 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12961   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12962   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12963   ins_cost(200);
12964   expand %{
12965     fcmovD_regS(cmp,flags,dst,src);
12966   %}
12967 %}
12968 
12969 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12970   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12971   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12972   ins_cost(200);
12973   expand %{
12974     fcmovFPR_regS(cmp,flags,dst,src);
12975   %}
12976 %}
12977 
12978 
12979 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12980   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12981   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12982   ins_cost(200);
12983   expand %{
12984     fcmovF_regS(cmp,flags,dst,src);
12985   %}
12986 %}
12987 
12988 
12989 // ============================================================================
12990 // Procedure Call/Return Instructions
12991 // Call Java Static Instruction
12992 // Note: If this code changes, the corresponding ret_addr_offset() and
12993 //       compute_padding() functions will have to be adjusted.
12994 instruct CallStaticJavaDirect(method meth) %{
12995   match(CallStaticJava);
12996   effect(USE meth);
12997 
12998   ins_cost(300);
12999   format %{ "CALL,static " %}
13000   opcode(0xE8); /* E8 cd */
13001   ins_encode( pre_call_resets,
13002               Java_Static_Call( meth ),
13003               call_epilog,
13004               post_call_FPU );
13005   ins_pipe( pipe_slow );
13006   ins_alignment(4);
13007 %}
13008 
13009 // Call Java Dynamic Instruction
13010 // Note: If this code changes, the corresponding ret_addr_offset() and
13011 //       compute_padding() functions will have to be adjusted.
13012 instruct CallDynamicJavaDirect(method meth) %{
13013   match(CallDynamicJava);
13014   effect(USE meth);
13015 
13016   ins_cost(300);
13017   format %{ "MOV    EAX,(oop)-1\n\t"
13018             "CALL,dynamic" %}
13019   opcode(0xE8); /* E8 cd */
13020   ins_encode( pre_call_resets,
13021               Java_Dynamic_Call( meth ),
13022               call_epilog,
13023               post_call_FPU );
13024   ins_pipe( pipe_slow );
13025   ins_alignment(4);
13026 %}
13027 
13028 // Call Runtime Instruction
13029 instruct CallRuntimeDirect(method meth) %{
13030   match(CallRuntime );
13031   effect(USE meth);
13032 
13033   ins_cost(300);
13034   format %{ "CALL,runtime " %}
13035   opcode(0xE8); /* E8 cd */
13036   // Use FFREEs to clear entries in float stack
13037   ins_encode( pre_call_resets,
13038               FFree_Float_Stack_All,
13039               Java_To_Runtime( meth ),
13040               post_call_FPU );
13041   ins_pipe( pipe_slow );
13042 %}
13043 
13044 // Call runtime without safepoint
13045 instruct CallLeafDirect(method meth) %{
13046   match(CallLeaf);
13047   effect(USE meth);
13048 
13049   ins_cost(300);
13050   format %{ "CALL_LEAF,runtime " %}
13051   opcode(0xE8); /* E8 cd */
13052   ins_encode( pre_call_resets,
13053               FFree_Float_Stack_All,
13054               Java_To_Runtime( meth ),
13055               Verify_FPU_For_Leaf, post_call_FPU );
13056   ins_pipe( pipe_slow );
13057 %}
13058 
13059 instruct CallLeafNoFPDirect(method meth) %{
13060   match(CallLeafNoFP);
13061   effect(USE meth);
13062 
13063   ins_cost(300);
13064   format %{ "CALL_LEAF_NOFP,runtime " %}
13065   opcode(0xE8); /* E8 cd */
13066   ins_encode(Java_To_Runtime(meth));
13067   ins_pipe( pipe_slow );
13068 %}
13069 
13070 
13071 // Return Instruction
13072 // Remove the return address & jump to it.
13073 instruct Ret() %{
13074   match(Return);
13075   format %{ "RET" %}
13076   opcode(0xC3);
13077   ins_encode(OpcP);
13078   ins_pipe( pipe_jmp );
13079 %}
13080 
13081 // Tail Call; Jump from runtime stub to Java code.
13082 // Also known as an 'interprocedural jump'.
13083 // Target of jump will eventually return to caller.
13084 // TailJump below removes the return address.
13085 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13086   match(TailCall jump_target method_oop );
13087   ins_cost(300);
13088   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13089   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13090   ins_encode( OpcP, RegOpc(jump_target) );
13091   ins_pipe( pipe_jmp );
13092 %}
13093 
13094 
13095 // Tail Jump; remove the return address; jump to target.
13096 // TailCall above leaves the return address around.
13097 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13098   match( TailJump jump_target ex_oop );
13099   ins_cost(300);
13100   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13101             "JMP    $jump_target " %}
13102   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13103   ins_encode( enc_pop_rdx,
13104               OpcP, RegOpc(jump_target) );
13105   ins_pipe( pipe_jmp );
13106 %}
13107 
13108 // Create exception oop: created by stack-crawling runtime code.
13109 // Created exception is now available to this handler, and is setup
13110 // just prior to jumping to this handler.  No code emitted.
13111 instruct CreateException( eAXRegP ex_oop )
13112 %{
13113   match(Set ex_oop (CreateEx));
13114 
13115   size(0);
13116   // use the following format syntax
13117   format %{ "# exception oop is in EAX; no code emitted" %}
13118   ins_encode();
13119   ins_pipe( empty );
13120 %}
13121 
13122 
13123 // Rethrow exception:
13124 // The exception oop will come in the first argument position.
13125 // Then JUMP (not call) to the rethrow stub code.
13126 instruct RethrowException()
13127 %{
13128   match(Rethrow);
13129 
13130   // use the following format syntax
13131   format %{ "JMP    rethrow_stub" %}
13132   ins_encode(enc_rethrow);
13133   ins_pipe( pipe_jmp );
13134 %}
13135 
13136 // inlined locking and unlocking
13137 
13138 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13139   predicate(Compile::current()->use_rtm());
13140   match(Set cr (FastLock object box));
13141   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13142   ins_cost(300);
13143   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13144   ins_encode %{
13145     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13146                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13147                  _counters, _rtm_counters, _stack_rtm_counters,
13148                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13149                  true, ra_->C->profile_rtm());
13150   %}
13151   ins_pipe(pipe_slow);
13152 %}
13153 
13154 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13155   predicate(!Compile::current()->use_rtm());
13156   match(Set cr (FastLock object box));
13157   effect(TEMP tmp, TEMP scr, USE_KILL box);
13158   ins_cost(300);
13159   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13160   ins_encode %{
13161     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13162                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13163   %}
13164   ins_pipe(pipe_slow);
13165 %}
13166 
13167 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13168   match(Set cr (FastUnlock object box));
13169   effect(TEMP tmp, USE_KILL box);
13170   ins_cost(300);
13171   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13172   ins_encode %{
13173     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13174   %}
13175   ins_pipe(pipe_slow);
13176 %}
13177 
13178 
13179 
13180 // ============================================================================
13181 // Safepoint Instruction
13182 instruct safePoint_poll(eFlagsReg cr) %{
13183   match(SafePoint);
13184   effect(KILL cr);
13185 
13186   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13187   // On SPARC that might be acceptable as we can generate the address with
13188   // just a sethi, saving an or.  By polling at offset 0 we can end up
13189   // putting additional pressure on the index-0 in the D$.  Because of
13190   // alignment (just like the situation at hand) the lower indices tend
13191   // to see more traffic.  It'd be better to change the polling address
13192   // to offset 0 of the last $line in the polling page.
13193 
13194   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13195   ins_cost(125);
13196   size(6) ;
13197   ins_encode( Safepoint_Poll() );
13198   ins_pipe( ialu_reg_mem );
13199 %}
13200 
13201 
13202 // ============================================================================
13203 // This name is KNOWN by the ADLC and cannot be changed.
13204 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13205 // for this guy.
13206 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13207   match(Set dst (ThreadLocal));
13208   effect(DEF dst, KILL cr);
13209 
13210   format %{ "MOV    $dst, Thread::current()" %}
13211   ins_encode %{
13212     Register dstReg = as_Register($dst$$reg);
13213     __ get_thread(dstReg);
13214   %}
13215   ins_pipe( ialu_reg_fat );
13216 %}
13217 
13218 
13219 
13220 //----------PEEPHOLE RULES-----------------------------------------------------
13221 // These must follow all instruction definitions as they use the names
13222 // defined in the instructions definitions.
13223 //
13224 // peepmatch ( root_instr_name [preceding_instruction]* );
13225 //
13226 // peepconstraint %{
13227 // (instruction_number.operand_name relational_op instruction_number.operand_name
13228 //  [, ...] );
13229 // // instruction numbers are zero-based using left to right order in peepmatch
13230 //
13231 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13232 // // provide an instruction_number.operand_name for each operand that appears
13233 // // in the replacement instruction's match rule
13234 //
13235 // ---------VM FLAGS---------------------------------------------------------
13236 //
13237 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13238 //
13239 // Each peephole rule is given an identifying number starting with zero and
13240 // increasing by one in the order seen by the parser.  An individual peephole
13241 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13242 // on the command-line.
13243 //
13244 // ---------CURRENT LIMITATIONS----------------------------------------------
13245 //
13246 // Only match adjacent instructions in same basic block
13247 // Only equality constraints
13248 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13249 // Only one replacement instruction
13250 //
13251 // ---------EXAMPLE----------------------------------------------------------
13252 //
13253 // // pertinent parts of existing instructions in architecture description
13254 // instruct movI(rRegI dst, rRegI src) %{
13255 //   match(Set dst (CopyI src));
13256 // %}
13257 //
13258 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13259 //   match(Set dst (AddI dst src));
13260 //   effect(KILL cr);
13261 // %}
13262 //
13263 // // Change (inc mov) to lea
13264 // peephole %{
13265 //   // increment preceeded by register-register move
13266 //   peepmatch ( incI_eReg movI );
13267 //   // require that the destination register of the increment
13268 //   // match the destination register of the move
13269 //   peepconstraint ( 0.dst == 1.dst );
13270 //   // construct a replacement instruction that sets
13271 //   // the destination to ( move's source register + one )
13272 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13273 // %}
13274 //
13275 // Implementation no longer uses movX instructions since
13276 // machine-independent system no longer uses CopyX nodes.
13277 //
13278 // peephole %{
13279 //   peepmatch ( incI_eReg movI );
13280 //   peepconstraint ( 0.dst == 1.dst );
13281 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13282 // %}
13283 //
13284 // peephole %{
13285 //   peepmatch ( decI_eReg movI );
13286 //   peepconstraint ( 0.dst == 1.dst );
13287 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13288 // %}
13289 //
13290 // peephole %{
13291 //   peepmatch ( addI_eReg_imm movI );
13292 //   peepconstraint ( 0.dst == 1.dst );
13293 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13294 // %}
13295 //
13296 // peephole %{
13297 //   peepmatch ( addP_eReg_imm movP );
13298 //   peepconstraint ( 0.dst == 1.dst );
13299 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13300 // %}
13301 
13302 // // Change load of spilled value to only a spill
13303 // instruct storeI(memory mem, rRegI src) %{
13304 //   match(Set mem (StoreI mem src));
13305 // %}
13306 //
13307 // instruct loadI(rRegI dst, memory mem) %{
13308 //   match(Set dst (LoadI mem));
13309 // %}
13310 //
13311 peephole %{
13312   peepmatch ( loadI storeI );
13313   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13314   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13315 %}
13316 
13317 //----------SMARTSPILL RULES---------------------------------------------------
13318 // These must follow all instruction definitions as they use the names
13319 // defined in the instructions definitions.