1 //
   2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     if (SafepointMechanism::uses_thread_local_poll()) {
 710       Register pollReg = as_Register(EBX_enc);
 711       MacroAssembler masm(&cbuf);
 712       masm.get_thread(pollReg);
 713       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 714       masm.relocate(relocInfo::poll_return_type);
 715       masm.testl(rax, Address(pollReg, 0));
 716     } else {
 717       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 718       emit_opcode(cbuf,0x85);
 719       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 720       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 721     }
 722   }
 723 }
 724 
 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 726   return MachNode::size(ra_); // too many variables; just compute it
 727                               // the hard way
 728 }
 729 
 730 int MachEpilogNode::reloc() const {
 731   return 0; // a large enough number
 732 }
 733 
 734 const Pipeline * MachEpilogNode::pipeline() const {
 735   return MachNode::pipeline_class();
 736 }
 737 
 738 int MachEpilogNode::safepoint_offset() const { return 0; }
 739 
 740 //=============================================================================
 741 
 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 743 static enum RC rc_class( OptoReg::Name reg ) {
 744 
 745   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 746   if (OptoReg::is_stack(reg)) return rc_stack;
 747 
 748   VMReg r = OptoReg::as_VMReg(reg);
 749   if (r->is_Register()) return rc_int;
 750   if (r->is_FloatRegister()) {
 751     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 752     return rc_float;
 753   }
 754   assert(r->is_XMMRegister(), "must be");
 755   return rc_xmm;
 756 }
 757 
 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 759                         int opcode, const char *op_str, int size, outputStream* st ) {
 760   if( cbuf ) {
 761     emit_opcode  (*cbuf, opcode );
 762     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 763 #ifndef PRODUCT
 764   } else if( !do_size ) {
 765     if( size != 0 ) st->print("\n\t");
 766     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 767       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 768       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 769     } else { // FLD, FST, PUSH, POP
 770       st->print("%s [ESP + #%d]",op_str,offset);
 771     }
 772 #endif
 773   }
 774   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 775   return size+3+offset_size;
 776 }
 777 
 778 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 780                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 781   int in_size_in_bits = Assembler::EVEX_32bit;
 782   int evex_encoding = 0;
 783   if (reg_lo+1 == reg_hi) {
 784     in_size_in_bits = Assembler::EVEX_64bit;
 785     evex_encoding = Assembler::VEX_W;
 786   }
 787   if (cbuf) {
 788     MacroAssembler _masm(cbuf);
 789     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 790     //                          it maps more cases to single byte displacement
 791     _masm.set_managed();
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 843     _masm.set_managed();
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 883     _masm.set_managed();
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 901     _masm.set_managed();
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return MachNode::size(ra_);
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Do we need to mask the count passed to shift instructions or does
1433 // the cpu only look at the lower 5/6 bits anyway?
1434 const bool Matcher::need_masked_shift_count = false;
1435 
1436 bool Matcher::narrow_oop_use_complex_address() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 bool Matcher::narrow_klass_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::const_oop_prefer_decode() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 bool Matcher::const_klass_prefer_decode() {
1452   ShouldNotCallThis();
1453   return true;
1454 }
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     // Clear upper bits of YMM registers when current compiled code uses
1886     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887     MacroAssembler _masm(&cbuf);
1888     __ vzeroupper();
1889     debug_only(int off1 = cbuf.insts_size());
1890     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891   %}
1892 
1893   enc_class post_call_FPU %{
1894     // If method sets FPU control word do it here also
1895     if (Compile::current()->in_24_bit_fp_mode()) {
1896       MacroAssembler masm(&cbuf);
1897       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898     }
1899   %}
1900 
1901   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903     // who we intended to call.
1904     cbuf.set_insts_mark();
1905     $$$emit8$primary;
1906 
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(),
1910                      RELOC_IMM32);
1911     } else {
1912       int method_index = resolved_method_index(cbuf);
1913       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                   : static_call_Relocation::spec(method_index);
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      rspec, RELOC_DISP32);
1917       // Emit stubs for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       }
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     emit_opcode(cbuf,0xF0);         // [Lock]
2091   %}
2092 
2093   // Cmp-xchg long value.
2094   // Note: we need to swap rbx, and rcx before and after the
2095   //       cmpxchg8 instruction because the instruction uses
2096   //       rcx as the high order word of the new value to store but
2097   //       our register encoding uses rbx,.
2098   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099 
2100     // XCHG  rbx,ecx
2101     emit_opcode(cbuf,0x87);
2102     emit_opcode(cbuf,0xD9);
2103     // [Lock]
2104     emit_opcode(cbuf,0xF0);
2105     // CMPXCHG8 [Eptr]
2106     emit_opcode(cbuf,0x0F);
2107     emit_opcode(cbuf,0xC7);
2108     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109     // XCHG  rbx,ecx
2110     emit_opcode(cbuf,0x87);
2111     emit_opcode(cbuf,0xD9);
2112   %}
2113 
2114   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115     // [Lock]
2116     emit_opcode(cbuf,0xF0);
2117 
2118     // CMPXCHG [Eptr]
2119     emit_opcode(cbuf,0x0F);
2120     emit_opcode(cbuf,0xB1);
2121     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2122   %}
2123 
2124   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2125     // [Lock]
2126     emit_opcode(cbuf,0xF0);
2127 
2128     // CMPXCHGB [Eptr]
2129     emit_opcode(cbuf,0x0F);
2130     emit_opcode(cbuf,0xB0);
2131     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2132   %}
2133 
2134   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2135     // [Lock]
2136     emit_opcode(cbuf,0xF0);
2137 
2138     // 16-bit mode
2139     emit_opcode(cbuf, 0x66);
2140 
2141     // CMPXCHGW [Eptr]
2142     emit_opcode(cbuf,0x0F);
2143     emit_opcode(cbuf,0xB1);
2144     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145   %}
2146 
2147   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2148     int res_encoding = $res$$reg;
2149 
2150     // MOV  res,0
2151     emit_opcode( cbuf, 0xB8 + res_encoding);
2152     emit_d32( cbuf, 0 );
2153     // JNE,s  fail
2154     emit_opcode(cbuf,0x75);
2155     emit_d8(cbuf, 5 );
2156     // MOV  res,1
2157     emit_opcode( cbuf, 0xB8 + res_encoding);
2158     emit_d32( cbuf, 1 );
2159     // fail:
2160   %}
2161 
2162   enc_class set_instruction_start( ) %{
2163     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2164   %}
2165 
2166   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2167     int reg_encoding = $ereg$$reg;
2168     int base  = $mem$$base;
2169     int index = $mem$$index;
2170     int scale = $mem$$scale;
2171     int displace = $mem$$disp;
2172     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2173     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2174   %}
2175 
2176   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2177     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2178     int base  = $mem$$base;
2179     int index = $mem$$index;
2180     int scale = $mem$$scale;
2181     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2182     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2183     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2184   %}
2185 
2186   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2187     int r1, r2;
2188     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190     emit_opcode(cbuf,0x0F);
2191     emit_opcode(cbuf,$tertiary);
2192     emit_rm(cbuf, 0x3, r1, r2);
2193     emit_d8(cbuf,$cnt$$constant);
2194     emit_d8(cbuf,$primary);
2195     emit_rm(cbuf, 0x3, $secondary, r1);
2196     emit_d8(cbuf,$cnt$$constant);
2197   %}
2198 
2199   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2200     emit_opcode( cbuf, 0x8B ); // Move
2201     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_d8(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_d8(cbuf,$primary);
2208     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2209     emit_d8(cbuf,31);
2210   %}
2211 
2212   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2213     int r1, r2;
2214     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2215     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2216 
2217     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2218     emit_rm(cbuf, 0x3, r1, r2);
2219     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220       emit_opcode(cbuf,$primary);
2221       emit_rm(cbuf, 0x3, $secondary, r1);
2222       emit_d8(cbuf,$cnt$$constant-32);
2223     }
2224     emit_opcode(cbuf,0x33);  // XOR r2,r2
2225     emit_rm(cbuf, 0x3, r2, r2);
2226   %}
2227 
2228   // Clone of RegMem but accepts an extra parameter to access each
2229   // half of a double in memory; it never needs relocation info.
2230   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2231     emit_opcode(cbuf,$opcode$$constant);
2232     int reg_encoding = $rm_reg$$reg;
2233     int base     = $mem$$base;
2234     int index    = $mem$$index;
2235     int scale    = $mem$$scale;
2236     int displace = $mem$$disp + $disp_for_half$$constant;
2237     relocInfo::relocType disp_reloc = relocInfo::none;
2238     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2239   %}
2240 
2241   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2242   //
2243   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2244   // and it never needs relocation information.
2245   // Frequently used to move data between FPU's Stack Top and memory.
2246   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2247     int rm_byte_opcode = $rm_opcode$$constant;
2248     int base     = $mem$$base;
2249     int index    = $mem$$index;
2250     int scale    = $mem$$scale;
2251     int displace = $mem$$disp;
2252     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2253     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2254   %}
2255 
2256   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2257     int rm_byte_opcode = $rm_opcode$$constant;
2258     int base     = $mem$$base;
2259     int index    = $mem$$index;
2260     int scale    = $mem$$scale;
2261     int displace = $mem$$disp;
2262     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2263     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2264   %}
2265 
2266   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2267     int reg_encoding = $dst$$reg;
2268     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2269     int index        = 0x04;            // 0x04 indicates no index
2270     int scale        = 0x00;            // 0x00 indicates no scale
2271     int displace     = $src1$$constant; // 0x00 indicates no displacement
2272     relocInfo::relocType disp_reloc = relocInfo::none;
2273     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2274   %}
2275 
2276   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2277     // Compare dst,src
2278     emit_opcode(cbuf,0x3B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280     // jmp dst < src around move
2281     emit_opcode(cbuf,0x7C);
2282     emit_d8(cbuf,2);
2283     // move dst,src
2284     emit_opcode(cbuf,0x8B);
2285     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286   %}
2287 
2288   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2289     // Compare dst,src
2290     emit_opcode(cbuf,0x3B);
2291     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2292     // jmp dst > src around move
2293     emit_opcode(cbuf,0x7F);
2294     emit_d8(cbuf,2);
2295     // move dst,src
2296     emit_opcode(cbuf,0x8B);
2297     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2298   %}
2299 
2300   enc_class enc_FPR_store(memory mem, regDPR src) %{
2301     // If src is FPR1, we can just FST to store it.
2302     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2303     int reg_encoding = 0x2; // Just store
2304     int base  = $mem$$base;
2305     int index = $mem$$index;
2306     int scale = $mem$$scale;
2307     int displace = $mem$$disp;
2308     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2309     if( $src$$reg != FPR1L_enc ) {
2310       reg_encoding = 0x3;  // Store & pop
2311       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2312       emit_d8( cbuf, 0xC0-1+$src$$reg );
2313     }
2314     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2315     emit_opcode(cbuf,$primary);
2316     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2317   %}
2318 
2319   enc_class neg_reg(rRegI dst) %{
2320     // NEG $dst
2321     emit_opcode(cbuf,0xF7);
2322     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2323   %}
2324 
2325   enc_class setLT_reg(eCXRegI dst) %{
2326     // SETLT $dst
2327     emit_opcode(cbuf,0x0F);
2328     emit_opcode(cbuf,0x9C);
2329     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2330   %}
2331 
2332   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2333     int tmpReg = $tmp$$reg;
2334 
2335     // SUB $p,$q
2336     emit_opcode(cbuf,0x2B);
2337     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2338     // SBB $tmp,$tmp
2339     emit_opcode(cbuf,0x1B);
2340     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2341     // AND $tmp,$y
2342     emit_opcode(cbuf,0x23);
2343     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2344     // ADD $p,$tmp
2345     emit_opcode(cbuf,0x03);
2346     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2347   %}
2348 
2349   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.hi,$dst.lo
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2360     // CLR    $dst.lo
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2363 // small:
2364     // SHLD   $dst.hi,$dst.lo,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xA5);
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2368     // SHL    $dst.lo,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2371   %}
2372 
2373   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x04);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // CLR    $dst.hi
2385     emit_opcode(cbuf, 0x33);
2386     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2387 // small:
2388     // SHRD   $dst.lo,$dst.hi,$shift
2389     emit_opcode(cbuf,0x0F);
2390     emit_opcode(cbuf,0xAD);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392     // SHR    $dst.hi,$shift"
2393     emit_opcode(cbuf,0xD3);
2394     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2395   %}
2396 
2397   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2398     // TEST shift,32
2399     emit_opcode(cbuf,0xF7);
2400     emit_rm(cbuf, 0x3, 0, ECX_enc);
2401     emit_d32(cbuf,0x20);
2402     // JEQ,s small
2403     emit_opcode(cbuf, 0x74);
2404     emit_d8(cbuf, 0x05);
2405     // MOV    $dst.lo,$dst.hi
2406     emit_opcode( cbuf, 0x8B );
2407     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2408     // SAR    $dst.hi,31
2409     emit_opcode(cbuf, 0xC1);
2410     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2411     emit_d8(cbuf, 0x1F );
2412 // small:
2413     // SHRD   $dst.lo,$dst.hi,$shift
2414     emit_opcode(cbuf,0x0F);
2415     emit_opcode(cbuf,0xAD);
2416     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417     // SAR    $dst.hi,$shift"
2418     emit_opcode(cbuf,0xD3);
2419     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2420   %}
2421 
2422 
2423   // ----------------- Encodings for floating point unit -----------------
2424   // May leave result in FPU-TOS or FPU reg depending on opcodes
2425   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2426     $$$emit8$primary;
2427     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2428   %}
2429 
2430   // Pop argument in FPR0 with FSTP ST(0)
2431   enc_class PopFPU() %{
2432     emit_opcode( cbuf, 0xDD );
2433     emit_d8( cbuf, 0xD8 );
2434   %}
2435 
2436   // !!!!! equivalent to Pop_Reg_F
2437   enc_class Pop_Reg_DPR( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2439     emit_d8( cbuf, 0xD8+$dst$$reg );
2440   %}
2441 
2442   enc_class Push_Reg_DPR( regDPR dst ) %{
2443     emit_opcode( cbuf, 0xD9 );
2444     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2445   %}
2446 
2447   enc_class strictfp_bias1( regDPR dst ) %{
2448     emit_opcode( cbuf, 0xDB );           // FLD m80real
2449     emit_opcode( cbuf, 0x2D );
2450     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2451     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2452     emit_opcode( cbuf, 0xC8+$dst$$reg );
2453   %}
2454 
2455   enc_class strictfp_bias2( regDPR dst ) %{
2456     emit_opcode( cbuf, 0xDB );           // FLD m80real
2457     emit_opcode( cbuf, 0x2D );
2458     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2459     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460     emit_opcode( cbuf, 0xC8+$dst$$reg );
2461   %}
2462 
2463   // Special case for moving an integer register to a stack slot.
2464   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2465     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2466   %}
2467 
2468   // Special case for moving a register to a stack slot.
2469   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470     // Opcode already emitted
2471     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2472     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2473     emit_d32(cbuf, $dst$$disp);   // Displacement
2474   %}
2475 
2476   // Push the integer in stackSlot 'src' onto FP-stack
2477   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2478     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2479   %}
2480 
2481   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2483     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2484   %}
2485 
2486   // Same as Pop_Mem_F except for opcode
2487   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2488   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2489     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2490   %}
2491 
2492   enc_class Pop_Reg_FPR( regFPR dst ) %{
2493     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2494     emit_d8( cbuf, 0xD8+$dst$$reg );
2495   %}
2496 
2497   enc_class Push_Reg_FPR( regFPR dst ) %{
2498     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2499     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2500   %}
2501 
2502   // Push FPU's float to a stack-slot, and pop FPU-stack
2503   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2504     int pop = 0x02;
2505     if ($src$$reg != FPR1L_enc) {
2506       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2507       emit_d8( cbuf, 0xC0-1+$src$$reg );
2508       pop = 0x03;
2509     }
2510     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2511   %}
2512 
2513   // Push FPU's double to a stack-slot, and pop FPU-stack
2514   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2515     int pop = 0x02;
2516     if ($src$$reg != FPR1L_enc) {
2517       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2518       emit_d8( cbuf, 0xC0-1+$src$$reg );
2519       pop = 0x03;
2520     }
2521     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2522   %}
2523 
2524   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2525   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2526     int pop = 0xD0 - 1; // -1 since we skip FLD
2527     if ($src$$reg != FPR1L_enc) {
2528       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2529       emit_d8( cbuf, 0xC0-1+$src$$reg );
2530       pop = 0xD8;
2531     }
2532     emit_opcode( cbuf, 0xDD );
2533     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2534   %}
2535 
2536 
2537   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2538     // load dst in FPR0
2539     emit_opcode( cbuf, 0xD9 );
2540     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2541     if ($src$$reg != FPR1L_enc) {
2542       // fincstp
2543       emit_opcode (cbuf, 0xD9);
2544       emit_opcode (cbuf, 0xF7);
2545       // swap src with FPR1:
2546       // FXCH FPR1 with src
2547       emit_opcode(cbuf, 0xD9);
2548       emit_d8(cbuf, 0xC8-1+$src$$reg );
2549       // fdecstp
2550       emit_opcode (cbuf, 0xD9);
2551       emit_opcode (cbuf, 0xF6);
2552     }
2553   %}
2554 
2555   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2561     __ fld_d(Address(rsp, 0));
2562   %}
2563 
2564   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ subptr(rsp, 4);
2567     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2568     __ fld_s(Address(rsp, 0));
2569     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2570     __ fld_s(Address(rsp, 0));
2571   %}
2572 
2573   enc_class Push_ResultD(regD dst) %{
2574     MacroAssembler _masm(&cbuf);
2575     __ fstp_d(Address(rsp, 0));
2576     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class Push_ResultF(regF dst, immI d8) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ fstp_s(Address(rsp, 0));
2583     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2584     __ addptr(rsp, $d8$$constant);
2585   %}
2586 
2587   enc_class Push_SrcD(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ subptr(rsp, 8);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class push_stack_temp_qword() %{
2595     MacroAssembler _masm(&cbuf);
2596     __ subptr(rsp, 8);
2597   %}
2598 
2599   enc_class pop_stack_temp_qword() %{
2600     MacroAssembler _masm(&cbuf);
2601     __ addptr(rsp, 8);
2602   %}
2603 
2604   enc_class push_xmm_to_fpr1(regD src) %{
2605     MacroAssembler _masm(&cbuf);
2606     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2607     __ fld_d(Address(rsp, 0));
2608   %}
2609 
2610   enc_class Push_Result_Mod_DPR( regDPR src) %{
2611     if ($src$$reg != FPR1L_enc) {
2612       // fincstp
2613       emit_opcode (cbuf, 0xD9);
2614       emit_opcode (cbuf, 0xF7);
2615       // FXCH FPR1 with src
2616       emit_opcode(cbuf, 0xD9);
2617       emit_d8(cbuf, 0xC8-1+$src$$reg );
2618       // fdecstp
2619       emit_opcode (cbuf, 0xD9);
2620       emit_opcode (cbuf, 0xF6);
2621     }
2622     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2623     // // FSTP   FPR$dst$$reg
2624     // emit_opcode( cbuf, 0xDD );
2625     // emit_d8( cbuf, 0xD8+$dst$$reg );
2626   %}
2627 
2628   enc_class fnstsw_sahf_skip_parity() %{
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jnp  ::skip
2635     emit_opcode( cbuf, 0x7B );
2636     emit_opcode( cbuf, 0x05 );
2637   %}
2638 
2639   enc_class emitModDPR() %{
2640     // fprem must be iterative
2641     // :: loop
2642     // fprem
2643     emit_opcode( cbuf, 0xD9 );
2644     emit_opcode( cbuf, 0xF8 );
2645     // wait
2646     emit_opcode( cbuf, 0x9b );
2647     // fnstsw ax
2648     emit_opcode( cbuf, 0xDF );
2649     emit_opcode( cbuf, 0xE0 );
2650     // sahf
2651     emit_opcode( cbuf, 0x9E );
2652     // jp  ::loop
2653     emit_opcode( cbuf, 0x0F );
2654     emit_opcode( cbuf, 0x8A );
2655     emit_opcode( cbuf, 0xF4 );
2656     emit_opcode( cbuf, 0xFF );
2657     emit_opcode( cbuf, 0xFF );
2658     emit_opcode( cbuf, 0xFF );
2659   %}
2660 
2661   enc_class fpu_flags() %{
2662     // fnstsw_ax
2663     emit_opcode( cbuf, 0xDF);
2664     emit_opcode( cbuf, 0xE0);
2665     // test ax,0x0400
2666     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2667     emit_opcode( cbuf, 0xA9 );
2668     emit_d16   ( cbuf, 0x0400 );
2669     // // // This sequence works, but stalls for 12-16 cycles on PPro
2670     // // test rax,0x0400
2671     // emit_opcode( cbuf, 0xA9 );
2672     // emit_d32   ( cbuf, 0x00000400 );
2673     //
2674     // jz exit (no unordered comparison)
2675     emit_opcode( cbuf, 0x74 );
2676     emit_d8    ( cbuf, 0x02 );
2677     // mov ah,1 - treat as LT case (set carry flag)
2678     emit_opcode( cbuf, 0xB4 );
2679     emit_d8    ( cbuf, 0x01 );
2680     // sahf
2681     emit_opcode( cbuf, 0x9E);
2682   %}
2683 
2684   enc_class cmpF_P6_fixup() %{
2685     // Fixup the integer flags in case comparison involved a NaN
2686     //
2687     // JNP exit (no unordered comparison, P-flag is set by NaN)
2688     emit_opcode( cbuf, 0x7B );
2689     emit_d8    ( cbuf, 0x03 );
2690     // MOV AH,1 - treat as LT case (set carry flag)
2691     emit_opcode( cbuf, 0xB4 );
2692     emit_d8    ( cbuf, 0x01 );
2693     // SAHF
2694     emit_opcode( cbuf, 0x9E);
2695     // NOP     // target for branch to avoid branch to branch
2696     emit_opcode( cbuf, 0x90);
2697   %}
2698 
2699 //     fnstsw_ax();
2700 //     sahf();
2701 //     movl(dst, nan_result);
2702 //     jcc(Assembler::parity, exit);
2703 //     movl(dst, less_result);
2704 //     jcc(Assembler::below, exit);
2705 //     movl(dst, equal_result);
2706 //     jcc(Assembler::equal, exit);
2707 //     movl(dst, greater_result);
2708 
2709 // less_result     =  1;
2710 // greater_result  = -1;
2711 // equal_result    = 0;
2712 // nan_result      = -1;
2713 
2714   enc_class CmpF_Result(rRegI dst) %{
2715     // fnstsw_ax();
2716     emit_opcode( cbuf, 0xDF);
2717     emit_opcode( cbuf, 0xE0);
2718     // sahf
2719     emit_opcode( cbuf, 0x9E);
2720     // movl(dst, nan_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, -1 );
2723     // jcc(Assembler::parity, exit);
2724     emit_opcode( cbuf, 0x7A );
2725     emit_d8    ( cbuf, 0x13 );
2726     // movl(dst, less_result);
2727     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728     emit_d32( cbuf, -1 );
2729     // jcc(Assembler::below, exit);
2730     emit_opcode( cbuf, 0x72 );
2731     emit_d8    ( cbuf, 0x0C );
2732     // movl(dst, equal_result);
2733     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2734     emit_d32( cbuf, 0 );
2735     // jcc(Assembler::equal, exit);
2736     emit_opcode( cbuf, 0x74 );
2737     emit_d8    ( cbuf, 0x05 );
2738     // movl(dst, greater_result);
2739     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2740     emit_d32( cbuf, 1 );
2741   %}
2742 
2743 
2744   // Compare the longs and set flags
2745   // BROKEN!  Do Not use as-is
2746   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2747     // CMP    $src1.hi,$src2.hi
2748     emit_opcode( cbuf, 0x3B );
2749     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2750     // JNE,s  done
2751     emit_opcode(cbuf,0x75);
2752     emit_d8(cbuf, 2 );
2753     // CMP    $src1.lo,$src2.lo
2754     emit_opcode( cbuf, 0x3B );
2755     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2756 // done:
2757   %}
2758 
2759   enc_class convert_int_long( regL dst, rRegI src ) %{
2760     // mov $dst.lo,$src
2761     int dst_encoding = $dst$$reg;
2762     int src_encoding = $src$$reg;
2763     encode_Copy( cbuf, dst_encoding  , src_encoding );
2764     // mov $dst.hi,$src
2765     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2766     // sar $dst.hi,31
2767     emit_opcode( cbuf, 0xC1 );
2768     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2769     emit_d8(cbuf, 0x1F );
2770   %}
2771 
2772   enc_class convert_long_double( eRegL src ) %{
2773     // push $src.hi
2774     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2775     // push $src.lo
2776     emit_opcode(cbuf, 0x50+$src$$reg  );
2777     // fild 64-bits at [SP]
2778     emit_opcode(cbuf,0xdf);
2779     emit_d8(cbuf, 0x6C);
2780     emit_d8(cbuf, 0x24);
2781     emit_d8(cbuf, 0x00);
2782     // pop stack
2783     emit_opcode(cbuf, 0x83); // add  SP, #8
2784     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2785     emit_d8(cbuf, 0x8);
2786   %}
2787 
2788   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2789     // IMUL   EDX:EAX,$src1
2790     emit_opcode( cbuf, 0xF7 );
2791     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2792     // SAR    EDX,$cnt-32
2793     int shift_count = ((int)$cnt$$constant) - 32;
2794     if (shift_count > 0) {
2795       emit_opcode(cbuf, 0xC1);
2796       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2797       emit_d8(cbuf, shift_count);
2798     }
2799   %}
2800 
2801   // this version doesn't have add sp, 8
2802   enc_class convert_long_double2( eRegL src ) %{
2803     // push $src.hi
2804     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2805     // push $src.lo
2806     emit_opcode(cbuf, 0x50+$src$$reg  );
2807     // fild 64-bits at [SP]
2808     emit_opcode(cbuf,0xdf);
2809     emit_d8(cbuf, 0x6C);
2810     emit_d8(cbuf, 0x24);
2811     emit_d8(cbuf, 0x00);
2812   %}
2813 
2814   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2815     // Basic idea: long = (long)int * (long)int
2816     // IMUL EDX:EAX, src
2817     emit_opcode( cbuf, 0xF7 );
2818     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2819   %}
2820 
2821   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2822     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2823     // MUL EDX:EAX, src
2824     emit_opcode( cbuf, 0xF7 );
2825     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2826   %}
2827 
2828   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2829     // Basic idea: lo(result) = lo(x_lo * y_lo)
2830     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2831     // MOV    $tmp,$src.lo
2832     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2833     // IMUL   $tmp,EDX
2834     emit_opcode( cbuf, 0x0F );
2835     emit_opcode( cbuf, 0xAF );
2836     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2837     // MOV    EDX,$src.hi
2838     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2839     // IMUL   EDX,EAX
2840     emit_opcode( cbuf, 0x0F );
2841     emit_opcode( cbuf, 0xAF );
2842     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2843     // ADD    $tmp,EDX
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2846     // MUL   EDX:EAX,$src.lo
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2852   %}
2853 
2854   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2855     // Basic idea: lo(result) = lo(src * y_lo)
2856     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2857     // IMUL   $tmp,EDX,$src
2858     emit_opcode( cbuf, 0x6B );
2859     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860     emit_d8( cbuf, (int)$src$$constant );
2861     // MOV    EDX,$src
2862     emit_opcode(cbuf, 0xB8 + EDX_enc);
2863     emit_d32( cbuf, (int)$src$$constant );
2864     // MUL   EDX:EAX,EDX
2865     emit_opcode( cbuf, 0xF7 );
2866     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2867     // ADD    EDX,ESI
2868     emit_opcode( cbuf, 0x03 );
2869     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2870   %}
2871 
2872   enc_class long_div( eRegL src1, eRegL src2 ) %{
2873     // PUSH src1.hi
2874     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875     // PUSH src1.lo
2876     emit_opcode(cbuf,               0x50+$src1$$reg  );
2877     // PUSH src2.hi
2878     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879     // PUSH src2.lo
2880     emit_opcode(cbuf,               0x50+$src2$$reg  );
2881     // CALL directly to the runtime
2882     cbuf.set_insts_mark();
2883     emit_opcode(cbuf,0xE8);       // Call into runtime
2884     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885     // Restore stack
2886     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888     emit_d8(cbuf, 4*4);
2889   %}
2890 
2891   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2892     // PUSH src1.hi
2893     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2894     // PUSH src1.lo
2895     emit_opcode(cbuf,               0x50+$src1$$reg  );
2896     // PUSH src2.hi
2897     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2898     // PUSH src2.lo
2899     emit_opcode(cbuf,               0x50+$src2$$reg  );
2900     // CALL directly to the runtime
2901     cbuf.set_insts_mark();
2902     emit_opcode(cbuf,0xE8);       // Call into runtime
2903     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2904     // Restore stack
2905     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2906     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2907     emit_d8(cbuf, 4*4);
2908   %}
2909 
2910   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2911     // MOV   $tmp,$src.lo
2912     emit_opcode(cbuf, 0x8B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2914     // OR    $tmp,$src.hi
2915     emit_opcode(cbuf, 0x0B);
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2917   %}
2918 
2919   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2920     // CMP    $src1.lo,$src2.lo
2921     emit_opcode( cbuf, 0x3B );
2922     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923     // JNE,s  skip
2924     emit_cc(cbuf, 0x70, 0x5);
2925     emit_d8(cbuf,2);
2926     // CMP    $src1.hi,$src2.hi
2927     emit_opcode( cbuf, 0x3B );
2928     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2929   %}
2930 
2931   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2932     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2933     emit_opcode( cbuf, 0x3B );
2934     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2935     // MOV    $tmp,$src1.hi
2936     emit_opcode( cbuf, 0x8B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2938     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2939     emit_opcode( cbuf, 0x1B );
2940     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2941   %}
2942 
2943   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2944     // XOR    $tmp,$tmp
2945     emit_opcode(cbuf,0x33);  // XOR
2946     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2947     // CMP    $tmp,$src.lo
2948     emit_opcode( cbuf, 0x3B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2950     // SBB    $tmp,$src.hi
2951     emit_opcode( cbuf, 0x1B );
2952     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2953   %}
2954 
2955  // Sniff, sniff... smells like Gnu Superoptimizer
2956   enc_class neg_long( eRegL dst ) %{
2957     emit_opcode(cbuf,0xF7);    // NEG hi
2958     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2959     emit_opcode(cbuf,0xF7);    // NEG lo
2960     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2961     emit_opcode(cbuf,0x83);    // SBB hi,0
2962     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2963     emit_d8    (cbuf,0 );
2964   %}
2965 
2966   enc_class enc_pop_rdx() %{
2967     emit_opcode(cbuf,0x5A);
2968   %}
2969 
2970   enc_class enc_rethrow() %{
2971     cbuf.set_insts_mark();
2972     emit_opcode(cbuf, 0xE9);        // jmp    entry
2973     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2974                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2975   %}
2976 
2977 
2978   // Convert a double to an int.  Java semantics require we do complex
2979   // manglelations in the corner cases.  So we set the rounding mode to
2980   // 'zero', store the darned double down as an int, and reset the
2981   // rounding mode to 'nearest'.  The hardware throws an exception which
2982   // patches up the correct value directly to the stack.
2983   enc_class DPR2I_encoding( regDPR src ) %{
2984     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2985     // exceptions here, so that a NAN or other corner-case value will
2986     // thrown an exception (but normal values get converted at full speed).
2987     // However, I2C adapters and other float-stack manglers leave pending
2988     // invalid-op exceptions hanging.  We would have to clear them before
2989     // enabling them and that is more expensive than just testing for the
2990     // invalid value Intel stores down in the corner cases.
2991     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2992     emit_opcode(cbuf,0x2D);
2993     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2994     // Allocate a word
2995     emit_opcode(cbuf,0x83);            // SUB ESP,4
2996     emit_opcode(cbuf,0xEC);
2997     emit_d8(cbuf,0x04);
2998     // Encoding assumes a double has been pushed into FPR0.
2999     // Store down the double as an int, popping the FPU stack
3000     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3001     emit_opcode(cbuf,0x1C);
3002     emit_d8(cbuf,0x24);
3003     // Restore the rounding mode; mask the exception
3004     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3005     emit_opcode(cbuf,0x2D);
3006     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3007         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3008         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3009 
3010     // Load the converted int; adjust CPU stack
3011     emit_opcode(cbuf,0x58);       // POP EAX
3012     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3013     emit_d32   (cbuf,0x80000000); //         0x80000000
3014     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3015     emit_d8    (cbuf,0x07);       // Size of slow_call
3016     // Push src onto stack slow-path
3017     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3018     emit_d8    (cbuf,0xC0-1+$src$$reg );
3019     // CALL directly to the runtime
3020     cbuf.set_insts_mark();
3021     emit_opcode(cbuf,0xE8);       // Call into runtime
3022     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3023     // Carry on here...
3024   %}
3025 
3026   enc_class DPR2L_encoding( regDPR src ) %{
3027     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3028     emit_opcode(cbuf,0x2D);
3029     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3030     // Allocate a word
3031     emit_opcode(cbuf,0x83);            // SUB ESP,8
3032     emit_opcode(cbuf,0xEC);
3033     emit_d8(cbuf,0x08);
3034     // Encoding assumes a double has been pushed into FPR0.
3035     // Store down the double as a long, popping the FPU stack
3036     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3037     emit_opcode(cbuf,0x3C);
3038     emit_d8(cbuf,0x24);
3039     // Restore the rounding mode; mask the exception
3040     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3041     emit_opcode(cbuf,0x2D);
3042     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3043         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3044         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3045 
3046     // Load the converted int; adjust CPU stack
3047     emit_opcode(cbuf,0x58);       // POP EAX
3048     emit_opcode(cbuf,0x5A);       // POP EDX
3049     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3050     emit_d8    (cbuf,0xFA);       // rdx
3051     emit_d32   (cbuf,0x80000000); //         0x80000000
3052     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3053     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3054     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3055     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3056     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3057     emit_d8    (cbuf,0x07);       // Size of slow_call
3058     // Push src onto stack slow-path
3059     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3060     emit_d8    (cbuf,0xC0-1+$src$$reg );
3061     // CALL directly to the runtime
3062     cbuf.set_insts_mark();
3063     emit_opcode(cbuf,0xE8);       // Call into runtime
3064     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065     // Carry on here...
3066   %}
3067 
3068   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3069     // Operand was loaded from memory into fp ST (stack top)
3070     // FMUL   ST,$src  /* D8 C8+i */
3071     emit_opcode(cbuf, 0xD8);
3072     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3073   %}
3074 
3075   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3076     // FADDP  ST,src2  /* D8 C0+i */
3077     emit_opcode(cbuf, 0xD8);
3078     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3079     //could use FADDP  src2,fpST  /* DE C0+i */
3080   %}
3081 
3082   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3083     // FADDP  src2,ST  /* DE C0+i */
3084     emit_opcode(cbuf, 0xDE);
3085     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3086   %}
3087 
3088   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3089     // Operand has been loaded into fp ST (stack top)
3090       // FSUB   ST,$src1
3091       emit_opcode(cbuf, 0xD8);
3092       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3093 
3094       // FDIV
3095       emit_opcode(cbuf, 0xD8);
3096       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3097   %}
3098 
3099   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3100     // Operand was loaded from memory into fp ST (stack top)
3101     // FADD   ST,$src  /* D8 C0+i */
3102     emit_opcode(cbuf, 0xD8);
3103     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104 
3105     // FMUL  ST,src2  /* D8 C*+i */
3106     emit_opcode(cbuf, 0xD8);
3107     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108   %}
3109 
3110 
3111   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3112     // Operand was loaded from memory into fp ST (stack top)
3113     // FADD   ST,$src  /* D8 C0+i */
3114     emit_opcode(cbuf, 0xD8);
3115     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3116 
3117     // FMULP  src2,ST  /* DE C8+i */
3118     emit_opcode(cbuf, 0xDE);
3119     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3120   %}
3121 
3122   // Atomically load the volatile long
3123   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x05;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3133   %}
3134 
3135   // Volatile Store Long.  Must be atomic, so move it into
3136   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3137   // target address before the store (for null-ptr checks)
3138   // so the memory operand is used twice in the encoding.
3139   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3140     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3141     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3142     emit_opcode(cbuf,0xDF);
3143     int rm_byte_opcode = 0x07;
3144     int base     = $mem$$base;
3145     int index    = $mem$$index;
3146     int scale    = $mem$$scale;
3147     int displace = $mem$$disp;
3148     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3149     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3150   %}
3151 
3152   // Safepoint Poll.  This polls the safepoint page, and causes an
3153   // exception if it is not readable. Unfortunately, it kills the condition code
3154   // in the process
3155   // We current use TESTL [spp],EDI
3156   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3157 
3158   enc_class Safepoint_Poll() %{
3159     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3160     emit_opcode(cbuf,0x85);
3161     emit_rm (cbuf, 0x0, 0x7, 0x5);
3162     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3163   %}
3164 %}
3165 
3166 
3167 //----------FRAME--------------------------------------------------------------
3168 // Definition of frame structure and management information.
3169 //
3170 //  S T A C K   L A Y O U T    Allocators stack-slot number
3171 //                             |   (to get allocators register number
3172 //  G  Owned by    |        |  v    add OptoReg::stack0())
3173 //  r   CALLER     |        |
3174 //  o     |        +--------+      pad to even-align allocators stack-slot
3175 //  w     V        |  pad0  |        numbers; owned by CALLER
3176 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3177 //  h     ^        |   in   |  5
3178 //        |        |  args  |  4   Holes in incoming args owned by SELF
3179 //  |     |        |        |  3
3180 //  |     |        +--------+
3181 //  V     |        | old out|      Empty on Intel, window on Sparc
3182 //        |    old |preserve|      Must be even aligned.
3183 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3184 //        |        |   in   |  3   area for Intel ret address
3185 //     Owned by    |preserve|      Empty on Sparc.
3186 //       SELF      +--------+
3187 //        |        |  pad2  |  2   pad to align old SP
3188 //        |        +--------+  1
3189 //        |        | locks  |  0
3190 //        |        +--------+----> OptoReg::stack0(), even aligned
3191 //        |        |  pad1  | 11   pad to align new SP
3192 //        |        +--------+
3193 //        |        |        | 10
3194 //        |        | spills |  9   spills
3195 //        V        |        |  8   (pad0 slot for callee)
3196 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3197 //        ^        |  out   |  7
3198 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3199 //     Owned by    +--------+
3200 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3201 //        |    new |preserve|      Must be even-aligned.
3202 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3203 //        |        |        |
3204 //
3205 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3206 //         known from SELF's arguments and the Java calling convention.
3207 //         Region 6-7 is determined per call site.
3208 // Note 2: If the calling convention leaves holes in the incoming argument
3209 //         area, those holes are owned by SELF.  Holes in the outgoing area
3210 //         are owned by the CALLEE.  Holes should not be nessecary in the
3211 //         incoming area, as the Java calling convention is completely under
3212 //         the control of the AD file.  Doubles can be sorted and packed to
3213 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3214 //         varargs C calling conventions.
3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3216 //         even aligned with pad0 as needed.
3217 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3218 //         region 6-11 is even aligned; it may be padded out more so that
3219 //         the region from SP to FP meets the minimum stack alignment.
3220 
3221 frame %{
3222   // What direction does stack grow in (assumed to be same for C & Java)
3223   stack_direction(TOWARDS_LOW);
3224 
3225   // These three registers define part of the calling convention
3226   // between compiled code and the interpreter.
3227   inline_cache_reg(EAX);                // Inline Cache Register
3228   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3229 
3230   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3231   cisc_spilling_operand_name(indOffset32);
3232 
3233   // Number of stack slots consumed by locking an object
3234   sync_stack_slots(1);
3235 
3236   // Compiled code's Frame Pointer
3237   frame_pointer(ESP);
3238   // Interpreter stores its frame pointer in a register which is
3239   // stored to the stack by I2CAdaptors.
3240   // I2CAdaptors convert from interpreted java to compiled java.
3241   interpreter_frame_pointer(EBP);
3242 
3243   // Stack alignment requirement
3244   // Alignment size in bytes (128-bit -> 16 bytes)
3245   stack_alignment(StackAlignmentInBytes);
3246 
3247   // Number of stack slots between incoming argument block and the start of
3248   // a new frame.  The PROLOG must add this many slots to the stack.  The
3249   // EPILOG must remove this many slots.  Intel needs one slot for
3250   // return address and one for rbp, (must save rbp)
3251   in_preserve_stack_slots(2+VerifyStackAtCalls);
3252 
3253   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3254   // for calls to C.  Supports the var-args backing area for register parms.
3255   varargs_C_out_slots_killed(0);
3256 
3257   // The after-PROLOG location of the return address.  Location of
3258   // return address specifies a type (REG or STACK) and a number
3259   // representing the register number (i.e. - use a register name) or
3260   // stack slot.
3261   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3262   // Otherwise, it is above the locks and verification slot and alignment word
3263   return_addr(STACK - 1 +
3264               align_up((Compile::current()->in_preserve_stack_slots() +
3265                         Compile::current()->fixed_slots()),
3266                        stack_alignment_in_slots()));
3267 
3268   // Body of function which returns an integer array locating
3269   // arguments either in registers or in stack slots.  Passed an array
3270   // of ideal registers called "sig" and a "length" count.  Stack-slot
3271   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272   // arguments for a CALLEE.  Incoming stack arguments are
3273   // automatically biased by the preserve_stack_slots field above.
3274   calling_convention %{
3275     // No difference between ingoing/outgoing just pass false
3276     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3277   %}
3278 
3279 
3280   // Body of function which returns an integer array locating
3281   // arguments either in registers or in stack slots.  Passed an array
3282   // of ideal registers called "sig" and a "length" count.  Stack-slot
3283   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3284   // arguments for a CALLEE.  Incoming stack arguments are
3285   // automatically biased by the preserve_stack_slots field above.
3286   c_calling_convention %{
3287     // This is obviously always outgoing
3288     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3289   %}
3290 
3291   // Location of C & interpreter return values
3292   c_return_value %{
3293     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3294     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3295     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3296 
3297     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3298     // that C functions return float and double results in XMM0.
3299     if( ideal_reg == Op_RegD && UseSSE>=2 )
3300       return OptoRegPair(XMM0b_num,XMM0_num);
3301     if( ideal_reg == Op_RegF && UseSSE>=2 )
3302       return OptoRegPair(OptoReg::Bad,XMM0_num);
3303 
3304     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305   %}
3306 
3307   // Location of return values
3308   return_value %{
3309     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3310     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3311     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3312     if( ideal_reg == Op_RegD && UseSSE>=2 )
3313       return OptoRegPair(XMM0b_num,XMM0_num);
3314     if( ideal_reg == Op_RegF && UseSSE>=1 )
3315       return OptoRegPair(OptoReg::Bad,XMM0_num);
3316     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3317   %}
3318 
3319 %}
3320 
3321 //----------ATTRIBUTES---------------------------------------------------------
3322 //----------Operand Attributes-------------------------------------------------
3323 op_attrib op_cost(0);        // Required cost attribute
3324 
3325 //----------Instruction Attributes---------------------------------------------
3326 ins_attrib ins_cost(100);       // Required cost attribute
3327 ins_attrib ins_size(8);         // Required size attribute (in bits)
3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3329                                 // non-matching short branch variant of some
3330                                                             // long branch?
3331 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3332                                 // specifies the alignment that some part of the instruction (not
3333                                 // necessarily the start) requires.  If > 1, a compute_padding()
3334                                 // function must be provided for the instruction
3335 
3336 //----------OPERANDS-----------------------------------------------------------
3337 // Operand definitions must precede instruction definitions for correct parsing
3338 // in the ADLC because operands constitute user defined types which are used in
3339 // instruction definitions.
3340 
3341 //----------Simple Operands----------------------------------------------------
3342 // Immediate Operands
3343 // Integer Immediate
3344 operand immI() %{
3345   match(ConI);
3346 
3347   op_cost(10);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for test vs zero
3353 operand immI0() %{
3354   predicate(n->get_int() == 0);
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 // Constant for increment
3363 operand immI1() %{
3364   predicate(n->get_int() == 1);
3365   match(ConI);
3366 
3367   op_cost(0);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 // Constant for decrement
3373 operand immI_M1() %{
3374   predicate(n->get_int() == -1);
3375   match(ConI);
3376 
3377   op_cost(0);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 // Valid scale values for addressing modes
3383 operand immI2() %{
3384   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3385   match(ConI);
3386 
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 operand immI8() %{
3392   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3393   match(ConI);
3394 
3395   op_cost(5);
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 operand immI16() %{
3401   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3402   match(ConI);
3403 
3404   op_cost(10);
3405   format %{ %}
3406   interface(CONST_INTER);
3407 %}
3408 
3409 // Int Immediate non-negative
3410 operand immU31()
3411 %{
3412   predicate(n->get_int() >= 0);
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 // Constant for long shifts
3421 operand immI_32() %{
3422   predicate( n->get_int() == 32 );
3423   match(ConI);
3424 
3425   op_cost(0);
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1_31() %{
3431   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_32_63() %{
3440   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3441   match(ConI);
3442   op_cost(0);
3443 
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_1() %{
3449   predicate( n->get_int() == 1 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 operand immI_2() %{
3458   predicate( n->get_int() == 2 );
3459   match(ConI);
3460 
3461   op_cost(0);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 operand immI_3() %{
3467   predicate( n->get_int() == 3 );
3468   match(ConI);
3469 
3470   op_cost(0);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Pointer Immediate
3476 operand immP() %{
3477   match(ConP);
3478 
3479   op_cost(10);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // NULL Pointer Immediate
3485 operand immP0() %{
3486   predicate( n->get_ptr() == 0 );
3487   match(ConP);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate
3495 operand immL() %{
3496   match(ConL);
3497 
3498   op_cost(20);
3499   format %{ %}
3500   interface(CONST_INTER);
3501 %}
3502 
3503 // Long Immediate zero
3504 operand immL0() %{
3505   predicate( n->get_long() == 0L );
3506   match(ConL);
3507   op_cost(0);
3508 
3509   format %{ %}
3510   interface(CONST_INTER);
3511 %}
3512 
3513 // Long Immediate zero
3514 operand immL_M1() %{
3515   predicate( n->get_long() == -1L );
3516   match(ConL);
3517   op_cost(0);
3518 
3519   format %{ %}
3520   interface(CONST_INTER);
3521 %}
3522 
3523 // Long immediate from 0 to 127.
3524 // Used for a shorter form of long mul by 10.
3525 operand immL_127() %{
3526   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3527   match(ConL);
3528   op_cost(0);
3529 
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Long Immediate: low 32-bit mask
3535 operand immL_32bits() %{
3536   predicate(n->get_long() == 0xFFFFFFFFL);
3537   match(ConL);
3538   op_cost(0);
3539 
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Long Immediate: low 32-bit mask
3545 operand immL32() %{
3546   predicate(n->get_long() == (int)(n->get_long()));
3547   match(ConL);
3548   op_cost(20);
3549 
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 //Double Immediate zero
3555 operand immDPR0() %{
3556   // Do additional (and counter-intuitive) test against NaN to work around VC++
3557   // bug that generates code such that NaNs compare equal to 0.0
3558   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3559   match(ConD);
3560 
3561   op_cost(5);
3562   format %{ %}
3563   interface(CONST_INTER);
3564 %}
3565 
3566 // Double Immediate one
3567 operand immDPR1() %{
3568   predicate( UseSSE<=1 && n->getd() == 1.0 );
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate
3577 operand immDPR() %{
3578   predicate(UseSSE<=1);
3579   match(ConD);
3580 
3581   op_cost(5);
3582   format %{ %}
3583   interface(CONST_INTER);
3584 %}
3585 
3586 operand immD() %{
3587   predicate(UseSSE>=2);
3588   match(ConD);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Double Immediate zero
3596 operand immD0() %{
3597   // Do additional (and counter-intuitive) test against NaN to work around VC++
3598   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3599   // compare equal to -0.0.
3600   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3601   match(ConD);
3602 
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 // Float Immediate zero
3608 operand immFPR0() %{
3609   predicate(UseSSE == 0 && n->getf() == 0.0F);
3610   match(ConF);
3611 
3612   op_cost(5);
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Float Immediate one
3618 operand immFPR1() %{
3619   predicate(UseSSE == 0 && n->getf() == 1.0F);
3620   match(ConF);
3621 
3622   op_cost(5);
3623   format %{ %}
3624   interface(CONST_INTER);
3625 %}
3626 
3627 // Float Immediate
3628 operand immFPR() %{
3629   predicate( UseSSE == 0 );
3630   match(ConF);
3631 
3632   op_cost(5);
3633   format %{ %}
3634   interface(CONST_INTER);
3635 %}
3636 
3637 // Float Immediate
3638 operand immF() %{
3639   predicate(UseSSE >= 1);
3640   match(ConF);
3641 
3642   op_cost(5);
3643   format %{ %}
3644   interface(CONST_INTER);
3645 %}
3646 
3647 // Float Immediate zero.  Zero and not -0.0
3648 operand immF0() %{
3649   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3650   match(ConF);
3651 
3652   op_cost(5);
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Immediates for special shifts (sign extend)
3658 
3659 // Constants for increment
3660 operand immI_16() %{
3661   predicate( n->get_int() == 16 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 operand immI_24() %{
3669   predicate( n->get_int() == 24 );
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Constant for byte-wide masking
3677 operand immI_255() %{
3678   predicate( n->get_int() == 255 );
3679   match(ConI);
3680 
3681   format %{ %}
3682   interface(CONST_INTER);
3683 %}
3684 
3685 // Constant for short-wide masking
3686 operand immI_65535() %{
3687   predicate(n->get_int() == 65535);
3688   match(ConI);
3689 
3690   format %{ %}
3691   interface(CONST_INTER);
3692 %}
3693 
3694 // Register Operands
3695 // Integer Register
3696 operand rRegI() %{
3697   constraint(ALLOC_IN_RC(int_reg));
3698   match(RegI);
3699   match(xRegI);
3700   match(eAXRegI);
3701   match(eBXRegI);
3702   match(eCXRegI);
3703   match(eDXRegI);
3704   match(eDIRegI);
3705   match(eSIRegI);
3706 
3707   format %{ %}
3708   interface(REG_INTER);
3709 %}
3710 
3711 // Subset of Integer Register
3712 operand xRegI(rRegI reg) %{
3713   constraint(ALLOC_IN_RC(int_x_reg));
3714   match(reg);
3715   match(eAXRegI);
3716   match(eBXRegI);
3717   match(eCXRegI);
3718   match(eDXRegI);
3719 
3720   format %{ %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Special Registers
3725 operand eAXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(eax_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EAX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 // Special Registers
3735 operand eBXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(ebx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EBX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eCXRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(ecx_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "ECX" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand eDXRegI(xRegI reg) %{
3754   constraint(ALLOC_IN_RC(edx_reg));
3755   match(reg);
3756   match(rRegI);
3757 
3758   format %{ "EDX" %}
3759   interface(REG_INTER);
3760 %}
3761 
3762 operand eDIRegI(xRegI reg) %{
3763   constraint(ALLOC_IN_RC(edi_reg));
3764   match(reg);
3765   match(rRegI);
3766 
3767   format %{ "EDI" %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand naxRegI() %{
3772   constraint(ALLOC_IN_RC(nax_reg));
3773   match(RegI);
3774   match(eCXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 operand nadxRegI() %{
3784   constraint(ALLOC_IN_RC(nadx_reg));
3785   match(RegI);
3786   match(eBXRegI);
3787   match(eCXRegI);
3788   match(eSIRegI);
3789   match(eDIRegI);
3790 
3791   format %{ %}
3792   interface(REG_INTER);
3793 %}
3794 
3795 operand ncxRegI() %{
3796   constraint(ALLOC_IN_RC(ncx_reg));
3797   match(RegI);
3798   match(eAXRegI);
3799   match(eDXRegI);
3800   match(eSIRegI);
3801   match(eDIRegI);
3802 
3803   format %{ %}
3804   interface(REG_INTER);
3805 %}
3806 
3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3808 // //
3809 operand eSIRegI(xRegI reg) %{
3810    constraint(ALLOC_IN_RC(esi_reg));
3811    match(reg);
3812    match(rRegI);
3813 
3814    format %{ "ESI" %}
3815    interface(REG_INTER);
3816 %}
3817 
3818 // Pointer Register
3819 operand anyRegP() %{
3820   constraint(ALLOC_IN_RC(any_reg));
3821   match(RegP);
3822   match(eAXRegP);
3823   match(eBXRegP);
3824   match(eCXRegP);
3825   match(eDIRegP);
3826   match(eRegP);
3827 
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 operand eRegP() %{
3833   constraint(ALLOC_IN_RC(int_reg));
3834   match(RegP);
3835   match(eAXRegP);
3836   match(eBXRegP);
3837   match(eCXRegP);
3838   match(eDIRegP);
3839 
3840   format %{ %}
3841   interface(REG_INTER);
3842 %}
3843 
3844 // On windows95, EBP is not safe to use for implicit null tests.
3845 operand eRegP_no_EBP() %{
3846   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3847   match(RegP);
3848   match(eAXRegP);
3849   match(eBXRegP);
3850   match(eCXRegP);
3851   match(eDIRegP);
3852 
3853   op_cost(100);
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 operand naxRegP() %{
3859   constraint(ALLOC_IN_RC(nax_reg));
3860   match(RegP);
3861   match(eBXRegP);
3862   match(eDXRegP);
3863   match(eCXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 operand nabxRegP() %{
3872   constraint(ALLOC_IN_RC(nabx_reg));
3873   match(RegP);
3874   match(eCXRegP);
3875   match(eDXRegP);
3876   match(eSIRegP);
3877   match(eDIRegP);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 operand pRegP() %{
3884   constraint(ALLOC_IN_RC(p_reg));
3885   match(RegP);
3886   match(eBXRegP);
3887   match(eDXRegP);
3888   match(eSIRegP);
3889   match(eDIRegP);
3890 
3891   format %{ %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Special Registers
3896 // Return a pointer value
3897 operand eAXRegP(eRegP reg) %{
3898   constraint(ALLOC_IN_RC(eax_reg));
3899   match(reg);
3900   format %{ "EAX" %}
3901   interface(REG_INTER);
3902 %}
3903 
3904 // Used in AtomicAdd
3905 operand eBXRegP(eRegP reg) %{
3906   constraint(ALLOC_IN_RC(ebx_reg));
3907   match(reg);
3908   format %{ "EBX" %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Tail-call (interprocedural jump) to interpreter
3913 operand eCXRegP(eRegP reg) %{
3914   constraint(ALLOC_IN_RC(ecx_reg));
3915   match(reg);
3916   format %{ "ECX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eSIRegP(eRegP reg) %{
3921   constraint(ALLOC_IN_RC(esi_reg));
3922   match(reg);
3923   format %{ "ESI" %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 // Used in rep stosw
3928 operand eDIRegP(eRegP reg) %{
3929   constraint(ALLOC_IN_RC(edi_reg));
3930   match(reg);
3931   format %{ "EDI" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eRegL() %{
3936   constraint(ALLOC_IN_RC(long_reg));
3937   match(RegL);
3938   match(eADXRegL);
3939 
3940   format %{ %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 operand eADXRegL( eRegL reg ) %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(reg);
3947 
3948   format %{ "EDX:EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eBCXRegL( eRegL reg ) %{
3953   constraint(ALLOC_IN_RC(ebcx_reg));
3954   match(reg);
3955 
3956   format %{ "EBX:ECX" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Special case for integer high multiply
3961 operand eADXRegL_low_only() %{
3962   constraint(ALLOC_IN_RC(eadx_reg));
3963   match(RegL);
3964 
3965   format %{ "EAX" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 // Flags register, used as output of compare instructions
3970 operand eFlagsReg() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973 
3974   format %{ "EFLAGS" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Flags register, used as output of FLOATING POINT compare instructions
3979 operand eFlagsRegU() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982 
3983   format %{ "EFLAGS_U" %}
3984   interface(REG_INTER);
3985 %}
3986 
3987 operand eFlagsRegUCF() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990   predicate(false);
3991 
3992   format %{ "EFLAGS_U_CF" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 // Condition Code Register used by long compare
3997 operand flagsReg_long_LTGE() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LTGE" %}
4001   interface(REG_INTER);
4002 %}
4003 operand flagsReg_long_EQNE() %{
4004   constraint(ALLOC_IN_RC(int_flags));
4005   match(RegFlags);
4006   format %{ "FLAGS_EQNE" %}
4007   interface(REG_INTER);
4008 %}
4009 operand flagsReg_long_LEGT() %{
4010   constraint(ALLOC_IN_RC(int_flags));
4011   match(RegFlags);
4012   format %{ "FLAGS_LEGT" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 // Condition Code Register used by unsigned long compare
4017 operand flagsReg_ulong_LTGE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_U_LTGE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_ulong_EQNE() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_U_EQNE" %}
4027   interface(REG_INTER);
4028 %}
4029 operand flagsReg_ulong_LEGT() %{
4030   constraint(ALLOC_IN_RC(int_flags));
4031   match(RegFlags);
4032   format %{ "FLAGS_U_LEGT" %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Float register operands
4037 operand regDPR() %{
4038   predicate( UseSSE < 2 );
4039   constraint(ALLOC_IN_RC(fp_dbl_reg));
4040   match(RegD);
4041   match(regDPR1);
4042   match(regDPR2);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 operand regDPR1(regDPR reg) %{
4048   predicate( UseSSE < 2 );
4049   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4050   match(reg);
4051   format %{ "FPR1" %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 operand regDPR2(regDPR reg) %{
4056   predicate( UseSSE < 2 );
4057   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4058   match(reg);
4059   format %{ "FPR2" %}
4060   interface(REG_INTER);
4061 %}
4062 
4063 operand regnotDPR1(regDPR reg) %{
4064   predicate( UseSSE < 2 );
4065   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4066   match(reg);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Float register operands
4072 operand regFPR() %{
4073   predicate( UseSSE < 2 );
4074   constraint(ALLOC_IN_RC(fp_flt_reg));
4075   match(RegF);
4076   match(regFPR1);
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 // Float register operands
4082 operand regFPR1(regFPR reg) %{
4083   predicate( UseSSE < 2 );
4084   constraint(ALLOC_IN_RC(fp_flt_reg0));
4085   match(reg);
4086   format %{ "FPR1" %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 // XMM Float register operands
4091 operand regF() %{
4092   predicate( UseSSE>=1 );
4093   constraint(ALLOC_IN_RC(float_reg_legacy));
4094   match(RegF);
4095   format %{ %}
4096   interface(REG_INTER);
4097 %}
4098 
4099 // Float register operands
4100 operand vlRegF() %{
4101    constraint(ALLOC_IN_RC(float_reg_vl));
4102    match(RegF);
4103 
4104    format %{ %}
4105    interface(REG_INTER);
4106 %}
4107 
4108 // XMM Double register operands
4109 operand regD() %{
4110   predicate( UseSSE>=2 );
4111   constraint(ALLOC_IN_RC(double_reg_legacy));
4112   match(RegD);
4113   format %{ %}
4114   interface(REG_INTER);
4115 %}
4116 
4117 // Double register operands
4118 operand vlRegD() %{
4119    constraint(ALLOC_IN_RC(double_reg_vl));
4120    match(RegD);
4121 
4122    format %{ %}
4123    interface(REG_INTER);
4124 %}
4125 
4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4127 // runtime code generation via reg_class_dynamic.
4128 operand vecS() %{
4129   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4130   match(VecS);
4131 
4132   format %{ %}
4133   interface(REG_INTER);
4134 %}
4135 
4136 operand legVecS() %{
4137   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4138   match(VecS);
4139 
4140   format %{ %}
4141   interface(REG_INTER);
4142 %}
4143 
4144 operand vecD() %{
4145   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4146   match(VecD);
4147 
4148   format %{ %}
4149   interface(REG_INTER);
4150 %}
4151 
4152 operand legVecD() %{
4153   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4154   match(VecD);
4155 
4156   format %{ %}
4157   interface(REG_INTER);
4158 %}
4159 
4160 operand vecX() %{
4161   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4162   match(VecX);
4163 
4164   format %{ %}
4165   interface(REG_INTER);
4166 %}
4167 
4168 operand legVecX() %{
4169   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4170   match(VecX);
4171 
4172   format %{ %}
4173   interface(REG_INTER);
4174 %}
4175 
4176 operand vecY() %{
4177   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4178   match(VecY);
4179 
4180   format %{ %}
4181   interface(REG_INTER);
4182 %}
4183 
4184 operand legVecY() %{
4185   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4186   match(VecY);
4187 
4188   format %{ %}
4189   interface(REG_INTER);
4190 %}
4191 
4192 //----------Memory Operands----------------------------------------------------
4193 // Direct Memory Operand
4194 operand direct(immP addr) %{
4195   match(addr);
4196 
4197   format %{ "[$addr]" %}
4198   interface(MEMORY_INTER) %{
4199     base(0xFFFFFFFF);
4200     index(0x4);
4201     scale(0x0);
4202     disp($addr);
4203   %}
4204 %}
4205 
4206 // Indirect Memory Operand
4207 operand indirect(eRegP reg) %{
4208   constraint(ALLOC_IN_RC(int_reg));
4209   match(reg);
4210 
4211   format %{ "[$reg]" %}
4212   interface(MEMORY_INTER) %{
4213     base($reg);
4214     index(0x4);
4215     scale(0x0);
4216     disp(0x0);
4217   %}
4218 %}
4219 
4220 // Indirect Memory Plus Short Offset Operand
4221 operand indOffset8(eRegP reg, immI8 off) %{
4222   match(AddP reg off);
4223 
4224   format %{ "[$reg + $off]" %}
4225   interface(MEMORY_INTER) %{
4226     base($reg);
4227     index(0x4);
4228     scale(0x0);
4229     disp($off);
4230   %}
4231 %}
4232 
4233 // Indirect Memory Plus Long Offset Operand
4234 operand indOffset32(eRegP reg, immI off) %{
4235   match(AddP reg off);
4236 
4237   format %{ "[$reg + $off]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index(0x4);
4241     scale(0x0);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 // Indirect Memory Plus Long Offset Operand
4247 operand indOffset32X(rRegI reg, immP off) %{
4248   match(AddP off reg);
4249 
4250   format %{ "[$reg + $off]" %}
4251   interface(MEMORY_INTER) %{
4252     base($reg);
4253     index(0x4);
4254     scale(0x0);
4255     disp($off);
4256   %}
4257 %}
4258 
4259 // Indirect Memory Plus Index Register Plus Offset Operand
4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4261   match(AddP (AddP reg ireg) off);
4262 
4263   op_cost(10);
4264   format %{"[$reg + $off + $ireg]" %}
4265   interface(MEMORY_INTER) %{
4266     base($reg);
4267     index($ireg);
4268     scale(0x0);
4269     disp($off);
4270   %}
4271 %}
4272 
4273 // Indirect Memory Plus Index Register Plus Offset Operand
4274 operand indIndex(eRegP reg, rRegI ireg) %{
4275   match(AddP reg ireg);
4276 
4277   op_cost(10);
4278   format %{"[$reg + $ireg]" %}
4279   interface(MEMORY_INTER) %{
4280     base($reg);
4281     index($ireg);
4282     scale(0x0);
4283     disp(0x0);
4284   %}
4285 %}
4286 
4287 // // -------------------------------------------------------------------------
4288 // // 486 architecture doesn't support "scale * index + offset" with out a base
4289 // // -------------------------------------------------------------------------
4290 // // Scaled Memory Operands
4291 // // Indirect Memory Times Scale Plus Offset Operand
4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4293 //   match(AddP off (LShiftI ireg scale));
4294 //
4295 //   op_cost(10);
4296 //   format %{"[$off + $ireg << $scale]" %}
4297 //   interface(MEMORY_INTER) %{
4298 //     base(0x4);
4299 //     index($ireg);
4300 //     scale($scale);
4301 //     disp($off);
4302 //   %}
4303 // %}
4304 
4305 // Indirect Memory Times Scale Plus Index Register
4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4307   match(AddP reg (LShiftI ireg scale));
4308 
4309   op_cost(10);
4310   format %{"[$reg + $ireg << $scale]" %}
4311   interface(MEMORY_INTER) %{
4312     base($reg);
4313     index($ireg);
4314     scale($scale);
4315     disp(0x0);
4316   %}
4317 %}
4318 
4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4321   match(AddP (AddP reg (LShiftI ireg scale)) off);
4322 
4323   op_cost(10);
4324   format %{"[$reg + $off + $ireg << $scale]" %}
4325   interface(MEMORY_INTER) %{
4326     base($reg);
4327     index($ireg);
4328     scale($scale);
4329     disp($off);
4330   %}
4331 %}
4332 
4333 //----------Load Long Memory Operands------------------------------------------
4334 // The load-long idiom will use it's address expression again after loading
4335 // the first word of the long.  If the load-long destination overlaps with
4336 // registers used in the addressing expression, the 2nd half will be loaded
4337 // from a clobbered address.  Fix this by requiring that load-long use
4338 // address registers that do not overlap with the load-long target.
4339 
4340 // load-long support
4341 operand load_long_RegP() %{
4342   constraint(ALLOC_IN_RC(esi_reg));
4343   match(RegP);
4344   match(eSIRegP);
4345   op_cost(100);
4346   format %{  %}
4347   interface(REG_INTER);
4348 %}
4349 
4350 // Indirect Memory Operand Long
4351 operand load_long_indirect(load_long_RegP reg) %{
4352   constraint(ALLOC_IN_RC(esi_reg));
4353   match(reg);
4354 
4355   format %{ "[$reg]" %}
4356   interface(MEMORY_INTER) %{
4357     base($reg);
4358     index(0x4);
4359     scale(0x0);
4360     disp(0x0);
4361   %}
4362 %}
4363 
4364 // Indirect Memory Plus Long Offset Operand
4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4366   match(AddP reg off);
4367 
4368   format %{ "[$reg + $off]" %}
4369   interface(MEMORY_INTER) %{
4370     base($reg);
4371     index(0x4);
4372     scale(0x0);
4373     disp($off);
4374   %}
4375 %}
4376 
4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4378 
4379 
4380 //----------Special Memory Operands--------------------------------------------
4381 // Stack Slot Operand - This operand is used for loading and storing temporary
4382 //                      values on the stack where a match requires a value to
4383 //                      flow through memory.
4384 operand stackSlotP(sRegP reg) %{
4385   constraint(ALLOC_IN_RC(stack_slots));
4386   // No match rule because this operand is only generated in matching
4387   format %{ "[$reg]" %}
4388   interface(MEMORY_INTER) %{
4389     base(0x4);   // ESP
4390     index(0x4);  // No Index
4391     scale(0x0);  // No Scale
4392     disp($reg);  // Stack Offset
4393   %}
4394 %}
4395 
4396 operand stackSlotI(sRegI reg) %{
4397   constraint(ALLOC_IN_RC(stack_slots));
4398   // No match rule because this operand is only generated in matching
4399   format %{ "[$reg]" %}
4400   interface(MEMORY_INTER) %{
4401     base(0x4);   // ESP
4402     index(0x4);  // No Index
4403     scale(0x0);  // No Scale
4404     disp($reg);  // Stack Offset
4405   %}
4406 %}
4407 
4408 operand stackSlotF(sRegF reg) %{
4409   constraint(ALLOC_IN_RC(stack_slots));
4410   // No match rule because this operand is only generated in matching
4411   format %{ "[$reg]" %}
4412   interface(MEMORY_INTER) %{
4413     base(0x4);   // ESP
4414     index(0x4);  // No Index
4415     scale(0x0);  // No Scale
4416     disp($reg);  // Stack Offset
4417   %}
4418 %}
4419 
4420 operand stackSlotD(sRegD reg) %{
4421   constraint(ALLOC_IN_RC(stack_slots));
4422   // No match rule because this operand is only generated in matching
4423   format %{ "[$reg]" %}
4424   interface(MEMORY_INTER) %{
4425     base(0x4);   // ESP
4426     index(0x4);  // No Index
4427     scale(0x0);  // No Scale
4428     disp($reg);  // Stack Offset
4429   %}
4430 %}
4431 
4432 operand stackSlotL(sRegL reg) %{
4433   constraint(ALLOC_IN_RC(stack_slots));
4434   // No match rule because this operand is only generated in matching
4435   format %{ "[$reg]" %}
4436   interface(MEMORY_INTER) %{
4437     base(0x4);   // ESP
4438     index(0x4);  // No Index
4439     scale(0x0);  // No Scale
4440     disp($reg);  // Stack Offset
4441   %}
4442 %}
4443 
4444 //----------Memory Operands - Win95 Implicit Null Variants----------------
4445 // Indirect Memory Operand
4446 operand indirect_win95_safe(eRegP_no_EBP reg)
4447 %{
4448   constraint(ALLOC_IN_RC(int_reg));
4449   match(reg);
4450 
4451   op_cost(100);
4452   format %{ "[$reg]" %}
4453   interface(MEMORY_INTER) %{
4454     base($reg);
4455     index(0x4);
4456     scale(0x0);
4457     disp(0x0);
4458   %}
4459 %}
4460 
4461 // Indirect Memory Plus Short Offset Operand
4462 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4463 %{
4464   match(AddP reg off);
4465 
4466   op_cost(100);
4467   format %{ "[$reg + $off]" %}
4468   interface(MEMORY_INTER) %{
4469     base($reg);
4470     index(0x4);
4471     scale(0x0);
4472     disp($off);
4473   %}
4474 %}
4475 
4476 // Indirect Memory Plus Long Offset Operand
4477 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4478 %{
4479   match(AddP reg off);
4480 
4481   op_cost(100);
4482   format %{ "[$reg + $off]" %}
4483   interface(MEMORY_INTER) %{
4484     base($reg);
4485     index(0x4);
4486     scale(0x0);
4487     disp($off);
4488   %}
4489 %}
4490 
4491 // Indirect Memory Plus Index Register Plus Offset Operand
4492 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4493 %{
4494   match(AddP (AddP reg ireg) off);
4495 
4496   op_cost(100);
4497   format %{"[$reg + $off + $ireg]" %}
4498   interface(MEMORY_INTER) %{
4499     base($reg);
4500     index($ireg);
4501     scale(0x0);
4502     disp($off);
4503   %}
4504 %}
4505 
4506 // Indirect Memory Times Scale Plus Index Register
4507 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4508 %{
4509   match(AddP reg (LShiftI ireg scale));
4510 
4511   op_cost(100);
4512   format %{"[$reg + $ireg << $scale]" %}
4513   interface(MEMORY_INTER) %{
4514     base($reg);
4515     index($ireg);
4516     scale($scale);
4517     disp(0x0);
4518   %}
4519 %}
4520 
4521 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4522 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4523 %{
4524   match(AddP (AddP reg (LShiftI ireg scale)) off);
4525 
4526   op_cost(100);
4527   format %{"[$reg + $off + $ireg << $scale]" %}
4528   interface(MEMORY_INTER) %{
4529     base($reg);
4530     index($ireg);
4531     scale($scale);
4532     disp($off);
4533   %}
4534 %}
4535 
4536 //----------Conditional Branch Operands----------------------------------------
4537 // Comparison Op  - This is the operation of the comparison, and is limited to
4538 //                  the following set of codes:
4539 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4540 //
4541 // Other attributes of the comparison, such as unsignedness, are specified
4542 // by the comparison instruction that sets a condition code flags register.
4543 // That result is represented by a flags operand whose subtype is appropriate
4544 // to the unsignedness (etc.) of the comparison.
4545 //
4546 // Later, the instruction which matches both the Comparison Op (a Bool) and
4547 // the flags (produced by the Cmp) specifies the coding of the comparison op
4548 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4549 
4550 // Comparision Code
4551 operand cmpOp() %{
4552   match(Bool);
4553 
4554   format %{ "" %}
4555   interface(COND_INTER) %{
4556     equal(0x4, "e");
4557     not_equal(0x5, "ne");
4558     less(0xC, "l");
4559     greater_equal(0xD, "ge");
4560     less_equal(0xE, "le");
4561     greater(0xF, "g");
4562     overflow(0x0, "o");
4563     no_overflow(0x1, "no");
4564   %}
4565 %}
4566 
4567 // Comparison Code, unsigned compare.  Used by FP also, with
4568 // C2 (unordered) turned into GT or LT already.  The other bits
4569 // C0 and C3 are turned into Carry & Zero flags.
4570 operand cmpOpU() %{
4571   match(Bool);
4572 
4573   format %{ "" %}
4574   interface(COND_INTER) %{
4575     equal(0x4, "e");
4576     not_equal(0x5, "ne");
4577     less(0x2, "b");
4578     greater_equal(0x3, "nb");
4579     less_equal(0x6, "be");
4580     greater(0x7, "nbe");
4581     overflow(0x0, "o");
4582     no_overflow(0x1, "no");
4583   %}
4584 %}
4585 
4586 // Floating comparisons that don't require any fixup for the unordered case
4587 operand cmpOpUCF() %{
4588   match(Bool);
4589   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4590             n->as_Bool()->_test._test == BoolTest::ge ||
4591             n->as_Bool()->_test._test == BoolTest::le ||
4592             n->as_Bool()->_test._test == BoolTest::gt);
4593   format %{ "" %}
4594   interface(COND_INTER) %{
4595     equal(0x4, "e");
4596     not_equal(0x5, "ne");
4597     less(0x2, "b");
4598     greater_equal(0x3, "nb");
4599     less_equal(0x6, "be");
4600     greater(0x7, "nbe");
4601     overflow(0x0, "o");
4602     no_overflow(0x1, "no");
4603   %}
4604 %}
4605 
4606 
4607 // Floating comparisons that can be fixed up with extra conditional jumps
4608 operand cmpOpUCF2() %{
4609   match(Bool);
4610   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4611             n->as_Bool()->_test._test == BoolTest::eq);
4612   format %{ "" %}
4613   interface(COND_INTER) %{
4614     equal(0x4, "e");
4615     not_equal(0x5, "ne");
4616     less(0x2, "b");
4617     greater_equal(0x3, "nb");
4618     less_equal(0x6, "be");
4619     greater(0x7, "nbe");
4620     overflow(0x0, "o");
4621     no_overflow(0x1, "no");
4622   %}
4623 %}
4624 
4625 // Comparison Code for FP conditional move
4626 operand cmpOp_fcmov() %{
4627   match(Bool);
4628 
4629   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4630             n->as_Bool()->_test._test != BoolTest::no_overflow);
4631   format %{ "" %}
4632   interface(COND_INTER) %{
4633     equal        (0x0C8);
4634     not_equal    (0x1C8);
4635     less         (0x0C0);
4636     greater_equal(0x1C0);
4637     less_equal   (0x0D0);
4638     greater      (0x1D0);
4639     overflow(0x0, "o"); // not really supported by the instruction
4640     no_overflow(0x1, "no"); // not really supported by the instruction
4641   %}
4642 %}
4643 
4644 // Comparison Code used in long compares
4645 operand cmpOp_commute() %{
4646   match(Bool);
4647 
4648   format %{ "" %}
4649   interface(COND_INTER) %{
4650     equal(0x4, "e");
4651     not_equal(0x5, "ne");
4652     less(0xF, "g");
4653     greater_equal(0xE, "le");
4654     less_equal(0xD, "ge");
4655     greater(0xC, "l");
4656     overflow(0x0, "o");
4657     no_overflow(0x1, "no");
4658   %}
4659 %}
4660 
4661 // Comparison Code used in unsigned long compares
4662 operand cmpOpU_commute() %{
4663   match(Bool);
4664 
4665   format %{ "" %}
4666   interface(COND_INTER) %{
4667     equal(0x4, "e");
4668     not_equal(0x5, "ne");
4669     less(0x7, "nbe");
4670     greater_equal(0x6, "be");
4671     less_equal(0x3, "nb");
4672     greater(0x2, "b");
4673     overflow(0x0, "o");
4674     no_overflow(0x1, "no");
4675   %}
4676 %}
4677 
4678 //----------OPERAND CLASSES----------------------------------------------------
4679 // Operand Classes are groups of operands that are used as to simplify
4680 // instruction definitions by not requiring the AD writer to specify separate
4681 // instructions for every form of operand when the instruction accepts
4682 // multiple operand types with the same basic encoding and format.  The classic
4683 // case of this is memory operands.
4684 
4685 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4686                indIndex, indIndexScale, indIndexScaleOffset);
4687 
4688 // Long memory operations are encoded in 2 instructions and a +4 offset.
4689 // This means some kind of offset is always required and you cannot use
4690 // an oop as the offset (done when working on static globals).
4691 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4692                     indIndex, indIndexScale, indIndexScaleOffset);
4693 
4694 
4695 //----------PIPELINE-----------------------------------------------------------
4696 // Rules which define the behavior of the target architectures pipeline.
4697 pipeline %{
4698 
4699 //----------ATTRIBUTES---------------------------------------------------------
4700 attributes %{
4701   variable_size_instructions;        // Fixed size instructions
4702   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4703   instruction_unit_size = 1;         // An instruction is 1 bytes long
4704   instruction_fetch_unit_size = 16;  // The processor fetches one line
4705   instruction_fetch_units = 1;       // of 16 bytes
4706 
4707   // List of nop instructions
4708   nops( MachNop );
4709 %}
4710 
4711 //----------RESOURCES----------------------------------------------------------
4712 // Resources are the functional units available to the machine
4713 
4714 // Generic P2/P3 pipeline
4715 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4716 // 3 instructions decoded per cycle.
4717 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4718 // 2 ALU op, only ALU0 handles mul/div instructions.
4719 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4720            MS0, MS1, MEM = MS0 | MS1,
4721            BR, FPU,
4722            ALU0, ALU1, ALU = ALU0 | ALU1 );
4723 
4724 //----------PIPELINE DESCRIPTION-----------------------------------------------
4725 // Pipeline Description specifies the stages in the machine's pipeline
4726 
4727 // Generic P2/P3 pipeline
4728 pipe_desc(S0, S1, S2, S3, S4, S5);
4729 
4730 //----------PIPELINE CLASSES---------------------------------------------------
4731 // Pipeline Classes describe the stages in which input and output are
4732 // referenced by the hardware pipeline.
4733 
4734 // Naming convention: ialu or fpu
4735 // Then: _reg
4736 // Then: _reg if there is a 2nd register
4737 // Then: _long if it's a pair of instructions implementing a long
4738 // Then: _fat if it requires the big decoder
4739 //   Or: _mem if it requires the big decoder and a memory unit.
4740 
4741 // Integer ALU reg operation
4742 pipe_class ialu_reg(rRegI dst) %{
4743     single_instruction;
4744     dst    : S4(write);
4745     dst    : S3(read);
4746     DECODE : S0;        // any decoder
4747     ALU    : S3;        // any alu
4748 %}
4749 
4750 // Long ALU reg operation
4751 pipe_class ialu_reg_long(eRegL dst) %{
4752     instruction_count(2);
4753     dst    : S4(write);
4754     dst    : S3(read);
4755     DECODE : S0(2);     // any 2 decoders
4756     ALU    : S3(2);     // both alus
4757 %}
4758 
4759 // Integer ALU reg operation using big decoder
4760 pipe_class ialu_reg_fat(rRegI dst) %{
4761     single_instruction;
4762     dst    : S4(write);
4763     dst    : S3(read);
4764     D0     : S0;        // big decoder only
4765     ALU    : S3;        // any alu
4766 %}
4767 
4768 // Long ALU reg operation using big decoder
4769 pipe_class ialu_reg_long_fat(eRegL dst) %{
4770     instruction_count(2);
4771     dst    : S4(write);
4772     dst    : S3(read);
4773     D0     : S0(2);     // big decoder only; twice
4774     ALU    : S3(2);     // any 2 alus
4775 %}
4776 
4777 // Integer ALU reg-reg operation
4778 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4779     single_instruction;
4780     dst    : S4(write);
4781     src    : S3(read);
4782     DECODE : S0;        // any decoder
4783     ALU    : S3;        // any alu
4784 %}
4785 
4786 // Long ALU reg-reg operation
4787 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4788     instruction_count(2);
4789     dst    : S4(write);
4790     src    : S3(read);
4791     DECODE : S0(2);     // any 2 decoders
4792     ALU    : S3(2);     // both alus
4793 %}
4794 
4795 // Integer ALU reg-reg operation
4796 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4797     single_instruction;
4798     dst    : S4(write);
4799     src    : S3(read);
4800     D0     : S0;        // big decoder only
4801     ALU    : S3;        // any alu
4802 %}
4803 
4804 // Long ALU reg-reg operation
4805 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4806     instruction_count(2);
4807     dst    : S4(write);
4808     src    : S3(read);
4809     D0     : S0(2);     // big decoder only; twice
4810     ALU    : S3(2);     // both alus
4811 %}
4812 
4813 // Integer ALU reg-mem operation
4814 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4815     single_instruction;
4816     dst    : S5(write);
4817     mem    : S3(read);
4818     D0     : S0;        // big decoder only
4819     ALU    : S4;        // any alu
4820     MEM    : S3;        // any mem
4821 %}
4822 
4823 // Long ALU reg-mem operation
4824 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4825     instruction_count(2);
4826     dst    : S5(write);
4827     mem    : S3(read);
4828     D0     : S0(2);     // big decoder only; twice
4829     ALU    : S4(2);     // any 2 alus
4830     MEM    : S3(2);     // both mems
4831 %}
4832 
4833 // Integer mem operation (prefetch)
4834 pipe_class ialu_mem(memory mem)
4835 %{
4836     single_instruction;
4837     mem    : S3(read);
4838     D0     : S0;        // big decoder only
4839     MEM    : S3;        // any mem
4840 %}
4841 
4842 // Integer Store to Memory
4843 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4844     single_instruction;
4845     mem    : S3(read);
4846     src    : S5(read);
4847     D0     : S0;        // big decoder only
4848     ALU    : S4;        // any alu
4849     MEM    : S3;
4850 %}
4851 
4852 // Long Store to Memory
4853 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4854     instruction_count(2);
4855     mem    : S3(read);
4856     src    : S5(read);
4857     D0     : S0(2);     // big decoder only; twice
4858     ALU    : S4(2);     // any 2 alus
4859     MEM    : S3(2);     // Both mems
4860 %}
4861 
4862 // Integer Store to Memory
4863 pipe_class ialu_mem_imm(memory mem) %{
4864     single_instruction;
4865     mem    : S3(read);
4866     D0     : S0;        // big decoder only
4867     ALU    : S4;        // any alu
4868     MEM    : S3;
4869 %}
4870 
4871 // Integer ALU0 reg-reg operation
4872 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4873     single_instruction;
4874     dst    : S4(write);
4875     src    : S3(read);
4876     D0     : S0;        // Big decoder only
4877     ALU0   : S3;        // only alu0
4878 %}
4879 
4880 // Integer ALU0 reg-mem operation
4881 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4882     single_instruction;
4883     dst    : S5(write);
4884     mem    : S3(read);
4885     D0     : S0;        // big decoder only
4886     ALU0   : S4;        // ALU0 only
4887     MEM    : S3;        // any mem
4888 %}
4889 
4890 // Integer ALU reg-reg operation
4891 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4892     single_instruction;
4893     cr     : S4(write);
4894     src1   : S3(read);
4895     src2   : S3(read);
4896     DECODE : S0;        // any decoder
4897     ALU    : S3;        // any alu
4898 %}
4899 
4900 // Integer ALU reg-imm operation
4901 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4902     single_instruction;
4903     cr     : S4(write);
4904     src1   : S3(read);
4905     DECODE : S0;        // any decoder
4906     ALU    : S3;        // any alu
4907 %}
4908 
4909 // Integer ALU reg-mem operation
4910 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4911     single_instruction;
4912     cr     : S4(write);
4913     src1   : S3(read);
4914     src2   : S3(read);
4915     D0     : S0;        // big decoder only
4916     ALU    : S4;        // any alu
4917     MEM    : S3;
4918 %}
4919 
4920 // Conditional move reg-reg
4921 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4922     instruction_count(4);
4923     y      : S4(read);
4924     q      : S3(read);
4925     p      : S3(read);
4926     DECODE : S0(4);     // any decoder
4927 %}
4928 
4929 // Conditional move reg-reg
4930 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4931     single_instruction;
4932     dst    : S4(write);
4933     src    : S3(read);
4934     cr     : S3(read);
4935     DECODE : S0;        // any decoder
4936 %}
4937 
4938 // Conditional move reg-mem
4939 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4940     single_instruction;
4941     dst    : S4(write);
4942     src    : S3(read);
4943     cr     : S3(read);
4944     DECODE : S0;        // any decoder
4945     MEM    : S3;
4946 %}
4947 
4948 // Conditional move reg-reg long
4949 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4950     single_instruction;
4951     dst    : S4(write);
4952     src    : S3(read);
4953     cr     : S3(read);
4954     DECODE : S0(2);     // any 2 decoders
4955 %}
4956 
4957 // Conditional move double reg-reg
4958 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4959     single_instruction;
4960     dst    : S4(write);
4961     src    : S3(read);
4962     cr     : S3(read);
4963     DECODE : S0;        // any decoder
4964 %}
4965 
4966 // Float reg-reg operation
4967 pipe_class fpu_reg(regDPR dst) %{
4968     instruction_count(2);
4969     dst    : S3(read);
4970     DECODE : S0(2);     // any 2 decoders
4971     FPU    : S3;
4972 %}
4973 
4974 // Float reg-reg operation
4975 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4976     instruction_count(2);
4977     dst    : S4(write);
4978     src    : S3(read);
4979     DECODE : S0(2);     // any 2 decoders
4980     FPU    : S3;
4981 %}
4982 
4983 // Float reg-reg operation
4984 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4985     instruction_count(3);
4986     dst    : S4(write);
4987     src1   : S3(read);
4988     src2   : S3(read);
4989     DECODE : S0(3);     // any 3 decoders
4990     FPU    : S3(2);
4991 %}
4992 
4993 // Float reg-reg operation
4994 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4995     instruction_count(4);
4996     dst    : S4(write);
4997     src1   : S3(read);
4998     src2   : S3(read);
4999     src3   : S3(read);
5000     DECODE : S0(4);     // any 3 decoders
5001     FPU    : S3(2);
5002 %}
5003 
5004 // Float reg-reg operation
5005 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5006     instruction_count(4);
5007     dst    : S4(write);
5008     src1   : S3(read);
5009     src2   : S3(read);
5010     src3   : S3(read);
5011     DECODE : S1(3);     // any 3 decoders
5012     D0     : S0;        // Big decoder only
5013     FPU    : S3(2);
5014     MEM    : S3;
5015 %}
5016 
5017 // Float reg-mem operation
5018 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5019     instruction_count(2);
5020     dst    : S5(write);
5021     mem    : S3(read);
5022     D0     : S0;        // big decoder only
5023     DECODE : S1;        // any decoder for FPU POP
5024     FPU    : S4;
5025     MEM    : S3;        // any mem
5026 %}
5027 
5028 // Float reg-mem operation
5029 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5030     instruction_count(3);
5031     dst    : S5(write);
5032     src1   : S3(read);
5033     mem    : S3(read);
5034     D0     : S0;        // big decoder only
5035     DECODE : S1(2);     // any decoder for FPU POP
5036     FPU    : S4;
5037     MEM    : S3;        // any mem
5038 %}
5039 
5040 // Float mem-reg operation
5041 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5042     instruction_count(2);
5043     src    : S5(read);
5044     mem    : S3(read);
5045     DECODE : S0;        // any decoder for FPU PUSH
5046     D0     : S1;        // big decoder only
5047     FPU    : S4;
5048     MEM    : S3;        // any mem
5049 %}
5050 
5051 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5052     instruction_count(3);
5053     src1   : S3(read);
5054     src2   : S3(read);
5055     mem    : S3(read);
5056     DECODE : S0(2);     // any decoder for FPU PUSH
5057     D0     : S1;        // big decoder only
5058     FPU    : S4;
5059     MEM    : S3;        // any mem
5060 %}
5061 
5062 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5063     instruction_count(3);
5064     src1   : S3(read);
5065     src2   : S3(read);
5066     mem    : S4(read);
5067     DECODE : S0;        // any decoder for FPU PUSH
5068     D0     : S0(2);     // big decoder only
5069     FPU    : S4;
5070     MEM    : S3(2);     // any mem
5071 %}
5072 
5073 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5074     instruction_count(2);
5075     src1   : S3(read);
5076     dst    : S4(read);
5077     D0     : S0(2);     // big decoder only
5078     MEM    : S3(2);     // any mem
5079 %}
5080 
5081 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5082     instruction_count(3);
5083     src1   : S3(read);
5084     src2   : S3(read);
5085     dst    : S4(read);
5086     D0     : S0(3);     // big decoder only
5087     FPU    : S4;
5088     MEM    : S3(3);     // any mem
5089 %}
5090 
5091 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5092     instruction_count(3);
5093     src1   : S4(read);
5094     mem    : S4(read);
5095     DECODE : S0;        // any decoder for FPU PUSH
5096     D0     : S0(2);     // big decoder only
5097     FPU    : S4;
5098     MEM    : S3(2);     // any mem
5099 %}
5100 
5101 // Float load constant
5102 pipe_class fpu_reg_con(regDPR dst) %{
5103     instruction_count(2);
5104     dst    : S5(write);
5105     D0     : S0;        // big decoder only for the load
5106     DECODE : S1;        // any decoder for FPU POP
5107     FPU    : S4;
5108     MEM    : S3;        // any mem
5109 %}
5110 
5111 // Float load constant
5112 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5113     instruction_count(3);
5114     dst    : S5(write);
5115     src    : S3(read);
5116     D0     : S0;        // big decoder only for the load
5117     DECODE : S1(2);     // any decoder for FPU POP
5118     FPU    : S4;
5119     MEM    : S3;        // any mem
5120 %}
5121 
5122 // UnConditional branch
5123 pipe_class pipe_jmp( label labl ) %{
5124     single_instruction;
5125     BR   : S3;
5126 %}
5127 
5128 // Conditional branch
5129 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5130     single_instruction;
5131     cr    : S1(read);
5132     BR    : S3;
5133 %}
5134 
5135 // Allocation idiom
5136 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5137     instruction_count(1); force_serialization;
5138     fixed_latency(6);
5139     heap_ptr : S3(read);
5140     DECODE   : S0(3);
5141     D0       : S2;
5142     MEM      : S3;
5143     ALU      : S3(2);
5144     dst      : S5(write);
5145     BR       : S5;
5146 %}
5147 
5148 // Generic big/slow expanded idiom
5149 pipe_class pipe_slow(  ) %{
5150     instruction_count(10); multiple_bundles; force_serialization;
5151     fixed_latency(100);
5152     D0  : S0(2);
5153     MEM : S3(2);
5154 %}
5155 
5156 // The real do-nothing guy
5157 pipe_class empty( ) %{
5158     instruction_count(0);
5159 %}
5160 
5161 // Define the class for the Nop node
5162 define %{
5163    MachNop = empty;
5164 %}
5165 
5166 %}
5167 
5168 //----------INSTRUCTIONS-------------------------------------------------------
5169 //
5170 // match      -- States which machine-independent subtree may be replaced
5171 //               by this instruction.
5172 // ins_cost   -- The estimated cost of this instruction is used by instruction
5173 //               selection to identify a minimum cost tree of machine
5174 //               instructions that matches a tree of machine-independent
5175 //               instructions.
5176 // format     -- A string providing the disassembly for this instruction.
5177 //               The value of an instruction's operand may be inserted
5178 //               by referring to it with a '$' prefix.
5179 // opcode     -- Three instruction opcodes may be provided.  These are referred
5180 //               to within an encode class as $primary, $secondary, and $tertiary
5181 //               respectively.  The primary opcode is commonly used to
5182 //               indicate the type of machine instruction, while secondary
5183 //               and tertiary are often used for prefix options or addressing
5184 //               modes.
5185 // ins_encode -- A list of encode classes with parameters. The encode class
5186 //               name must have been defined in an 'enc_class' specification
5187 //               in the encode section of the architecture description.
5188 
5189 //----------BSWAP-Instruction--------------------------------------------------
5190 instruct bytes_reverse_int(rRegI dst) %{
5191   match(Set dst (ReverseBytesI dst));
5192 
5193   format %{ "BSWAP  $dst" %}
5194   opcode(0x0F, 0xC8);
5195   ins_encode( OpcP, OpcSReg(dst) );
5196   ins_pipe( ialu_reg );
5197 %}
5198 
5199 instruct bytes_reverse_long(eRegL dst) %{
5200   match(Set dst (ReverseBytesL dst));
5201 
5202   format %{ "BSWAP  $dst.lo\n\t"
5203             "BSWAP  $dst.hi\n\t"
5204             "XCHG   $dst.lo $dst.hi" %}
5205 
5206   ins_cost(125);
5207   ins_encode( bswap_long_bytes(dst) );
5208   ins_pipe( ialu_reg_reg);
5209 %}
5210 
5211 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5212   match(Set dst (ReverseBytesUS dst));
5213   effect(KILL cr);
5214 
5215   format %{ "BSWAP  $dst\n\t"
5216             "SHR    $dst,16\n\t" %}
5217   ins_encode %{
5218     __ bswapl($dst$$Register);
5219     __ shrl($dst$$Register, 16);
5220   %}
5221   ins_pipe( ialu_reg );
5222 %}
5223 
5224 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5225   match(Set dst (ReverseBytesS dst));
5226   effect(KILL cr);
5227 
5228   format %{ "BSWAP  $dst\n\t"
5229             "SAR    $dst,16\n\t" %}
5230   ins_encode %{
5231     __ bswapl($dst$$Register);
5232     __ sarl($dst$$Register, 16);
5233   %}
5234   ins_pipe( ialu_reg );
5235 %}
5236 
5237 
5238 //---------- Zeros Count Instructions ------------------------------------------
5239 
5240 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5241   predicate(UseCountLeadingZerosInstruction);
5242   match(Set dst (CountLeadingZerosI src));
5243   effect(KILL cr);
5244 
5245   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5246   ins_encode %{
5247     __ lzcntl($dst$$Register, $src$$Register);
5248   %}
5249   ins_pipe(ialu_reg);
5250 %}
5251 
5252 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5253   predicate(!UseCountLeadingZerosInstruction);
5254   match(Set dst (CountLeadingZerosI src));
5255   effect(KILL cr);
5256 
5257   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5258             "JNZ    skip\n\t"
5259             "MOV    $dst, -1\n"
5260       "skip:\n\t"
5261             "NEG    $dst\n\t"
5262             "ADD    $dst, 31" %}
5263   ins_encode %{
5264     Register Rdst = $dst$$Register;
5265     Register Rsrc = $src$$Register;
5266     Label skip;
5267     __ bsrl(Rdst, Rsrc);
5268     __ jccb(Assembler::notZero, skip);
5269     __ movl(Rdst, -1);
5270     __ bind(skip);
5271     __ negl(Rdst);
5272     __ addl(Rdst, BitsPerInt - 1);
5273   %}
5274   ins_pipe(ialu_reg);
5275 %}
5276 
5277 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5278   predicate(UseCountLeadingZerosInstruction);
5279   match(Set dst (CountLeadingZerosL src));
5280   effect(TEMP dst, KILL cr);
5281 
5282   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5283             "JNC    done\n\t"
5284             "LZCNT  $dst, $src.lo\n\t"
5285             "ADD    $dst, 32\n"
5286       "done:" %}
5287   ins_encode %{
5288     Register Rdst = $dst$$Register;
5289     Register Rsrc = $src$$Register;
5290     Label done;
5291     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5292     __ jccb(Assembler::carryClear, done);
5293     __ lzcntl(Rdst, Rsrc);
5294     __ addl(Rdst, BitsPerInt);
5295     __ bind(done);
5296   %}
5297   ins_pipe(ialu_reg);
5298 %}
5299 
5300 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5301   predicate(!UseCountLeadingZerosInstruction);
5302   match(Set dst (CountLeadingZerosL src));
5303   effect(TEMP dst, KILL cr);
5304 
5305   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5306             "JZ     msw_is_zero\n\t"
5307             "ADD    $dst, 32\n\t"
5308             "JMP    not_zero\n"
5309       "msw_is_zero:\n\t"
5310             "BSR    $dst, $src.lo\n\t"
5311             "JNZ    not_zero\n\t"
5312             "MOV    $dst, -1\n"
5313       "not_zero:\n\t"
5314             "NEG    $dst\n\t"
5315             "ADD    $dst, 63\n" %}
5316  ins_encode %{
5317     Register Rdst = $dst$$Register;
5318     Register Rsrc = $src$$Register;
5319     Label msw_is_zero;
5320     Label not_zero;
5321     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5322     __ jccb(Assembler::zero, msw_is_zero);
5323     __ addl(Rdst, BitsPerInt);
5324     __ jmpb(not_zero);
5325     __ bind(msw_is_zero);
5326     __ bsrl(Rdst, Rsrc);
5327     __ jccb(Assembler::notZero, not_zero);
5328     __ movl(Rdst, -1);
5329     __ bind(not_zero);
5330     __ negl(Rdst);
5331     __ addl(Rdst, BitsPerLong - 1);
5332   %}
5333   ins_pipe(ialu_reg);
5334 %}
5335 
5336 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5337   predicate(UseCountTrailingZerosInstruction);
5338   match(Set dst (CountTrailingZerosI src));
5339   effect(KILL cr);
5340 
5341   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5342   ins_encode %{
5343     __ tzcntl($dst$$Register, $src$$Register);
5344   %}
5345   ins_pipe(ialu_reg);
5346 %}
5347 
5348 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5349   predicate(!UseCountTrailingZerosInstruction);
5350   match(Set dst (CountTrailingZerosI src));
5351   effect(KILL cr);
5352 
5353   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5354             "JNZ    done\n\t"
5355             "MOV    $dst, 32\n"
5356       "done:" %}
5357   ins_encode %{
5358     Register Rdst = $dst$$Register;
5359     Label done;
5360     __ bsfl(Rdst, $src$$Register);
5361     __ jccb(Assembler::notZero, done);
5362     __ movl(Rdst, BitsPerInt);
5363     __ bind(done);
5364   %}
5365   ins_pipe(ialu_reg);
5366 %}
5367 
5368 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5369   predicate(UseCountTrailingZerosInstruction);
5370   match(Set dst (CountTrailingZerosL src));
5371   effect(TEMP dst, KILL cr);
5372 
5373   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5374             "JNC    done\n\t"
5375             "TZCNT  $dst, $src.hi\n\t"
5376             "ADD    $dst, 32\n"
5377             "done:" %}
5378   ins_encode %{
5379     Register Rdst = $dst$$Register;
5380     Register Rsrc = $src$$Register;
5381     Label done;
5382     __ tzcntl(Rdst, Rsrc);
5383     __ jccb(Assembler::carryClear, done);
5384     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5385     __ addl(Rdst, BitsPerInt);
5386     __ bind(done);
5387   %}
5388   ins_pipe(ialu_reg);
5389 %}
5390 
5391 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5392   predicate(!UseCountTrailingZerosInstruction);
5393   match(Set dst (CountTrailingZerosL src));
5394   effect(TEMP dst, KILL cr);
5395 
5396   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5397             "JNZ    done\n\t"
5398             "BSF    $dst, $src.hi\n\t"
5399             "JNZ    msw_not_zero\n\t"
5400             "MOV    $dst, 32\n"
5401       "msw_not_zero:\n\t"
5402             "ADD    $dst, 32\n"
5403       "done:" %}
5404   ins_encode %{
5405     Register Rdst = $dst$$Register;
5406     Register Rsrc = $src$$Register;
5407     Label msw_not_zero;
5408     Label done;
5409     __ bsfl(Rdst, Rsrc);
5410     __ jccb(Assembler::notZero, done);
5411     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5412     __ jccb(Assembler::notZero, msw_not_zero);
5413     __ movl(Rdst, BitsPerInt);
5414     __ bind(msw_not_zero);
5415     __ addl(Rdst, BitsPerInt);
5416     __ bind(done);
5417   %}
5418   ins_pipe(ialu_reg);
5419 %}
5420 
5421 
5422 //---------- Population Count Instructions -------------------------------------
5423 
5424 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5425   predicate(UsePopCountInstruction);
5426   match(Set dst (PopCountI src));
5427   effect(KILL cr);
5428 
5429   format %{ "POPCNT $dst, $src" %}
5430   ins_encode %{
5431     __ popcntl($dst$$Register, $src$$Register);
5432   %}
5433   ins_pipe(ialu_reg);
5434 %}
5435 
5436 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5437   predicate(UsePopCountInstruction);
5438   match(Set dst (PopCountI (LoadI mem)));
5439   effect(KILL cr);
5440 
5441   format %{ "POPCNT $dst, $mem" %}
5442   ins_encode %{
5443     __ popcntl($dst$$Register, $mem$$Address);
5444   %}
5445   ins_pipe(ialu_reg);
5446 %}
5447 
5448 // Note: Long.bitCount(long) returns an int.
5449 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5450   predicate(UsePopCountInstruction);
5451   match(Set dst (PopCountL src));
5452   effect(KILL cr, TEMP tmp, TEMP dst);
5453 
5454   format %{ "POPCNT $dst, $src.lo\n\t"
5455             "POPCNT $tmp, $src.hi\n\t"
5456             "ADD    $dst, $tmp" %}
5457   ins_encode %{
5458     __ popcntl($dst$$Register, $src$$Register);
5459     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5460     __ addl($dst$$Register, $tmp$$Register);
5461   %}
5462   ins_pipe(ialu_reg);
5463 %}
5464 
5465 // Note: Long.bitCount(long) returns an int.
5466 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5467   predicate(UsePopCountInstruction);
5468   match(Set dst (PopCountL (LoadL mem)));
5469   effect(KILL cr, TEMP tmp, TEMP dst);
5470 
5471   format %{ "POPCNT $dst, $mem\n\t"
5472             "POPCNT $tmp, $mem+4\n\t"
5473             "ADD    $dst, $tmp" %}
5474   ins_encode %{
5475     //__ popcntl($dst$$Register, $mem$$Address$$first);
5476     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5477     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5478     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5479     __ addl($dst$$Register, $tmp$$Register);
5480   %}
5481   ins_pipe(ialu_reg);
5482 %}
5483 
5484 
5485 //----------Load/Store/Move Instructions---------------------------------------
5486 //----------Load Instructions--------------------------------------------------
5487 // Load Byte (8bit signed)
5488 instruct loadB(xRegI dst, memory mem) %{
5489   match(Set dst (LoadB mem));
5490 
5491   ins_cost(125);
5492   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5493 
5494   ins_encode %{
5495     __ movsbl($dst$$Register, $mem$$Address);
5496   %}
5497 
5498   ins_pipe(ialu_reg_mem);
5499 %}
5500 
5501 // Load Byte (8bit signed) into Long Register
5502 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5503   match(Set dst (ConvI2L (LoadB mem)));
5504   effect(KILL cr);
5505 
5506   ins_cost(375);
5507   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5508             "MOV    $dst.hi,$dst.lo\n\t"
5509             "SAR    $dst.hi,7" %}
5510 
5511   ins_encode %{
5512     __ movsbl($dst$$Register, $mem$$Address);
5513     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5514     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5515   %}
5516 
5517   ins_pipe(ialu_reg_mem);
5518 %}
5519 
5520 // Load Unsigned Byte (8bit UNsigned)
5521 instruct loadUB(xRegI dst, memory mem) %{
5522   match(Set dst (LoadUB mem));
5523 
5524   ins_cost(125);
5525   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5526 
5527   ins_encode %{
5528     __ movzbl($dst$$Register, $mem$$Address);
5529   %}
5530 
5531   ins_pipe(ialu_reg_mem);
5532 %}
5533 
5534 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5535 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5536   match(Set dst (ConvI2L (LoadUB mem)));
5537   effect(KILL cr);
5538 
5539   ins_cost(250);
5540   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5541             "XOR    $dst.hi,$dst.hi" %}
5542 
5543   ins_encode %{
5544     Register Rdst = $dst$$Register;
5545     __ movzbl(Rdst, $mem$$Address);
5546     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5547   %}
5548 
5549   ins_pipe(ialu_reg_mem);
5550 %}
5551 
5552 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5553 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5554   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5555   effect(KILL cr);
5556 
5557   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5558             "XOR    $dst.hi,$dst.hi\n\t"
5559             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5560   ins_encode %{
5561     Register Rdst = $dst$$Register;
5562     __ movzbl(Rdst, $mem$$Address);
5563     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5564     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5565   %}
5566   ins_pipe(ialu_reg_mem);
5567 %}
5568 
5569 // Load Short (16bit signed)
5570 instruct loadS(rRegI dst, memory mem) %{
5571   match(Set dst (LoadS mem));
5572 
5573   ins_cost(125);
5574   format %{ "MOVSX  $dst,$mem\t# short" %}
5575 
5576   ins_encode %{
5577     __ movswl($dst$$Register, $mem$$Address);
5578   %}
5579 
5580   ins_pipe(ialu_reg_mem);
5581 %}
5582 
5583 // Load Short (16 bit signed) to Byte (8 bit signed)
5584 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5585   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5586 
5587   ins_cost(125);
5588   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5589   ins_encode %{
5590     __ movsbl($dst$$Register, $mem$$Address);
5591   %}
5592   ins_pipe(ialu_reg_mem);
5593 %}
5594 
5595 // Load Short (16bit signed) into Long Register
5596 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5597   match(Set dst (ConvI2L (LoadS mem)));
5598   effect(KILL cr);
5599 
5600   ins_cost(375);
5601   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5602             "MOV    $dst.hi,$dst.lo\n\t"
5603             "SAR    $dst.hi,15" %}
5604 
5605   ins_encode %{
5606     __ movswl($dst$$Register, $mem$$Address);
5607     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5608     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5609   %}
5610 
5611   ins_pipe(ialu_reg_mem);
5612 %}
5613 
5614 // Load Unsigned Short/Char (16bit unsigned)
5615 instruct loadUS(rRegI dst, memory mem) %{
5616   match(Set dst (LoadUS mem));
5617 
5618   ins_cost(125);
5619   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5620 
5621   ins_encode %{
5622     __ movzwl($dst$$Register, $mem$$Address);
5623   %}
5624 
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5629 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5630   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5631 
5632   ins_cost(125);
5633   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5634   ins_encode %{
5635     __ movsbl($dst$$Register, $mem$$Address);
5636   %}
5637   ins_pipe(ialu_reg_mem);
5638 %}
5639 
5640 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5641 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5642   match(Set dst (ConvI2L (LoadUS mem)));
5643   effect(KILL cr);
5644 
5645   ins_cost(250);
5646   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5647             "XOR    $dst.hi,$dst.hi" %}
5648 
5649   ins_encode %{
5650     __ movzwl($dst$$Register, $mem$$Address);
5651     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5652   %}
5653 
5654   ins_pipe(ialu_reg_mem);
5655 %}
5656 
5657 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5658 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5659   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5660   effect(KILL cr);
5661 
5662   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5663             "XOR    $dst.hi,$dst.hi" %}
5664   ins_encode %{
5665     Register Rdst = $dst$$Register;
5666     __ movzbl(Rdst, $mem$$Address);
5667     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5668   %}
5669   ins_pipe(ialu_reg_mem);
5670 %}
5671 
5672 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5673 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5674   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5675   effect(KILL cr);
5676 
5677   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5678             "XOR    $dst.hi,$dst.hi\n\t"
5679             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5680   ins_encode %{
5681     Register Rdst = $dst$$Register;
5682     __ movzwl(Rdst, $mem$$Address);
5683     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5684     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5685   %}
5686   ins_pipe(ialu_reg_mem);
5687 %}
5688 
5689 // Load Integer
5690 instruct loadI(rRegI dst, memory mem) %{
5691   match(Set dst (LoadI mem));
5692 
5693   ins_cost(125);
5694   format %{ "MOV    $dst,$mem\t# int" %}
5695 
5696   ins_encode %{
5697     __ movl($dst$$Register, $mem$$Address);
5698   %}
5699 
5700   ins_pipe(ialu_reg_mem);
5701 %}
5702 
5703 // Load Integer (32 bit signed) to Byte (8 bit signed)
5704 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5705   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5706 
5707   ins_cost(125);
5708   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5709   ins_encode %{
5710     __ movsbl($dst$$Register, $mem$$Address);
5711   %}
5712   ins_pipe(ialu_reg_mem);
5713 %}
5714 
5715 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5716 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5717   match(Set dst (AndI (LoadI mem) mask));
5718 
5719   ins_cost(125);
5720   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5721   ins_encode %{
5722     __ movzbl($dst$$Register, $mem$$Address);
5723   %}
5724   ins_pipe(ialu_reg_mem);
5725 %}
5726 
5727 // Load Integer (32 bit signed) to Short (16 bit signed)
5728 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5729   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5730 
5731   ins_cost(125);
5732   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5733   ins_encode %{
5734     __ movswl($dst$$Register, $mem$$Address);
5735   %}
5736   ins_pipe(ialu_reg_mem);
5737 %}
5738 
5739 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5740 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5741   match(Set dst (AndI (LoadI mem) mask));
5742 
5743   ins_cost(125);
5744   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5745   ins_encode %{
5746     __ movzwl($dst$$Register, $mem$$Address);
5747   %}
5748   ins_pipe(ialu_reg_mem);
5749 %}
5750 
5751 // Load Integer into Long Register
5752 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5753   match(Set dst (ConvI2L (LoadI mem)));
5754   effect(KILL cr);
5755 
5756   ins_cost(375);
5757   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5758             "MOV    $dst.hi,$dst.lo\n\t"
5759             "SAR    $dst.hi,31" %}
5760 
5761   ins_encode %{
5762     __ movl($dst$$Register, $mem$$Address);
5763     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5764     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5765   %}
5766 
5767   ins_pipe(ialu_reg_mem);
5768 %}
5769 
5770 // Load Integer with mask 0xFF into Long Register
5771 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5772   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5773   effect(KILL cr);
5774 
5775   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5776             "XOR    $dst.hi,$dst.hi" %}
5777   ins_encode %{
5778     Register Rdst = $dst$$Register;
5779     __ movzbl(Rdst, $mem$$Address);
5780     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5781   %}
5782   ins_pipe(ialu_reg_mem);
5783 %}
5784 
5785 // Load Integer with mask 0xFFFF into Long Register
5786 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5787   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5788   effect(KILL cr);
5789 
5790   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5791             "XOR    $dst.hi,$dst.hi" %}
5792   ins_encode %{
5793     Register Rdst = $dst$$Register;
5794     __ movzwl(Rdst, $mem$$Address);
5795     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5796   %}
5797   ins_pipe(ialu_reg_mem);
5798 %}
5799 
5800 // Load Integer with 31-bit mask into Long Register
5801 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5802   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5803   effect(KILL cr);
5804 
5805   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5806             "XOR    $dst.hi,$dst.hi\n\t"
5807             "AND    $dst.lo,$mask" %}
5808   ins_encode %{
5809     Register Rdst = $dst$$Register;
5810     __ movl(Rdst, $mem$$Address);
5811     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5812     __ andl(Rdst, $mask$$constant);
5813   %}
5814   ins_pipe(ialu_reg_mem);
5815 %}
5816 
5817 // Load Unsigned Integer into Long Register
5818 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5819   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5820   effect(KILL cr);
5821 
5822   ins_cost(250);
5823   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5824             "XOR    $dst.hi,$dst.hi" %}
5825 
5826   ins_encode %{
5827     __ movl($dst$$Register, $mem$$Address);
5828     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5829   %}
5830 
5831   ins_pipe(ialu_reg_mem);
5832 %}
5833 
5834 // Load Long.  Cannot clobber address while loading, so restrict address
5835 // register to ESI
5836 instruct loadL(eRegL dst, load_long_memory mem) %{
5837   predicate(!((LoadLNode*)n)->require_atomic_access());
5838   match(Set dst (LoadL mem));
5839 
5840   ins_cost(250);
5841   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5842             "MOV    $dst.hi,$mem+4" %}
5843 
5844   ins_encode %{
5845     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5846     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5847     __ movl($dst$$Register, Amemlo);
5848     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5849   %}
5850 
5851   ins_pipe(ialu_reg_long_mem);
5852 %}
5853 
5854 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5855 // then store it down to the stack and reload on the int
5856 // side.
5857 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5858   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5859   match(Set dst (LoadL mem));
5860 
5861   ins_cost(200);
5862   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5863             "FISTp  $dst" %}
5864   ins_encode(enc_loadL_volatile(mem,dst));
5865   ins_pipe( fpu_reg_mem );
5866 %}
5867 
5868 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5869   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5870   match(Set dst (LoadL mem));
5871   effect(TEMP tmp);
5872   ins_cost(180);
5873   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5874             "MOVSD  $dst,$tmp" %}
5875   ins_encode %{
5876     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5877     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5878   %}
5879   ins_pipe( pipe_slow );
5880 %}
5881 
5882 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5883   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5884   match(Set dst (LoadL mem));
5885   effect(TEMP tmp);
5886   ins_cost(160);
5887   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5888             "MOVD   $dst.lo,$tmp\n\t"
5889             "PSRLQ  $tmp,32\n\t"
5890             "MOVD   $dst.hi,$tmp" %}
5891   ins_encode %{
5892     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5893     __ movdl($dst$$Register, $tmp$$XMMRegister);
5894     __ psrlq($tmp$$XMMRegister, 32);
5895     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5896   %}
5897   ins_pipe( pipe_slow );
5898 %}
5899 
5900 // Load Range
5901 instruct loadRange(rRegI dst, memory mem) %{
5902   match(Set dst (LoadRange mem));
5903 
5904   ins_cost(125);
5905   format %{ "MOV    $dst,$mem" %}
5906   opcode(0x8B);
5907   ins_encode( OpcP, RegMem(dst,mem));
5908   ins_pipe( ialu_reg_mem );
5909 %}
5910 
5911 
5912 // Load Pointer
5913 instruct loadP(eRegP dst, memory mem) %{
5914   match(Set dst (LoadP mem));
5915 
5916   ins_cost(125);
5917   format %{ "MOV    $dst,$mem" %}
5918   opcode(0x8B);
5919   ins_encode( OpcP, RegMem(dst,mem));
5920   ins_pipe( ialu_reg_mem );
5921 %}
5922 
5923 // Load Klass Pointer
5924 instruct loadKlass(eRegP dst, memory mem) %{
5925   match(Set dst (LoadKlass mem));
5926 
5927   ins_cost(125);
5928   format %{ "MOV    $dst,$mem" %}
5929   opcode(0x8B);
5930   ins_encode( OpcP, RegMem(dst,mem));
5931   ins_pipe( ialu_reg_mem );
5932 %}
5933 
5934 // Load Double
5935 instruct loadDPR(regDPR dst, memory mem) %{
5936   predicate(UseSSE<=1);
5937   match(Set dst (LoadD mem));
5938 
5939   ins_cost(150);
5940   format %{ "FLD_D  ST,$mem\n\t"
5941             "FSTP   $dst" %}
5942   opcode(0xDD);               /* DD /0 */
5943   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5944               Pop_Reg_DPR(dst) );
5945   ins_pipe( fpu_reg_mem );
5946 %}
5947 
5948 // Load Double to XMM
5949 instruct loadD(regD dst, memory mem) %{
5950   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5951   match(Set dst (LoadD mem));
5952   ins_cost(145);
5953   format %{ "MOVSD  $dst,$mem" %}
5954   ins_encode %{
5955     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5956   %}
5957   ins_pipe( pipe_slow );
5958 %}
5959 
5960 instruct loadD_partial(regD dst, memory mem) %{
5961   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5962   match(Set dst (LoadD mem));
5963   ins_cost(145);
5964   format %{ "MOVLPD $dst,$mem" %}
5965   ins_encode %{
5966     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5967   %}
5968   ins_pipe( pipe_slow );
5969 %}
5970 
5971 // Load to XMM register (single-precision floating point)
5972 // MOVSS instruction
5973 instruct loadF(regF dst, memory mem) %{
5974   predicate(UseSSE>=1);
5975   match(Set dst (LoadF mem));
5976   ins_cost(145);
5977   format %{ "MOVSS  $dst,$mem" %}
5978   ins_encode %{
5979     __ movflt ($dst$$XMMRegister, $mem$$Address);
5980   %}
5981   ins_pipe( pipe_slow );
5982 %}
5983 
5984 // Load Float
5985 instruct loadFPR(regFPR dst, memory mem) %{
5986   predicate(UseSSE==0);
5987   match(Set dst (LoadF mem));
5988 
5989   ins_cost(150);
5990   format %{ "FLD_S  ST,$mem\n\t"
5991             "FSTP   $dst" %}
5992   opcode(0xD9);               /* D9 /0 */
5993   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5994               Pop_Reg_FPR(dst) );
5995   ins_pipe( fpu_reg_mem );
5996 %}
5997 
5998 // Load Effective Address
5999 instruct leaP8(eRegP dst, indOffset8 mem) %{
6000   match(Set dst mem);
6001 
6002   ins_cost(110);
6003   format %{ "LEA    $dst,$mem" %}
6004   opcode(0x8D);
6005   ins_encode( OpcP, RegMem(dst,mem));
6006   ins_pipe( ialu_reg_reg_fat );
6007 %}
6008 
6009 instruct leaP32(eRegP dst, indOffset32 mem) %{
6010   match(Set dst mem);
6011 
6012   ins_cost(110);
6013   format %{ "LEA    $dst,$mem" %}
6014   opcode(0x8D);
6015   ins_encode( OpcP, RegMem(dst,mem));
6016   ins_pipe( ialu_reg_reg_fat );
6017 %}
6018 
6019 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6020   match(Set dst mem);
6021 
6022   ins_cost(110);
6023   format %{ "LEA    $dst,$mem" %}
6024   opcode(0x8D);
6025   ins_encode( OpcP, RegMem(dst,mem));
6026   ins_pipe( ialu_reg_reg_fat );
6027 %}
6028 
6029 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6030   match(Set dst mem);
6031 
6032   ins_cost(110);
6033   format %{ "LEA    $dst,$mem" %}
6034   opcode(0x8D);
6035   ins_encode( OpcP, RegMem(dst,mem));
6036   ins_pipe( ialu_reg_reg_fat );
6037 %}
6038 
6039 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6040   match(Set dst mem);
6041 
6042   ins_cost(110);
6043   format %{ "LEA    $dst,$mem" %}
6044   opcode(0x8D);
6045   ins_encode( OpcP, RegMem(dst,mem));
6046   ins_pipe( ialu_reg_reg_fat );
6047 %}
6048 
6049 // Load Constant
6050 instruct loadConI(rRegI dst, immI src) %{
6051   match(Set dst src);
6052 
6053   format %{ "MOV    $dst,$src" %}
6054   ins_encode( LdImmI(dst, src) );
6055   ins_pipe( ialu_reg_fat );
6056 %}
6057 
6058 // Load Constant zero
6059 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6060   match(Set dst src);
6061   effect(KILL cr);
6062 
6063   ins_cost(50);
6064   format %{ "XOR    $dst,$dst" %}
6065   opcode(0x33);  /* + rd */
6066   ins_encode( OpcP, RegReg( dst, dst ) );
6067   ins_pipe( ialu_reg );
6068 %}
6069 
6070 instruct loadConP(eRegP dst, immP src) %{
6071   match(Set dst src);
6072 
6073   format %{ "MOV    $dst,$src" %}
6074   opcode(0xB8);  /* + rd */
6075   ins_encode( LdImmP(dst, src) );
6076   ins_pipe( ialu_reg_fat );
6077 %}
6078 
6079 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6080   match(Set dst src);
6081   effect(KILL cr);
6082   ins_cost(200);
6083   format %{ "MOV    $dst.lo,$src.lo\n\t"
6084             "MOV    $dst.hi,$src.hi" %}
6085   opcode(0xB8);
6086   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6087   ins_pipe( ialu_reg_long_fat );
6088 %}
6089 
6090 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6091   match(Set dst src);
6092   effect(KILL cr);
6093   ins_cost(150);
6094   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6095             "XOR    $dst.hi,$dst.hi" %}
6096   opcode(0x33,0x33);
6097   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6098   ins_pipe( ialu_reg_long );
6099 %}
6100 
6101 // The instruction usage is guarded by predicate in operand immFPR().
6102 instruct loadConFPR(regFPR dst, immFPR con) %{
6103   match(Set dst con);
6104   ins_cost(125);
6105   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6106             "FSTP   $dst" %}
6107   ins_encode %{
6108     __ fld_s($constantaddress($con));
6109     __ fstp_d($dst$$reg);
6110   %}
6111   ins_pipe(fpu_reg_con);
6112 %}
6113 
6114 // The instruction usage is guarded by predicate in operand immFPR0().
6115 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6116   match(Set dst con);
6117   ins_cost(125);
6118   format %{ "FLDZ   ST\n\t"
6119             "FSTP   $dst" %}
6120   ins_encode %{
6121     __ fldz();
6122     __ fstp_d($dst$$reg);
6123   %}
6124   ins_pipe(fpu_reg_con);
6125 %}
6126 
6127 // The instruction usage is guarded by predicate in operand immFPR1().
6128 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6129   match(Set dst con);
6130   ins_cost(125);
6131   format %{ "FLD1   ST\n\t"
6132             "FSTP   $dst" %}
6133   ins_encode %{
6134     __ fld1();
6135     __ fstp_d($dst$$reg);
6136   %}
6137   ins_pipe(fpu_reg_con);
6138 %}
6139 
6140 // The instruction usage is guarded by predicate in operand immF().
6141 instruct loadConF(regF dst, immF con) %{
6142   match(Set dst con);
6143   ins_cost(125);
6144   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6145   ins_encode %{
6146     __ movflt($dst$$XMMRegister, $constantaddress($con));
6147   %}
6148   ins_pipe(pipe_slow);
6149 %}
6150 
6151 // The instruction usage is guarded by predicate in operand immF0().
6152 instruct loadConF0(regF dst, immF0 src) %{
6153   match(Set dst src);
6154   ins_cost(100);
6155   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6156   ins_encode %{
6157     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6158   %}
6159   ins_pipe(pipe_slow);
6160 %}
6161 
6162 // The instruction usage is guarded by predicate in operand immDPR().
6163 instruct loadConDPR(regDPR dst, immDPR con) %{
6164   match(Set dst con);
6165   ins_cost(125);
6166 
6167   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6168             "FSTP   $dst" %}
6169   ins_encode %{
6170     __ fld_d($constantaddress($con));
6171     __ fstp_d($dst$$reg);
6172   %}
6173   ins_pipe(fpu_reg_con);
6174 %}
6175 
6176 // The instruction usage is guarded by predicate in operand immDPR0().
6177 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6178   match(Set dst con);
6179   ins_cost(125);
6180 
6181   format %{ "FLDZ   ST\n\t"
6182             "FSTP   $dst" %}
6183   ins_encode %{
6184     __ fldz();
6185     __ fstp_d($dst$$reg);
6186   %}
6187   ins_pipe(fpu_reg_con);
6188 %}
6189 
6190 // The instruction usage is guarded by predicate in operand immDPR1().
6191 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6192   match(Set dst con);
6193   ins_cost(125);
6194 
6195   format %{ "FLD1   ST\n\t"
6196             "FSTP   $dst" %}
6197   ins_encode %{
6198     __ fld1();
6199     __ fstp_d($dst$$reg);
6200   %}
6201   ins_pipe(fpu_reg_con);
6202 %}
6203 
6204 // The instruction usage is guarded by predicate in operand immD().
6205 instruct loadConD(regD dst, immD con) %{
6206   match(Set dst con);
6207   ins_cost(125);
6208   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6209   ins_encode %{
6210     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6211   %}
6212   ins_pipe(pipe_slow);
6213 %}
6214 
6215 // The instruction usage is guarded by predicate in operand immD0().
6216 instruct loadConD0(regD dst, immD0 src) %{
6217   match(Set dst src);
6218   ins_cost(100);
6219   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6220   ins_encode %{
6221     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6222   %}
6223   ins_pipe( pipe_slow );
6224 %}
6225 
6226 // Load Stack Slot
6227 instruct loadSSI(rRegI dst, stackSlotI src) %{
6228   match(Set dst src);
6229   ins_cost(125);
6230 
6231   format %{ "MOV    $dst,$src" %}
6232   opcode(0x8B);
6233   ins_encode( OpcP, RegMem(dst,src));
6234   ins_pipe( ialu_reg_mem );
6235 %}
6236 
6237 instruct loadSSL(eRegL dst, stackSlotL src) %{
6238   match(Set dst src);
6239 
6240   ins_cost(200);
6241   format %{ "MOV    $dst,$src.lo\n\t"
6242             "MOV    $dst+4,$src.hi" %}
6243   opcode(0x8B, 0x8B);
6244   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6245   ins_pipe( ialu_mem_long_reg );
6246 %}
6247 
6248 // Load Stack Slot
6249 instruct loadSSP(eRegP dst, stackSlotP src) %{
6250   match(Set dst src);
6251   ins_cost(125);
6252 
6253   format %{ "MOV    $dst,$src" %}
6254   opcode(0x8B);
6255   ins_encode( OpcP, RegMem(dst,src));
6256   ins_pipe( ialu_reg_mem );
6257 %}
6258 
6259 // Load Stack Slot
6260 instruct loadSSF(regFPR dst, stackSlotF src) %{
6261   match(Set dst src);
6262   ins_cost(125);
6263 
6264   format %{ "FLD_S  $src\n\t"
6265             "FSTP   $dst" %}
6266   opcode(0xD9);               /* D9 /0, FLD m32real */
6267   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6268               Pop_Reg_FPR(dst) );
6269   ins_pipe( fpu_reg_mem );
6270 %}
6271 
6272 // Load Stack Slot
6273 instruct loadSSD(regDPR dst, stackSlotD src) %{
6274   match(Set dst src);
6275   ins_cost(125);
6276 
6277   format %{ "FLD_D  $src\n\t"
6278             "FSTP   $dst" %}
6279   opcode(0xDD);               /* DD /0, FLD m64real */
6280   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6281               Pop_Reg_DPR(dst) );
6282   ins_pipe( fpu_reg_mem );
6283 %}
6284 
6285 // Prefetch instructions for allocation.
6286 // Must be safe to execute with invalid address (cannot fault).
6287 
6288 instruct prefetchAlloc0( memory mem ) %{
6289   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6290   match(PrefetchAllocation mem);
6291   ins_cost(0);
6292   size(0);
6293   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6294   ins_encode();
6295   ins_pipe(empty);
6296 %}
6297 
6298 instruct prefetchAlloc( memory mem ) %{
6299   predicate(AllocatePrefetchInstr==3);
6300   match( PrefetchAllocation mem );
6301   ins_cost(100);
6302 
6303   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6304   ins_encode %{
6305     __ prefetchw($mem$$Address);
6306   %}
6307   ins_pipe(ialu_mem);
6308 %}
6309 
6310 instruct prefetchAllocNTA( memory mem ) %{
6311   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6312   match(PrefetchAllocation mem);
6313   ins_cost(100);
6314 
6315   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6316   ins_encode %{
6317     __ prefetchnta($mem$$Address);
6318   %}
6319   ins_pipe(ialu_mem);
6320 %}
6321 
6322 instruct prefetchAllocT0( memory mem ) %{
6323   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6324   match(PrefetchAllocation mem);
6325   ins_cost(100);
6326 
6327   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6328   ins_encode %{
6329     __ prefetcht0($mem$$Address);
6330   %}
6331   ins_pipe(ialu_mem);
6332 %}
6333 
6334 instruct prefetchAllocT2( memory mem ) %{
6335   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6336   match(PrefetchAllocation mem);
6337   ins_cost(100);
6338 
6339   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6340   ins_encode %{
6341     __ prefetcht2($mem$$Address);
6342   %}
6343   ins_pipe(ialu_mem);
6344 %}
6345 
6346 //----------Store Instructions-------------------------------------------------
6347 
6348 // Store Byte
6349 instruct storeB(memory mem, xRegI src) %{
6350   match(Set mem (StoreB mem src));
6351 
6352   ins_cost(125);
6353   format %{ "MOV8   $mem,$src" %}
6354   opcode(0x88);
6355   ins_encode( OpcP, RegMem( src, mem ) );
6356   ins_pipe( ialu_mem_reg );
6357 %}
6358 
6359 // Store Char/Short
6360 instruct storeC(memory mem, rRegI src) %{
6361   match(Set mem (StoreC mem src));
6362 
6363   ins_cost(125);
6364   format %{ "MOV16  $mem,$src" %}
6365   opcode(0x89, 0x66);
6366   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6367   ins_pipe( ialu_mem_reg );
6368 %}
6369 
6370 // Store Integer
6371 instruct storeI(memory mem, rRegI src) %{
6372   match(Set mem (StoreI mem src));
6373 
6374   ins_cost(125);
6375   format %{ "MOV    $mem,$src" %}
6376   opcode(0x89);
6377   ins_encode( OpcP, RegMem( src, mem ) );
6378   ins_pipe( ialu_mem_reg );
6379 %}
6380 
6381 // Store Long
6382 instruct storeL(long_memory mem, eRegL src) %{
6383   predicate(!((StoreLNode*)n)->require_atomic_access());
6384   match(Set mem (StoreL mem src));
6385 
6386   ins_cost(200);
6387   format %{ "MOV    $mem,$src.lo\n\t"
6388             "MOV    $mem+4,$src.hi" %}
6389   opcode(0x89, 0x89);
6390   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6391   ins_pipe( ialu_mem_long_reg );
6392 %}
6393 
6394 // Store Long to Integer
6395 instruct storeL2I(memory mem, eRegL src) %{
6396   match(Set mem (StoreI mem (ConvL2I src)));
6397 
6398   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6399   ins_encode %{
6400     __ movl($mem$$Address, $src$$Register);
6401   %}
6402   ins_pipe(ialu_mem_reg);
6403 %}
6404 
6405 // Volatile Store Long.  Must be atomic, so move it into
6406 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6407 // target address before the store (for null-ptr checks)
6408 // so the memory operand is used twice in the encoding.
6409 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6410   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6411   match(Set mem (StoreL mem src));
6412   effect( KILL cr );
6413   ins_cost(400);
6414   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6415             "FILD   $src\n\t"
6416             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6417   opcode(0x3B);
6418   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6419   ins_pipe( fpu_reg_mem );
6420 %}
6421 
6422 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6423   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6424   match(Set mem (StoreL mem src));
6425   effect( TEMP tmp, KILL cr );
6426   ins_cost(380);
6427   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6428             "MOVSD  $tmp,$src\n\t"
6429             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6430   ins_encode %{
6431     __ cmpl(rax, $mem$$Address);
6432     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6433     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6434   %}
6435   ins_pipe( pipe_slow );
6436 %}
6437 
6438 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6439   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6440   match(Set mem (StoreL mem src));
6441   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6442   ins_cost(360);
6443   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6444             "MOVD   $tmp,$src.lo\n\t"
6445             "MOVD   $tmp2,$src.hi\n\t"
6446             "PUNPCKLDQ $tmp,$tmp2\n\t"
6447             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6448   ins_encode %{
6449     __ cmpl(rax, $mem$$Address);
6450     __ movdl($tmp$$XMMRegister, $src$$Register);
6451     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6452     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6453     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6454   %}
6455   ins_pipe( pipe_slow );
6456 %}
6457 
6458 // Store Pointer; for storing unknown oops and raw pointers
6459 instruct storeP(memory mem, anyRegP src) %{
6460   match(Set mem (StoreP mem src));
6461 
6462   ins_cost(125);
6463   format %{ "MOV    $mem,$src" %}
6464   opcode(0x89);
6465   ins_encode( OpcP, RegMem( src, mem ) );
6466   ins_pipe( ialu_mem_reg );
6467 %}
6468 
6469 // Store Integer Immediate
6470 instruct storeImmI(memory mem, immI src) %{
6471   match(Set mem (StoreI mem src));
6472 
6473   ins_cost(150);
6474   format %{ "MOV    $mem,$src" %}
6475   opcode(0xC7);               /* C7 /0 */
6476   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6477   ins_pipe( ialu_mem_imm );
6478 %}
6479 
6480 // Store Short/Char Immediate
6481 instruct storeImmI16(memory mem, immI16 src) %{
6482   predicate(UseStoreImmI16);
6483   match(Set mem (StoreC mem src));
6484 
6485   ins_cost(150);
6486   format %{ "MOV16  $mem,$src" %}
6487   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6488   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6489   ins_pipe( ialu_mem_imm );
6490 %}
6491 
6492 // Store Pointer Immediate; null pointers or constant oops that do not
6493 // need card-mark barriers.
6494 instruct storeImmP(memory mem, immP src) %{
6495   match(Set mem (StoreP mem src));
6496 
6497   ins_cost(150);
6498   format %{ "MOV    $mem,$src" %}
6499   opcode(0xC7);               /* C7 /0 */
6500   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6501   ins_pipe( ialu_mem_imm );
6502 %}
6503 
6504 // Store Byte Immediate
6505 instruct storeImmB(memory mem, immI8 src) %{
6506   match(Set mem (StoreB mem src));
6507 
6508   ins_cost(150);
6509   format %{ "MOV8   $mem,$src" %}
6510   opcode(0xC6);               /* C6 /0 */
6511   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6512   ins_pipe( ialu_mem_imm );
6513 %}
6514 
6515 // Store CMS card-mark Immediate
6516 instruct storeImmCM(memory mem, immI8 src) %{
6517   match(Set mem (StoreCM mem src));
6518 
6519   ins_cost(150);
6520   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6521   opcode(0xC6);               /* C6 /0 */
6522   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6523   ins_pipe( ialu_mem_imm );
6524 %}
6525 
6526 // Store Double
6527 instruct storeDPR( memory mem, regDPR1 src) %{
6528   predicate(UseSSE<=1);
6529   match(Set mem (StoreD mem src));
6530 
6531   ins_cost(100);
6532   format %{ "FST_D  $mem,$src" %}
6533   opcode(0xDD);       /* DD /2 */
6534   ins_encode( enc_FPR_store(mem,src) );
6535   ins_pipe( fpu_mem_reg );
6536 %}
6537 
6538 // Store double does rounding on x86
6539 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6540   predicate(UseSSE<=1);
6541   match(Set mem (StoreD mem (RoundDouble src)));
6542 
6543   ins_cost(100);
6544   format %{ "FST_D  $mem,$src\t# round" %}
6545   opcode(0xDD);       /* DD /2 */
6546   ins_encode( enc_FPR_store(mem,src) );
6547   ins_pipe( fpu_mem_reg );
6548 %}
6549 
6550 // Store XMM register to memory (double-precision floating points)
6551 // MOVSD instruction
6552 instruct storeD(memory mem, regD src) %{
6553   predicate(UseSSE>=2);
6554   match(Set mem (StoreD mem src));
6555   ins_cost(95);
6556   format %{ "MOVSD  $mem,$src" %}
6557   ins_encode %{
6558     __ movdbl($mem$$Address, $src$$XMMRegister);
6559   %}
6560   ins_pipe( pipe_slow );
6561 %}
6562 
6563 // Load Double
6564 instruct MoveD2VL(vlRegD dst, regD src) %{
6565   match(Set dst src);
6566   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6567   ins_encode %{
6568     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6569   %}
6570   ins_pipe( fpu_reg_reg );
6571 %}
6572 
6573 // Load Double
6574 instruct MoveVL2D(regD dst, vlRegD src) %{
6575   match(Set dst src);
6576   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6577   ins_encode %{
6578     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6579   %}
6580   ins_pipe( fpu_reg_reg );
6581 %}
6582 
6583 // Store XMM register to memory (single-precision floating point)
6584 // MOVSS instruction
6585 instruct storeF(memory mem, regF src) %{
6586   predicate(UseSSE>=1);
6587   match(Set mem (StoreF mem src));
6588   ins_cost(95);
6589   format %{ "MOVSS  $mem,$src" %}
6590   ins_encode %{
6591     __ movflt($mem$$Address, $src$$XMMRegister);
6592   %}
6593   ins_pipe( pipe_slow );
6594 %}
6595 
6596 // Load Float
6597 instruct MoveF2VL(vlRegF dst, regF src) %{
6598   match(Set dst src);
6599   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6600   ins_encode %{
6601     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6602   %}
6603   ins_pipe( fpu_reg_reg );
6604 %}
6605 
6606 // Load Float
6607 instruct MoveVL2F(regF dst, vlRegF src) %{
6608   match(Set dst src);
6609   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6610   ins_encode %{
6611     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6612   %}
6613   ins_pipe( fpu_reg_reg );
6614 %}
6615 
6616 // Store Float
6617 instruct storeFPR( memory mem, regFPR1 src) %{
6618   predicate(UseSSE==0);
6619   match(Set mem (StoreF mem src));
6620 
6621   ins_cost(100);
6622   format %{ "FST_S  $mem,$src" %}
6623   opcode(0xD9);       /* D9 /2 */
6624   ins_encode( enc_FPR_store(mem,src) );
6625   ins_pipe( fpu_mem_reg );
6626 %}
6627 
6628 // Store Float does rounding on x86
6629 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6630   predicate(UseSSE==0);
6631   match(Set mem (StoreF mem (RoundFloat src)));
6632 
6633   ins_cost(100);
6634   format %{ "FST_S  $mem,$src\t# round" %}
6635   opcode(0xD9);       /* D9 /2 */
6636   ins_encode( enc_FPR_store(mem,src) );
6637   ins_pipe( fpu_mem_reg );
6638 %}
6639 
6640 // Store Float does rounding on x86
6641 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6642   predicate(UseSSE<=1);
6643   match(Set mem (StoreF mem (ConvD2F src)));
6644 
6645   ins_cost(100);
6646   format %{ "FST_S  $mem,$src\t# D-round" %}
6647   opcode(0xD9);       /* D9 /2 */
6648   ins_encode( enc_FPR_store(mem,src) );
6649   ins_pipe( fpu_mem_reg );
6650 %}
6651 
6652 // Store immediate Float value (it is faster than store from FPU register)
6653 // The instruction usage is guarded by predicate in operand immFPR().
6654 instruct storeFPR_imm( memory mem, immFPR src) %{
6655   match(Set mem (StoreF mem src));
6656 
6657   ins_cost(50);
6658   format %{ "MOV    $mem,$src\t# store float" %}
6659   opcode(0xC7);               /* C7 /0 */
6660   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6661   ins_pipe( ialu_mem_imm );
6662 %}
6663 
6664 // Store immediate Float value (it is faster than store from XMM register)
6665 // The instruction usage is guarded by predicate in operand immF().
6666 instruct storeF_imm( memory mem, immF src) %{
6667   match(Set mem (StoreF mem src));
6668 
6669   ins_cost(50);
6670   format %{ "MOV    $mem,$src\t# store float" %}
6671   opcode(0xC7);               /* C7 /0 */
6672   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6673   ins_pipe( ialu_mem_imm );
6674 %}
6675 
6676 // Store Integer to stack slot
6677 instruct storeSSI(stackSlotI dst, rRegI src) %{
6678   match(Set dst src);
6679 
6680   ins_cost(100);
6681   format %{ "MOV    $dst,$src" %}
6682   opcode(0x89);
6683   ins_encode( OpcPRegSS( dst, src ) );
6684   ins_pipe( ialu_mem_reg );
6685 %}
6686 
6687 // Store Integer to stack slot
6688 instruct storeSSP(stackSlotP dst, eRegP src) %{
6689   match(Set dst src);
6690 
6691   ins_cost(100);
6692   format %{ "MOV    $dst,$src" %}
6693   opcode(0x89);
6694   ins_encode( OpcPRegSS( dst, src ) );
6695   ins_pipe( ialu_mem_reg );
6696 %}
6697 
6698 // Store Long to stack slot
6699 instruct storeSSL(stackSlotL dst, eRegL src) %{
6700   match(Set dst src);
6701 
6702   ins_cost(200);
6703   format %{ "MOV    $dst,$src.lo\n\t"
6704             "MOV    $dst+4,$src.hi" %}
6705   opcode(0x89, 0x89);
6706   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6707   ins_pipe( ialu_mem_long_reg );
6708 %}
6709 
6710 //----------MemBar Instructions-----------------------------------------------
6711 // Memory barrier flavors
6712 
6713 instruct membar_acquire() %{
6714   match(MemBarAcquire);
6715   match(LoadFence);
6716   ins_cost(400);
6717 
6718   size(0);
6719   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6720   ins_encode();
6721   ins_pipe(empty);
6722 %}
6723 
6724 instruct membar_acquire_lock() %{
6725   match(MemBarAcquireLock);
6726   ins_cost(0);
6727 
6728   size(0);
6729   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6730   ins_encode( );
6731   ins_pipe(empty);
6732 %}
6733 
6734 instruct membar_release() %{
6735   match(MemBarRelease);
6736   match(StoreFence);
6737   ins_cost(400);
6738 
6739   size(0);
6740   format %{ "MEMBAR-release ! (empty encoding)" %}
6741   ins_encode( );
6742   ins_pipe(empty);
6743 %}
6744 
6745 instruct membar_release_lock() %{
6746   match(MemBarReleaseLock);
6747   ins_cost(0);
6748 
6749   size(0);
6750   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6751   ins_encode( );
6752   ins_pipe(empty);
6753 %}
6754 
6755 instruct membar_volatile(eFlagsReg cr) %{
6756   match(MemBarVolatile);
6757   effect(KILL cr);
6758   ins_cost(400);
6759 
6760   format %{
6761     $$template
6762     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6763   %}
6764   ins_encode %{
6765     __ membar(Assembler::StoreLoad);
6766   %}
6767   ins_pipe(pipe_slow);
6768 %}
6769 
6770 instruct unnecessary_membar_volatile() %{
6771   match(MemBarVolatile);
6772   predicate(Matcher::post_store_load_barrier(n));
6773   ins_cost(0);
6774 
6775   size(0);
6776   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6777   ins_encode( );
6778   ins_pipe(empty);
6779 %}
6780 
6781 instruct membar_storestore() %{
6782   match(MemBarStoreStore);
6783   ins_cost(0);
6784 
6785   size(0);
6786   format %{ "MEMBAR-storestore (empty encoding)" %}
6787   ins_encode( );
6788   ins_pipe(empty);
6789 %}
6790 
6791 //----------Move Instructions--------------------------------------------------
6792 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6793   match(Set dst (CastX2P src));
6794   format %{ "# X2P  $dst, $src" %}
6795   ins_encode( /*empty encoding*/ );
6796   ins_cost(0);
6797   ins_pipe(empty);
6798 %}
6799 
6800 instruct castP2X(rRegI dst, eRegP src ) %{
6801   match(Set dst (CastP2X src));
6802   ins_cost(50);
6803   format %{ "MOV    $dst, $src\t# CastP2X" %}
6804   ins_encode( enc_Copy( dst, src) );
6805   ins_pipe( ialu_reg_reg );
6806 %}
6807 
6808 //----------Conditional Move---------------------------------------------------
6809 // Conditional move
6810 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6811   predicate(!VM_Version::supports_cmov() );
6812   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6813   ins_cost(200);
6814   format %{ "J$cop,us skip\t# signed cmove\n\t"
6815             "MOV    $dst,$src\n"
6816       "skip:" %}
6817   ins_encode %{
6818     Label Lskip;
6819     // Invert sense of branch from sense of CMOV
6820     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6821     __ movl($dst$$Register, $src$$Register);
6822     __ bind(Lskip);
6823   %}
6824   ins_pipe( pipe_cmov_reg );
6825 %}
6826 
6827 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6828   predicate(!VM_Version::supports_cmov() );
6829   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6830   ins_cost(200);
6831   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6832             "MOV    $dst,$src\n"
6833       "skip:" %}
6834   ins_encode %{
6835     Label Lskip;
6836     // Invert sense of branch from sense of CMOV
6837     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6838     __ movl($dst$$Register, $src$$Register);
6839     __ bind(Lskip);
6840   %}
6841   ins_pipe( pipe_cmov_reg );
6842 %}
6843 
6844 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6845   predicate(VM_Version::supports_cmov() );
6846   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6847   ins_cost(200);
6848   format %{ "CMOV$cop $dst,$src" %}
6849   opcode(0x0F,0x40);
6850   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6851   ins_pipe( pipe_cmov_reg );
6852 %}
6853 
6854 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6855   predicate(VM_Version::supports_cmov() );
6856   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6857   ins_cost(200);
6858   format %{ "CMOV$cop $dst,$src" %}
6859   opcode(0x0F,0x40);
6860   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6861   ins_pipe( pipe_cmov_reg );
6862 %}
6863 
6864 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6865   predicate(VM_Version::supports_cmov() );
6866   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6867   ins_cost(200);
6868   expand %{
6869     cmovI_regU(cop, cr, dst, src);
6870   %}
6871 %}
6872 
6873 // Conditional move
6874 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6875   predicate(VM_Version::supports_cmov() );
6876   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6877   ins_cost(250);
6878   format %{ "CMOV$cop $dst,$src" %}
6879   opcode(0x0F,0x40);
6880   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6881   ins_pipe( pipe_cmov_mem );
6882 %}
6883 
6884 // Conditional move
6885 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6886   predicate(VM_Version::supports_cmov() );
6887   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6888   ins_cost(250);
6889   format %{ "CMOV$cop $dst,$src" %}
6890   opcode(0x0F,0x40);
6891   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6892   ins_pipe( pipe_cmov_mem );
6893 %}
6894 
6895 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6896   predicate(VM_Version::supports_cmov() );
6897   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6898   ins_cost(250);
6899   expand %{
6900     cmovI_memU(cop, cr, dst, src);
6901   %}
6902 %}
6903 
6904 // Conditional move
6905 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6906   predicate(VM_Version::supports_cmov() );
6907   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6908   ins_cost(200);
6909   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6910   opcode(0x0F,0x40);
6911   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6912   ins_pipe( pipe_cmov_reg );
6913 %}
6914 
6915 // Conditional move (non-P6 version)
6916 // Note:  a CMoveP is generated for  stubs and native wrappers
6917 //        regardless of whether we are on a P6, so we
6918 //        emulate a cmov here
6919 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6920   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6921   ins_cost(300);
6922   format %{ "Jn$cop   skip\n\t"
6923           "MOV    $dst,$src\t# pointer\n"
6924       "skip:" %}
6925   opcode(0x8b);
6926   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6927   ins_pipe( pipe_cmov_reg );
6928 %}
6929 
6930 // Conditional move
6931 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6932   predicate(VM_Version::supports_cmov() );
6933   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6934   ins_cost(200);
6935   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6936   opcode(0x0F,0x40);
6937   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6938   ins_pipe( pipe_cmov_reg );
6939 %}
6940 
6941 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6942   predicate(VM_Version::supports_cmov() );
6943   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6944   ins_cost(200);
6945   expand %{
6946     cmovP_regU(cop, cr, dst, src);
6947   %}
6948 %}
6949 
6950 // DISABLED: Requires the ADLC to emit a bottom_type call that
6951 // correctly meets the two pointer arguments; one is an incoming
6952 // register but the other is a memory operand.  ALSO appears to
6953 // be buggy with implicit null checks.
6954 //
6955 //// Conditional move
6956 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6957 //  predicate(VM_Version::supports_cmov() );
6958 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6959 //  ins_cost(250);
6960 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6961 //  opcode(0x0F,0x40);
6962 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6963 //  ins_pipe( pipe_cmov_mem );
6964 //%}
6965 //
6966 //// Conditional move
6967 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6968 //  predicate(VM_Version::supports_cmov() );
6969 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6970 //  ins_cost(250);
6971 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6972 //  opcode(0x0F,0x40);
6973 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6974 //  ins_pipe( pipe_cmov_mem );
6975 //%}
6976 
6977 // Conditional move
6978 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6979   predicate(UseSSE<=1);
6980   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6981   ins_cost(200);
6982   format %{ "FCMOV$cop $dst,$src\t# double" %}
6983   opcode(0xDA);
6984   ins_encode( enc_cmov_dpr(cop,src) );
6985   ins_pipe( pipe_cmovDPR_reg );
6986 %}
6987 
6988 // Conditional move
6989 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6990   predicate(UseSSE==0);
6991   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6992   ins_cost(200);
6993   format %{ "FCMOV$cop $dst,$src\t# float" %}
6994   opcode(0xDA);
6995   ins_encode( enc_cmov_dpr(cop,src) );
6996   ins_pipe( pipe_cmovDPR_reg );
6997 %}
6998 
6999 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7000 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7001   predicate(UseSSE<=1);
7002   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7003   ins_cost(200);
7004   format %{ "Jn$cop   skip\n\t"
7005             "MOV    $dst,$src\t# double\n"
7006       "skip:" %}
7007   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7008   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7009   ins_pipe( pipe_cmovDPR_reg );
7010 %}
7011 
7012 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7013 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7014   predicate(UseSSE==0);
7015   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7016   ins_cost(200);
7017   format %{ "Jn$cop    skip\n\t"
7018             "MOV    $dst,$src\t# float\n"
7019       "skip:" %}
7020   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7021   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7022   ins_pipe( pipe_cmovDPR_reg );
7023 %}
7024 
7025 // No CMOVE with SSE/SSE2
7026 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7027   predicate (UseSSE>=1);
7028   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7029   ins_cost(200);
7030   format %{ "Jn$cop   skip\n\t"
7031             "MOVSS  $dst,$src\t# float\n"
7032       "skip:" %}
7033   ins_encode %{
7034     Label skip;
7035     // Invert sense of branch from sense of CMOV
7036     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7037     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7038     __ bind(skip);
7039   %}
7040   ins_pipe( pipe_slow );
7041 %}
7042 
7043 // No CMOVE with SSE/SSE2
7044 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7045   predicate (UseSSE>=2);
7046   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7047   ins_cost(200);
7048   format %{ "Jn$cop   skip\n\t"
7049             "MOVSD  $dst,$src\t# float\n"
7050       "skip:" %}
7051   ins_encode %{
7052     Label skip;
7053     // Invert sense of branch from sense of CMOV
7054     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7055     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7056     __ bind(skip);
7057   %}
7058   ins_pipe( pipe_slow );
7059 %}
7060 
7061 // unsigned version
7062 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7063   predicate (UseSSE>=1);
7064   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7065   ins_cost(200);
7066   format %{ "Jn$cop   skip\n\t"
7067             "MOVSS  $dst,$src\t# float\n"
7068       "skip:" %}
7069   ins_encode %{
7070     Label skip;
7071     // Invert sense of branch from sense of CMOV
7072     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7073     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7074     __ bind(skip);
7075   %}
7076   ins_pipe( pipe_slow );
7077 %}
7078 
7079 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7080   predicate (UseSSE>=1);
7081   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7082   ins_cost(200);
7083   expand %{
7084     fcmovF_regU(cop, cr, dst, src);
7085   %}
7086 %}
7087 
7088 // unsigned version
7089 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7090   predicate (UseSSE>=2);
7091   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7092   ins_cost(200);
7093   format %{ "Jn$cop   skip\n\t"
7094             "MOVSD  $dst,$src\t# float\n"
7095       "skip:" %}
7096   ins_encode %{
7097     Label skip;
7098     // Invert sense of branch from sense of CMOV
7099     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7100     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7101     __ bind(skip);
7102   %}
7103   ins_pipe( pipe_slow );
7104 %}
7105 
7106 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7107   predicate (UseSSE>=2);
7108   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7109   ins_cost(200);
7110   expand %{
7111     fcmovD_regU(cop, cr, dst, src);
7112   %}
7113 %}
7114 
7115 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7116   predicate(VM_Version::supports_cmov() );
7117   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7118   ins_cost(200);
7119   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7120             "CMOV$cop $dst.hi,$src.hi" %}
7121   opcode(0x0F,0x40);
7122   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7123   ins_pipe( pipe_cmov_reg_long );
7124 %}
7125 
7126 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7127   predicate(VM_Version::supports_cmov() );
7128   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7129   ins_cost(200);
7130   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7131             "CMOV$cop $dst.hi,$src.hi" %}
7132   opcode(0x0F,0x40);
7133   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7134   ins_pipe( pipe_cmov_reg_long );
7135 %}
7136 
7137 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7138   predicate(VM_Version::supports_cmov() );
7139   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7140   ins_cost(200);
7141   expand %{
7142     cmovL_regU(cop, cr, dst, src);
7143   %}
7144 %}
7145 
7146 //----------Arithmetic Instructions--------------------------------------------
7147 //----------Addition Instructions----------------------------------------------
7148 
7149 // Integer Addition Instructions
7150 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7151   match(Set dst (AddI dst src));
7152   effect(KILL cr);
7153 
7154   size(2);
7155   format %{ "ADD    $dst,$src" %}
7156   opcode(0x03);
7157   ins_encode( OpcP, RegReg( dst, src) );
7158   ins_pipe( ialu_reg_reg );
7159 %}
7160 
7161 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7162   match(Set dst (AddI dst src));
7163   effect(KILL cr);
7164 
7165   format %{ "ADD    $dst,$src" %}
7166   opcode(0x81, 0x00); /* /0 id */
7167   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7168   ins_pipe( ialu_reg );
7169 %}
7170 
7171 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7172   predicate(UseIncDec);
7173   match(Set dst (AddI dst src));
7174   effect(KILL cr);
7175 
7176   size(1);
7177   format %{ "INC    $dst" %}
7178   opcode(0x40); /*  */
7179   ins_encode( Opc_plus( primary, dst ) );
7180   ins_pipe( ialu_reg );
7181 %}
7182 
7183 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7184   match(Set dst (AddI src0 src1));
7185   ins_cost(110);
7186 
7187   format %{ "LEA    $dst,[$src0 + $src1]" %}
7188   opcode(0x8D); /* 0x8D /r */
7189   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7190   ins_pipe( ialu_reg_reg );
7191 %}
7192 
7193 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7194   match(Set dst (AddP src0 src1));
7195   ins_cost(110);
7196 
7197   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7198   opcode(0x8D); /* 0x8D /r */
7199   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7200   ins_pipe( ialu_reg_reg );
7201 %}
7202 
7203 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7204   predicate(UseIncDec);
7205   match(Set dst (AddI dst src));
7206   effect(KILL cr);
7207 
7208   size(1);
7209   format %{ "DEC    $dst" %}
7210   opcode(0x48); /*  */
7211   ins_encode( Opc_plus( primary, dst ) );
7212   ins_pipe( ialu_reg );
7213 %}
7214 
7215 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7216   match(Set dst (AddP dst src));
7217   effect(KILL cr);
7218 
7219   size(2);
7220   format %{ "ADD    $dst,$src" %}
7221   opcode(0x03);
7222   ins_encode( OpcP, RegReg( dst, src) );
7223   ins_pipe( ialu_reg_reg );
7224 %}
7225 
7226 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7227   match(Set dst (AddP dst src));
7228   effect(KILL cr);
7229 
7230   format %{ "ADD    $dst,$src" %}
7231   opcode(0x81,0x00); /* Opcode 81 /0 id */
7232   // ins_encode( RegImm( dst, src) );
7233   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7234   ins_pipe( ialu_reg );
7235 %}
7236 
7237 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7238   match(Set dst (AddI dst (LoadI src)));
7239   effect(KILL cr);
7240 
7241   ins_cost(125);
7242   format %{ "ADD    $dst,$src" %}
7243   opcode(0x03);
7244   ins_encode( OpcP, RegMem( dst, src) );
7245   ins_pipe( ialu_reg_mem );
7246 %}
7247 
7248 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7249   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7250   effect(KILL cr);
7251 
7252   ins_cost(150);
7253   format %{ "ADD    $dst,$src" %}
7254   opcode(0x01);  /* Opcode 01 /r */
7255   ins_encode( OpcP, RegMem( src, dst ) );
7256   ins_pipe( ialu_mem_reg );
7257 %}
7258 
7259 // Add Memory with Immediate
7260 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7261   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7262   effect(KILL cr);
7263 
7264   ins_cost(125);
7265   format %{ "ADD    $dst,$src" %}
7266   opcode(0x81);               /* Opcode 81 /0 id */
7267   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7268   ins_pipe( ialu_mem_imm );
7269 %}
7270 
7271 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7272   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7273   effect(KILL cr);
7274 
7275   ins_cost(125);
7276   format %{ "INC    $dst" %}
7277   opcode(0xFF);               /* Opcode FF /0 */
7278   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7279   ins_pipe( ialu_mem_imm );
7280 %}
7281 
7282 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7283   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7284   effect(KILL cr);
7285 
7286   ins_cost(125);
7287   format %{ "DEC    $dst" %}
7288   opcode(0xFF);               /* Opcode FF /1 */
7289   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7290   ins_pipe( ialu_mem_imm );
7291 %}
7292 
7293 
7294 instruct checkCastPP( eRegP dst ) %{
7295   match(Set dst (CheckCastPP dst));
7296 
7297   size(0);
7298   format %{ "#checkcastPP of $dst" %}
7299   ins_encode( /*empty encoding*/ );
7300   ins_pipe( empty );
7301 %}
7302 
7303 instruct castPP( eRegP dst ) %{
7304   match(Set dst (CastPP dst));
7305   format %{ "#castPP of $dst" %}
7306   ins_encode( /*empty encoding*/ );
7307   ins_pipe( empty );
7308 %}
7309 
7310 instruct castII( rRegI dst ) %{
7311   match(Set dst (CastII dst));
7312   format %{ "#castII of $dst" %}
7313   ins_encode( /*empty encoding*/ );
7314   ins_cost(0);
7315   ins_pipe( empty );
7316 %}
7317 
7318 
7319 // Load-locked - same as a regular pointer load when used with compare-swap
7320 instruct loadPLocked(eRegP dst, memory mem) %{
7321   match(Set dst (LoadPLocked mem));
7322 
7323   ins_cost(125);
7324   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7325   opcode(0x8B);
7326   ins_encode( OpcP, RegMem(dst,mem));
7327   ins_pipe( ialu_reg_mem );
7328 %}
7329 
7330 // Conditional-store of the updated heap-top.
7331 // Used during allocation of the shared heap.
7332 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7333 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7334   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7335   // EAX is killed if there is contention, but then it's also unused.
7336   // In the common case of no contention, EAX holds the new oop address.
7337   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7338   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7339   ins_pipe( pipe_cmpxchg );
7340 %}
7341 
7342 // Conditional-store of an int value.
7343 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7344 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7345   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7346   effect(KILL oldval);
7347   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7348   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7349   ins_pipe( pipe_cmpxchg );
7350 %}
7351 
7352 // Conditional-store of a long value.
7353 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7354 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7355   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7356   effect(KILL oldval);
7357   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7358             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7359             "XCHG   EBX,ECX"
7360   %}
7361   ins_encode %{
7362     // Note: we need to swap rbx, and rcx before and after the
7363     //       cmpxchg8 instruction because the instruction uses
7364     //       rcx as the high order word of the new value to store but
7365     //       our register encoding uses rbx.
7366     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7367     __ lock();
7368     __ cmpxchg8($mem$$Address);
7369     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7370   %}
7371   ins_pipe( pipe_cmpxchg );
7372 %}
7373 
7374 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7375 
7376 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7377   predicate(VM_Version::supports_cx8());
7378   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7379   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7380   effect(KILL cr, KILL oldval);
7381   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7382             "MOV    $res,0\n\t"
7383             "JNE,s  fail\n\t"
7384             "MOV    $res,1\n"
7385           "fail:" %}
7386   ins_encode( enc_cmpxchg8(mem_ptr),
7387               enc_flags_ne_to_boolean(res) );
7388   ins_pipe( pipe_cmpxchg );
7389 %}
7390 
7391 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7392   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7393   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7394   effect(KILL cr, KILL oldval);
7395   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7396             "MOV    $res,0\n\t"
7397             "JNE,s  fail\n\t"
7398             "MOV    $res,1\n"
7399           "fail:" %}
7400   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7401   ins_pipe( pipe_cmpxchg );
7402 %}
7403 
7404 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7405   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7406   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7407   effect(KILL cr, KILL oldval);
7408   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7409             "MOV    $res,0\n\t"
7410             "JNE,s  fail\n\t"
7411             "MOV    $res,1\n"
7412           "fail:" %}
7413   ins_encode( enc_cmpxchgb(mem_ptr),
7414               enc_flags_ne_to_boolean(res) );
7415   ins_pipe( pipe_cmpxchg );
7416 %}
7417 
7418 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7419   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7420   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7421   effect(KILL cr, KILL oldval);
7422   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7423             "MOV    $res,0\n\t"
7424             "JNE,s  fail\n\t"
7425             "MOV    $res,1\n"
7426           "fail:" %}
7427   ins_encode( enc_cmpxchgw(mem_ptr),
7428               enc_flags_ne_to_boolean(res) );
7429   ins_pipe( pipe_cmpxchg );
7430 %}
7431 
7432 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7433   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7434   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7435   effect(KILL cr, KILL oldval);
7436   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7437             "MOV    $res,0\n\t"
7438             "JNE,s  fail\n\t"
7439             "MOV    $res,1\n"
7440           "fail:" %}
7441   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7442   ins_pipe( pipe_cmpxchg );
7443 %}
7444 
7445 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7446   predicate(VM_Version::supports_cx8());
7447   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7448   effect(KILL cr);
7449   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7450   ins_encode( enc_cmpxchg8(mem_ptr) );
7451   ins_pipe( pipe_cmpxchg );
7452 %}
7453 
7454 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7455   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7456   effect(KILL cr);
7457   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7458   ins_encode( enc_cmpxchg(mem_ptr) );
7459   ins_pipe( pipe_cmpxchg );
7460 %}
7461 
7462 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7463   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7464   effect(KILL cr);
7465   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7466   ins_encode( enc_cmpxchgb(mem_ptr) );
7467   ins_pipe( pipe_cmpxchg );
7468 %}
7469 
7470 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7471   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7472   effect(KILL cr);
7473   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7474   ins_encode( enc_cmpxchgw(mem_ptr) );
7475   ins_pipe( pipe_cmpxchg );
7476 %}
7477 
7478 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7479   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7480   effect(KILL cr);
7481   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7482   ins_encode( enc_cmpxchg(mem_ptr) );
7483   ins_pipe( pipe_cmpxchg );
7484 %}
7485 
7486 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7487   predicate(n->as_LoadStore()->result_not_used());
7488   match(Set dummy (GetAndAddB mem add));
7489   effect(KILL cr);
7490   format %{ "ADDB  [$mem],$add" %}
7491   ins_encode %{
7492     __ lock();
7493     __ addb($mem$$Address, $add$$constant);
7494   %}
7495   ins_pipe( pipe_cmpxchg );
7496 %}
7497 
7498 // Important to match to xRegI: only 8-bit regs.
7499 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7500   match(Set newval (GetAndAddB mem newval));
7501   effect(KILL cr);
7502   format %{ "XADDB  [$mem],$newval" %}
7503   ins_encode %{
7504     __ lock();
7505     __ xaddb($mem$$Address, $newval$$Register);
7506   %}
7507   ins_pipe( pipe_cmpxchg );
7508 %}
7509 
7510 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7511   predicate(n->as_LoadStore()->result_not_used());
7512   match(Set dummy (GetAndAddS mem add));
7513   effect(KILL cr);
7514   format %{ "ADDS  [$mem],$add" %}
7515   ins_encode %{
7516     __ lock();
7517     __ addw($mem$$Address, $add$$constant);
7518   %}
7519   ins_pipe( pipe_cmpxchg );
7520 %}
7521 
7522 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7523   match(Set newval (GetAndAddS mem newval));
7524   effect(KILL cr);
7525   format %{ "XADDS  [$mem],$newval" %}
7526   ins_encode %{
7527     __ lock();
7528     __ xaddw($mem$$Address, $newval$$Register);
7529   %}
7530   ins_pipe( pipe_cmpxchg );
7531 %}
7532 
7533 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7534   predicate(n->as_LoadStore()->result_not_used());
7535   match(Set dummy (GetAndAddI mem add));
7536   effect(KILL cr);
7537   format %{ "ADDL  [$mem],$add" %}
7538   ins_encode %{
7539     __ lock();
7540     __ addl($mem$$Address, $add$$constant);
7541   %}
7542   ins_pipe( pipe_cmpxchg );
7543 %}
7544 
7545 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7546   match(Set newval (GetAndAddI mem newval));
7547   effect(KILL cr);
7548   format %{ "XADDL  [$mem],$newval" %}
7549   ins_encode %{
7550     __ lock();
7551     __ xaddl($mem$$Address, $newval$$Register);
7552   %}
7553   ins_pipe( pipe_cmpxchg );
7554 %}
7555 
7556 // Important to match to xRegI: only 8-bit regs.
7557 instruct xchgB( memory mem, xRegI newval) %{
7558   match(Set newval (GetAndSetB mem newval));
7559   format %{ "XCHGB  $newval,[$mem]" %}
7560   ins_encode %{
7561     __ xchgb($newval$$Register, $mem$$Address);
7562   %}
7563   ins_pipe( pipe_cmpxchg );
7564 %}
7565 
7566 instruct xchgS( memory mem, rRegI newval) %{
7567   match(Set newval (GetAndSetS mem newval));
7568   format %{ "XCHGW  $newval,[$mem]" %}
7569   ins_encode %{
7570     __ xchgw($newval$$Register, $mem$$Address);
7571   %}
7572   ins_pipe( pipe_cmpxchg );
7573 %}
7574 
7575 instruct xchgI( memory mem, rRegI newval) %{
7576   match(Set newval (GetAndSetI mem newval));
7577   format %{ "XCHGL  $newval,[$mem]" %}
7578   ins_encode %{
7579     __ xchgl($newval$$Register, $mem$$Address);
7580   %}
7581   ins_pipe( pipe_cmpxchg );
7582 %}
7583 
7584 instruct xchgP( memory mem, pRegP newval) %{
7585   match(Set newval (GetAndSetP mem newval));
7586   format %{ "XCHGL  $newval,[$mem]" %}
7587   ins_encode %{
7588     __ xchgl($newval$$Register, $mem$$Address);
7589   %}
7590   ins_pipe( pipe_cmpxchg );
7591 %}
7592 
7593 //----------Subtraction Instructions-------------------------------------------
7594 
7595 // Integer Subtraction Instructions
7596 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7597   match(Set dst (SubI dst src));
7598   effect(KILL cr);
7599 
7600   size(2);
7601   format %{ "SUB    $dst,$src" %}
7602   opcode(0x2B);
7603   ins_encode( OpcP, RegReg( dst, src) );
7604   ins_pipe( ialu_reg_reg );
7605 %}
7606 
7607 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7608   match(Set dst (SubI dst src));
7609   effect(KILL cr);
7610 
7611   format %{ "SUB    $dst,$src" %}
7612   opcode(0x81,0x05);  /* Opcode 81 /5 */
7613   // ins_encode( RegImm( dst, src) );
7614   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7615   ins_pipe( ialu_reg );
7616 %}
7617 
7618 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7619   match(Set dst (SubI dst (LoadI src)));
7620   effect(KILL cr);
7621 
7622   ins_cost(125);
7623   format %{ "SUB    $dst,$src" %}
7624   opcode(0x2B);
7625   ins_encode( OpcP, RegMem( dst, src) );
7626   ins_pipe( ialu_reg_mem );
7627 %}
7628 
7629 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7630   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7631   effect(KILL cr);
7632 
7633   ins_cost(150);
7634   format %{ "SUB    $dst,$src" %}
7635   opcode(0x29);  /* Opcode 29 /r */
7636   ins_encode( OpcP, RegMem( src, dst ) );
7637   ins_pipe( ialu_mem_reg );
7638 %}
7639 
7640 // Subtract from a pointer
7641 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7642   match(Set dst (AddP dst (SubI zero src)));
7643   effect(KILL cr);
7644 
7645   size(2);
7646   format %{ "SUB    $dst,$src" %}
7647   opcode(0x2B);
7648   ins_encode( OpcP, RegReg( dst, src) );
7649   ins_pipe( ialu_reg_reg );
7650 %}
7651 
7652 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7653   match(Set dst (SubI zero dst));
7654   effect(KILL cr);
7655 
7656   size(2);
7657   format %{ "NEG    $dst" %}
7658   opcode(0xF7,0x03);  // Opcode F7 /3
7659   ins_encode( OpcP, RegOpc( dst ) );
7660   ins_pipe( ialu_reg );
7661 %}
7662 
7663 //----------Multiplication/Division Instructions-------------------------------
7664 // Integer Multiplication Instructions
7665 // Multiply Register
7666 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7667   match(Set dst (MulI dst src));
7668   effect(KILL cr);
7669 
7670   size(3);
7671   ins_cost(300);
7672   format %{ "IMUL   $dst,$src" %}
7673   opcode(0xAF, 0x0F);
7674   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7675   ins_pipe( ialu_reg_reg_alu0 );
7676 %}
7677 
7678 // Multiply 32-bit Immediate
7679 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7680   match(Set dst (MulI src imm));
7681   effect(KILL cr);
7682 
7683   ins_cost(300);
7684   format %{ "IMUL   $dst,$src,$imm" %}
7685   opcode(0x69);  /* 69 /r id */
7686   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7687   ins_pipe( ialu_reg_reg_alu0 );
7688 %}
7689 
7690 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7691   match(Set dst src);
7692   effect(KILL cr);
7693 
7694   // Note that this is artificially increased to make it more expensive than loadConL
7695   ins_cost(250);
7696   format %{ "MOV    EAX,$src\t// low word only" %}
7697   opcode(0xB8);
7698   ins_encode( LdImmL_Lo(dst, src) );
7699   ins_pipe( ialu_reg_fat );
7700 %}
7701 
7702 // Multiply by 32-bit Immediate, taking the shifted high order results
7703 //  (special case for shift by 32)
7704 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7705   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7706   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7707              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7708              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7709   effect(USE src1, KILL cr);
7710 
7711   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7712   ins_cost(0*100 + 1*400 - 150);
7713   format %{ "IMUL   EDX:EAX,$src1" %}
7714   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7715   ins_pipe( pipe_slow );
7716 %}
7717 
7718 // Multiply by 32-bit Immediate, taking the shifted high order results
7719 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7720   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7721   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7722              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7723              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7724   effect(USE src1, KILL cr);
7725 
7726   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7727   ins_cost(1*100 + 1*400 - 150);
7728   format %{ "IMUL   EDX:EAX,$src1\n\t"
7729             "SAR    EDX,$cnt-32" %}
7730   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7731   ins_pipe( pipe_slow );
7732 %}
7733 
7734 // Multiply Memory 32-bit Immediate
7735 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7736   match(Set dst (MulI (LoadI src) imm));
7737   effect(KILL cr);
7738 
7739   ins_cost(300);
7740   format %{ "IMUL   $dst,$src,$imm" %}
7741   opcode(0x69);  /* 69 /r id */
7742   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7743   ins_pipe( ialu_reg_mem_alu0 );
7744 %}
7745 
7746 // Multiply Memory
7747 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7748   match(Set dst (MulI dst (LoadI src)));
7749   effect(KILL cr);
7750 
7751   ins_cost(350);
7752   format %{ "IMUL   $dst,$src" %}
7753   opcode(0xAF, 0x0F);
7754   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7755   ins_pipe( ialu_reg_mem_alu0 );
7756 %}
7757 
7758 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7759 %{
7760   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7761   effect(KILL cr, KILL src2);
7762 
7763   expand %{ mulI_eReg(dst, src1, cr);
7764            mulI_eReg(src2, src3, cr);
7765            addI_eReg(dst, src2, cr); %}
7766 %}
7767 
7768 // Multiply Register Int to Long
7769 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7770   // Basic Idea: long = (long)int * (long)int
7771   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7772   effect(DEF dst, USE src, USE src1, KILL flags);
7773 
7774   ins_cost(300);
7775   format %{ "IMUL   $dst,$src1" %}
7776 
7777   ins_encode( long_int_multiply( dst, src1 ) );
7778   ins_pipe( ialu_reg_reg_alu0 );
7779 %}
7780 
7781 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7782   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7783   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7784   effect(KILL flags);
7785 
7786   ins_cost(300);
7787   format %{ "MUL    $dst,$src1" %}
7788 
7789   ins_encode( long_uint_multiply(dst, src1) );
7790   ins_pipe( ialu_reg_reg_alu0 );
7791 %}
7792 
7793 // Multiply Register Long
7794 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7795   match(Set dst (MulL dst src));
7796   effect(KILL cr, TEMP tmp);
7797   ins_cost(4*100+3*400);
7798 // Basic idea: lo(result) = lo(x_lo * y_lo)
7799 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7800   format %{ "MOV    $tmp,$src.lo\n\t"
7801             "IMUL   $tmp,EDX\n\t"
7802             "MOV    EDX,$src.hi\n\t"
7803             "IMUL   EDX,EAX\n\t"
7804             "ADD    $tmp,EDX\n\t"
7805             "MUL    EDX:EAX,$src.lo\n\t"
7806             "ADD    EDX,$tmp" %}
7807   ins_encode( long_multiply( dst, src, tmp ) );
7808   ins_pipe( pipe_slow );
7809 %}
7810 
7811 // Multiply Register Long where the left operand's high 32 bits are zero
7812 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7813   predicate(is_operand_hi32_zero(n->in(1)));
7814   match(Set dst (MulL dst src));
7815   effect(KILL cr, TEMP tmp);
7816   ins_cost(2*100+2*400);
7817 // Basic idea: lo(result) = lo(x_lo * y_lo)
7818 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7819   format %{ "MOV    $tmp,$src.hi\n\t"
7820             "IMUL   $tmp,EAX\n\t"
7821             "MUL    EDX:EAX,$src.lo\n\t"
7822             "ADD    EDX,$tmp" %}
7823   ins_encode %{
7824     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7825     __ imull($tmp$$Register, rax);
7826     __ mull($src$$Register);
7827     __ addl(rdx, $tmp$$Register);
7828   %}
7829   ins_pipe( pipe_slow );
7830 %}
7831 
7832 // Multiply Register Long where the right operand's high 32 bits are zero
7833 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7834   predicate(is_operand_hi32_zero(n->in(2)));
7835   match(Set dst (MulL dst src));
7836   effect(KILL cr, TEMP tmp);
7837   ins_cost(2*100+2*400);
7838 // Basic idea: lo(result) = lo(x_lo * y_lo)
7839 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7840   format %{ "MOV    $tmp,$src.lo\n\t"
7841             "IMUL   $tmp,EDX\n\t"
7842             "MUL    EDX:EAX,$src.lo\n\t"
7843             "ADD    EDX,$tmp" %}
7844   ins_encode %{
7845     __ movl($tmp$$Register, $src$$Register);
7846     __ imull($tmp$$Register, rdx);
7847     __ mull($src$$Register);
7848     __ addl(rdx, $tmp$$Register);
7849   %}
7850   ins_pipe( pipe_slow );
7851 %}
7852 
7853 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7854 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7855   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7856   match(Set dst (MulL dst src));
7857   effect(KILL cr);
7858   ins_cost(1*400);
7859 // Basic idea: lo(result) = lo(x_lo * y_lo)
7860 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7861   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7862   ins_encode %{
7863     __ mull($src$$Register);
7864   %}
7865   ins_pipe( pipe_slow );
7866 %}
7867 
7868 // Multiply Register Long by small constant
7869 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7870   match(Set dst (MulL dst src));
7871   effect(KILL cr, TEMP tmp);
7872   ins_cost(2*100+2*400);
7873   size(12);
7874 // Basic idea: lo(result) = lo(src * EAX)
7875 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7876   format %{ "IMUL   $tmp,EDX,$src\n\t"
7877             "MOV    EDX,$src\n\t"
7878             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7879             "ADD    EDX,$tmp" %}
7880   ins_encode( long_multiply_con( dst, src, tmp ) );
7881   ins_pipe( pipe_slow );
7882 %}
7883 
7884 // Integer DIV with Register
7885 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7886   match(Set rax (DivI rax div));
7887   effect(KILL rdx, KILL cr);
7888   size(26);
7889   ins_cost(30*100+10*100);
7890   format %{ "CMP    EAX,0x80000000\n\t"
7891             "JNE,s  normal\n\t"
7892             "XOR    EDX,EDX\n\t"
7893             "CMP    ECX,-1\n\t"
7894             "JE,s   done\n"
7895     "normal: CDQ\n\t"
7896             "IDIV   $div\n\t"
7897     "done:"        %}
7898   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7899   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7900   ins_pipe( ialu_reg_reg_alu0 );
7901 %}
7902 
7903 // Divide Register Long
7904 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7905   match(Set dst (DivL src1 src2));
7906   effect( KILL cr, KILL cx, KILL bx );
7907   ins_cost(10000);
7908   format %{ "PUSH   $src1.hi\n\t"
7909             "PUSH   $src1.lo\n\t"
7910             "PUSH   $src2.hi\n\t"
7911             "PUSH   $src2.lo\n\t"
7912             "CALL   SharedRuntime::ldiv\n\t"
7913             "ADD    ESP,16" %}
7914   ins_encode( long_div(src1,src2) );
7915   ins_pipe( pipe_slow );
7916 %}
7917 
7918 // Integer DIVMOD with Register, both quotient and mod results
7919 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7920   match(DivModI rax div);
7921   effect(KILL cr);
7922   size(26);
7923   ins_cost(30*100+10*100);
7924   format %{ "CMP    EAX,0x80000000\n\t"
7925             "JNE,s  normal\n\t"
7926             "XOR    EDX,EDX\n\t"
7927             "CMP    ECX,-1\n\t"
7928             "JE,s   done\n"
7929     "normal: CDQ\n\t"
7930             "IDIV   $div\n\t"
7931     "done:"        %}
7932   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7933   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7934   ins_pipe( pipe_slow );
7935 %}
7936 
7937 // Integer MOD with Register
7938 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7939   match(Set rdx (ModI rax div));
7940   effect(KILL rax, KILL cr);
7941 
7942   size(26);
7943   ins_cost(300);
7944   format %{ "CDQ\n\t"
7945             "IDIV   $div" %}
7946   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7947   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7948   ins_pipe( ialu_reg_reg_alu0 );
7949 %}
7950 
7951 // Remainder Register Long
7952 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7953   match(Set dst (ModL src1 src2));
7954   effect( KILL cr, KILL cx, KILL bx );
7955   ins_cost(10000);
7956   format %{ "PUSH   $src1.hi\n\t"
7957             "PUSH   $src1.lo\n\t"
7958             "PUSH   $src2.hi\n\t"
7959             "PUSH   $src2.lo\n\t"
7960             "CALL   SharedRuntime::lrem\n\t"
7961             "ADD    ESP,16" %}
7962   ins_encode( long_mod(src1,src2) );
7963   ins_pipe( pipe_slow );
7964 %}
7965 
7966 // Divide Register Long (no special case since divisor != -1)
7967 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7968   match(Set dst (DivL dst imm));
7969   effect( TEMP tmp, TEMP tmp2, KILL cr );
7970   ins_cost(1000);
7971   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7972             "XOR    $tmp2,$tmp2\n\t"
7973             "CMP    $tmp,EDX\n\t"
7974             "JA,s   fast\n\t"
7975             "MOV    $tmp2,EAX\n\t"
7976             "MOV    EAX,EDX\n\t"
7977             "MOV    EDX,0\n\t"
7978             "JLE,s  pos\n\t"
7979             "LNEG   EAX : $tmp2\n\t"
7980             "DIV    $tmp # unsigned division\n\t"
7981             "XCHG   EAX,$tmp2\n\t"
7982             "DIV    $tmp\n\t"
7983             "LNEG   $tmp2 : EAX\n\t"
7984             "JMP,s  done\n"
7985     "pos:\n\t"
7986             "DIV    $tmp\n\t"
7987             "XCHG   EAX,$tmp2\n"
7988     "fast:\n\t"
7989             "DIV    $tmp\n"
7990     "done:\n\t"
7991             "MOV    EDX,$tmp2\n\t"
7992             "NEG    EDX:EAX # if $imm < 0" %}
7993   ins_encode %{
7994     int con = (int)$imm$$constant;
7995     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7996     int pcon = (con > 0) ? con : -con;
7997     Label Lfast, Lpos, Ldone;
7998 
7999     __ movl($tmp$$Register, pcon);
8000     __ xorl($tmp2$$Register,$tmp2$$Register);
8001     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8002     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8003 
8004     __ movl($tmp2$$Register, $dst$$Register); // save
8005     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8006     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8007     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8008 
8009     // Negative dividend.
8010     // convert value to positive to use unsigned division
8011     __ lneg($dst$$Register, $tmp2$$Register);
8012     __ divl($tmp$$Register);
8013     __ xchgl($dst$$Register, $tmp2$$Register);
8014     __ divl($tmp$$Register);
8015     // revert result back to negative
8016     __ lneg($tmp2$$Register, $dst$$Register);
8017     __ jmpb(Ldone);
8018 
8019     __ bind(Lpos);
8020     __ divl($tmp$$Register); // Use unsigned division
8021     __ xchgl($dst$$Register, $tmp2$$Register);
8022     // Fallthrow for final divide, tmp2 has 32 bit hi result
8023 
8024     __ bind(Lfast);
8025     // fast path: src is positive
8026     __ divl($tmp$$Register); // Use unsigned division
8027 
8028     __ bind(Ldone);
8029     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8030     if (con < 0) {
8031       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8032     }
8033   %}
8034   ins_pipe( pipe_slow );
8035 %}
8036 
8037 // Remainder Register Long (remainder fit into 32 bits)
8038 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8039   match(Set dst (ModL dst imm));
8040   effect( TEMP tmp, TEMP tmp2, KILL cr );
8041   ins_cost(1000);
8042   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8043             "CMP    $tmp,EDX\n\t"
8044             "JA,s   fast\n\t"
8045             "MOV    $tmp2,EAX\n\t"
8046             "MOV    EAX,EDX\n\t"
8047             "MOV    EDX,0\n\t"
8048             "JLE,s  pos\n\t"
8049             "LNEG   EAX : $tmp2\n\t"
8050             "DIV    $tmp # unsigned division\n\t"
8051             "MOV    EAX,$tmp2\n\t"
8052             "DIV    $tmp\n\t"
8053             "NEG    EDX\n\t"
8054             "JMP,s  done\n"
8055     "pos:\n\t"
8056             "DIV    $tmp\n\t"
8057             "MOV    EAX,$tmp2\n"
8058     "fast:\n\t"
8059             "DIV    $tmp\n"
8060     "done:\n\t"
8061             "MOV    EAX,EDX\n\t"
8062             "SAR    EDX,31\n\t" %}
8063   ins_encode %{
8064     int con = (int)$imm$$constant;
8065     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8066     int pcon = (con > 0) ? con : -con;
8067     Label  Lfast, Lpos, Ldone;
8068 
8069     __ movl($tmp$$Register, pcon);
8070     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8071     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8072 
8073     __ movl($tmp2$$Register, $dst$$Register); // save
8074     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8075     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8076     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8077 
8078     // Negative dividend.
8079     // convert value to positive to use unsigned division
8080     __ lneg($dst$$Register, $tmp2$$Register);
8081     __ divl($tmp$$Register);
8082     __ movl($dst$$Register, $tmp2$$Register);
8083     __ divl($tmp$$Register);
8084     // revert remainder back to negative
8085     __ negl(HIGH_FROM_LOW($dst$$Register));
8086     __ jmpb(Ldone);
8087 
8088     __ bind(Lpos);
8089     __ divl($tmp$$Register);
8090     __ movl($dst$$Register, $tmp2$$Register);
8091 
8092     __ bind(Lfast);
8093     // fast path: src is positive
8094     __ divl($tmp$$Register);
8095 
8096     __ bind(Ldone);
8097     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8098     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8099 
8100   %}
8101   ins_pipe( pipe_slow );
8102 %}
8103 
8104 // Integer Shift Instructions
8105 // Shift Left by one
8106 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8107   match(Set dst (LShiftI dst shift));
8108   effect(KILL cr);
8109 
8110   size(2);
8111   format %{ "SHL    $dst,$shift" %}
8112   opcode(0xD1, 0x4);  /* D1 /4 */
8113   ins_encode( OpcP, RegOpc( dst ) );
8114   ins_pipe( ialu_reg );
8115 %}
8116 
8117 // Shift Left by 8-bit immediate
8118 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8119   match(Set dst (LShiftI dst shift));
8120   effect(KILL cr);
8121 
8122   size(3);
8123   format %{ "SHL    $dst,$shift" %}
8124   opcode(0xC1, 0x4);  /* C1 /4 ib */
8125   ins_encode( RegOpcImm( dst, shift) );
8126   ins_pipe( ialu_reg );
8127 %}
8128 
8129 // Shift Left by variable
8130 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8131   match(Set dst (LShiftI dst shift));
8132   effect(KILL cr);
8133 
8134   size(2);
8135   format %{ "SHL    $dst,$shift" %}
8136   opcode(0xD3, 0x4);  /* D3 /4 */
8137   ins_encode( OpcP, RegOpc( dst ) );
8138   ins_pipe( ialu_reg_reg );
8139 %}
8140 
8141 // Arithmetic shift right by one
8142 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8143   match(Set dst (RShiftI dst shift));
8144   effect(KILL cr);
8145 
8146   size(2);
8147   format %{ "SAR    $dst,$shift" %}
8148   opcode(0xD1, 0x7);  /* D1 /7 */
8149   ins_encode( OpcP, RegOpc( dst ) );
8150   ins_pipe( ialu_reg );
8151 %}
8152 
8153 // Arithmetic shift right by one
8154 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8155   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8156   effect(KILL cr);
8157   format %{ "SAR    $dst,$shift" %}
8158   opcode(0xD1, 0x7);  /* D1 /7 */
8159   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8160   ins_pipe( ialu_mem_imm );
8161 %}
8162 
8163 // Arithmetic Shift Right by 8-bit immediate
8164 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8165   match(Set dst (RShiftI dst shift));
8166   effect(KILL cr);
8167 
8168   size(3);
8169   format %{ "SAR    $dst,$shift" %}
8170   opcode(0xC1, 0x7);  /* C1 /7 ib */
8171   ins_encode( RegOpcImm( dst, shift ) );
8172   ins_pipe( ialu_mem_imm );
8173 %}
8174 
8175 // Arithmetic Shift Right by 8-bit immediate
8176 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8177   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8178   effect(KILL cr);
8179 
8180   format %{ "SAR    $dst,$shift" %}
8181   opcode(0xC1, 0x7);  /* C1 /7 ib */
8182   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8183   ins_pipe( ialu_mem_imm );
8184 %}
8185 
8186 // Arithmetic Shift Right by variable
8187 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8188   match(Set dst (RShiftI dst shift));
8189   effect(KILL cr);
8190 
8191   size(2);
8192   format %{ "SAR    $dst,$shift" %}
8193   opcode(0xD3, 0x7);  /* D3 /7 */
8194   ins_encode( OpcP, RegOpc( dst ) );
8195   ins_pipe( ialu_reg_reg );
8196 %}
8197 
8198 // Logical shift right by one
8199 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8200   match(Set dst (URShiftI dst shift));
8201   effect(KILL cr);
8202 
8203   size(2);
8204   format %{ "SHR    $dst,$shift" %}
8205   opcode(0xD1, 0x5);  /* D1 /5 */
8206   ins_encode( OpcP, RegOpc( dst ) );
8207   ins_pipe( ialu_reg );
8208 %}
8209 
8210 // Logical Shift Right by 8-bit immediate
8211 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8212   match(Set dst (URShiftI dst shift));
8213   effect(KILL cr);
8214 
8215   size(3);
8216   format %{ "SHR    $dst,$shift" %}
8217   opcode(0xC1, 0x5);  /* C1 /5 ib */
8218   ins_encode( RegOpcImm( dst, shift) );
8219   ins_pipe( ialu_reg );
8220 %}
8221 
8222 
8223 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8224 // This idiom is used by the compiler for the i2b bytecode.
8225 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8226   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8227 
8228   size(3);
8229   format %{ "MOVSX  $dst,$src :8" %}
8230   ins_encode %{
8231     __ movsbl($dst$$Register, $src$$Register);
8232   %}
8233   ins_pipe(ialu_reg_reg);
8234 %}
8235 
8236 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8237 // This idiom is used by the compiler the i2s bytecode.
8238 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8239   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8240 
8241   size(3);
8242   format %{ "MOVSX  $dst,$src :16" %}
8243   ins_encode %{
8244     __ movswl($dst$$Register, $src$$Register);
8245   %}
8246   ins_pipe(ialu_reg_reg);
8247 %}
8248 
8249 
8250 // Logical Shift Right by variable
8251 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8252   match(Set dst (URShiftI dst shift));
8253   effect(KILL cr);
8254 
8255   size(2);
8256   format %{ "SHR    $dst,$shift" %}
8257   opcode(0xD3, 0x5);  /* D3 /5 */
8258   ins_encode( OpcP, RegOpc( dst ) );
8259   ins_pipe( ialu_reg_reg );
8260 %}
8261 
8262 
8263 //----------Logical Instructions-----------------------------------------------
8264 //----------Integer Logical Instructions---------------------------------------
8265 // And Instructions
8266 // And Register with Register
8267 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8268   match(Set dst (AndI dst src));
8269   effect(KILL cr);
8270 
8271   size(2);
8272   format %{ "AND    $dst,$src" %}
8273   opcode(0x23);
8274   ins_encode( OpcP, RegReg( dst, src) );
8275   ins_pipe( ialu_reg_reg );
8276 %}
8277 
8278 // And Register with Immediate
8279 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8280   match(Set dst (AndI dst src));
8281   effect(KILL cr);
8282 
8283   format %{ "AND    $dst,$src" %}
8284   opcode(0x81,0x04);  /* Opcode 81 /4 */
8285   // ins_encode( RegImm( dst, src) );
8286   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8287   ins_pipe( ialu_reg );
8288 %}
8289 
8290 // And Register with Memory
8291 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8292   match(Set dst (AndI dst (LoadI src)));
8293   effect(KILL cr);
8294 
8295   ins_cost(125);
8296   format %{ "AND    $dst,$src" %}
8297   opcode(0x23);
8298   ins_encode( OpcP, RegMem( dst, src) );
8299   ins_pipe( ialu_reg_mem );
8300 %}
8301 
8302 // And Memory with Register
8303 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8304   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8305   effect(KILL cr);
8306 
8307   ins_cost(150);
8308   format %{ "AND    $dst,$src" %}
8309   opcode(0x21);  /* Opcode 21 /r */
8310   ins_encode( OpcP, RegMem( src, dst ) );
8311   ins_pipe( ialu_mem_reg );
8312 %}
8313 
8314 // And Memory with Immediate
8315 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8316   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8317   effect(KILL cr);
8318 
8319   ins_cost(125);
8320   format %{ "AND    $dst,$src" %}
8321   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8322   // ins_encode( MemImm( dst, src) );
8323   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8324   ins_pipe( ialu_mem_imm );
8325 %}
8326 
8327 // BMI1 instructions
8328 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8329   match(Set dst (AndI (XorI src1 minus_1) src2));
8330   predicate(UseBMI1Instructions);
8331   effect(KILL cr);
8332 
8333   format %{ "ANDNL  $dst, $src1, $src2" %}
8334 
8335   ins_encode %{
8336     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8337   %}
8338   ins_pipe(ialu_reg);
8339 %}
8340 
8341 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8342   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8343   predicate(UseBMI1Instructions);
8344   effect(KILL cr);
8345 
8346   ins_cost(125);
8347   format %{ "ANDNL  $dst, $src1, $src2" %}
8348 
8349   ins_encode %{
8350     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8351   %}
8352   ins_pipe(ialu_reg_mem);
8353 %}
8354 
8355 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8356   match(Set dst (AndI (SubI imm_zero src) src));
8357   predicate(UseBMI1Instructions);
8358   effect(KILL cr);
8359 
8360   format %{ "BLSIL  $dst, $src" %}
8361 
8362   ins_encode %{
8363     __ blsil($dst$$Register, $src$$Register);
8364   %}
8365   ins_pipe(ialu_reg);
8366 %}
8367 
8368 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8369   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8370   predicate(UseBMI1Instructions);
8371   effect(KILL cr);
8372 
8373   ins_cost(125);
8374   format %{ "BLSIL  $dst, $src" %}
8375 
8376   ins_encode %{
8377     __ blsil($dst$$Register, $src$$Address);
8378   %}
8379   ins_pipe(ialu_reg_mem);
8380 %}
8381 
8382 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8383 %{
8384   match(Set dst (XorI (AddI src minus_1) src));
8385   predicate(UseBMI1Instructions);
8386   effect(KILL cr);
8387 
8388   format %{ "BLSMSKL $dst, $src" %}
8389 
8390   ins_encode %{
8391     __ blsmskl($dst$$Register, $src$$Register);
8392   %}
8393 
8394   ins_pipe(ialu_reg);
8395 %}
8396 
8397 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8398 %{
8399   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8400   predicate(UseBMI1Instructions);
8401   effect(KILL cr);
8402 
8403   ins_cost(125);
8404   format %{ "BLSMSKL $dst, $src" %}
8405 
8406   ins_encode %{
8407     __ blsmskl($dst$$Register, $src$$Address);
8408   %}
8409 
8410   ins_pipe(ialu_reg_mem);
8411 %}
8412 
8413 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8414 %{
8415   match(Set dst (AndI (AddI src minus_1) src) );
8416   predicate(UseBMI1Instructions);
8417   effect(KILL cr);
8418 
8419   format %{ "BLSRL  $dst, $src" %}
8420 
8421   ins_encode %{
8422     __ blsrl($dst$$Register, $src$$Register);
8423   %}
8424 
8425   ins_pipe(ialu_reg);
8426 %}
8427 
8428 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8429 %{
8430   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8431   predicate(UseBMI1Instructions);
8432   effect(KILL cr);
8433 
8434   ins_cost(125);
8435   format %{ "BLSRL  $dst, $src" %}
8436 
8437   ins_encode %{
8438     __ blsrl($dst$$Register, $src$$Address);
8439   %}
8440 
8441   ins_pipe(ialu_reg_mem);
8442 %}
8443 
8444 // Or Instructions
8445 // Or Register with Register
8446 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8447   match(Set dst (OrI dst src));
8448   effect(KILL cr);
8449 
8450   size(2);
8451   format %{ "OR     $dst,$src" %}
8452   opcode(0x0B);
8453   ins_encode( OpcP, RegReg( dst, src) );
8454   ins_pipe( ialu_reg_reg );
8455 %}
8456 
8457 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8458   match(Set dst (OrI dst (CastP2X src)));
8459   effect(KILL cr);
8460 
8461   size(2);
8462   format %{ "OR     $dst,$src" %}
8463   opcode(0x0B);
8464   ins_encode( OpcP, RegReg( dst, src) );
8465   ins_pipe( ialu_reg_reg );
8466 %}
8467 
8468 
8469 // Or Register with Immediate
8470 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8471   match(Set dst (OrI dst src));
8472   effect(KILL cr);
8473 
8474   format %{ "OR     $dst,$src" %}
8475   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8476   // ins_encode( RegImm( dst, src) );
8477   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8478   ins_pipe( ialu_reg );
8479 %}
8480 
8481 // Or Register with Memory
8482 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8483   match(Set dst (OrI dst (LoadI src)));
8484   effect(KILL cr);
8485 
8486   ins_cost(125);
8487   format %{ "OR     $dst,$src" %}
8488   opcode(0x0B);
8489   ins_encode( OpcP, RegMem( dst, src) );
8490   ins_pipe( ialu_reg_mem );
8491 %}
8492 
8493 // Or Memory with Register
8494 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8495   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8496   effect(KILL cr);
8497 
8498   ins_cost(150);
8499   format %{ "OR     $dst,$src" %}
8500   opcode(0x09);  /* Opcode 09 /r */
8501   ins_encode( OpcP, RegMem( src, dst ) );
8502   ins_pipe( ialu_mem_reg );
8503 %}
8504 
8505 // Or Memory with Immediate
8506 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8507   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8508   effect(KILL cr);
8509 
8510   ins_cost(125);
8511   format %{ "OR     $dst,$src" %}
8512   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8513   // ins_encode( MemImm( dst, src) );
8514   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8515   ins_pipe( ialu_mem_imm );
8516 %}
8517 
8518 // ROL/ROR
8519 // ROL expand
8520 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8521   effect(USE_DEF dst, USE shift, KILL cr);
8522 
8523   format %{ "ROL    $dst, $shift" %}
8524   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8525   ins_encode( OpcP, RegOpc( dst ));
8526   ins_pipe( ialu_reg );
8527 %}
8528 
8529 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8530   effect(USE_DEF dst, USE shift, KILL cr);
8531 
8532   format %{ "ROL    $dst, $shift" %}
8533   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8534   ins_encode( RegOpcImm(dst, shift) );
8535   ins_pipe(ialu_reg);
8536 %}
8537 
8538 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8539   effect(USE_DEF dst, USE shift, KILL cr);
8540 
8541   format %{ "ROL    $dst, $shift" %}
8542   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8543   ins_encode(OpcP, RegOpc(dst));
8544   ins_pipe( ialu_reg_reg );
8545 %}
8546 // end of ROL expand
8547 
8548 // ROL 32bit by one once
8549 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8550   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8551 
8552   expand %{
8553     rolI_eReg_imm1(dst, lshift, cr);
8554   %}
8555 %}
8556 
8557 // ROL 32bit var by imm8 once
8558 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8559   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8560   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8561 
8562   expand %{
8563     rolI_eReg_imm8(dst, lshift, cr);
8564   %}
8565 %}
8566 
8567 // ROL 32bit var by var once
8568 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8569   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8570 
8571   expand %{
8572     rolI_eReg_CL(dst, shift, cr);
8573   %}
8574 %}
8575 
8576 // ROL 32bit var by var once
8577 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8578   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8579 
8580   expand %{
8581     rolI_eReg_CL(dst, shift, cr);
8582   %}
8583 %}
8584 
8585 // ROR expand
8586 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8587   effect(USE_DEF dst, USE shift, KILL cr);
8588 
8589   format %{ "ROR    $dst, $shift" %}
8590   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8591   ins_encode( OpcP, RegOpc( dst ) );
8592   ins_pipe( ialu_reg );
8593 %}
8594 
8595 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8596   effect (USE_DEF dst, USE shift, KILL cr);
8597 
8598   format %{ "ROR    $dst, $shift" %}
8599   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8600   ins_encode( RegOpcImm(dst, shift) );
8601   ins_pipe( ialu_reg );
8602 %}
8603 
8604 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8605   effect(USE_DEF dst, USE shift, KILL cr);
8606 
8607   format %{ "ROR    $dst, $shift" %}
8608   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8609   ins_encode(OpcP, RegOpc(dst));
8610   ins_pipe( ialu_reg_reg );
8611 %}
8612 // end of ROR expand
8613 
8614 // ROR right once
8615 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8616   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8617 
8618   expand %{
8619     rorI_eReg_imm1(dst, rshift, cr);
8620   %}
8621 %}
8622 
8623 // ROR 32bit by immI8 once
8624 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8625   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8626   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8627 
8628   expand %{
8629     rorI_eReg_imm8(dst, rshift, cr);
8630   %}
8631 %}
8632 
8633 // ROR 32bit var by var once
8634 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8635   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8636 
8637   expand %{
8638     rorI_eReg_CL(dst, shift, cr);
8639   %}
8640 %}
8641 
8642 // ROR 32bit var by var once
8643 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8644   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8645 
8646   expand %{
8647     rorI_eReg_CL(dst, shift, cr);
8648   %}
8649 %}
8650 
8651 // Xor Instructions
8652 // Xor Register with Register
8653 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8654   match(Set dst (XorI dst src));
8655   effect(KILL cr);
8656 
8657   size(2);
8658   format %{ "XOR    $dst,$src" %}
8659   opcode(0x33);
8660   ins_encode( OpcP, RegReg( dst, src) );
8661   ins_pipe( ialu_reg_reg );
8662 %}
8663 
8664 // Xor Register with Immediate -1
8665 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8666   match(Set dst (XorI dst imm));
8667 
8668   size(2);
8669   format %{ "NOT    $dst" %}
8670   ins_encode %{
8671      __ notl($dst$$Register);
8672   %}
8673   ins_pipe( ialu_reg );
8674 %}
8675 
8676 // Xor Register with Immediate
8677 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8678   match(Set dst (XorI dst src));
8679   effect(KILL cr);
8680 
8681   format %{ "XOR    $dst,$src" %}
8682   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8683   // ins_encode( RegImm( dst, src) );
8684   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8685   ins_pipe( ialu_reg );
8686 %}
8687 
8688 // Xor Register with Memory
8689 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8690   match(Set dst (XorI dst (LoadI src)));
8691   effect(KILL cr);
8692 
8693   ins_cost(125);
8694   format %{ "XOR    $dst,$src" %}
8695   opcode(0x33);
8696   ins_encode( OpcP, RegMem(dst, src) );
8697   ins_pipe( ialu_reg_mem );
8698 %}
8699 
8700 // Xor Memory with Register
8701 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8702   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8703   effect(KILL cr);
8704 
8705   ins_cost(150);
8706   format %{ "XOR    $dst,$src" %}
8707   opcode(0x31);  /* Opcode 31 /r */
8708   ins_encode( OpcP, RegMem( src, dst ) );
8709   ins_pipe( ialu_mem_reg );
8710 %}
8711 
8712 // Xor Memory with Immediate
8713 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8714   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8715   effect(KILL cr);
8716 
8717   ins_cost(125);
8718   format %{ "XOR    $dst,$src" %}
8719   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8720   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8721   ins_pipe( ialu_mem_imm );
8722 %}
8723 
8724 //----------Convert Int to Boolean---------------------------------------------
8725 
8726 instruct movI_nocopy(rRegI dst, rRegI src) %{
8727   effect( DEF dst, USE src );
8728   format %{ "MOV    $dst,$src" %}
8729   ins_encode( enc_Copy( dst, src) );
8730   ins_pipe( ialu_reg_reg );
8731 %}
8732 
8733 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8734   effect( USE_DEF dst, USE src, KILL cr );
8735 
8736   size(4);
8737   format %{ "NEG    $dst\n\t"
8738             "ADC    $dst,$src" %}
8739   ins_encode( neg_reg(dst),
8740               OpcRegReg(0x13,dst,src) );
8741   ins_pipe( ialu_reg_reg_long );
8742 %}
8743 
8744 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8745   match(Set dst (Conv2B src));
8746 
8747   expand %{
8748     movI_nocopy(dst,src);
8749     ci2b(dst,src,cr);
8750   %}
8751 %}
8752 
8753 instruct movP_nocopy(rRegI dst, eRegP src) %{
8754   effect( DEF dst, USE src );
8755   format %{ "MOV    $dst,$src" %}
8756   ins_encode( enc_Copy( dst, src) );
8757   ins_pipe( ialu_reg_reg );
8758 %}
8759 
8760 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8761   effect( USE_DEF dst, USE src, KILL cr );
8762   format %{ "NEG    $dst\n\t"
8763             "ADC    $dst,$src" %}
8764   ins_encode( neg_reg(dst),
8765               OpcRegReg(0x13,dst,src) );
8766   ins_pipe( ialu_reg_reg_long );
8767 %}
8768 
8769 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8770   match(Set dst (Conv2B src));
8771 
8772   expand %{
8773     movP_nocopy(dst,src);
8774     cp2b(dst,src,cr);
8775   %}
8776 %}
8777 
8778 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8779   match(Set dst (CmpLTMask p q));
8780   effect(KILL cr);
8781   ins_cost(400);
8782 
8783   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8784   format %{ "XOR    $dst,$dst\n\t"
8785             "CMP    $p,$q\n\t"
8786             "SETlt  $dst\n\t"
8787             "NEG    $dst" %}
8788   ins_encode %{
8789     Register Rp = $p$$Register;
8790     Register Rq = $q$$Register;
8791     Register Rd = $dst$$Register;
8792     Label done;
8793     __ xorl(Rd, Rd);
8794     __ cmpl(Rp, Rq);
8795     __ setb(Assembler::less, Rd);
8796     __ negl(Rd);
8797   %}
8798 
8799   ins_pipe(pipe_slow);
8800 %}
8801 
8802 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8803   match(Set dst (CmpLTMask dst zero));
8804   effect(DEF dst, KILL cr);
8805   ins_cost(100);
8806 
8807   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8808   ins_encode %{
8809   __ sarl($dst$$Register, 31);
8810   %}
8811   ins_pipe(ialu_reg);
8812 %}
8813 
8814 /* better to save a register than avoid a branch */
8815 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8816   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8817   effect(KILL cr);
8818   ins_cost(400);
8819   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8820             "JGE    done\n\t"
8821             "ADD    $p,$y\n"
8822             "done:  " %}
8823   ins_encode %{
8824     Register Rp = $p$$Register;
8825     Register Rq = $q$$Register;
8826     Register Ry = $y$$Register;
8827     Label done;
8828     __ subl(Rp, Rq);
8829     __ jccb(Assembler::greaterEqual, done);
8830     __ addl(Rp, Ry);
8831     __ bind(done);
8832   %}
8833 
8834   ins_pipe(pipe_cmplt);
8835 %}
8836 
8837 /* better to save a register than avoid a branch */
8838 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8839   match(Set y (AndI (CmpLTMask p q) y));
8840   effect(KILL cr);
8841 
8842   ins_cost(300);
8843 
8844   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8845             "JLT      done\n\t"
8846             "XORL     $y, $y\n"
8847             "done:  " %}
8848   ins_encode %{
8849     Register Rp = $p$$Register;
8850     Register Rq = $q$$Register;
8851     Register Ry = $y$$Register;
8852     Label done;
8853     __ cmpl(Rp, Rq);
8854     __ jccb(Assembler::less, done);
8855     __ xorl(Ry, Ry);
8856     __ bind(done);
8857   %}
8858 
8859   ins_pipe(pipe_cmplt);
8860 %}
8861 
8862 /* If I enable this, I encourage spilling in the inner loop of compress.
8863 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8864   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8865 */
8866 //----------Overflow Math Instructions-----------------------------------------
8867 
8868 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8869 %{
8870   match(Set cr (OverflowAddI op1 op2));
8871   effect(DEF cr, USE_KILL op1, USE op2);
8872 
8873   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8874 
8875   ins_encode %{
8876     __ addl($op1$$Register, $op2$$Register);
8877   %}
8878   ins_pipe(ialu_reg_reg);
8879 %}
8880 
8881 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8882 %{
8883   match(Set cr (OverflowAddI op1 op2));
8884   effect(DEF cr, USE_KILL op1, USE op2);
8885 
8886   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8887 
8888   ins_encode %{
8889     __ addl($op1$$Register, $op2$$constant);
8890   %}
8891   ins_pipe(ialu_reg_reg);
8892 %}
8893 
8894 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8895 %{
8896   match(Set cr (OverflowSubI op1 op2));
8897 
8898   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8899   ins_encode %{
8900     __ cmpl($op1$$Register, $op2$$Register);
8901   %}
8902   ins_pipe(ialu_reg_reg);
8903 %}
8904 
8905 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8906 %{
8907   match(Set cr (OverflowSubI op1 op2));
8908 
8909   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8910   ins_encode %{
8911     __ cmpl($op1$$Register, $op2$$constant);
8912   %}
8913   ins_pipe(ialu_reg_reg);
8914 %}
8915 
8916 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8917 %{
8918   match(Set cr (OverflowSubI zero op2));
8919   effect(DEF cr, USE_KILL op2);
8920 
8921   format %{ "NEG    $op2\t# overflow check int" %}
8922   ins_encode %{
8923     __ negl($op2$$Register);
8924   %}
8925   ins_pipe(ialu_reg_reg);
8926 %}
8927 
8928 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8929 %{
8930   match(Set cr (OverflowMulI op1 op2));
8931   effect(DEF cr, USE_KILL op1, USE op2);
8932 
8933   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8934   ins_encode %{
8935     __ imull($op1$$Register, $op2$$Register);
8936   %}
8937   ins_pipe(ialu_reg_reg_alu0);
8938 %}
8939 
8940 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8941 %{
8942   match(Set cr (OverflowMulI op1 op2));
8943   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8944 
8945   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8946   ins_encode %{
8947     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8948   %}
8949   ins_pipe(ialu_reg_reg_alu0);
8950 %}
8951 
8952 // Integer Absolute Instructions
8953 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8954 %{
8955   match(Set dst (AbsI src));
8956   effect(TEMP dst, TEMP tmp, KILL cr);
8957   format %{ "movl $tmp, $src\n\t"
8958             "sarl $tmp, 31\n\t"
8959             "movl $dst, $src\n\t"
8960             "xorl $dst, $tmp\n\t"
8961             "subl $dst, $tmp\n"
8962           %}
8963   ins_encode %{
8964     __ movl($tmp$$Register, $src$$Register);
8965     __ sarl($tmp$$Register, 31);
8966     __ movl($dst$$Register, $src$$Register);
8967     __ xorl($dst$$Register, $tmp$$Register);
8968     __ subl($dst$$Register, $tmp$$Register);
8969   %}
8970 
8971   ins_pipe(ialu_reg_reg);
8972 %} 
8973 
8974 //----------Long Instructions------------------------------------------------
8975 // Add Long Register with Register
8976 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8977   match(Set dst (AddL dst src));
8978   effect(KILL cr);
8979   ins_cost(200);
8980   format %{ "ADD    $dst.lo,$src.lo\n\t"
8981             "ADC    $dst.hi,$src.hi" %}
8982   opcode(0x03, 0x13);
8983   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8984   ins_pipe( ialu_reg_reg_long );
8985 %}
8986 
8987 // Add Long Register with Immediate
8988 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8989   match(Set dst (AddL dst src));
8990   effect(KILL cr);
8991   format %{ "ADD    $dst.lo,$src.lo\n\t"
8992             "ADC    $dst.hi,$src.hi" %}
8993   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8994   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8995   ins_pipe( ialu_reg_long );
8996 %}
8997 
8998 // Add Long Register with Memory
8999 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9000   match(Set dst (AddL dst (LoadL mem)));
9001   effect(KILL cr);
9002   ins_cost(125);
9003   format %{ "ADD    $dst.lo,$mem\n\t"
9004             "ADC    $dst.hi,$mem+4" %}
9005   opcode(0x03, 0x13);
9006   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9007   ins_pipe( ialu_reg_long_mem );
9008 %}
9009 
9010 // Subtract Long Register with Register.
9011 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9012   match(Set dst (SubL dst src));
9013   effect(KILL cr);
9014   ins_cost(200);
9015   format %{ "SUB    $dst.lo,$src.lo\n\t"
9016             "SBB    $dst.hi,$src.hi" %}
9017   opcode(0x2B, 0x1B);
9018   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9019   ins_pipe( ialu_reg_reg_long );
9020 %}
9021 
9022 // Subtract Long Register with Immediate
9023 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9024   match(Set dst (SubL dst src));
9025   effect(KILL cr);
9026   format %{ "SUB    $dst.lo,$src.lo\n\t"
9027             "SBB    $dst.hi,$src.hi" %}
9028   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9029   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9030   ins_pipe( ialu_reg_long );
9031 %}
9032 
9033 // Subtract Long Register with Memory
9034 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9035   match(Set dst (SubL dst (LoadL mem)));
9036   effect(KILL cr);
9037   ins_cost(125);
9038   format %{ "SUB    $dst.lo,$mem\n\t"
9039             "SBB    $dst.hi,$mem+4" %}
9040   opcode(0x2B, 0x1B);
9041   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9042   ins_pipe( ialu_reg_long_mem );
9043 %}
9044 
9045 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9046   match(Set dst (SubL zero dst));
9047   effect(KILL cr);
9048   ins_cost(300);
9049   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9050   ins_encode( neg_long(dst) );
9051   ins_pipe( ialu_reg_reg_long );
9052 %}
9053 
9054 // And Long Register with Register
9055 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9056   match(Set dst (AndL dst src));
9057   effect(KILL cr);
9058   format %{ "AND    $dst.lo,$src.lo\n\t"
9059             "AND    $dst.hi,$src.hi" %}
9060   opcode(0x23,0x23);
9061   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9062   ins_pipe( ialu_reg_reg_long );
9063 %}
9064 
9065 // And Long Register with Immediate
9066 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9067   match(Set dst (AndL dst src));
9068   effect(KILL cr);
9069   format %{ "AND    $dst.lo,$src.lo\n\t"
9070             "AND    $dst.hi,$src.hi" %}
9071   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9072   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9073   ins_pipe( ialu_reg_long );
9074 %}
9075 
9076 // And Long Register with Memory
9077 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9078   match(Set dst (AndL dst (LoadL mem)));
9079   effect(KILL cr);
9080   ins_cost(125);
9081   format %{ "AND    $dst.lo,$mem\n\t"
9082             "AND    $dst.hi,$mem+4" %}
9083   opcode(0x23, 0x23);
9084   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9085   ins_pipe( ialu_reg_long_mem );
9086 %}
9087 
9088 // BMI1 instructions
9089 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9090   match(Set dst (AndL (XorL src1 minus_1) src2));
9091   predicate(UseBMI1Instructions);
9092   effect(KILL cr, TEMP dst);
9093 
9094   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9095             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9096          %}
9097 
9098   ins_encode %{
9099     Register Rdst = $dst$$Register;
9100     Register Rsrc1 = $src1$$Register;
9101     Register Rsrc2 = $src2$$Register;
9102     __ andnl(Rdst, Rsrc1, Rsrc2);
9103     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9104   %}
9105   ins_pipe(ialu_reg_reg_long);
9106 %}
9107 
9108 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9109   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9110   predicate(UseBMI1Instructions);
9111   effect(KILL cr, TEMP dst);
9112 
9113   ins_cost(125);
9114   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9115             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9116          %}
9117 
9118   ins_encode %{
9119     Register Rdst = $dst$$Register;
9120     Register Rsrc1 = $src1$$Register;
9121     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9122 
9123     __ andnl(Rdst, Rsrc1, $src2$$Address);
9124     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9125   %}
9126   ins_pipe(ialu_reg_mem);
9127 %}
9128 
9129 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9130   match(Set dst (AndL (SubL imm_zero src) src));
9131   predicate(UseBMI1Instructions);
9132   effect(KILL cr, TEMP dst);
9133 
9134   format %{ "MOVL   $dst.hi, 0\n\t"
9135             "BLSIL  $dst.lo, $src.lo\n\t"
9136             "JNZ    done\n\t"
9137             "BLSIL  $dst.hi, $src.hi\n"
9138             "done:"
9139          %}
9140 
9141   ins_encode %{
9142     Label done;
9143     Register Rdst = $dst$$Register;
9144     Register Rsrc = $src$$Register;
9145     __ movl(HIGH_FROM_LOW(Rdst), 0);
9146     __ blsil(Rdst, Rsrc);
9147     __ jccb(Assembler::notZero, done);
9148     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9149     __ bind(done);
9150   %}
9151   ins_pipe(ialu_reg);
9152 %}
9153 
9154 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9155   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9156   predicate(UseBMI1Instructions);
9157   effect(KILL cr, TEMP dst);
9158 
9159   ins_cost(125);
9160   format %{ "MOVL   $dst.hi, 0\n\t"
9161             "BLSIL  $dst.lo, $src\n\t"
9162             "JNZ    done\n\t"
9163             "BLSIL  $dst.hi, $src+4\n"
9164             "done:"
9165          %}
9166 
9167   ins_encode %{
9168     Label done;
9169     Register Rdst = $dst$$Register;
9170     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9171 
9172     __ movl(HIGH_FROM_LOW(Rdst), 0);
9173     __ blsil(Rdst, $src$$Address);
9174     __ jccb(Assembler::notZero, done);
9175     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9176     __ bind(done);
9177   %}
9178   ins_pipe(ialu_reg_mem);
9179 %}
9180 
9181 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9182 %{
9183   match(Set dst (XorL (AddL src minus_1) src));
9184   predicate(UseBMI1Instructions);
9185   effect(KILL cr, TEMP dst);
9186 
9187   format %{ "MOVL    $dst.hi, 0\n\t"
9188             "BLSMSKL $dst.lo, $src.lo\n\t"
9189             "JNC     done\n\t"
9190             "BLSMSKL $dst.hi, $src.hi\n"
9191             "done:"
9192          %}
9193 
9194   ins_encode %{
9195     Label done;
9196     Register Rdst = $dst$$Register;
9197     Register Rsrc = $src$$Register;
9198     __ movl(HIGH_FROM_LOW(Rdst), 0);
9199     __ blsmskl(Rdst, Rsrc);
9200     __ jccb(Assembler::carryClear, done);
9201     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9202     __ bind(done);
9203   %}
9204 
9205   ins_pipe(ialu_reg);
9206 %}
9207 
9208 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9209 %{
9210   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9211   predicate(UseBMI1Instructions);
9212   effect(KILL cr, TEMP dst);
9213 
9214   ins_cost(125);
9215   format %{ "MOVL    $dst.hi, 0\n\t"
9216             "BLSMSKL $dst.lo, $src\n\t"
9217             "JNC     done\n\t"
9218             "BLSMSKL $dst.hi, $src+4\n"
9219             "done:"
9220          %}
9221 
9222   ins_encode %{
9223     Label done;
9224     Register Rdst = $dst$$Register;
9225     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9226 
9227     __ movl(HIGH_FROM_LOW(Rdst), 0);
9228     __ blsmskl(Rdst, $src$$Address);
9229     __ jccb(Assembler::carryClear, done);
9230     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9231     __ bind(done);
9232   %}
9233 
9234   ins_pipe(ialu_reg_mem);
9235 %}
9236 
9237 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9238 %{
9239   match(Set dst (AndL (AddL src minus_1) src) );
9240   predicate(UseBMI1Instructions);
9241   effect(KILL cr, TEMP dst);
9242 
9243   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9244             "BLSRL  $dst.lo, $src.lo\n\t"
9245             "JNC    done\n\t"
9246             "BLSRL  $dst.hi, $src.hi\n"
9247             "done:"
9248   %}
9249 
9250   ins_encode %{
9251     Label done;
9252     Register Rdst = $dst$$Register;
9253     Register Rsrc = $src$$Register;
9254     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9255     __ blsrl(Rdst, Rsrc);
9256     __ jccb(Assembler::carryClear, done);
9257     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9258     __ bind(done);
9259   %}
9260 
9261   ins_pipe(ialu_reg);
9262 %}
9263 
9264 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9265 %{
9266   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9267   predicate(UseBMI1Instructions);
9268   effect(KILL cr, TEMP dst);
9269 
9270   ins_cost(125);
9271   format %{ "MOVL   $dst.hi, $src+4\n\t"
9272             "BLSRL  $dst.lo, $src\n\t"
9273             "JNC    done\n\t"
9274             "BLSRL  $dst.hi, $src+4\n"
9275             "done:"
9276   %}
9277 
9278   ins_encode %{
9279     Label done;
9280     Register Rdst = $dst$$Register;
9281     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9282     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9283     __ blsrl(Rdst, $src$$Address);
9284     __ jccb(Assembler::carryClear, done);
9285     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9286     __ bind(done);
9287   %}
9288 
9289   ins_pipe(ialu_reg_mem);
9290 %}
9291 
9292 // Or Long Register with Register
9293 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9294   match(Set dst (OrL dst src));
9295   effect(KILL cr);
9296   format %{ "OR     $dst.lo,$src.lo\n\t"
9297             "OR     $dst.hi,$src.hi" %}
9298   opcode(0x0B,0x0B);
9299   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9300   ins_pipe( ialu_reg_reg_long );
9301 %}
9302 
9303 // Or Long Register with Immediate
9304 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9305   match(Set dst (OrL dst src));
9306   effect(KILL cr);
9307   format %{ "OR     $dst.lo,$src.lo\n\t"
9308             "OR     $dst.hi,$src.hi" %}
9309   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9310   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9311   ins_pipe( ialu_reg_long );
9312 %}
9313 
9314 // Or Long Register with Memory
9315 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9316   match(Set dst (OrL dst (LoadL mem)));
9317   effect(KILL cr);
9318   ins_cost(125);
9319   format %{ "OR     $dst.lo,$mem\n\t"
9320             "OR     $dst.hi,$mem+4" %}
9321   opcode(0x0B,0x0B);
9322   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9323   ins_pipe( ialu_reg_long_mem );
9324 %}
9325 
9326 // Xor Long Register with Register
9327 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9328   match(Set dst (XorL dst src));
9329   effect(KILL cr);
9330   format %{ "XOR    $dst.lo,$src.lo\n\t"
9331             "XOR    $dst.hi,$src.hi" %}
9332   opcode(0x33,0x33);
9333   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9334   ins_pipe( ialu_reg_reg_long );
9335 %}
9336 
9337 // Xor Long Register with Immediate -1
9338 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9339   match(Set dst (XorL dst imm));
9340   format %{ "NOT    $dst.lo\n\t"
9341             "NOT    $dst.hi" %}
9342   ins_encode %{
9343      __ notl($dst$$Register);
9344      __ notl(HIGH_FROM_LOW($dst$$Register));
9345   %}
9346   ins_pipe( ialu_reg_long );
9347 %}
9348 
9349 // Xor Long Register with Immediate
9350 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9351   match(Set dst (XorL dst src));
9352   effect(KILL cr);
9353   format %{ "XOR    $dst.lo,$src.lo\n\t"
9354             "XOR    $dst.hi,$src.hi" %}
9355   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9356   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9357   ins_pipe( ialu_reg_long );
9358 %}
9359 
9360 // Xor Long Register with Memory
9361 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9362   match(Set dst (XorL dst (LoadL mem)));
9363   effect(KILL cr);
9364   ins_cost(125);
9365   format %{ "XOR    $dst.lo,$mem\n\t"
9366             "XOR    $dst.hi,$mem+4" %}
9367   opcode(0x33,0x33);
9368   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9369   ins_pipe( ialu_reg_long_mem );
9370 %}
9371 
9372 // Shift Left Long by 1
9373 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9374   predicate(UseNewLongLShift);
9375   match(Set dst (LShiftL dst cnt));
9376   effect(KILL cr);
9377   ins_cost(100);
9378   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9379             "ADC    $dst.hi,$dst.hi" %}
9380   ins_encode %{
9381     __ addl($dst$$Register,$dst$$Register);
9382     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9383   %}
9384   ins_pipe( ialu_reg_long );
9385 %}
9386 
9387 // Shift Left Long by 2
9388 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9389   predicate(UseNewLongLShift);
9390   match(Set dst (LShiftL dst cnt));
9391   effect(KILL cr);
9392   ins_cost(100);
9393   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9394             "ADC    $dst.hi,$dst.hi\n\t"
9395             "ADD    $dst.lo,$dst.lo\n\t"
9396             "ADC    $dst.hi,$dst.hi" %}
9397   ins_encode %{
9398     __ addl($dst$$Register,$dst$$Register);
9399     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9400     __ addl($dst$$Register,$dst$$Register);
9401     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9402   %}
9403   ins_pipe( ialu_reg_long );
9404 %}
9405 
9406 // Shift Left Long by 3
9407 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9408   predicate(UseNewLongLShift);
9409   match(Set dst (LShiftL dst cnt));
9410   effect(KILL cr);
9411   ins_cost(100);
9412   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9413             "ADC    $dst.hi,$dst.hi\n\t"
9414             "ADD    $dst.lo,$dst.lo\n\t"
9415             "ADC    $dst.hi,$dst.hi\n\t"
9416             "ADD    $dst.lo,$dst.lo\n\t"
9417             "ADC    $dst.hi,$dst.hi" %}
9418   ins_encode %{
9419     __ addl($dst$$Register,$dst$$Register);
9420     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9421     __ addl($dst$$Register,$dst$$Register);
9422     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9423     __ addl($dst$$Register,$dst$$Register);
9424     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9425   %}
9426   ins_pipe( ialu_reg_long );
9427 %}
9428 
9429 // Shift Left Long by 1-31
9430 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9431   match(Set dst (LShiftL dst cnt));
9432   effect(KILL cr);
9433   ins_cost(200);
9434   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9435             "SHL    $dst.lo,$cnt" %}
9436   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9437   ins_encode( move_long_small_shift(dst,cnt) );
9438   ins_pipe( ialu_reg_long );
9439 %}
9440 
9441 // Shift Left Long by 32-63
9442 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9443   match(Set dst (LShiftL dst cnt));
9444   effect(KILL cr);
9445   ins_cost(300);
9446   format %{ "MOV    $dst.hi,$dst.lo\n"
9447           "\tSHL    $dst.hi,$cnt-32\n"
9448           "\tXOR    $dst.lo,$dst.lo" %}
9449   opcode(0xC1, 0x4);  /* C1 /4 ib */
9450   ins_encode( move_long_big_shift_clr(dst,cnt) );
9451   ins_pipe( ialu_reg_long );
9452 %}
9453 
9454 // Shift Left Long by variable
9455 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9456   match(Set dst (LShiftL dst shift));
9457   effect(KILL cr);
9458   ins_cost(500+200);
9459   size(17);
9460   format %{ "TEST   $shift,32\n\t"
9461             "JEQ,s  small\n\t"
9462             "MOV    $dst.hi,$dst.lo\n\t"
9463             "XOR    $dst.lo,$dst.lo\n"
9464     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9465             "SHL    $dst.lo,$shift" %}
9466   ins_encode( shift_left_long( dst, shift ) );
9467   ins_pipe( pipe_slow );
9468 %}
9469 
9470 // Shift Right Long by 1-31
9471 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9472   match(Set dst (URShiftL dst cnt));
9473   effect(KILL cr);
9474   ins_cost(200);
9475   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9476             "SHR    $dst.hi,$cnt" %}
9477   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9478   ins_encode( move_long_small_shift(dst,cnt) );
9479   ins_pipe( ialu_reg_long );
9480 %}
9481 
9482 // Shift Right Long by 32-63
9483 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9484   match(Set dst (URShiftL dst cnt));
9485   effect(KILL cr);
9486   ins_cost(300);
9487   format %{ "MOV    $dst.lo,$dst.hi\n"
9488           "\tSHR    $dst.lo,$cnt-32\n"
9489           "\tXOR    $dst.hi,$dst.hi" %}
9490   opcode(0xC1, 0x5);  /* C1 /5 ib */
9491   ins_encode( move_long_big_shift_clr(dst,cnt) );
9492   ins_pipe( ialu_reg_long );
9493 %}
9494 
9495 // Shift Right Long by variable
9496 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9497   match(Set dst (URShiftL dst shift));
9498   effect(KILL cr);
9499   ins_cost(600);
9500   size(17);
9501   format %{ "TEST   $shift,32\n\t"
9502             "JEQ,s  small\n\t"
9503             "MOV    $dst.lo,$dst.hi\n\t"
9504             "XOR    $dst.hi,$dst.hi\n"
9505     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9506             "SHR    $dst.hi,$shift" %}
9507   ins_encode( shift_right_long( dst, shift ) );
9508   ins_pipe( pipe_slow );
9509 %}
9510 
9511 // Shift Right Long by 1-31
9512 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9513   match(Set dst (RShiftL dst cnt));
9514   effect(KILL cr);
9515   ins_cost(200);
9516   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9517             "SAR    $dst.hi,$cnt" %}
9518   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9519   ins_encode( move_long_small_shift(dst,cnt) );
9520   ins_pipe( ialu_reg_long );
9521 %}
9522 
9523 // Shift Right Long by 32-63
9524 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9525   match(Set dst (RShiftL dst cnt));
9526   effect(KILL cr);
9527   ins_cost(300);
9528   format %{ "MOV    $dst.lo,$dst.hi\n"
9529           "\tSAR    $dst.lo,$cnt-32\n"
9530           "\tSAR    $dst.hi,31" %}
9531   opcode(0xC1, 0x7);  /* C1 /7 ib */
9532   ins_encode( move_long_big_shift_sign(dst,cnt) );
9533   ins_pipe( ialu_reg_long );
9534 %}
9535 
9536 // Shift Right arithmetic Long by variable
9537 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9538   match(Set dst (RShiftL dst shift));
9539   effect(KILL cr);
9540   ins_cost(600);
9541   size(18);
9542   format %{ "TEST   $shift,32\n\t"
9543             "JEQ,s  small\n\t"
9544             "MOV    $dst.lo,$dst.hi\n\t"
9545             "SAR    $dst.hi,31\n"
9546     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9547             "SAR    $dst.hi,$shift" %}
9548   ins_encode( shift_right_arith_long( dst, shift ) );
9549   ins_pipe( pipe_slow );
9550 %}
9551 
9552 
9553 //----------Double Instructions------------------------------------------------
9554 // Double Math
9555 
9556 // Compare & branch
9557 
9558 // P6 version of float compare, sets condition codes in EFLAGS
9559 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9560   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9561   match(Set cr (CmpD src1 src2));
9562   effect(KILL rax);
9563   ins_cost(150);
9564   format %{ "FLD    $src1\n\t"
9565             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9566             "JNP    exit\n\t"
9567             "MOV    ah,1       // saw a NaN, set CF\n\t"
9568             "SAHF\n"
9569      "exit:\tNOP               // avoid branch to branch" %}
9570   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9571   ins_encode( Push_Reg_DPR(src1),
9572               OpcP, RegOpc(src2),
9573               cmpF_P6_fixup );
9574   ins_pipe( pipe_slow );
9575 %}
9576 
9577 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9578   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9579   match(Set cr (CmpD src1 src2));
9580   ins_cost(150);
9581   format %{ "FLD    $src1\n\t"
9582             "FUCOMIP ST,$src2  // P6 instruction" %}
9583   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9584   ins_encode( Push_Reg_DPR(src1),
9585               OpcP, RegOpc(src2));
9586   ins_pipe( pipe_slow );
9587 %}
9588 
9589 // Compare & branch
9590 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9591   predicate(UseSSE<=1);
9592   match(Set cr (CmpD src1 src2));
9593   effect(KILL rax);
9594   ins_cost(200);
9595   format %{ "FLD    $src1\n\t"
9596             "FCOMp  $src2\n\t"
9597             "FNSTSW AX\n\t"
9598             "TEST   AX,0x400\n\t"
9599             "JZ,s   flags\n\t"
9600             "MOV    AH,1\t# unordered treat as LT\n"
9601     "flags:\tSAHF" %}
9602   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9603   ins_encode( Push_Reg_DPR(src1),
9604               OpcP, RegOpc(src2),
9605               fpu_flags);
9606   ins_pipe( pipe_slow );
9607 %}
9608 
9609 // Compare vs zero into -1,0,1
9610 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9611   predicate(UseSSE<=1);
9612   match(Set dst (CmpD3 src1 zero));
9613   effect(KILL cr, KILL rax);
9614   ins_cost(280);
9615   format %{ "FTSTD  $dst,$src1" %}
9616   opcode(0xE4, 0xD9);
9617   ins_encode( Push_Reg_DPR(src1),
9618               OpcS, OpcP, PopFPU,
9619               CmpF_Result(dst));
9620   ins_pipe( pipe_slow );
9621 %}
9622 
9623 // Compare into -1,0,1
9624 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9625   predicate(UseSSE<=1);
9626   match(Set dst (CmpD3 src1 src2));
9627   effect(KILL cr, KILL rax);
9628   ins_cost(300);
9629   format %{ "FCMPD  $dst,$src1,$src2" %}
9630   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9631   ins_encode( Push_Reg_DPR(src1),
9632               OpcP, RegOpc(src2),
9633               CmpF_Result(dst));
9634   ins_pipe( pipe_slow );
9635 %}
9636 
9637 // float compare and set condition codes in EFLAGS by XMM regs
9638 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9639   predicate(UseSSE>=2);
9640   match(Set cr (CmpD src1 src2));
9641   ins_cost(145);
9642   format %{ "UCOMISD $src1,$src2\n\t"
9643             "JNP,s   exit\n\t"
9644             "PUSHF\t# saw NaN, set CF\n\t"
9645             "AND     [rsp], #0xffffff2b\n\t"
9646             "POPF\n"
9647     "exit:" %}
9648   ins_encode %{
9649     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9650     emit_cmpfp_fixup(_masm);
9651   %}
9652   ins_pipe( pipe_slow );
9653 %}
9654 
9655 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9656   predicate(UseSSE>=2);
9657   match(Set cr (CmpD src1 src2));
9658   ins_cost(100);
9659   format %{ "UCOMISD $src1,$src2" %}
9660   ins_encode %{
9661     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9662   %}
9663   ins_pipe( pipe_slow );
9664 %}
9665 
9666 // float compare and set condition codes in EFLAGS by XMM regs
9667 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9668   predicate(UseSSE>=2);
9669   match(Set cr (CmpD src1 (LoadD src2)));
9670   ins_cost(145);
9671   format %{ "UCOMISD $src1,$src2\n\t"
9672             "JNP,s   exit\n\t"
9673             "PUSHF\t# saw NaN, set CF\n\t"
9674             "AND     [rsp], #0xffffff2b\n\t"
9675             "POPF\n"
9676     "exit:" %}
9677   ins_encode %{
9678     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9679     emit_cmpfp_fixup(_masm);
9680   %}
9681   ins_pipe( pipe_slow );
9682 %}
9683 
9684 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9685   predicate(UseSSE>=2);
9686   match(Set cr (CmpD src1 (LoadD src2)));
9687   ins_cost(100);
9688   format %{ "UCOMISD $src1,$src2" %}
9689   ins_encode %{
9690     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9691   %}
9692   ins_pipe( pipe_slow );
9693 %}
9694 
9695 // Compare into -1,0,1 in XMM
9696 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9697   predicate(UseSSE>=2);
9698   match(Set dst (CmpD3 src1 src2));
9699   effect(KILL cr);
9700   ins_cost(255);
9701   format %{ "UCOMISD $src1, $src2\n\t"
9702             "MOV     $dst, #-1\n\t"
9703             "JP,s    done\n\t"
9704             "JB,s    done\n\t"
9705             "SETNE   $dst\n\t"
9706             "MOVZB   $dst, $dst\n"
9707     "done:" %}
9708   ins_encode %{
9709     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9710     emit_cmpfp3(_masm, $dst$$Register);
9711   %}
9712   ins_pipe( pipe_slow );
9713 %}
9714 
9715 // Compare into -1,0,1 in XMM and memory
9716 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9717   predicate(UseSSE>=2);
9718   match(Set dst (CmpD3 src1 (LoadD src2)));
9719   effect(KILL cr);
9720   ins_cost(275);
9721   format %{ "UCOMISD $src1, $src2\n\t"
9722             "MOV     $dst, #-1\n\t"
9723             "JP,s    done\n\t"
9724             "JB,s    done\n\t"
9725             "SETNE   $dst\n\t"
9726             "MOVZB   $dst, $dst\n"
9727     "done:" %}
9728   ins_encode %{
9729     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9730     emit_cmpfp3(_masm, $dst$$Register);
9731   %}
9732   ins_pipe( pipe_slow );
9733 %}
9734 
9735 
9736 instruct subDPR_reg(regDPR dst, regDPR src) %{
9737   predicate (UseSSE <=1);
9738   match(Set dst (SubD dst src));
9739 
9740   format %{ "FLD    $src\n\t"
9741             "DSUBp  $dst,ST" %}
9742   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9743   ins_cost(150);
9744   ins_encode( Push_Reg_DPR(src),
9745               OpcP, RegOpc(dst) );
9746   ins_pipe( fpu_reg_reg );
9747 %}
9748 
9749 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9750   predicate (UseSSE <=1);
9751   match(Set dst (RoundDouble (SubD src1 src2)));
9752   ins_cost(250);
9753 
9754   format %{ "FLD    $src2\n\t"
9755             "DSUB   ST,$src1\n\t"
9756             "FSTP_D $dst\t# D-round" %}
9757   opcode(0xD8, 0x5);
9758   ins_encode( Push_Reg_DPR(src2),
9759               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9760   ins_pipe( fpu_mem_reg_reg );
9761 %}
9762 
9763 
9764 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9765   predicate (UseSSE <=1);
9766   match(Set dst (SubD dst (LoadD src)));
9767   ins_cost(150);
9768 
9769   format %{ "FLD    $src\n\t"
9770             "DSUBp  $dst,ST" %}
9771   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9772   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9773               OpcP, RegOpc(dst) );
9774   ins_pipe( fpu_reg_mem );
9775 %}
9776 
9777 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9778   predicate (UseSSE<=1);
9779   match(Set dst (AbsD src));
9780   ins_cost(100);
9781   format %{ "FABS" %}
9782   opcode(0xE1, 0xD9);
9783   ins_encode( OpcS, OpcP );
9784   ins_pipe( fpu_reg_reg );
9785 %}
9786 
9787 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9788   predicate(UseSSE<=1);
9789   match(Set dst (NegD src));
9790   ins_cost(100);
9791   format %{ "FCHS" %}
9792   opcode(0xE0, 0xD9);
9793   ins_encode( OpcS, OpcP );
9794   ins_pipe( fpu_reg_reg );
9795 %}
9796 
9797 instruct addDPR_reg(regDPR dst, regDPR src) %{
9798   predicate(UseSSE<=1);
9799   match(Set dst (AddD dst src));
9800   format %{ "FLD    $src\n\t"
9801             "DADD   $dst,ST" %}
9802   size(4);
9803   ins_cost(150);
9804   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9805   ins_encode( Push_Reg_DPR(src),
9806               OpcP, RegOpc(dst) );
9807   ins_pipe( fpu_reg_reg );
9808 %}
9809 
9810 
9811 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9812   predicate(UseSSE<=1);
9813   match(Set dst (RoundDouble (AddD src1 src2)));
9814   ins_cost(250);
9815 
9816   format %{ "FLD    $src2\n\t"
9817             "DADD   ST,$src1\n\t"
9818             "FSTP_D $dst\t# D-round" %}
9819   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9820   ins_encode( Push_Reg_DPR(src2),
9821               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9822   ins_pipe( fpu_mem_reg_reg );
9823 %}
9824 
9825 
9826 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9827   predicate(UseSSE<=1);
9828   match(Set dst (AddD dst (LoadD src)));
9829   ins_cost(150);
9830 
9831   format %{ "FLD    $src\n\t"
9832             "DADDp  $dst,ST" %}
9833   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9834   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9835               OpcP, RegOpc(dst) );
9836   ins_pipe( fpu_reg_mem );
9837 %}
9838 
9839 // add-to-memory
9840 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9841   predicate(UseSSE<=1);
9842   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9843   ins_cost(150);
9844 
9845   format %{ "FLD_D  $dst\n\t"
9846             "DADD   ST,$src\n\t"
9847             "FST_D  $dst" %}
9848   opcode(0xDD, 0x0);
9849   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9850               Opcode(0xD8), RegOpc(src),
9851               set_instruction_start,
9852               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9853   ins_pipe( fpu_reg_mem );
9854 %}
9855 
9856 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9857   predicate(UseSSE<=1);
9858   match(Set dst (AddD dst con));
9859   ins_cost(125);
9860   format %{ "FLD1\n\t"
9861             "DADDp  $dst,ST" %}
9862   ins_encode %{
9863     __ fld1();
9864     __ faddp($dst$$reg);
9865   %}
9866   ins_pipe(fpu_reg);
9867 %}
9868 
9869 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9870   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9871   match(Set dst (AddD dst con));
9872   ins_cost(200);
9873   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9874             "DADDp  $dst,ST" %}
9875   ins_encode %{
9876     __ fld_d($constantaddress($con));
9877     __ faddp($dst$$reg);
9878   %}
9879   ins_pipe(fpu_reg_mem);
9880 %}
9881 
9882 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9883   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9884   match(Set dst (RoundDouble (AddD src con)));
9885   ins_cost(200);
9886   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9887             "DADD   ST,$src\n\t"
9888             "FSTP_D $dst\t# D-round" %}
9889   ins_encode %{
9890     __ fld_d($constantaddress($con));
9891     __ fadd($src$$reg);
9892     __ fstp_d(Address(rsp, $dst$$disp));
9893   %}
9894   ins_pipe(fpu_mem_reg_con);
9895 %}
9896 
9897 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9898   predicate(UseSSE<=1);
9899   match(Set dst (MulD dst src));
9900   format %{ "FLD    $src\n\t"
9901             "DMULp  $dst,ST" %}
9902   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9903   ins_cost(150);
9904   ins_encode( Push_Reg_DPR(src),
9905               OpcP, RegOpc(dst) );
9906   ins_pipe( fpu_reg_reg );
9907 %}
9908 
9909 // Strict FP instruction biases argument before multiply then
9910 // biases result to avoid double rounding of subnormals.
9911 //
9912 // scale arg1 by multiplying arg1 by 2^(-15360)
9913 // load arg2
9914 // multiply scaled arg1 by arg2
9915 // rescale product by 2^(15360)
9916 //
9917 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9918   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9919   match(Set dst (MulD dst src));
9920   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9921 
9922   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9923             "DMULp  $dst,ST\n\t"
9924             "FLD    $src\n\t"
9925             "DMULp  $dst,ST\n\t"
9926             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9927             "DMULp  $dst,ST\n\t" %}
9928   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9929   ins_encode( strictfp_bias1(dst),
9930               Push_Reg_DPR(src),
9931               OpcP, RegOpc(dst),
9932               strictfp_bias2(dst) );
9933   ins_pipe( fpu_reg_reg );
9934 %}
9935 
9936 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9937   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9938   match(Set dst (MulD dst con));
9939   ins_cost(200);
9940   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9941             "DMULp  $dst,ST" %}
9942   ins_encode %{
9943     __ fld_d($constantaddress($con));
9944     __ fmulp($dst$$reg);
9945   %}
9946   ins_pipe(fpu_reg_mem);
9947 %}
9948 
9949 
9950 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9951   predicate( UseSSE<=1 );
9952   match(Set dst (MulD dst (LoadD src)));
9953   ins_cost(200);
9954   format %{ "FLD_D  $src\n\t"
9955             "DMULp  $dst,ST" %}
9956   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9957   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9958               OpcP, RegOpc(dst) );
9959   ins_pipe( fpu_reg_mem );
9960 %}
9961 
9962 //
9963 // Cisc-alternate to reg-reg multiply
9964 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9965   predicate( UseSSE<=1 );
9966   match(Set dst (MulD src (LoadD mem)));
9967   ins_cost(250);
9968   format %{ "FLD_D  $mem\n\t"
9969             "DMUL   ST,$src\n\t"
9970             "FSTP_D $dst" %}
9971   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9972   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9973               OpcReg_FPR(src),
9974               Pop_Reg_DPR(dst) );
9975   ins_pipe( fpu_reg_reg_mem );
9976 %}
9977 
9978 
9979 // MACRO3 -- addDPR a mulDPR
9980 // This instruction is a '2-address' instruction in that the result goes
9981 // back to src2.  This eliminates a move from the macro; possibly the
9982 // register allocator will have to add it back (and maybe not).
9983 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9984   predicate( UseSSE<=1 );
9985   match(Set src2 (AddD (MulD src0 src1) src2));
9986   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9987             "DMUL   ST,$src1\n\t"
9988             "DADDp  $src2,ST" %}
9989   ins_cost(250);
9990   opcode(0xDD); /* LoadD DD /0 */
9991   ins_encode( Push_Reg_FPR(src0),
9992               FMul_ST_reg(src1),
9993               FAddP_reg_ST(src2) );
9994   ins_pipe( fpu_reg_reg_reg );
9995 %}
9996 
9997 
9998 // MACRO3 -- subDPR a mulDPR
9999 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10000   predicate( UseSSE<=1 );
10001   match(Set src2 (SubD (MulD src0 src1) src2));
10002   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10003             "DMUL   ST,$src1\n\t"
10004             "DSUBRp $src2,ST" %}
10005   ins_cost(250);
10006   ins_encode( Push_Reg_FPR(src0),
10007               FMul_ST_reg(src1),
10008               Opcode(0xDE), Opc_plus(0xE0,src2));
10009   ins_pipe( fpu_reg_reg_reg );
10010 %}
10011 
10012 
10013 instruct divDPR_reg(regDPR dst, regDPR src) %{
10014   predicate( UseSSE<=1 );
10015   match(Set dst (DivD dst src));
10016 
10017   format %{ "FLD    $src\n\t"
10018             "FDIVp  $dst,ST" %}
10019   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10020   ins_cost(150);
10021   ins_encode( Push_Reg_DPR(src),
10022               OpcP, RegOpc(dst) );
10023   ins_pipe( fpu_reg_reg );
10024 %}
10025 
10026 // Strict FP instruction biases argument before division then
10027 // biases result, to avoid double rounding of subnormals.
10028 //
10029 // scale dividend by multiplying dividend by 2^(-15360)
10030 // load divisor
10031 // divide scaled dividend by divisor
10032 // rescale quotient by 2^(15360)
10033 //
10034 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10035   predicate (UseSSE<=1);
10036   match(Set dst (DivD dst src));
10037   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10038   ins_cost(01);
10039 
10040   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10041             "DMULp  $dst,ST\n\t"
10042             "FLD    $src\n\t"
10043             "FDIVp  $dst,ST\n\t"
10044             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10045             "DMULp  $dst,ST\n\t" %}
10046   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10047   ins_encode( strictfp_bias1(dst),
10048               Push_Reg_DPR(src),
10049               OpcP, RegOpc(dst),
10050               strictfp_bias2(dst) );
10051   ins_pipe( fpu_reg_reg );
10052 %}
10053 
10054 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10055   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10056   match(Set dst (RoundDouble (DivD src1 src2)));
10057 
10058   format %{ "FLD    $src1\n\t"
10059             "FDIV   ST,$src2\n\t"
10060             "FSTP_D $dst\t# D-round" %}
10061   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10062   ins_encode( Push_Reg_DPR(src1),
10063               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10064   ins_pipe( fpu_mem_reg_reg );
10065 %}
10066 
10067 
10068 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10069   predicate(UseSSE<=1);
10070   match(Set dst (ModD dst src));
10071   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10072 
10073   format %{ "DMOD   $dst,$src" %}
10074   ins_cost(250);
10075   ins_encode(Push_Reg_Mod_DPR(dst, src),
10076               emitModDPR(),
10077               Push_Result_Mod_DPR(src),
10078               Pop_Reg_DPR(dst));
10079   ins_pipe( pipe_slow );
10080 %}
10081 
10082 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10083   predicate(UseSSE>=2);
10084   match(Set dst (ModD src0 src1));
10085   effect(KILL rax, KILL cr);
10086 
10087   format %{ "SUB    ESP,8\t # DMOD\n"
10088           "\tMOVSD  [ESP+0],$src1\n"
10089           "\tFLD_D  [ESP+0]\n"
10090           "\tMOVSD  [ESP+0],$src0\n"
10091           "\tFLD_D  [ESP+0]\n"
10092      "loop:\tFPREM\n"
10093           "\tFWAIT\n"
10094           "\tFNSTSW AX\n"
10095           "\tSAHF\n"
10096           "\tJP     loop\n"
10097           "\tFSTP_D [ESP+0]\n"
10098           "\tMOVSD  $dst,[ESP+0]\n"
10099           "\tADD    ESP,8\n"
10100           "\tFSTP   ST0\t # Restore FPU Stack"
10101     %}
10102   ins_cost(250);
10103   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10104   ins_pipe( pipe_slow );
10105 %}
10106 
10107 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10108   predicate (UseSSE<=1);
10109   match(Set dst(AtanD dst src));
10110   format %{ "DATA   $dst,$src" %}
10111   opcode(0xD9, 0xF3);
10112   ins_encode( Push_Reg_DPR(src),
10113               OpcP, OpcS, RegOpc(dst) );
10114   ins_pipe( pipe_slow );
10115 %}
10116 
10117 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10118   predicate (UseSSE>=2);
10119   match(Set dst(AtanD dst src));
10120   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10121   format %{ "DATA   $dst,$src" %}
10122   opcode(0xD9, 0xF3);
10123   ins_encode( Push_SrcD(src),
10124               OpcP, OpcS, Push_ResultD(dst) );
10125   ins_pipe( pipe_slow );
10126 %}
10127 
10128 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10129   predicate (UseSSE<=1);
10130   match(Set dst (SqrtD src));
10131   format %{ "DSQRT  $dst,$src" %}
10132   opcode(0xFA, 0xD9);
10133   ins_encode( Push_Reg_DPR(src),
10134               OpcS, OpcP, Pop_Reg_DPR(dst) );
10135   ins_pipe( pipe_slow );
10136 %}
10137 
10138 //-------------Float Instructions-------------------------------
10139 // Float Math
10140 
10141 // Code for float compare:
10142 //     fcompp();
10143 //     fwait(); fnstsw_ax();
10144 //     sahf();
10145 //     movl(dst, unordered_result);
10146 //     jcc(Assembler::parity, exit);
10147 //     movl(dst, less_result);
10148 //     jcc(Assembler::below, exit);
10149 //     movl(dst, equal_result);
10150 //     jcc(Assembler::equal, exit);
10151 //     movl(dst, greater_result);
10152 //   exit:
10153 
10154 // P6 version of float compare, sets condition codes in EFLAGS
10155 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10156   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10157   match(Set cr (CmpF src1 src2));
10158   effect(KILL rax);
10159   ins_cost(150);
10160   format %{ "FLD    $src1\n\t"
10161             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10162             "JNP    exit\n\t"
10163             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10164             "SAHF\n"
10165      "exit:\tNOP               // avoid branch to branch" %}
10166   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10167   ins_encode( Push_Reg_DPR(src1),
10168               OpcP, RegOpc(src2),
10169               cmpF_P6_fixup );
10170   ins_pipe( pipe_slow );
10171 %}
10172 
10173 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10174   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10175   match(Set cr (CmpF src1 src2));
10176   ins_cost(100);
10177   format %{ "FLD    $src1\n\t"
10178             "FUCOMIP ST,$src2  // P6 instruction" %}
10179   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10180   ins_encode( Push_Reg_DPR(src1),
10181               OpcP, RegOpc(src2));
10182   ins_pipe( pipe_slow );
10183 %}
10184 
10185 
10186 // Compare & branch
10187 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10188   predicate(UseSSE == 0);
10189   match(Set cr (CmpF src1 src2));
10190   effect(KILL rax);
10191   ins_cost(200);
10192   format %{ "FLD    $src1\n\t"
10193             "FCOMp  $src2\n\t"
10194             "FNSTSW AX\n\t"
10195             "TEST   AX,0x400\n\t"
10196             "JZ,s   flags\n\t"
10197             "MOV    AH,1\t# unordered treat as LT\n"
10198     "flags:\tSAHF" %}
10199   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10200   ins_encode( Push_Reg_DPR(src1),
10201               OpcP, RegOpc(src2),
10202               fpu_flags);
10203   ins_pipe( pipe_slow );
10204 %}
10205 
10206 // Compare vs zero into -1,0,1
10207 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10208   predicate(UseSSE == 0);
10209   match(Set dst (CmpF3 src1 zero));
10210   effect(KILL cr, KILL rax);
10211   ins_cost(280);
10212   format %{ "FTSTF  $dst,$src1" %}
10213   opcode(0xE4, 0xD9);
10214   ins_encode( Push_Reg_DPR(src1),
10215               OpcS, OpcP, PopFPU,
10216               CmpF_Result(dst));
10217   ins_pipe( pipe_slow );
10218 %}
10219 
10220 // Compare into -1,0,1
10221 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10222   predicate(UseSSE == 0);
10223   match(Set dst (CmpF3 src1 src2));
10224   effect(KILL cr, KILL rax);
10225   ins_cost(300);
10226   format %{ "FCMPF  $dst,$src1,$src2" %}
10227   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10228   ins_encode( Push_Reg_DPR(src1),
10229               OpcP, RegOpc(src2),
10230               CmpF_Result(dst));
10231   ins_pipe( pipe_slow );
10232 %}
10233 
10234 // float compare and set condition codes in EFLAGS by XMM regs
10235 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10236   predicate(UseSSE>=1);
10237   match(Set cr (CmpF src1 src2));
10238   ins_cost(145);
10239   format %{ "UCOMISS $src1,$src2\n\t"
10240             "JNP,s   exit\n\t"
10241             "PUSHF\t# saw NaN, set CF\n\t"
10242             "AND     [rsp], #0xffffff2b\n\t"
10243             "POPF\n"
10244     "exit:" %}
10245   ins_encode %{
10246     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10247     emit_cmpfp_fixup(_masm);
10248   %}
10249   ins_pipe( pipe_slow );
10250 %}
10251 
10252 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10253   predicate(UseSSE>=1);
10254   match(Set cr (CmpF src1 src2));
10255   ins_cost(100);
10256   format %{ "UCOMISS $src1,$src2" %}
10257   ins_encode %{
10258     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10259   %}
10260   ins_pipe( pipe_slow );
10261 %}
10262 
10263 // float compare and set condition codes in EFLAGS by XMM regs
10264 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10265   predicate(UseSSE>=1);
10266   match(Set cr (CmpF src1 (LoadF src2)));
10267   ins_cost(165);
10268   format %{ "UCOMISS $src1,$src2\n\t"
10269             "JNP,s   exit\n\t"
10270             "PUSHF\t# saw NaN, set CF\n\t"
10271             "AND     [rsp], #0xffffff2b\n\t"
10272             "POPF\n"
10273     "exit:" %}
10274   ins_encode %{
10275     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10276     emit_cmpfp_fixup(_masm);
10277   %}
10278   ins_pipe( pipe_slow );
10279 %}
10280 
10281 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10282   predicate(UseSSE>=1);
10283   match(Set cr (CmpF src1 (LoadF src2)));
10284   ins_cost(100);
10285   format %{ "UCOMISS $src1,$src2" %}
10286   ins_encode %{
10287     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10288   %}
10289   ins_pipe( pipe_slow );
10290 %}
10291 
10292 // Compare into -1,0,1 in XMM
10293 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10294   predicate(UseSSE>=1);
10295   match(Set dst (CmpF3 src1 src2));
10296   effect(KILL cr);
10297   ins_cost(255);
10298   format %{ "UCOMISS $src1, $src2\n\t"
10299             "MOV     $dst, #-1\n\t"
10300             "JP,s    done\n\t"
10301             "JB,s    done\n\t"
10302             "SETNE   $dst\n\t"
10303             "MOVZB   $dst, $dst\n"
10304     "done:" %}
10305   ins_encode %{
10306     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10307     emit_cmpfp3(_masm, $dst$$Register);
10308   %}
10309   ins_pipe( pipe_slow );
10310 %}
10311 
10312 // Compare into -1,0,1 in XMM and memory
10313 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10314   predicate(UseSSE>=1);
10315   match(Set dst (CmpF3 src1 (LoadF src2)));
10316   effect(KILL cr);
10317   ins_cost(275);
10318   format %{ "UCOMISS $src1, $src2\n\t"
10319             "MOV     $dst, #-1\n\t"
10320             "JP,s    done\n\t"
10321             "JB,s    done\n\t"
10322             "SETNE   $dst\n\t"
10323             "MOVZB   $dst, $dst\n"
10324     "done:" %}
10325   ins_encode %{
10326     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10327     emit_cmpfp3(_masm, $dst$$Register);
10328   %}
10329   ins_pipe( pipe_slow );
10330 %}
10331 
10332 // Spill to obtain 24-bit precision
10333 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10334   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10335   match(Set dst (SubF src1 src2));
10336 
10337   format %{ "FSUB   $dst,$src1 - $src2" %}
10338   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10339   ins_encode( Push_Reg_FPR(src1),
10340               OpcReg_FPR(src2),
10341               Pop_Mem_FPR(dst) );
10342   ins_pipe( fpu_mem_reg_reg );
10343 %}
10344 //
10345 // This instruction does not round to 24-bits
10346 instruct subFPR_reg(regFPR dst, regFPR src) %{
10347   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10348   match(Set dst (SubF dst src));
10349 
10350   format %{ "FSUB   $dst,$src" %}
10351   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10352   ins_encode( Push_Reg_FPR(src),
10353               OpcP, RegOpc(dst) );
10354   ins_pipe( fpu_reg_reg );
10355 %}
10356 
10357 // Spill to obtain 24-bit precision
10358 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10359   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10360   match(Set dst (AddF src1 src2));
10361 
10362   format %{ "FADD   $dst,$src1,$src2" %}
10363   opcode(0xD8, 0x0); /* D8 C0+i */
10364   ins_encode( Push_Reg_FPR(src2),
10365               OpcReg_FPR(src1),
10366               Pop_Mem_FPR(dst) );
10367   ins_pipe( fpu_mem_reg_reg );
10368 %}
10369 //
10370 // This instruction does not round to 24-bits
10371 instruct addFPR_reg(regFPR dst, regFPR src) %{
10372   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10373   match(Set dst (AddF dst src));
10374 
10375   format %{ "FLD    $src\n\t"
10376             "FADDp  $dst,ST" %}
10377   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10378   ins_encode( Push_Reg_FPR(src),
10379               OpcP, RegOpc(dst) );
10380   ins_pipe( fpu_reg_reg );
10381 %}
10382 
10383 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10384   predicate(UseSSE==0);
10385   match(Set dst (AbsF src));
10386   ins_cost(100);
10387   format %{ "FABS" %}
10388   opcode(0xE1, 0xD9);
10389   ins_encode( OpcS, OpcP );
10390   ins_pipe( fpu_reg_reg );
10391 %}
10392 
10393 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10394   predicate(UseSSE==0);
10395   match(Set dst (NegF src));
10396   ins_cost(100);
10397   format %{ "FCHS" %}
10398   opcode(0xE0, 0xD9);
10399   ins_encode( OpcS, OpcP );
10400   ins_pipe( fpu_reg_reg );
10401 %}
10402 
10403 // Cisc-alternate to addFPR_reg
10404 // Spill to obtain 24-bit precision
10405 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10406   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10407   match(Set dst (AddF src1 (LoadF src2)));
10408 
10409   format %{ "FLD    $src2\n\t"
10410             "FADD   ST,$src1\n\t"
10411             "FSTP_S $dst" %}
10412   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10413   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10414               OpcReg_FPR(src1),
10415               Pop_Mem_FPR(dst) );
10416   ins_pipe( fpu_mem_reg_mem );
10417 %}
10418 //
10419 // Cisc-alternate to addFPR_reg
10420 // This instruction does not round to 24-bits
10421 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10422   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10423   match(Set dst (AddF dst (LoadF src)));
10424 
10425   format %{ "FADD   $dst,$src" %}
10426   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10427   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10428               OpcP, RegOpc(dst) );
10429   ins_pipe( fpu_reg_mem );
10430 %}
10431 
10432 // // Following two instructions for _222_mpegaudio
10433 // Spill to obtain 24-bit precision
10434 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10435   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10436   match(Set dst (AddF src1 src2));
10437 
10438   format %{ "FADD   $dst,$src1,$src2" %}
10439   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10440   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10441               OpcReg_FPR(src2),
10442               Pop_Mem_FPR(dst) );
10443   ins_pipe( fpu_mem_reg_mem );
10444 %}
10445 
10446 // Cisc-spill variant
10447 // Spill to obtain 24-bit precision
10448 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10449   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10450   match(Set dst (AddF src1 (LoadF src2)));
10451 
10452   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10453   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10454   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10455               set_instruction_start,
10456               OpcP, RMopc_Mem(secondary,src1),
10457               Pop_Mem_FPR(dst) );
10458   ins_pipe( fpu_mem_mem_mem );
10459 %}
10460 
10461 // Spill to obtain 24-bit precision
10462 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10463   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10464   match(Set dst (AddF src1 src2));
10465 
10466   format %{ "FADD   $dst,$src1,$src2" %}
10467   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10468   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10469               set_instruction_start,
10470               OpcP, RMopc_Mem(secondary,src1),
10471               Pop_Mem_FPR(dst) );
10472   ins_pipe( fpu_mem_mem_mem );
10473 %}
10474 
10475 
10476 // Spill to obtain 24-bit precision
10477 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10478   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10479   match(Set dst (AddF src con));
10480   format %{ "FLD    $src\n\t"
10481             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10482             "FSTP_S $dst"  %}
10483   ins_encode %{
10484     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10485     __ fadd_s($constantaddress($con));
10486     __ fstp_s(Address(rsp, $dst$$disp));
10487   %}
10488   ins_pipe(fpu_mem_reg_con);
10489 %}
10490 //
10491 // This instruction does not round to 24-bits
10492 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10493   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10494   match(Set dst (AddF src con));
10495   format %{ "FLD    $src\n\t"
10496             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10497             "FSTP   $dst"  %}
10498   ins_encode %{
10499     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10500     __ fadd_s($constantaddress($con));
10501     __ fstp_d($dst$$reg);
10502   %}
10503   ins_pipe(fpu_reg_reg_con);
10504 %}
10505 
10506 // Spill to obtain 24-bit precision
10507 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10508   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10509   match(Set dst (MulF src1 src2));
10510 
10511   format %{ "FLD    $src1\n\t"
10512             "FMUL   $src2\n\t"
10513             "FSTP_S $dst"  %}
10514   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10515   ins_encode( Push_Reg_FPR(src1),
10516               OpcReg_FPR(src2),
10517               Pop_Mem_FPR(dst) );
10518   ins_pipe( fpu_mem_reg_reg );
10519 %}
10520 //
10521 // This instruction does not round to 24-bits
10522 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10523   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10524   match(Set dst (MulF src1 src2));
10525 
10526   format %{ "FLD    $src1\n\t"
10527             "FMUL   $src2\n\t"
10528             "FSTP_S $dst"  %}
10529   opcode(0xD8, 0x1); /* D8 C8+i */
10530   ins_encode( Push_Reg_FPR(src2),
10531               OpcReg_FPR(src1),
10532               Pop_Reg_FPR(dst) );
10533   ins_pipe( fpu_reg_reg_reg );
10534 %}
10535 
10536 
10537 // Spill to obtain 24-bit precision
10538 // Cisc-alternate to reg-reg multiply
10539 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10540   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10541   match(Set dst (MulF src1 (LoadF src2)));
10542 
10543   format %{ "FLD_S  $src2\n\t"
10544             "FMUL   $src1\n\t"
10545             "FSTP_S $dst"  %}
10546   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10547   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10548               OpcReg_FPR(src1),
10549               Pop_Mem_FPR(dst) );
10550   ins_pipe( fpu_mem_reg_mem );
10551 %}
10552 //
10553 // This instruction does not round to 24-bits
10554 // Cisc-alternate to reg-reg multiply
10555 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10556   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10557   match(Set dst (MulF src1 (LoadF src2)));
10558 
10559   format %{ "FMUL   $dst,$src1,$src2" %}
10560   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10561   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10562               OpcReg_FPR(src1),
10563               Pop_Reg_FPR(dst) );
10564   ins_pipe( fpu_reg_reg_mem );
10565 %}
10566 
10567 // Spill to obtain 24-bit precision
10568 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10569   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10570   match(Set dst (MulF src1 src2));
10571 
10572   format %{ "FMUL   $dst,$src1,$src2" %}
10573   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10574   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10575               set_instruction_start,
10576               OpcP, RMopc_Mem(secondary,src1),
10577               Pop_Mem_FPR(dst) );
10578   ins_pipe( fpu_mem_mem_mem );
10579 %}
10580 
10581 // Spill to obtain 24-bit precision
10582 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10583   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10584   match(Set dst (MulF src con));
10585 
10586   format %{ "FLD    $src\n\t"
10587             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10588             "FSTP_S $dst"  %}
10589   ins_encode %{
10590     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10591     __ fmul_s($constantaddress($con));
10592     __ fstp_s(Address(rsp, $dst$$disp));
10593   %}
10594   ins_pipe(fpu_mem_reg_con);
10595 %}
10596 //
10597 // This instruction does not round to 24-bits
10598 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10599   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10600   match(Set dst (MulF src con));
10601 
10602   format %{ "FLD    $src\n\t"
10603             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10604             "FSTP   $dst"  %}
10605   ins_encode %{
10606     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10607     __ fmul_s($constantaddress($con));
10608     __ fstp_d($dst$$reg);
10609   %}
10610   ins_pipe(fpu_reg_reg_con);
10611 %}
10612 
10613 
10614 //
10615 // MACRO1 -- subsume unshared load into mulFPR
10616 // This instruction does not round to 24-bits
10617 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10618   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10619   match(Set dst (MulF (LoadF mem1) src));
10620 
10621   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10622             "FMUL   ST,$src\n\t"
10623             "FSTP   $dst" %}
10624   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10625   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10626               OpcReg_FPR(src),
10627               Pop_Reg_FPR(dst) );
10628   ins_pipe( fpu_reg_reg_mem );
10629 %}
10630 //
10631 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10632 // This instruction does not round to 24-bits
10633 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10634   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10635   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10636   ins_cost(95);
10637 
10638   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10639             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10640             "FADD   ST,$src2\n\t"
10641             "FSTP   $dst" %}
10642   opcode(0xD9); /* LoadF D9 /0 */
10643   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10644               FMul_ST_reg(src1),
10645               FAdd_ST_reg(src2),
10646               Pop_Reg_FPR(dst) );
10647   ins_pipe( fpu_reg_mem_reg_reg );
10648 %}
10649 
10650 // MACRO3 -- addFPR a mulFPR
10651 // This instruction does not round to 24-bits.  It is a '2-address'
10652 // instruction in that the result goes back to src2.  This eliminates
10653 // a move from the macro; possibly the register allocator will have
10654 // to add it back (and maybe not).
10655 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10656   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10657   match(Set src2 (AddF (MulF src0 src1) src2));
10658 
10659   format %{ "FLD    $src0     ===MACRO3===\n\t"
10660             "FMUL   ST,$src1\n\t"
10661             "FADDP  $src2,ST" %}
10662   opcode(0xD9); /* LoadF D9 /0 */
10663   ins_encode( Push_Reg_FPR(src0),
10664               FMul_ST_reg(src1),
10665               FAddP_reg_ST(src2) );
10666   ins_pipe( fpu_reg_reg_reg );
10667 %}
10668 
10669 // MACRO4 -- divFPR subFPR
10670 // This instruction does not round to 24-bits
10671 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10672   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10673   match(Set dst (DivF (SubF src2 src1) src3));
10674 
10675   format %{ "FLD    $src2   ===MACRO4===\n\t"
10676             "FSUB   ST,$src1\n\t"
10677             "FDIV   ST,$src3\n\t"
10678             "FSTP  $dst" %}
10679   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10680   ins_encode( Push_Reg_FPR(src2),
10681               subFPR_divFPR_encode(src1,src3),
10682               Pop_Reg_FPR(dst) );
10683   ins_pipe( fpu_reg_reg_reg_reg );
10684 %}
10685 
10686 // Spill to obtain 24-bit precision
10687 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10688   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10689   match(Set dst (DivF src1 src2));
10690 
10691   format %{ "FDIV   $dst,$src1,$src2" %}
10692   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10693   ins_encode( Push_Reg_FPR(src1),
10694               OpcReg_FPR(src2),
10695               Pop_Mem_FPR(dst) );
10696   ins_pipe( fpu_mem_reg_reg );
10697 %}
10698 //
10699 // This instruction does not round to 24-bits
10700 instruct divFPR_reg(regFPR dst, regFPR src) %{
10701   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10702   match(Set dst (DivF dst src));
10703 
10704   format %{ "FDIV   $dst,$src" %}
10705   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10706   ins_encode( Push_Reg_FPR(src),
10707               OpcP, RegOpc(dst) );
10708   ins_pipe( fpu_reg_reg );
10709 %}
10710 
10711 
10712 // Spill to obtain 24-bit precision
10713 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10714   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10715   match(Set dst (ModF src1 src2));
10716   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10717 
10718   format %{ "FMOD   $dst,$src1,$src2" %}
10719   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10720               emitModDPR(),
10721               Push_Result_Mod_DPR(src2),
10722               Pop_Mem_FPR(dst));
10723   ins_pipe( pipe_slow );
10724 %}
10725 //
10726 // This instruction does not round to 24-bits
10727 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10728   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10729   match(Set dst (ModF dst src));
10730   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10731 
10732   format %{ "FMOD   $dst,$src" %}
10733   ins_encode(Push_Reg_Mod_DPR(dst, src),
10734               emitModDPR(),
10735               Push_Result_Mod_DPR(src),
10736               Pop_Reg_FPR(dst));
10737   ins_pipe( pipe_slow );
10738 %}
10739 
10740 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10741   predicate(UseSSE>=1);
10742   match(Set dst (ModF src0 src1));
10743   effect(KILL rax, KILL cr);
10744   format %{ "SUB    ESP,4\t # FMOD\n"
10745           "\tMOVSS  [ESP+0],$src1\n"
10746           "\tFLD_S  [ESP+0]\n"
10747           "\tMOVSS  [ESP+0],$src0\n"
10748           "\tFLD_S  [ESP+0]\n"
10749      "loop:\tFPREM\n"
10750           "\tFWAIT\n"
10751           "\tFNSTSW AX\n"
10752           "\tSAHF\n"
10753           "\tJP     loop\n"
10754           "\tFSTP_S [ESP+0]\n"
10755           "\tMOVSS  $dst,[ESP+0]\n"
10756           "\tADD    ESP,4\n"
10757           "\tFSTP   ST0\t # Restore FPU Stack"
10758     %}
10759   ins_cost(250);
10760   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10761   ins_pipe( pipe_slow );
10762 %}
10763 
10764 
10765 //----------Arithmetic Conversion Instructions---------------------------------
10766 // The conversions operations are all Alpha sorted.  Please keep it that way!
10767 
10768 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10769   predicate(UseSSE==0);
10770   match(Set dst (RoundFloat src));
10771   ins_cost(125);
10772   format %{ "FST_S  $dst,$src\t# F-round" %}
10773   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10774   ins_pipe( fpu_mem_reg );
10775 %}
10776 
10777 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10778   predicate(UseSSE<=1);
10779   match(Set dst (RoundDouble src));
10780   ins_cost(125);
10781   format %{ "FST_D  $dst,$src\t# D-round" %}
10782   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10783   ins_pipe( fpu_mem_reg );
10784 %}
10785 
10786 // Force rounding to 24-bit precision and 6-bit exponent
10787 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10788   predicate(UseSSE==0);
10789   match(Set dst (ConvD2F src));
10790   format %{ "FST_S  $dst,$src\t# F-round" %}
10791   expand %{
10792     roundFloat_mem_reg(dst,src);
10793   %}
10794 %}
10795 
10796 // Force rounding to 24-bit precision and 6-bit exponent
10797 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10798   predicate(UseSSE==1);
10799   match(Set dst (ConvD2F src));
10800   effect( KILL cr );
10801   format %{ "SUB    ESP,4\n\t"
10802             "FST_S  [ESP],$src\t# F-round\n\t"
10803             "MOVSS  $dst,[ESP]\n\t"
10804             "ADD ESP,4" %}
10805   ins_encode %{
10806     __ subptr(rsp, 4);
10807     if ($src$$reg != FPR1L_enc) {
10808       __ fld_s($src$$reg-1);
10809       __ fstp_s(Address(rsp, 0));
10810     } else {
10811       __ fst_s(Address(rsp, 0));
10812     }
10813     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10814     __ addptr(rsp, 4);
10815   %}
10816   ins_pipe( pipe_slow );
10817 %}
10818 
10819 // Force rounding double precision to single precision
10820 instruct convD2F_reg(regF dst, regD src) %{
10821   predicate(UseSSE>=2);
10822   match(Set dst (ConvD2F src));
10823   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10824   ins_encode %{
10825     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10826   %}
10827   ins_pipe( pipe_slow );
10828 %}
10829 
10830 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10831   predicate(UseSSE==0);
10832   match(Set dst (ConvF2D src));
10833   format %{ "FST_S  $dst,$src\t# D-round" %}
10834   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10835   ins_pipe( fpu_reg_reg );
10836 %}
10837 
10838 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10839   predicate(UseSSE==1);
10840   match(Set dst (ConvF2D src));
10841   format %{ "FST_D  $dst,$src\t# D-round" %}
10842   expand %{
10843     roundDouble_mem_reg(dst,src);
10844   %}
10845 %}
10846 
10847 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10848   predicate(UseSSE==1);
10849   match(Set dst (ConvF2D src));
10850   effect( KILL cr );
10851   format %{ "SUB    ESP,4\n\t"
10852             "MOVSS  [ESP] $src\n\t"
10853             "FLD_S  [ESP]\n\t"
10854             "ADD    ESP,4\n\t"
10855             "FSTP   $dst\t# D-round" %}
10856   ins_encode %{
10857     __ subptr(rsp, 4);
10858     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10859     __ fld_s(Address(rsp, 0));
10860     __ addptr(rsp, 4);
10861     __ fstp_d($dst$$reg);
10862   %}
10863   ins_pipe( pipe_slow );
10864 %}
10865 
10866 instruct convF2D_reg(regD dst, regF src) %{
10867   predicate(UseSSE>=2);
10868   match(Set dst (ConvF2D src));
10869   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10870   ins_encode %{
10871     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10872   %}
10873   ins_pipe( pipe_slow );
10874 %}
10875 
10876 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10877 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10878   predicate(UseSSE<=1);
10879   match(Set dst (ConvD2I src));
10880   effect( KILL tmp, KILL cr );
10881   format %{ "FLD    $src\t# Convert double to int \n\t"
10882             "FLDCW  trunc mode\n\t"
10883             "SUB    ESP,4\n\t"
10884             "FISTp  [ESP + #0]\n\t"
10885             "FLDCW  std/24-bit mode\n\t"
10886             "POP    EAX\n\t"
10887             "CMP    EAX,0x80000000\n\t"
10888             "JNE,s  fast\n\t"
10889             "FLD_D  $src\n\t"
10890             "CALL   d2i_wrapper\n"
10891       "fast:" %}
10892   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10893   ins_pipe( pipe_slow );
10894 %}
10895 
10896 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10897 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10898   predicate(UseSSE>=2);
10899   match(Set dst (ConvD2I src));
10900   effect( KILL tmp, KILL cr );
10901   format %{ "CVTTSD2SI $dst, $src\n\t"
10902             "CMP    $dst,0x80000000\n\t"
10903             "JNE,s  fast\n\t"
10904             "SUB    ESP, 8\n\t"
10905             "MOVSD  [ESP], $src\n\t"
10906             "FLD_D  [ESP]\n\t"
10907             "ADD    ESP, 8\n\t"
10908             "CALL   d2i_wrapper\n"
10909       "fast:" %}
10910   ins_encode %{
10911     Label fast;
10912     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10913     __ cmpl($dst$$Register, 0x80000000);
10914     __ jccb(Assembler::notEqual, fast);
10915     __ subptr(rsp, 8);
10916     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10917     __ fld_d(Address(rsp, 0));
10918     __ addptr(rsp, 8);
10919     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10920     __ bind(fast);
10921   %}
10922   ins_pipe( pipe_slow );
10923 %}
10924 
10925 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10926   predicate(UseSSE<=1);
10927   match(Set dst (ConvD2L src));
10928   effect( KILL cr );
10929   format %{ "FLD    $src\t# Convert double to long\n\t"
10930             "FLDCW  trunc mode\n\t"
10931             "SUB    ESP,8\n\t"
10932             "FISTp  [ESP + #0]\n\t"
10933             "FLDCW  std/24-bit mode\n\t"
10934             "POP    EAX\n\t"
10935             "POP    EDX\n\t"
10936             "CMP    EDX,0x80000000\n\t"
10937             "JNE,s  fast\n\t"
10938             "TEST   EAX,EAX\n\t"
10939             "JNE,s  fast\n\t"
10940             "FLD    $src\n\t"
10941             "CALL   d2l_wrapper\n"
10942       "fast:" %}
10943   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10944   ins_pipe( pipe_slow );
10945 %}
10946 
10947 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10948 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10949   predicate (UseSSE>=2);
10950   match(Set dst (ConvD2L src));
10951   effect( KILL cr );
10952   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10953             "MOVSD  [ESP],$src\n\t"
10954             "FLD_D  [ESP]\n\t"
10955             "FLDCW  trunc mode\n\t"
10956             "FISTp  [ESP + #0]\n\t"
10957             "FLDCW  std/24-bit mode\n\t"
10958             "POP    EAX\n\t"
10959             "POP    EDX\n\t"
10960             "CMP    EDX,0x80000000\n\t"
10961             "JNE,s  fast\n\t"
10962             "TEST   EAX,EAX\n\t"
10963             "JNE,s  fast\n\t"
10964             "SUB    ESP,8\n\t"
10965             "MOVSD  [ESP],$src\n\t"
10966             "FLD_D  [ESP]\n\t"
10967             "ADD    ESP,8\n\t"
10968             "CALL   d2l_wrapper\n"
10969       "fast:" %}
10970   ins_encode %{
10971     Label fast;
10972     __ subptr(rsp, 8);
10973     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10974     __ fld_d(Address(rsp, 0));
10975     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10976     __ fistp_d(Address(rsp, 0));
10977     // Restore the rounding mode, mask the exception
10978     if (Compile::current()->in_24_bit_fp_mode()) {
10979       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10980     } else {
10981       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10982     }
10983     // Load the converted long, adjust CPU stack
10984     __ pop(rax);
10985     __ pop(rdx);
10986     __ cmpl(rdx, 0x80000000);
10987     __ jccb(Assembler::notEqual, fast);
10988     __ testl(rax, rax);
10989     __ jccb(Assembler::notEqual, fast);
10990     __ subptr(rsp, 8);
10991     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10992     __ fld_d(Address(rsp, 0));
10993     __ addptr(rsp, 8);
10994     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10995     __ bind(fast);
10996   %}
10997   ins_pipe( pipe_slow );
10998 %}
10999 
11000 // Convert a double to an int.  Java semantics require we do complex
11001 // manglations in the corner cases.  So we set the rounding mode to
11002 // 'zero', store the darned double down as an int, and reset the
11003 // rounding mode to 'nearest'.  The hardware stores a flag value down
11004 // if we would overflow or converted a NAN; we check for this and
11005 // and go the slow path if needed.
11006 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11007   predicate(UseSSE==0);
11008   match(Set dst (ConvF2I src));
11009   effect( KILL tmp, KILL cr );
11010   format %{ "FLD    $src\t# Convert float to int \n\t"
11011             "FLDCW  trunc mode\n\t"
11012             "SUB    ESP,4\n\t"
11013             "FISTp  [ESP + #0]\n\t"
11014             "FLDCW  std/24-bit mode\n\t"
11015             "POP    EAX\n\t"
11016             "CMP    EAX,0x80000000\n\t"
11017             "JNE,s  fast\n\t"
11018             "FLD    $src\n\t"
11019             "CALL   d2i_wrapper\n"
11020       "fast:" %}
11021   // DPR2I_encoding works for FPR2I
11022   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11023   ins_pipe( pipe_slow );
11024 %}
11025 
11026 // Convert a float in xmm to an int reg.
11027 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11028   predicate(UseSSE>=1);
11029   match(Set dst (ConvF2I src));
11030   effect( KILL tmp, KILL cr );
11031   format %{ "CVTTSS2SI $dst, $src\n\t"
11032             "CMP    $dst,0x80000000\n\t"
11033             "JNE,s  fast\n\t"
11034             "SUB    ESP, 4\n\t"
11035             "MOVSS  [ESP], $src\n\t"
11036             "FLD    [ESP]\n\t"
11037             "ADD    ESP, 4\n\t"
11038             "CALL   d2i_wrapper\n"
11039       "fast:" %}
11040   ins_encode %{
11041     Label fast;
11042     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11043     __ cmpl($dst$$Register, 0x80000000);
11044     __ jccb(Assembler::notEqual, fast);
11045     __ subptr(rsp, 4);
11046     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11047     __ fld_s(Address(rsp, 0));
11048     __ addptr(rsp, 4);
11049     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11050     __ bind(fast);
11051   %}
11052   ins_pipe( pipe_slow );
11053 %}
11054 
11055 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11056   predicate(UseSSE==0);
11057   match(Set dst (ConvF2L src));
11058   effect( KILL cr );
11059   format %{ "FLD    $src\t# Convert float to long\n\t"
11060             "FLDCW  trunc mode\n\t"
11061             "SUB    ESP,8\n\t"
11062             "FISTp  [ESP + #0]\n\t"
11063             "FLDCW  std/24-bit mode\n\t"
11064             "POP    EAX\n\t"
11065             "POP    EDX\n\t"
11066             "CMP    EDX,0x80000000\n\t"
11067             "JNE,s  fast\n\t"
11068             "TEST   EAX,EAX\n\t"
11069             "JNE,s  fast\n\t"
11070             "FLD    $src\n\t"
11071             "CALL   d2l_wrapper\n"
11072       "fast:" %}
11073   // DPR2L_encoding works for FPR2L
11074   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11075   ins_pipe( pipe_slow );
11076 %}
11077 
11078 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11079 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11080   predicate (UseSSE>=1);
11081   match(Set dst (ConvF2L src));
11082   effect( KILL cr );
11083   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11084             "MOVSS  [ESP],$src\n\t"
11085             "FLD_S  [ESP]\n\t"
11086             "FLDCW  trunc mode\n\t"
11087             "FISTp  [ESP + #0]\n\t"
11088             "FLDCW  std/24-bit mode\n\t"
11089             "POP    EAX\n\t"
11090             "POP    EDX\n\t"
11091             "CMP    EDX,0x80000000\n\t"
11092             "JNE,s  fast\n\t"
11093             "TEST   EAX,EAX\n\t"
11094             "JNE,s  fast\n\t"
11095             "SUB    ESP,4\t# Convert float to long\n\t"
11096             "MOVSS  [ESP],$src\n\t"
11097             "FLD_S  [ESP]\n\t"
11098             "ADD    ESP,4\n\t"
11099             "CALL   d2l_wrapper\n"
11100       "fast:" %}
11101   ins_encode %{
11102     Label fast;
11103     __ subptr(rsp, 8);
11104     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11105     __ fld_s(Address(rsp, 0));
11106     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11107     __ fistp_d(Address(rsp, 0));
11108     // Restore the rounding mode, mask the exception
11109     if (Compile::current()->in_24_bit_fp_mode()) {
11110       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11111     } else {
11112       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11113     }
11114     // Load the converted long, adjust CPU stack
11115     __ pop(rax);
11116     __ pop(rdx);
11117     __ cmpl(rdx, 0x80000000);
11118     __ jccb(Assembler::notEqual, fast);
11119     __ testl(rax, rax);
11120     __ jccb(Assembler::notEqual, fast);
11121     __ subptr(rsp, 4);
11122     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11123     __ fld_s(Address(rsp, 0));
11124     __ addptr(rsp, 4);
11125     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11126     __ bind(fast);
11127   %}
11128   ins_pipe( pipe_slow );
11129 %}
11130 
11131 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11132   predicate( UseSSE<=1 );
11133   match(Set dst (ConvI2D src));
11134   format %{ "FILD   $src\n\t"
11135             "FSTP   $dst" %}
11136   opcode(0xDB, 0x0);  /* DB /0 */
11137   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11138   ins_pipe( fpu_reg_mem );
11139 %}
11140 
11141 instruct convI2D_reg(regD dst, rRegI src) %{
11142   predicate( UseSSE>=2 && !UseXmmI2D );
11143   match(Set dst (ConvI2D src));
11144   format %{ "CVTSI2SD $dst,$src" %}
11145   ins_encode %{
11146     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11147   %}
11148   ins_pipe( pipe_slow );
11149 %}
11150 
11151 instruct convI2D_mem(regD dst, memory mem) %{
11152   predicate( UseSSE>=2 );
11153   match(Set dst (ConvI2D (LoadI mem)));
11154   format %{ "CVTSI2SD $dst,$mem" %}
11155   ins_encode %{
11156     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11157   %}
11158   ins_pipe( pipe_slow );
11159 %}
11160 
11161 instruct convXI2D_reg(regD dst, rRegI src)
11162 %{
11163   predicate( UseSSE>=2 && UseXmmI2D );
11164   match(Set dst (ConvI2D src));
11165 
11166   format %{ "MOVD  $dst,$src\n\t"
11167             "CVTDQ2PD $dst,$dst\t# i2d" %}
11168   ins_encode %{
11169     __ movdl($dst$$XMMRegister, $src$$Register);
11170     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11171   %}
11172   ins_pipe(pipe_slow); // XXX
11173 %}
11174 
11175 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11176   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11177   match(Set dst (ConvI2D (LoadI mem)));
11178   format %{ "FILD   $mem\n\t"
11179             "FSTP   $dst" %}
11180   opcode(0xDB);      /* DB /0 */
11181   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11182               Pop_Reg_DPR(dst));
11183   ins_pipe( fpu_reg_mem );
11184 %}
11185 
11186 // Convert a byte to a float; no rounding step needed.
11187 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11188   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11189   match(Set dst (ConvI2F src));
11190   format %{ "FILD   $src\n\t"
11191             "FSTP   $dst" %}
11192 
11193   opcode(0xDB, 0x0);  /* DB /0 */
11194   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11195   ins_pipe( fpu_reg_mem );
11196 %}
11197 
11198 // In 24-bit mode, force exponent rounding by storing back out
11199 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11200   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11201   match(Set dst (ConvI2F src));
11202   ins_cost(200);
11203   format %{ "FILD   $src\n\t"
11204             "FSTP_S $dst" %}
11205   opcode(0xDB, 0x0);  /* DB /0 */
11206   ins_encode( Push_Mem_I(src),
11207               Pop_Mem_FPR(dst));
11208   ins_pipe( fpu_mem_mem );
11209 %}
11210 
11211 // In 24-bit mode, force exponent rounding by storing back out
11212 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11213   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11214   match(Set dst (ConvI2F (LoadI mem)));
11215   ins_cost(200);
11216   format %{ "FILD   $mem\n\t"
11217             "FSTP_S $dst" %}
11218   opcode(0xDB);  /* DB /0 */
11219   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11220               Pop_Mem_FPR(dst));
11221   ins_pipe( fpu_mem_mem );
11222 %}
11223 
11224 // This instruction does not round to 24-bits
11225 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11226   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11227   match(Set dst (ConvI2F src));
11228   format %{ "FILD   $src\n\t"
11229             "FSTP   $dst" %}
11230   opcode(0xDB, 0x0);  /* DB /0 */
11231   ins_encode( Push_Mem_I(src),
11232               Pop_Reg_FPR(dst));
11233   ins_pipe( fpu_reg_mem );
11234 %}
11235 
11236 // This instruction does not round to 24-bits
11237 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11238   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11239   match(Set dst (ConvI2F (LoadI mem)));
11240   format %{ "FILD   $mem\n\t"
11241             "FSTP   $dst" %}
11242   opcode(0xDB);      /* DB /0 */
11243   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11244               Pop_Reg_FPR(dst));
11245   ins_pipe( fpu_reg_mem );
11246 %}
11247 
11248 // Convert an int to a float in xmm; no rounding step needed.
11249 instruct convI2F_reg(regF dst, rRegI src) %{
11250   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11251   match(Set dst (ConvI2F src));
11252   format %{ "CVTSI2SS $dst, $src" %}
11253   ins_encode %{
11254     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11255   %}
11256   ins_pipe( pipe_slow );
11257 %}
11258 
11259  instruct convXI2F_reg(regF dst, rRegI src)
11260 %{
11261   predicate( UseSSE>=2 && UseXmmI2F );
11262   match(Set dst (ConvI2F src));
11263 
11264   format %{ "MOVD  $dst,$src\n\t"
11265             "CVTDQ2PS $dst,$dst\t# i2f" %}
11266   ins_encode %{
11267     __ movdl($dst$$XMMRegister, $src$$Register);
11268     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11269   %}
11270   ins_pipe(pipe_slow); // XXX
11271 %}
11272 
11273 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11274   match(Set dst (ConvI2L src));
11275   effect(KILL cr);
11276   ins_cost(375);
11277   format %{ "MOV    $dst.lo,$src\n\t"
11278             "MOV    $dst.hi,$src\n\t"
11279             "SAR    $dst.hi,31" %}
11280   ins_encode(convert_int_long(dst,src));
11281   ins_pipe( ialu_reg_reg_long );
11282 %}
11283 
11284 // Zero-extend convert int to long
11285 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11286   match(Set dst (AndL (ConvI2L src) mask) );
11287   effect( KILL flags );
11288   ins_cost(250);
11289   format %{ "MOV    $dst.lo,$src\n\t"
11290             "XOR    $dst.hi,$dst.hi" %}
11291   opcode(0x33); // XOR
11292   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11293   ins_pipe( ialu_reg_reg_long );
11294 %}
11295 
11296 // Zero-extend long
11297 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11298   match(Set dst (AndL src mask) );
11299   effect( KILL flags );
11300   ins_cost(250);
11301   format %{ "MOV    $dst.lo,$src.lo\n\t"
11302             "XOR    $dst.hi,$dst.hi\n\t" %}
11303   opcode(0x33); // XOR
11304   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11305   ins_pipe( ialu_reg_reg_long );
11306 %}
11307 
11308 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11309   predicate (UseSSE<=1);
11310   match(Set dst (ConvL2D src));
11311   effect( KILL cr );
11312   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11313             "PUSH   $src.lo\n\t"
11314             "FILD   ST,[ESP + #0]\n\t"
11315             "ADD    ESP,8\n\t"
11316             "FSTP_D $dst\t# D-round" %}
11317   opcode(0xDF, 0x5);  /* DF /5 */
11318   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11319   ins_pipe( pipe_slow );
11320 %}
11321 
11322 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11323   predicate (UseSSE>=2);
11324   match(Set dst (ConvL2D src));
11325   effect( KILL cr );
11326   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11327             "PUSH   $src.lo\n\t"
11328             "FILD_D [ESP]\n\t"
11329             "FSTP_D [ESP]\n\t"
11330             "MOVSD  $dst,[ESP]\n\t"
11331             "ADD    ESP,8" %}
11332   opcode(0xDF, 0x5);  /* DF /5 */
11333   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11334   ins_pipe( pipe_slow );
11335 %}
11336 
11337 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11338   predicate (UseSSE>=1);
11339   match(Set dst (ConvL2F src));
11340   effect( KILL cr );
11341   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11342             "PUSH   $src.lo\n\t"
11343             "FILD_D [ESP]\n\t"
11344             "FSTP_S [ESP]\n\t"
11345             "MOVSS  $dst,[ESP]\n\t"
11346             "ADD    ESP,8" %}
11347   opcode(0xDF, 0x5);  /* DF /5 */
11348   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11353   match(Set dst (ConvL2F src));
11354   effect( KILL cr );
11355   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11356             "PUSH   $src.lo\n\t"
11357             "FILD   ST,[ESP + #0]\n\t"
11358             "ADD    ESP,8\n\t"
11359             "FSTP_S $dst\t# F-round" %}
11360   opcode(0xDF, 0x5);  /* DF /5 */
11361   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11362   ins_pipe( pipe_slow );
11363 %}
11364 
11365 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11366   match(Set dst (ConvL2I src));
11367   effect( DEF dst, USE src );
11368   format %{ "MOV    $dst,$src.lo" %}
11369   ins_encode(enc_CopyL_Lo(dst,src));
11370   ins_pipe( ialu_reg_reg );
11371 %}
11372 
11373 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11374   match(Set dst (MoveF2I src));
11375   effect( DEF dst, USE src );
11376   ins_cost(100);
11377   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11378   ins_encode %{
11379     __ movl($dst$$Register, Address(rsp, $src$$disp));
11380   %}
11381   ins_pipe( ialu_reg_mem );
11382 %}
11383 
11384 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11385   predicate(UseSSE==0);
11386   match(Set dst (MoveF2I src));
11387   effect( DEF dst, USE src );
11388 
11389   ins_cost(125);
11390   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11391   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11392   ins_pipe( fpu_mem_reg );
11393 %}
11394 
11395 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11396   predicate(UseSSE>=1);
11397   match(Set dst (MoveF2I src));
11398   effect( DEF dst, USE src );
11399 
11400   ins_cost(95);
11401   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11402   ins_encode %{
11403     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11404   %}
11405   ins_pipe( pipe_slow );
11406 %}
11407 
11408 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11409   predicate(UseSSE>=2);
11410   match(Set dst (MoveF2I src));
11411   effect( DEF dst, USE src );
11412   ins_cost(85);
11413   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11414   ins_encode %{
11415     __ movdl($dst$$Register, $src$$XMMRegister);
11416   %}
11417   ins_pipe( pipe_slow );
11418 %}
11419 
11420 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11421   match(Set dst (MoveI2F src));
11422   effect( DEF dst, USE src );
11423 
11424   ins_cost(100);
11425   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11426   ins_encode %{
11427     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11428   %}
11429   ins_pipe( ialu_mem_reg );
11430 %}
11431 
11432 
11433 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11434   predicate(UseSSE==0);
11435   match(Set dst (MoveI2F src));
11436   effect(DEF dst, USE src);
11437 
11438   ins_cost(125);
11439   format %{ "FLD_S  $src\n\t"
11440             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11441   opcode(0xD9);               /* D9 /0, FLD m32real */
11442   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11443               Pop_Reg_FPR(dst) );
11444   ins_pipe( fpu_reg_mem );
11445 %}
11446 
11447 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11448   predicate(UseSSE>=1);
11449   match(Set dst (MoveI2F src));
11450   effect( DEF dst, USE src );
11451 
11452   ins_cost(95);
11453   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11454   ins_encode %{
11455     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11456   %}
11457   ins_pipe( pipe_slow );
11458 %}
11459 
11460 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11461   predicate(UseSSE>=2);
11462   match(Set dst (MoveI2F src));
11463   effect( DEF dst, USE src );
11464 
11465   ins_cost(85);
11466   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11467   ins_encode %{
11468     __ movdl($dst$$XMMRegister, $src$$Register);
11469   %}
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11474   match(Set dst (MoveD2L src));
11475   effect(DEF dst, USE src);
11476 
11477   ins_cost(250);
11478   format %{ "MOV    $dst.lo,$src\n\t"
11479             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11480   opcode(0x8B, 0x8B);
11481   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11482   ins_pipe( ialu_mem_long_reg );
11483 %}
11484 
11485 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11486   predicate(UseSSE<=1);
11487   match(Set dst (MoveD2L src));
11488   effect(DEF dst, USE src);
11489 
11490   ins_cost(125);
11491   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11492   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11493   ins_pipe( fpu_mem_reg );
11494 %}
11495 
11496 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11497   predicate(UseSSE>=2);
11498   match(Set dst (MoveD2L src));
11499   effect(DEF dst, USE src);
11500   ins_cost(95);
11501   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11502   ins_encode %{
11503     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11504   %}
11505   ins_pipe( pipe_slow );
11506 %}
11507 
11508 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11509   predicate(UseSSE>=2);
11510   match(Set dst (MoveD2L src));
11511   effect(DEF dst, USE src, TEMP tmp);
11512   ins_cost(85);
11513   format %{ "MOVD   $dst.lo,$src\n\t"
11514             "PSHUFLW $tmp,$src,0x4E\n\t"
11515             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11516   ins_encode %{
11517     __ movdl($dst$$Register, $src$$XMMRegister);
11518     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11519     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11520   %}
11521   ins_pipe( pipe_slow );
11522 %}
11523 
11524 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11525   match(Set dst (MoveL2D src));
11526   effect(DEF dst, USE src);
11527 
11528   ins_cost(200);
11529   format %{ "MOV    $dst,$src.lo\n\t"
11530             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11531   opcode(0x89, 0x89);
11532   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11533   ins_pipe( ialu_mem_long_reg );
11534 %}
11535 
11536 
11537 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11538   predicate(UseSSE<=1);
11539   match(Set dst (MoveL2D src));
11540   effect(DEF dst, USE src);
11541   ins_cost(125);
11542 
11543   format %{ "FLD_D  $src\n\t"
11544             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11545   opcode(0xDD);               /* DD /0, FLD m64real */
11546   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11547               Pop_Reg_DPR(dst) );
11548   ins_pipe( fpu_reg_mem );
11549 %}
11550 
11551 
11552 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11553   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11554   match(Set dst (MoveL2D src));
11555   effect(DEF dst, USE src);
11556 
11557   ins_cost(95);
11558   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11559   ins_encode %{
11560     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11561   %}
11562   ins_pipe( pipe_slow );
11563 %}
11564 
11565 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11566   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11567   match(Set dst (MoveL2D src));
11568   effect(DEF dst, USE src);
11569 
11570   ins_cost(95);
11571   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11572   ins_encode %{
11573     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11574   %}
11575   ins_pipe( pipe_slow );
11576 %}
11577 
11578 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11579   predicate(UseSSE>=2);
11580   match(Set dst (MoveL2D src));
11581   effect(TEMP dst, USE src, TEMP tmp);
11582   ins_cost(85);
11583   format %{ "MOVD   $dst,$src.lo\n\t"
11584             "MOVD   $tmp,$src.hi\n\t"
11585             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11586   ins_encode %{
11587     __ movdl($dst$$XMMRegister, $src$$Register);
11588     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11589     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11590   %}
11591   ins_pipe( pipe_slow );
11592 %}
11593 
11594 
11595 // =======================================================================
11596 // fast clearing of an array
11597 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11598   predicate(!((ClearArrayNode*)n)->is_large());
11599   match(Set dummy (ClearArray cnt base));
11600   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11601 
11602   format %{ $$template
11603     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11604     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11605     $$emit$$"JG     LARGE\n\t"
11606     $$emit$$"SHL    ECX, 1\n\t"
11607     $$emit$$"DEC    ECX\n\t"
11608     $$emit$$"JS     DONE\t# Zero length\n\t"
11609     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11610     $$emit$$"DEC    ECX\n\t"
11611     $$emit$$"JGE    LOOP\n\t"
11612     $$emit$$"JMP    DONE\n\t"
11613     $$emit$$"# LARGE:\n\t"
11614     if (UseFastStosb) {
11615        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11616        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11617     } else if (UseXMMForObjInit) {
11618        $$emit$$"MOV     RDI,RAX\n\t"
11619        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11620        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11621        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11622        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11623        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11624        $$emit$$"ADD     0x40,RAX\n\t"
11625        $$emit$$"# L_zero_64_bytes:\n\t"
11626        $$emit$$"SUB     0x8,RCX\n\t"
11627        $$emit$$"JGE     L_loop\n\t"
11628        $$emit$$"ADD     0x4,RCX\n\t"
11629        $$emit$$"JL      L_tail\n\t"
11630        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11631        $$emit$$"ADD     0x20,RAX\n\t"
11632        $$emit$$"SUB     0x4,RCX\n\t"
11633        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11634        $$emit$$"ADD     0x4,RCX\n\t"
11635        $$emit$$"JLE     L_end\n\t"
11636        $$emit$$"DEC     RCX\n\t"
11637        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11638        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11639        $$emit$$"ADD     0x8,RAX\n\t"
11640        $$emit$$"DEC     RCX\n\t"
11641        $$emit$$"JGE     L_sloop\n\t"
11642        $$emit$$"# L_end:\n\t"
11643     } else {
11644        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11645        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11646     }
11647     $$emit$$"# DONE"
11648   %}
11649   ins_encode %{
11650     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11651                  $tmp$$XMMRegister, false);
11652   %}
11653   ins_pipe( pipe_slow );
11654 %}
11655 
11656 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11657   predicate(((ClearArrayNode*)n)->is_large());
11658   match(Set dummy (ClearArray cnt base));
11659   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11660   format %{ $$template
11661     if (UseFastStosb) {
11662        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11663        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11664        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11665     } else if (UseXMMForObjInit) {
11666        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11667        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11668        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11669        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11670        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11671        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11672        $$emit$$"ADD     0x40,RAX\n\t"
11673        $$emit$$"# L_zero_64_bytes:\n\t"
11674        $$emit$$"SUB     0x8,RCX\n\t"
11675        $$emit$$"JGE     L_loop\n\t"
11676        $$emit$$"ADD     0x4,RCX\n\t"
11677        $$emit$$"JL      L_tail\n\t"
11678        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11679        $$emit$$"ADD     0x20,RAX\n\t"
11680        $$emit$$"SUB     0x4,RCX\n\t"
11681        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11682        $$emit$$"ADD     0x4,RCX\n\t"
11683        $$emit$$"JLE     L_end\n\t"
11684        $$emit$$"DEC     RCX\n\t"
11685        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11686        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11687        $$emit$$"ADD     0x8,RAX\n\t"
11688        $$emit$$"DEC     RCX\n\t"
11689        $$emit$$"JGE     L_sloop\n\t"
11690        $$emit$$"# L_end:\n\t"
11691     } else {
11692        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11693        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11694        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11695     }
11696     $$emit$$"# DONE"
11697   %}
11698   ins_encode %{
11699     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11700                  $tmp$$XMMRegister, true);
11701   %}
11702   ins_pipe( pipe_slow );
11703 %}
11704 
11705 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11706                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11707   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11708   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11709   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11710 
11711   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11712   ins_encode %{
11713     __ string_compare($str1$$Register, $str2$$Register,
11714                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11715                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11716   %}
11717   ins_pipe( pipe_slow );
11718 %}
11719 
11720 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11721                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11722   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11723   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11724   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11725 
11726   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11727   ins_encode %{
11728     __ string_compare($str1$$Register, $str2$$Register,
11729                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11730                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11731   %}
11732   ins_pipe( pipe_slow );
11733 %}
11734 
11735 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11736                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11737   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11738   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11739   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11740 
11741   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11742   ins_encode %{
11743     __ string_compare($str1$$Register, $str2$$Register,
11744                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11745                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11746   %}
11747   ins_pipe( pipe_slow );
11748 %}
11749 
11750 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11751                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11752   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11753   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11754   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11755 
11756   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11757   ins_encode %{
11758     __ string_compare($str2$$Register, $str1$$Register,
11759                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11760                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11761   %}
11762   ins_pipe( pipe_slow );
11763 %}
11764 
11765 // fast string equals
11766 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11767                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11768   match(Set result (StrEquals (Binary str1 str2) cnt));
11769   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11770 
11771   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11772   ins_encode %{
11773     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11774                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11775                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11776   %}
11777 
11778   ins_pipe( pipe_slow );
11779 %}
11780 
11781 // fast search of substring with known size.
11782 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11783                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11784   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11785   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11786   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11787 
11788   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11789   ins_encode %{
11790     int icnt2 = (int)$int_cnt2$$constant;
11791     if (icnt2 >= 16) {
11792       // IndexOf for constant substrings with size >= 16 elements
11793       // which don't need to be loaded through stack.
11794       __ string_indexofC8($str1$$Register, $str2$$Register,
11795                           $cnt1$$Register, $cnt2$$Register,
11796                           icnt2, $result$$Register,
11797                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11798     } else {
11799       // Small strings are loaded through stack if they cross page boundary.
11800       __ string_indexof($str1$$Register, $str2$$Register,
11801                         $cnt1$$Register, $cnt2$$Register,
11802                         icnt2, $result$$Register,
11803                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11804     }
11805   %}
11806   ins_pipe( pipe_slow );
11807 %}
11808 
11809 // fast search of substring with known size.
11810 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11811                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11812   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11813   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11814   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11815 
11816   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11817   ins_encode %{
11818     int icnt2 = (int)$int_cnt2$$constant;
11819     if (icnt2 >= 8) {
11820       // IndexOf for constant substrings with size >= 8 elements
11821       // which don't need to be loaded through stack.
11822       __ string_indexofC8($str1$$Register, $str2$$Register,
11823                           $cnt1$$Register, $cnt2$$Register,
11824                           icnt2, $result$$Register,
11825                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11826     } else {
11827       // Small strings are loaded through stack if they cross page boundary.
11828       __ string_indexof($str1$$Register, $str2$$Register,
11829                         $cnt1$$Register, $cnt2$$Register,
11830                         icnt2, $result$$Register,
11831                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11832     }
11833   %}
11834   ins_pipe( pipe_slow );
11835 %}
11836 
11837 // fast search of substring with known size.
11838 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11839                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11840   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11841   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11842   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11843 
11844   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11845   ins_encode %{
11846     int icnt2 = (int)$int_cnt2$$constant;
11847     if (icnt2 >= 8) {
11848       // IndexOf for constant substrings with size >= 8 elements
11849       // which don't need to be loaded through stack.
11850       __ string_indexofC8($str1$$Register, $str2$$Register,
11851                           $cnt1$$Register, $cnt2$$Register,
11852                           icnt2, $result$$Register,
11853                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11854     } else {
11855       // Small strings are loaded through stack if they cross page boundary.
11856       __ string_indexof($str1$$Register, $str2$$Register,
11857                         $cnt1$$Register, $cnt2$$Register,
11858                         icnt2, $result$$Register,
11859                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11860     }
11861   %}
11862   ins_pipe( pipe_slow );
11863 %}
11864 
11865 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11866                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11867   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11868   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11869   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11870 
11871   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11872   ins_encode %{
11873     __ string_indexof($str1$$Register, $str2$$Register,
11874                       $cnt1$$Register, $cnt2$$Register,
11875                       (-1), $result$$Register,
11876                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11877   %}
11878   ins_pipe( pipe_slow );
11879 %}
11880 
11881 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11882                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11883   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11884   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11885   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11886 
11887   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11888   ins_encode %{
11889     __ string_indexof($str1$$Register, $str2$$Register,
11890                       $cnt1$$Register, $cnt2$$Register,
11891                       (-1), $result$$Register,
11892                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11893   %}
11894   ins_pipe( pipe_slow );
11895 %}
11896 
11897 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11898                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11899   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11900   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11901   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11902 
11903   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11904   ins_encode %{
11905     __ string_indexof($str1$$Register, $str2$$Register,
11906                       $cnt1$$Register, $cnt2$$Register,
11907                       (-1), $result$$Register,
11908                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11909   %}
11910   ins_pipe( pipe_slow );
11911 %}
11912 
11913 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11914                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11915   predicate(UseSSE42Intrinsics);
11916   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11917   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11918   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11919   ins_encode %{
11920     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11921                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11922   %}
11923   ins_pipe( pipe_slow );
11924 %}
11925 
11926 // fast array equals
11927 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11928                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11929 %{
11930   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11931   match(Set result (AryEq ary1 ary2));
11932   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11933   //ins_cost(300);
11934 
11935   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11936   ins_encode %{
11937     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11938                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11939                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11940   %}
11941   ins_pipe( pipe_slow );
11942 %}
11943 
11944 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11945                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11946 %{
11947   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11948   match(Set result (AryEq ary1 ary2));
11949   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11950   //ins_cost(300);
11951 
11952   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11953   ins_encode %{
11954     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11955                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11956                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11957   %}
11958   ins_pipe( pipe_slow );
11959 %}
11960 
11961 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11962                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11963 %{
11964   match(Set result (HasNegatives ary1 len));
11965   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11966 
11967   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11968   ins_encode %{
11969     __ has_negatives($ary1$$Register, $len$$Register,
11970                      $result$$Register, $tmp3$$Register,
11971                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11972   %}
11973   ins_pipe( pipe_slow );
11974 %}
11975 
11976 // fast char[] to byte[] compression
11977 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11978                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11979   match(Set result (StrCompressedCopy src (Binary dst len)));
11980   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11981 
11982   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11983   ins_encode %{
11984     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11985                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11986                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11987   %}
11988   ins_pipe( pipe_slow );
11989 %}
11990 
11991 // fast byte[] to char[] inflation
11992 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11993                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11994   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11995   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11996 
11997   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11998   ins_encode %{
11999     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12000                           $tmp1$$XMMRegister, $tmp2$$Register);
12001   %}
12002   ins_pipe( pipe_slow );
12003 %}
12004 
12005 // encode char[] to byte[] in ISO_8859_1
12006 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12007                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12008                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12009   match(Set result (EncodeISOArray src (Binary dst len)));
12010   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12011 
12012   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12013   ins_encode %{
12014     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12015                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12016                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
12017   %}
12018   ins_pipe( pipe_slow );
12019 %}
12020 
12021 
12022 //----------Control Flow Instructions------------------------------------------
12023 // Signed compare Instructions
12024 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12025   match(Set cr (CmpI op1 op2));
12026   effect( DEF cr, USE op1, USE op2 );
12027   format %{ "CMP    $op1,$op2" %}
12028   opcode(0x3B);  /* Opcode 3B /r */
12029   ins_encode( OpcP, RegReg( op1, op2) );
12030   ins_pipe( ialu_cr_reg_reg );
12031 %}
12032 
12033 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12034   match(Set cr (CmpI op1 op2));
12035   effect( DEF cr, USE op1 );
12036   format %{ "CMP    $op1,$op2" %}
12037   opcode(0x81,0x07);  /* Opcode 81 /7 */
12038   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12039   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12040   ins_pipe( ialu_cr_reg_imm );
12041 %}
12042 
12043 // Cisc-spilled version of cmpI_eReg
12044 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12045   match(Set cr (CmpI op1 (LoadI op2)));
12046 
12047   format %{ "CMP    $op1,$op2" %}
12048   ins_cost(500);
12049   opcode(0x3B);  /* Opcode 3B /r */
12050   ins_encode( OpcP, RegMem( op1, op2) );
12051   ins_pipe( ialu_cr_reg_mem );
12052 %}
12053 
12054 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12055   match(Set cr (CmpI src zero));
12056   effect( DEF cr, USE src );
12057 
12058   format %{ "TEST   $src,$src" %}
12059   opcode(0x85);
12060   ins_encode( OpcP, RegReg( src, src ) );
12061   ins_pipe( ialu_cr_reg_imm );
12062 %}
12063 
12064 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12065   match(Set cr (CmpI (AndI src con) zero));
12066 
12067   format %{ "TEST   $src,$con" %}
12068   opcode(0xF7,0x00);
12069   ins_encode( OpcP, RegOpc(src), Con32(con) );
12070   ins_pipe( ialu_cr_reg_imm );
12071 %}
12072 
12073 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12074   match(Set cr (CmpI (AndI src mem) zero));
12075 
12076   format %{ "TEST   $src,$mem" %}
12077   opcode(0x85);
12078   ins_encode( OpcP, RegMem( src, mem ) );
12079   ins_pipe( ialu_cr_reg_mem );
12080 %}
12081 
12082 // Unsigned compare Instructions; really, same as signed except they
12083 // produce an eFlagsRegU instead of eFlagsReg.
12084 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12085   match(Set cr (CmpU op1 op2));
12086 
12087   format %{ "CMPu   $op1,$op2" %}
12088   opcode(0x3B);  /* Opcode 3B /r */
12089   ins_encode( OpcP, RegReg( op1, op2) );
12090   ins_pipe( ialu_cr_reg_reg );
12091 %}
12092 
12093 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12094   match(Set cr (CmpU op1 op2));
12095 
12096   format %{ "CMPu   $op1,$op2" %}
12097   opcode(0x81,0x07);  /* Opcode 81 /7 */
12098   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12099   ins_pipe( ialu_cr_reg_imm );
12100 %}
12101 
12102 // // Cisc-spilled version of cmpU_eReg
12103 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12104   match(Set cr (CmpU op1 (LoadI op2)));
12105 
12106   format %{ "CMPu   $op1,$op2" %}
12107   ins_cost(500);
12108   opcode(0x3B);  /* Opcode 3B /r */
12109   ins_encode( OpcP, RegMem( op1, op2) );
12110   ins_pipe( ialu_cr_reg_mem );
12111 %}
12112 
12113 // // Cisc-spilled version of cmpU_eReg
12114 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12115 //  match(Set cr (CmpU (LoadI op1) op2));
12116 //
12117 //  format %{ "CMPu   $op1,$op2" %}
12118 //  ins_cost(500);
12119 //  opcode(0x39);  /* Opcode 39 /r */
12120 //  ins_encode( OpcP, RegMem( op1, op2) );
12121 //%}
12122 
12123 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12124   match(Set cr (CmpU src zero));
12125 
12126   format %{ "TESTu  $src,$src" %}
12127   opcode(0x85);
12128   ins_encode( OpcP, RegReg( src, src ) );
12129   ins_pipe( ialu_cr_reg_imm );
12130 %}
12131 
12132 // Unsigned pointer compare Instructions
12133 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12134   match(Set cr (CmpP op1 op2));
12135 
12136   format %{ "CMPu   $op1,$op2" %}
12137   opcode(0x3B);  /* Opcode 3B /r */
12138   ins_encode( OpcP, RegReg( op1, op2) );
12139   ins_pipe( ialu_cr_reg_reg );
12140 %}
12141 
12142 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12143   match(Set cr (CmpP op1 op2));
12144 
12145   format %{ "CMPu   $op1,$op2" %}
12146   opcode(0x81,0x07);  /* Opcode 81 /7 */
12147   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12148   ins_pipe( ialu_cr_reg_imm );
12149 %}
12150 
12151 // // Cisc-spilled version of cmpP_eReg
12152 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12153   match(Set cr (CmpP op1 (LoadP op2)));
12154 
12155   format %{ "CMPu   $op1,$op2" %}
12156   ins_cost(500);
12157   opcode(0x3B);  /* Opcode 3B /r */
12158   ins_encode( OpcP, RegMem( op1, op2) );
12159   ins_pipe( ialu_cr_reg_mem );
12160 %}
12161 
12162 // // Cisc-spilled version of cmpP_eReg
12163 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12164 //  match(Set cr (CmpP (LoadP op1) op2));
12165 //
12166 //  format %{ "CMPu   $op1,$op2" %}
12167 //  ins_cost(500);
12168 //  opcode(0x39);  /* Opcode 39 /r */
12169 //  ins_encode( OpcP, RegMem( op1, op2) );
12170 //%}
12171 
12172 // Compare raw pointer (used in out-of-heap check).
12173 // Only works because non-oop pointers must be raw pointers
12174 // and raw pointers have no anti-dependencies.
12175 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12176   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12177   match(Set cr (CmpP op1 (LoadP op2)));
12178 
12179   format %{ "CMPu   $op1,$op2" %}
12180   opcode(0x3B);  /* Opcode 3B /r */
12181   ins_encode( OpcP, RegMem( op1, op2) );
12182   ins_pipe( ialu_cr_reg_mem );
12183 %}
12184 
12185 //
12186 // This will generate a signed flags result. This should be ok
12187 // since any compare to a zero should be eq/neq.
12188 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12189   match(Set cr (CmpP src zero));
12190 
12191   format %{ "TEST   $src,$src" %}
12192   opcode(0x85);
12193   ins_encode( OpcP, RegReg( src, src ) );
12194   ins_pipe( ialu_cr_reg_imm );
12195 %}
12196 
12197 // Cisc-spilled version of testP_reg
12198 // This will generate a signed flags result. This should be ok
12199 // since any compare to a zero should be eq/neq.
12200 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12201   match(Set cr (CmpP (LoadP op) zero));
12202 
12203   format %{ "TEST   $op,0xFFFFFFFF" %}
12204   ins_cost(500);
12205   opcode(0xF7);               /* Opcode F7 /0 */
12206   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12207   ins_pipe( ialu_cr_reg_imm );
12208 %}
12209 
12210 // Yanked all unsigned pointer compare operations.
12211 // Pointer compares are done with CmpP which is already unsigned.
12212 
12213 //----------Max and Min--------------------------------------------------------
12214 // Min Instructions
12215 ////
12216 //   *** Min and Max using the conditional move are slower than the
12217 //   *** branch version on a Pentium III.
12218 // // Conditional move for min
12219 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12220 //  effect( USE_DEF op2, USE op1, USE cr );
12221 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12222 //  opcode(0x4C,0x0F);
12223 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12224 //  ins_pipe( pipe_cmov_reg );
12225 //%}
12226 //
12227 //// Min Register with Register (P6 version)
12228 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12229 //  predicate(VM_Version::supports_cmov() );
12230 //  match(Set op2 (MinI op1 op2));
12231 //  ins_cost(200);
12232 //  expand %{
12233 //    eFlagsReg cr;
12234 //    compI_eReg(cr,op1,op2);
12235 //    cmovI_reg_lt(op2,op1,cr);
12236 //  %}
12237 //%}
12238 
12239 // Min Register with Register (generic version)
12240 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12241   match(Set dst (MinI dst src));
12242   effect(KILL flags);
12243   ins_cost(300);
12244 
12245   format %{ "MIN    $dst,$src" %}
12246   opcode(0xCC);
12247   ins_encode( min_enc(dst,src) );
12248   ins_pipe( pipe_slow );
12249 %}
12250 
12251 // Max Register with Register
12252 //   *** Min and Max using the conditional move are slower than the
12253 //   *** branch version on a Pentium III.
12254 // // Conditional move for max
12255 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12256 //  effect( USE_DEF op2, USE op1, USE cr );
12257 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12258 //  opcode(0x4F,0x0F);
12259 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12260 //  ins_pipe( pipe_cmov_reg );
12261 //%}
12262 //
12263 // // Max Register with Register (P6 version)
12264 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12265 //  predicate(VM_Version::supports_cmov() );
12266 //  match(Set op2 (MaxI op1 op2));
12267 //  ins_cost(200);
12268 //  expand %{
12269 //    eFlagsReg cr;
12270 //    compI_eReg(cr,op1,op2);
12271 //    cmovI_reg_gt(op2,op1,cr);
12272 //  %}
12273 //%}
12274 
12275 // Max Register with Register (generic version)
12276 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12277   match(Set dst (MaxI dst src));
12278   effect(KILL flags);
12279   ins_cost(300);
12280 
12281   format %{ "MAX    $dst,$src" %}
12282   opcode(0xCC);
12283   ins_encode( max_enc(dst,src) );
12284   ins_pipe( pipe_slow );
12285 %}
12286 
12287 // ============================================================================
12288 // Counted Loop limit node which represents exact final iterator value.
12289 // Note: the resulting value should fit into integer range since
12290 // counted loops have limit check on overflow.
12291 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12292   match(Set limit (LoopLimit (Binary init limit) stride));
12293   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12294   ins_cost(300);
12295 
12296   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12297   ins_encode %{
12298     int strd = (int)$stride$$constant;
12299     assert(strd != 1 && strd != -1, "sanity");
12300     int m1 = (strd > 0) ? 1 : -1;
12301     // Convert limit to long (EAX:EDX)
12302     __ cdql();
12303     // Convert init to long (init:tmp)
12304     __ movl($tmp$$Register, $init$$Register);
12305     __ sarl($tmp$$Register, 31);
12306     // $limit - $init
12307     __ subl($limit$$Register, $init$$Register);
12308     __ sbbl($limit_hi$$Register, $tmp$$Register);
12309     // + ($stride - 1)
12310     if (strd > 0) {
12311       __ addl($limit$$Register, (strd - 1));
12312       __ adcl($limit_hi$$Register, 0);
12313       __ movl($tmp$$Register, strd);
12314     } else {
12315       __ addl($limit$$Register, (strd + 1));
12316       __ adcl($limit_hi$$Register, -1);
12317       __ lneg($limit_hi$$Register, $limit$$Register);
12318       __ movl($tmp$$Register, -strd);
12319     }
12320     // signed devision: (EAX:EDX) / pos_stride
12321     __ idivl($tmp$$Register);
12322     if (strd < 0) {
12323       // restore sign
12324       __ negl($tmp$$Register);
12325     }
12326     // (EAX) * stride
12327     __ mull($tmp$$Register);
12328     // + init (ignore upper bits)
12329     __ addl($limit$$Register, $init$$Register);
12330   %}
12331   ins_pipe( pipe_slow );
12332 %}
12333 
12334 // ============================================================================
12335 // Branch Instructions
12336 // Jump Table
12337 instruct jumpXtnd(rRegI switch_val) %{
12338   match(Jump switch_val);
12339   ins_cost(350);
12340   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12341   ins_encode %{
12342     // Jump to Address(table_base + switch_reg)
12343     Address index(noreg, $switch_val$$Register, Address::times_1);
12344     __ jump(ArrayAddress($constantaddress, index));
12345   %}
12346   ins_pipe(pipe_jmp);
12347 %}
12348 
12349 // Jump Direct - Label defines a relative address from JMP+1
12350 instruct jmpDir(label labl) %{
12351   match(Goto);
12352   effect(USE labl);
12353 
12354   ins_cost(300);
12355   format %{ "JMP    $labl" %}
12356   size(5);
12357   ins_encode %{
12358     Label* L = $labl$$label;
12359     __ jmp(*L, false); // Always long jump
12360   %}
12361   ins_pipe( pipe_jmp );
12362 %}
12363 
12364 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12365 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12366   match(If cop cr);
12367   effect(USE labl);
12368 
12369   ins_cost(300);
12370   format %{ "J$cop    $labl" %}
12371   size(6);
12372   ins_encode %{
12373     Label* L = $labl$$label;
12374     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12375   %}
12376   ins_pipe( pipe_jcc );
12377 %}
12378 
12379 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12380 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12381   predicate(!n->has_vector_mask_set());
12382   match(CountedLoopEnd cop cr);
12383   effect(USE labl);
12384 
12385   ins_cost(300);
12386   format %{ "J$cop    $labl\t# Loop end" %}
12387   size(6);
12388   ins_encode %{
12389     Label* L = $labl$$label;
12390     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12391   %}
12392   ins_pipe( pipe_jcc );
12393 %}
12394 
12395 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12396 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12397   predicate(!n->has_vector_mask_set());
12398   match(CountedLoopEnd cop cmp);
12399   effect(USE labl);
12400 
12401   ins_cost(300);
12402   format %{ "J$cop,u  $labl\t# Loop end" %}
12403   size(6);
12404   ins_encode %{
12405     Label* L = $labl$$label;
12406     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12407   %}
12408   ins_pipe( pipe_jcc );
12409 %}
12410 
12411 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12412   predicate(!n->has_vector_mask_set());
12413   match(CountedLoopEnd cop cmp);
12414   effect(USE labl);
12415 
12416   ins_cost(200);
12417   format %{ "J$cop,u  $labl\t# Loop end" %}
12418   size(6);
12419   ins_encode %{
12420     Label* L = $labl$$label;
12421     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12422   %}
12423   ins_pipe( pipe_jcc );
12424 %}
12425 
12426 // mask version
12427 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12428 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12429   predicate(n->has_vector_mask_set());
12430   match(CountedLoopEnd cop cr);
12431   effect(USE labl);
12432 
12433   ins_cost(400);
12434   format %{ "J$cop    $labl\t# Loop end\n\t"
12435             "restorevectmask \t# vector mask restore for loops" %}
12436   size(10);
12437   ins_encode %{
12438     Label* L = $labl$$label;
12439     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12440     __ restorevectmask();
12441   %}
12442   ins_pipe( pipe_jcc );
12443 %}
12444 
12445 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12446 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12447   predicate(n->has_vector_mask_set());
12448   match(CountedLoopEnd cop cmp);
12449   effect(USE labl);
12450 
12451   ins_cost(400);
12452   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12453             "restorevectmask \t# vector mask restore for loops" %}
12454   size(10);
12455   ins_encode %{
12456     Label* L = $labl$$label;
12457     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12458     __ restorevectmask();
12459   %}
12460   ins_pipe( pipe_jcc );
12461 %}
12462 
12463 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12464   predicate(n->has_vector_mask_set());
12465   match(CountedLoopEnd cop cmp);
12466   effect(USE labl);
12467 
12468   ins_cost(300);
12469   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12470             "restorevectmask \t# vector mask restore for loops" %}
12471   size(10);
12472   ins_encode %{
12473     Label* L = $labl$$label;
12474     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12475     __ restorevectmask();
12476   %}
12477   ins_pipe( pipe_jcc );
12478 %}
12479 
12480 // Jump Direct Conditional - using unsigned comparison
12481 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12482   match(If cop cmp);
12483   effect(USE labl);
12484 
12485   ins_cost(300);
12486   format %{ "J$cop,u  $labl" %}
12487   size(6);
12488   ins_encode %{
12489     Label* L = $labl$$label;
12490     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12491   %}
12492   ins_pipe(pipe_jcc);
12493 %}
12494 
12495 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12496   match(If cop cmp);
12497   effect(USE labl);
12498 
12499   ins_cost(200);
12500   format %{ "J$cop,u  $labl" %}
12501   size(6);
12502   ins_encode %{
12503     Label* L = $labl$$label;
12504     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12505   %}
12506   ins_pipe(pipe_jcc);
12507 %}
12508 
12509 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12510   match(If cop cmp);
12511   effect(USE labl);
12512 
12513   ins_cost(200);
12514   format %{ $$template
12515     if ($cop$$cmpcode == Assembler::notEqual) {
12516       $$emit$$"JP,u   $labl\n\t"
12517       $$emit$$"J$cop,u   $labl"
12518     } else {
12519       $$emit$$"JP,u   done\n\t"
12520       $$emit$$"J$cop,u   $labl\n\t"
12521       $$emit$$"done:"
12522     }
12523   %}
12524   ins_encode %{
12525     Label* l = $labl$$label;
12526     if ($cop$$cmpcode == Assembler::notEqual) {
12527       __ jcc(Assembler::parity, *l, false);
12528       __ jcc(Assembler::notEqual, *l, false);
12529     } else if ($cop$$cmpcode == Assembler::equal) {
12530       Label done;
12531       __ jccb(Assembler::parity, done);
12532       __ jcc(Assembler::equal, *l, false);
12533       __ bind(done);
12534     } else {
12535        ShouldNotReachHere();
12536     }
12537   %}
12538   ins_pipe(pipe_jcc);
12539 %}
12540 
12541 // ============================================================================
12542 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12543 // array for an instance of the superklass.  Set a hidden internal cache on a
12544 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12545 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12546 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12547   match(Set result (PartialSubtypeCheck sub super));
12548   effect( KILL rcx, KILL cr );
12549 
12550   ins_cost(1100);  // slightly larger than the next version
12551   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12552             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12553             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12554             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12555             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12556             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12557             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12558      "miss:\t" %}
12559 
12560   opcode(0x1); // Force a XOR of EDI
12561   ins_encode( enc_PartialSubtypeCheck() );
12562   ins_pipe( pipe_slow );
12563 %}
12564 
12565 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12566   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12567   effect( KILL rcx, KILL result );
12568 
12569   ins_cost(1000);
12570   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12571             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12572             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12573             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12574             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12575             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12576      "miss:\t" %}
12577 
12578   opcode(0x0);  // No need to XOR EDI
12579   ins_encode( enc_PartialSubtypeCheck() );
12580   ins_pipe( pipe_slow );
12581 %}
12582 
12583 // ============================================================================
12584 // Branch Instructions -- short offset versions
12585 //
12586 // These instructions are used to replace jumps of a long offset (the default
12587 // match) with jumps of a shorter offset.  These instructions are all tagged
12588 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12589 // match rules in general matching.  Instead, the ADLC generates a conversion
12590 // method in the MachNode which can be used to do in-place replacement of the
12591 // long variant with the shorter variant.  The compiler will determine if a
12592 // branch can be taken by the is_short_branch_offset() predicate in the machine
12593 // specific code section of the file.
12594 
12595 // Jump Direct - Label defines a relative address from JMP+1
12596 instruct jmpDir_short(label labl) %{
12597   match(Goto);
12598   effect(USE labl);
12599 
12600   ins_cost(300);
12601   format %{ "JMP,s  $labl" %}
12602   size(2);
12603   ins_encode %{
12604     Label* L = $labl$$label;
12605     __ jmpb(*L);
12606   %}
12607   ins_pipe( pipe_jmp );
12608   ins_short_branch(1);
12609 %}
12610 
12611 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12612 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12613   match(If cop cr);
12614   effect(USE labl);
12615 
12616   ins_cost(300);
12617   format %{ "J$cop,s  $labl" %}
12618   size(2);
12619   ins_encode %{
12620     Label* L = $labl$$label;
12621     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12622   %}
12623   ins_pipe( pipe_jcc );
12624   ins_short_branch(1);
12625 %}
12626 
12627 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12628 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12629   match(CountedLoopEnd cop cr);
12630   effect(USE labl);
12631 
12632   ins_cost(300);
12633   format %{ "J$cop,s  $labl\t# Loop end" %}
12634   size(2);
12635   ins_encode %{
12636     Label* L = $labl$$label;
12637     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12638   %}
12639   ins_pipe( pipe_jcc );
12640   ins_short_branch(1);
12641 %}
12642 
12643 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12644 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12645   match(CountedLoopEnd cop cmp);
12646   effect(USE labl);
12647 
12648   ins_cost(300);
12649   format %{ "J$cop,us $labl\t# Loop end" %}
12650   size(2);
12651   ins_encode %{
12652     Label* L = $labl$$label;
12653     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12654   %}
12655   ins_pipe( pipe_jcc );
12656   ins_short_branch(1);
12657 %}
12658 
12659 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12660   match(CountedLoopEnd cop cmp);
12661   effect(USE labl);
12662 
12663   ins_cost(300);
12664   format %{ "J$cop,us $labl\t# Loop end" %}
12665   size(2);
12666   ins_encode %{
12667     Label* L = $labl$$label;
12668     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12669   %}
12670   ins_pipe( pipe_jcc );
12671   ins_short_branch(1);
12672 %}
12673 
12674 // Jump Direct Conditional - using unsigned comparison
12675 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12676   match(If cop cmp);
12677   effect(USE labl);
12678 
12679   ins_cost(300);
12680   format %{ "J$cop,us $labl" %}
12681   size(2);
12682   ins_encode %{
12683     Label* L = $labl$$label;
12684     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12685   %}
12686   ins_pipe( pipe_jcc );
12687   ins_short_branch(1);
12688 %}
12689 
12690 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12691   match(If cop cmp);
12692   effect(USE labl);
12693 
12694   ins_cost(300);
12695   format %{ "J$cop,us $labl" %}
12696   size(2);
12697   ins_encode %{
12698     Label* L = $labl$$label;
12699     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12700   %}
12701   ins_pipe( pipe_jcc );
12702   ins_short_branch(1);
12703 %}
12704 
12705 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12706   match(If cop cmp);
12707   effect(USE labl);
12708 
12709   ins_cost(300);
12710   format %{ $$template
12711     if ($cop$$cmpcode == Assembler::notEqual) {
12712       $$emit$$"JP,u,s   $labl\n\t"
12713       $$emit$$"J$cop,u,s   $labl"
12714     } else {
12715       $$emit$$"JP,u,s   done\n\t"
12716       $$emit$$"J$cop,u,s  $labl\n\t"
12717       $$emit$$"done:"
12718     }
12719   %}
12720   size(4);
12721   ins_encode %{
12722     Label* l = $labl$$label;
12723     if ($cop$$cmpcode == Assembler::notEqual) {
12724       __ jccb(Assembler::parity, *l);
12725       __ jccb(Assembler::notEqual, *l);
12726     } else if ($cop$$cmpcode == Assembler::equal) {
12727       Label done;
12728       __ jccb(Assembler::parity, done);
12729       __ jccb(Assembler::equal, *l);
12730       __ bind(done);
12731     } else {
12732        ShouldNotReachHere();
12733     }
12734   %}
12735   ins_pipe(pipe_jcc);
12736   ins_short_branch(1);
12737 %}
12738 
12739 // ============================================================================
12740 // Long Compare
12741 //
12742 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12743 // is tricky.  The flavor of compare used depends on whether we are testing
12744 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12745 // The GE test is the negated LT test.  The LE test can be had by commuting
12746 // the operands (yielding a GE test) and then negating; negate again for the
12747 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12748 // NE test is negated from that.
12749 
12750 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12751 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12752 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12753 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12754 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12755 // foo match ends up with the wrong leaf.  One fix is to not match both
12756 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12757 // both forms beat the trinary form of long-compare and both are very useful
12758 // on Intel which has so few registers.
12759 
12760 // Manifest a CmpL result in an integer register.  Very painful.
12761 // This is the test to avoid.
12762 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12763   match(Set dst (CmpL3 src1 src2));
12764   effect( KILL flags );
12765   ins_cost(1000);
12766   format %{ "XOR    $dst,$dst\n\t"
12767             "CMP    $src1.hi,$src2.hi\n\t"
12768             "JLT,s  m_one\n\t"
12769             "JGT,s  p_one\n\t"
12770             "CMP    $src1.lo,$src2.lo\n\t"
12771             "JB,s   m_one\n\t"
12772             "JEQ,s  done\n"
12773     "p_one:\tINC    $dst\n\t"
12774             "JMP,s  done\n"
12775     "m_one:\tDEC    $dst\n"
12776      "done:" %}
12777   ins_encode %{
12778     Label p_one, m_one, done;
12779     __ xorptr($dst$$Register, $dst$$Register);
12780     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12781     __ jccb(Assembler::less,    m_one);
12782     __ jccb(Assembler::greater, p_one);
12783     __ cmpl($src1$$Register, $src2$$Register);
12784     __ jccb(Assembler::below,   m_one);
12785     __ jccb(Assembler::equal,   done);
12786     __ bind(p_one);
12787     __ incrementl($dst$$Register);
12788     __ jmpb(done);
12789     __ bind(m_one);
12790     __ decrementl($dst$$Register);
12791     __ bind(done);
12792   %}
12793   ins_pipe( pipe_slow );
12794 %}
12795 
12796 //======
12797 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12798 // compares.  Can be used for LE or GT compares by reversing arguments.
12799 // NOT GOOD FOR EQ/NE tests.
12800 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12801   match( Set flags (CmpL src zero ));
12802   ins_cost(100);
12803   format %{ "TEST   $src.hi,$src.hi" %}
12804   opcode(0x85);
12805   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12806   ins_pipe( ialu_cr_reg_reg );
12807 %}
12808 
12809 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12810 // compares.  Can be used for LE or GT compares by reversing arguments.
12811 // NOT GOOD FOR EQ/NE tests.
12812 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12813   match( Set flags (CmpL src1 src2 ));
12814   effect( TEMP tmp );
12815   ins_cost(300);
12816   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12817             "MOV    $tmp,$src1.hi\n\t"
12818             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12819   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12820   ins_pipe( ialu_cr_reg_reg );
12821 %}
12822 
12823 // Long compares reg < zero/req OR reg >= zero/req.
12824 // Just a wrapper for a normal branch, plus the predicate test.
12825 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12826   match(If cmp flags);
12827   effect(USE labl);
12828   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12829   expand %{
12830     jmpCon(cmp,flags,labl);    // JLT or JGE...
12831   %}
12832 %}
12833 
12834 //======
12835 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12836 // compares.  Can be used for LE or GT compares by reversing arguments.
12837 // NOT GOOD FOR EQ/NE tests.
12838 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12839   match(Set flags (CmpUL src zero));
12840   ins_cost(100);
12841   format %{ "TEST   $src.hi,$src.hi" %}
12842   opcode(0x85);
12843   ins_encode(OpcP, RegReg_Hi2(src, src));
12844   ins_pipe(ialu_cr_reg_reg);
12845 %}
12846 
12847 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12848 // compares.  Can be used for LE or GT compares by reversing arguments.
12849 // NOT GOOD FOR EQ/NE tests.
12850 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12851   match(Set flags (CmpUL src1 src2));
12852   effect(TEMP tmp);
12853   ins_cost(300);
12854   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12855             "MOV    $tmp,$src1.hi\n\t"
12856             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12857   ins_encode(long_cmp_flags2(src1, src2, tmp));
12858   ins_pipe(ialu_cr_reg_reg);
12859 %}
12860 
12861 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12862 // Just a wrapper for a normal branch, plus the predicate test.
12863 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12864   match(If cmp flags);
12865   effect(USE labl);
12866   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12867   expand %{
12868     jmpCon(cmp, flags, labl);    // JLT or JGE...
12869   %}
12870 %}
12871 
12872 // Compare 2 longs and CMOVE longs.
12873 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12874   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12875   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12876   ins_cost(400);
12877   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12878             "CMOV$cmp $dst.hi,$src.hi" %}
12879   opcode(0x0F,0x40);
12880   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12881   ins_pipe( pipe_cmov_reg_long );
12882 %}
12883 
12884 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12885   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12886   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12887   ins_cost(500);
12888   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12889             "CMOV$cmp $dst.hi,$src.hi" %}
12890   opcode(0x0F,0x40);
12891   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12892   ins_pipe( pipe_cmov_reg_long );
12893 %}
12894 
12895 // Compare 2 longs and CMOVE ints.
12896 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12897   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12898   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12899   ins_cost(200);
12900   format %{ "CMOV$cmp $dst,$src" %}
12901   opcode(0x0F,0x40);
12902   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12903   ins_pipe( pipe_cmov_reg );
12904 %}
12905 
12906 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12907   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12908   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12909   ins_cost(250);
12910   format %{ "CMOV$cmp $dst,$src" %}
12911   opcode(0x0F,0x40);
12912   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12913   ins_pipe( pipe_cmov_mem );
12914 %}
12915 
12916 // Compare 2 longs and CMOVE ints.
12917 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12918   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12919   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12920   ins_cost(200);
12921   format %{ "CMOV$cmp $dst,$src" %}
12922   opcode(0x0F,0x40);
12923   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12924   ins_pipe( pipe_cmov_reg );
12925 %}
12926 
12927 // Compare 2 longs and CMOVE doubles
12928 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12929   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12930   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12931   ins_cost(200);
12932   expand %{
12933     fcmovDPR_regS(cmp,flags,dst,src);
12934   %}
12935 %}
12936 
12937 // Compare 2 longs and CMOVE doubles
12938 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12939   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12940   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12941   ins_cost(200);
12942   expand %{
12943     fcmovD_regS(cmp,flags,dst,src);
12944   %}
12945 %}
12946 
12947 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12948   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12949   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12950   ins_cost(200);
12951   expand %{
12952     fcmovFPR_regS(cmp,flags,dst,src);
12953   %}
12954 %}
12955 
12956 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12957   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12958   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12959   ins_cost(200);
12960   expand %{
12961     fcmovF_regS(cmp,flags,dst,src);
12962   %}
12963 %}
12964 
12965 //======
12966 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12967 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12968   match( Set flags (CmpL src zero ));
12969   effect(TEMP tmp);
12970   ins_cost(200);
12971   format %{ "MOV    $tmp,$src.lo\n\t"
12972             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12973   ins_encode( long_cmp_flags0( src, tmp ) );
12974   ins_pipe( ialu_reg_reg_long );
12975 %}
12976 
12977 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12978 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12979   match( Set flags (CmpL src1 src2 ));
12980   ins_cost(200+300);
12981   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12982             "JNE,s  skip\n\t"
12983             "CMP    $src1.hi,$src2.hi\n\t"
12984      "skip:\t" %}
12985   ins_encode( long_cmp_flags1( src1, src2 ) );
12986   ins_pipe( ialu_cr_reg_reg );
12987 %}
12988 
12989 // Long compare reg == zero/reg OR reg != zero/reg
12990 // Just a wrapper for a normal branch, plus the predicate test.
12991 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12992   match(If cmp flags);
12993   effect(USE labl);
12994   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12995   expand %{
12996     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12997   %}
12998 %}
12999 
13000 //======
13001 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13002 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13003   match(Set flags (CmpUL src zero));
13004   effect(TEMP tmp);
13005   ins_cost(200);
13006   format %{ "MOV    $tmp,$src.lo\n\t"
13007             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13008   ins_encode(long_cmp_flags0(src, tmp));
13009   ins_pipe(ialu_reg_reg_long);
13010 %}
13011 
13012 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13013 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13014   match(Set flags (CmpUL src1 src2));
13015   ins_cost(200+300);
13016   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13017             "JNE,s  skip\n\t"
13018             "CMP    $src1.hi,$src2.hi\n\t"
13019      "skip:\t" %}
13020   ins_encode(long_cmp_flags1(src1, src2));
13021   ins_pipe(ialu_cr_reg_reg);
13022 %}
13023 
13024 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13025 // Just a wrapper for a normal branch, plus the predicate test.
13026 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13027   match(If cmp flags);
13028   effect(USE labl);
13029   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13030   expand %{
13031     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13032   %}
13033 %}
13034 
13035 // Compare 2 longs and CMOVE longs.
13036 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13037   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13038   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13039   ins_cost(400);
13040   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13041             "CMOV$cmp $dst.hi,$src.hi" %}
13042   opcode(0x0F,0x40);
13043   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13044   ins_pipe( pipe_cmov_reg_long );
13045 %}
13046 
13047 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13048   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13049   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13050   ins_cost(500);
13051   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13052             "CMOV$cmp $dst.hi,$src.hi" %}
13053   opcode(0x0F,0x40);
13054   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13055   ins_pipe( pipe_cmov_reg_long );
13056 %}
13057 
13058 // Compare 2 longs and CMOVE ints.
13059 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13060   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13061   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13062   ins_cost(200);
13063   format %{ "CMOV$cmp $dst,$src" %}
13064   opcode(0x0F,0x40);
13065   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13066   ins_pipe( pipe_cmov_reg );
13067 %}
13068 
13069 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13070   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13071   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13072   ins_cost(250);
13073   format %{ "CMOV$cmp $dst,$src" %}
13074   opcode(0x0F,0x40);
13075   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13076   ins_pipe( pipe_cmov_mem );
13077 %}
13078 
13079 // Compare 2 longs and CMOVE ints.
13080 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13081   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13082   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13083   ins_cost(200);
13084   format %{ "CMOV$cmp $dst,$src" %}
13085   opcode(0x0F,0x40);
13086   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13087   ins_pipe( pipe_cmov_reg );
13088 %}
13089 
13090 // Compare 2 longs and CMOVE doubles
13091 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13092   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13093   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13094   ins_cost(200);
13095   expand %{
13096     fcmovDPR_regS(cmp,flags,dst,src);
13097   %}
13098 %}
13099 
13100 // Compare 2 longs and CMOVE doubles
13101 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13102   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13103   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13104   ins_cost(200);
13105   expand %{
13106     fcmovD_regS(cmp,flags,dst,src);
13107   %}
13108 %}
13109 
13110 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13111   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13112   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13113   ins_cost(200);
13114   expand %{
13115     fcmovFPR_regS(cmp,flags,dst,src);
13116   %}
13117 %}
13118 
13119 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13120   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13121   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13122   ins_cost(200);
13123   expand %{
13124     fcmovF_regS(cmp,flags,dst,src);
13125   %}
13126 %}
13127 
13128 //======
13129 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13130 // Same as cmpL_reg_flags_LEGT except must negate src
13131 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13132   match( Set flags (CmpL src zero ));
13133   effect( TEMP tmp );
13134   ins_cost(300);
13135   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13136             "CMP    $tmp,$src.lo\n\t"
13137             "SBB    $tmp,$src.hi\n\t" %}
13138   ins_encode( long_cmp_flags3(src, tmp) );
13139   ins_pipe( ialu_reg_reg_long );
13140 %}
13141 
13142 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13143 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13144 // requires a commuted test to get the same result.
13145 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13146   match( Set flags (CmpL src1 src2 ));
13147   effect( TEMP tmp );
13148   ins_cost(300);
13149   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13150             "MOV    $tmp,$src2.hi\n\t"
13151             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13152   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13153   ins_pipe( ialu_cr_reg_reg );
13154 %}
13155 
13156 // Long compares reg < zero/req OR reg >= zero/req.
13157 // Just a wrapper for a normal branch, plus the predicate test
13158 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13159   match(If cmp flags);
13160   effect(USE labl);
13161   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13162   ins_cost(300);
13163   expand %{
13164     jmpCon(cmp,flags,labl);    // JGT or JLE...
13165   %}
13166 %}
13167 
13168 //======
13169 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13170 // Same as cmpUL_reg_flags_LEGT except must negate src
13171 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13172   match(Set flags (CmpUL src zero));
13173   effect(TEMP tmp);
13174   ins_cost(300);
13175   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13176             "CMP    $tmp,$src.lo\n\t"
13177             "SBB    $tmp,$src.hi\n\t" %}
13178   ins_encode(long_cmp_flags3(src, tmp));
13179   ins_pipe(ialu_reg_reg_long);
13180 %}
13181 
13182 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13183 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13184 // requires a commuted test to get the same result.
13185 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13186   match(Set flags (CmpUL src1 src2));
13187   effect(TEMP tmp);
13188   ins_cost(300);
13189   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13190             "MOV    $tmp,$src2.hi\n\t"
13191             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13192   ins_encode(long_cmp_flags2( src2, src1, tmp));
13193   ins_pipe(ialu_cr_reg_reg);
13194 %}
13195 
13196 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13197 // Just a wrapper for a normal branch, plus the predicate test
13198 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13199   match(If cmp flags);
13200   effect(USE labl);
13201   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13202   ins_cost(300);
13203   expand %{
13204     jmpCon(cmp, flags, labl);    // JGT or JLE...
13205   %}
13206 %}
13207 
13208 // Compare 2 longs and CMOVE longs.
13209 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13210   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13211   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13212   ins_cost(400);
13213   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13214             "CMOV$cmp $dst.hi,$src.hi" %}
13215   opcode(0x0F,0x40);
13216   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13217   ins_pipe( pipe_cmov_reg_long );
13218 %}
13219 
13220 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13221   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13222   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13223   ins_cost(500);
13224   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13225             "CMOV$cmp $dst.hi,$src.hi+4" %}
13226   opcode(0x0F,0x40);
13227   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13228   ins_pipe( pipe_cmov_reg_long );
13229 %}
13230 
13231 // Compare 2 longs and CMOVE ints.
13232 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13233   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13234   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13235   ins_cost(200);
13236   format %{ "CMOV$cmp $dst,$src" %}
13237   opcode(0x0F,0x40);
13238   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13239   ins_pipe( pipe_cmov_reg );
13240 %}
13241 
13242 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13243   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13244   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13245   ins_cost(250);
13246   format %{ "CMOV$cmp $dst,$src" %}
13247   opcode(0x0F,0x40);
13248   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13249   ins_pipe( pipe_cmov_mem );
13250 %}
13251 
13252 // Compare 2 longs and CMOVE ptrs.
13253 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13254   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13255   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13256   ins_cost(200);
13257   format %{ "CMOV$cmp $dst,$src" %}
13258   opcode(0x0F,0x40);
13259   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13260   ins_pipe( pipe_cmov_reg );
13261 %}
13262 
13263 // Compare 2 longs and CMOVE doubles
13264 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13265   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13266   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13267   ins_cost(200);
13268   expand %{
13269     fcmovDPR_regS(cmp,flags,dst,src);
13270   %}
13271 %}
13272 
13273 // Compare 2 longs and CMOVE doubles
13274 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13275   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13276   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13277   ins_cost(200);
13278   expand %{
13279     fcmovD_regS(cmp,flags,dst,src);
13280   %}
13281 %}
13282 
13283 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13284   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13285   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13286   ins_cost(200);
13287   expand %{
13288     fcmovFPR_regS(cmp,flags,dst,src);
13289   %}
13290 %}
13291 
13292 
13293 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13294   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13295   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13296   ins_cost(200);
13297   expand %{
13298     fcmovF_regS(cmp,flags,dst,src);
13299   %}
13300 %}
13301 
13302 
13303 // ============================================================================
13304 // Procedure Call/Return Instructions
13305 // Call Java Static Instruction
13306 // Note: If this code changes, the corresponding ret_addr_offset() and
13307 //       compute_padding() functions will have to be adjusted.
13308 instruct CallStaticJavaDirect(method meth) %{
13309   match(CallStaticJava);
13310   effect(USE meth);
13311 
13312   ins_cost(300);
13313   format %{ "CALL,static " %}
13314   opcode(0xE8); /* E8 cd */
13315   ins_encode( pre_call_resets,
13316               Java_Static_Call( meth ),
13317               call_epilog,
13318               post_call_FPU );
13319   ins_pipe( pipe_slow );
13320   ins_alignment(4);
13321 %}
13322 
13323 // Call Java Dynamic Instruction
13324 // Note: If this code changes, the corresponding ret_addr_offset() and
13325 //       compute_padding() functions will have to be adjusted.
13326 instruct CallDynamicJavaDirect(method meth) %{
13327   match(CallDynamicJava);
13328   effect(USE meth);
13329 
13330   ins_cost(300);
13331   format %{ "MOV    EAX,(oop)-1\n\t"
13332             "CALL,dynamic" %}
13333   opcode(0xE8); /* E8 cd */
13334   ins_encode( pre_call_resets,
13335               Java_Dynamic_Call( meth ),
13336               call_epilog,
13337               post_call_FPU );
13338   ins_pipe( pipe_slow );
13339   ins_alignment(4);
13340 %}
13341 
13342 // Call Runtime Instruction
13343 instruct CallRuntimeDirect(method meth) %{
13344   match(CallRuntime );
13345   effect(USE meth);
13346 
13347   ins_cost(300);
13348   format %{ "CALL,runtime " %}
13349   opcode(0xE8); /* E8 cd */
13350   // Use FFREEs to clear entries in float stack
13351   ins_encode( pre_call_resets,
13352               FFree_Float_Stack_All,
13353               Java_To_Runtime( meth ),
13354               post_call_FPU );
13355   ins_pipe( pipe_slow );
13356 %}
13357 
13358 // Call runtime without safepoint
13359 instruct CallLeafDirect(method meth) %{
13360   match(CallLeaf);
13361   effect(USE meth);
13362 
13363   ins_cost(300);
13364   format %{ "CALL_LEAF,runtime " %}
13365   opcode(0xE8); /* E8 cd */
13366   ins_encode( pre_call_resets,
13367               FFree_Float_Stack_All,
13368               Java_To_Runtime( meth ),
13369               Verify_FPU_For_Leaf, post_call_FPU );
13370   ins_pipe( pipe_slow );
13371 %}
13372 
13373 instruct CallLeafNoFPDirect(method meth) %{
13374   match(CallLeafNoFP);
13375   effect(USE meth);
13376 
13377   ins_cost(300);
13378   format %{ "CALL_LEAF_NOFP,runtime " %}
13379   opcode(0xE8); /* E8 cd */
13380   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13381   ins_pipe( pipe_slow );
13382 %}
13383 
13384 
13385 // Return Instruction
13386 // Remove the return address & jump to it.
13387 instruct Ret() %{
13388   match(Return);
13389   format %{ "RET" %}
13390   opcode(0xC3);
13391   ins_encode(OpcP);
13392   ins_pipe( pipe_jmp );
13393 %}
13394 
13395 // Tail Call; Jump from runtime stub to Java code.
13396 // Also known as an 'interprocedural jump'.
13397 // Target of jump will eventually return to caller.
13398 // TailJump below removes the return address.
13399 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13400   match(TailCall jump_target method_oop );
13401   ins_cost(300);
13402   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13403   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13404   ins_encode( OpcP, RegOpc(jump_target) );
13405   ins_pipe( pipe_jmp );
13406 %}
13407 
13408 
13409 // Tail Jump; remove the return address; jump to target.
13410 // TailCall above leaves the return address around.
13411 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13412   match( TailJump jump_target ex_oop );
13413   ins_cost(300);
13414   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13415             "JMP    $jump_target " %}
13416   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13417   ins_encode( enc_pop_rdx,
13418               OpcP, RegOpc(jump_target) );
13419   ins_pipe( pipe_jmp );
13420 %}
13421 
13422 // Create exception oop: created by stack-crawling runtime code.
13423 // Created exception is now available to this handler, and is setup
13424 // just prior to jumping to this handler.  No code emitted.
13425 instruct CreateException( eAXRegP ex_oop )
13426 %{
13427   match(Set ex_oop (CreateEx));
13428 
13429   size(0);
13430   // use the following format syntax
13431   format %{ "# exception oop is in EAX; no code emitted" %}
13432   ins_encode();
13433   ins_pipe( empty );
13434 %}
13435 
13436 
13437 // Rethrow exception:
13438 // The exception oop will come in the first argument position.
13439 // Then JUMP (not call) to the rethrow stub code.
13440 instruct RethrowException()
13441 %{
13442   match(Rethrow);
13443 
13444   // use the following format syntax
13445   format %{ "JMP    rethrow_stub" %}
13446   ins_encode(enc_rethrow);
13447   ins_pipe( pipe_jmp );
13448 %}
13449 
13450 // inlined locking and unlocking
13451 
13452 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13453   predicate(Compile::current()->use_rtm());
13454   match(Set cr (FastLock object box));
13455   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13456   ins_cost(300);
13457   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13458   ins_encode %{
13459     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13460                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13461                  _counters, _rtm_counters, _stack_rtm_counters,
13462                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13463                  true, ra_->C->profile_rtm());
13464   %}
13465   ins_pipe(pipe_slow);
13466 %}
13467 
13468 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13469   predicate(!Compile::current()->use_rtm());
13470   match(Set cr (FastLock object box));
13471   effect(TEMP tmp, TEMP scr, USE_KILL box);
13472   ins_cost(300);
13473   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13474   ins_encode %{
13475     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13476                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13477   %}
13478   ins_pipe(pipe_slow);
13479 %}
13480 
13481 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13482   match(Set cr (FastUnlock object box));
13483   effect(TEMP tmp, USE_KILL box);
13484   ins_cost(300);
13485   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13486   ins_encode %{
13487     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13488   %}
13489   ins_pipe(pipe_slow);
13490 %}
13491 
13492 
13493 
13494 // ============================================================================
13495 // Safepoint Instruction
13496 instruct safePoint_poll(eFlagsReg cr) %{
13497   predicate(SafepointMechanism::uses_global_page_poll());
13498   match(SafePoint);
13499   effect(KILL cr);
13500 
13501   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13502   // On SPARC that might be acceptable as we can generate the address with
13503   // just a sethi, saving an or.  By polling at offset 0 we can end up
13504   // putting additional pressure on the index-0 in the D$.  Because of
13505   // alignment (just like the situation at hand) the lower indices tend
13506   // to see more traffic.  It'd be better to change the polling address
13507   // to offset 0 of the last $line in the polling page.
13508 
13509   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13510   ins_cost(125);
13511   size(6) ;
13512   ins_encode( Safepoint_Poll() );
13513   ins_pipe( ialu_reg_mem );
13514 %}
13515 
13516 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13517   predicate(SafepointMechanism::uses_thread_local_poll());
13518   match(SafePoint poll);
13519   effect(KILL cr, USE poll);
13520 
13521   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13522   ins_cost(125);
13523   // EBP would need size(3)
13524   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13525   ins_encode %{
13526     __ relocate(relocInfo::poll_type);
13527     address pre_pc = __ pc();
13528     __ testl(rax, Address($poll$$Register, 0));
13529     address post_pc = __ pc();
13530     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13531   %}
13532   ins_pipe(ialu_reg_mem);
13533 %}
13534 
13535 
13536 // ============================================================================
13537 // This name is KNOWN by the ADLC and cannot be changed.
13538 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13539 // for this guy.
13540 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13541   match(Set dst (ThreadLocal));
13542   effect(DEF dst, KILL cr);
13543 
13544   format %{ "MOV    $dst, Thread::current()" %}
13545   ins_encode %{
13546     Register dstReg = as_Register($dst$$reg);
13547     __ get_thread(dstReg);
13548   %}
13549   ins_pipe( ialu_reg_fat );
13550 %}
13551 
13552 
13553 
13554 //----------PEEPHOLE RULES-----------------------------------------------------
13555 // These must follow all instruction definitions as they use the names
13556 // defined in the instructions definitions.
13557 //
13558 // peepmatch ( root_instr_name [preceding_instruction]* );
13559 //
13560 // peepconstraint %{
13561 // (instruction_number.operand_name relational_op instruction_number.operand_name
13562 //  [, ...] );
13563 // // instruction numbers are zero-based using left to right order in peepmatch
13564 //
13565 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13566 // // provide an instruction_number.operand_name for each operand that appears
13567 // // in the replacement instruction's match rule
13568 //
13569 // ---------VM FLAGS---------------------------------------------------------
13570 //
13571 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13572 //
13573 // Each peephole rule is given an identifying number starting with zero and
13574 // increasing by one in the order seen by the parser.  An individual peephole
13575 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13576 // on the command-line.
13577 //
13578 // ---------CURRENT LIMITATIONS----------------------------------------------
13579 //
13580 // Only match adjacent instructions in same basic block
13581 // Only equality constraints
13582 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13583 // Only one replacement instruction
13584 //
13585 // ---------EXAMPLE----------------------------------------------------------
13586 //
13587 // // pertinent parts of existing instructions in architecture description
13588 // instruct movI(rRegI dst, rRegI src) %{
13589 //   match(Set dst (CopyI src));
13590 // %}
13591 //
13592 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13593 //   match(Set dst (AddI dst src));
13594 //   effect(KILL cr);
13595 // %}
13596 //
13597 // // Change (inc mov) to lea
13598 // peephole %{
13599 //   // increment preceeded by register-register move
13600 //   peepmatch ( incI_eReg movI );
13601 //   // require that the destination register of the increment
13602 //   // match the destination register of the move
13603 //   peepconstraint ( 0.dst == 1.dst );
13604 //   // construct a replacement instruction that sets
13605 //   // the destination to ( move's source register + one )
13606 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13607 // %}
13608 //
13609 // Implementation no longer uses movX instructions since
13610 // machine-independent system no longer uses CopyX nodes.
13611 //
13612 // peephole %{
13613 //   peepmatch ( incI_eReg movI );
13614 //   peepconstraint ( 0.dst == 1.dst );
13615 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13616 // %}
13617 //
13618 // peephole %{
13619 //   peepmatch ( decI_eReg movI );
13620 //   peepconstraint ( 0.dst == 1.dst );
13621 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13622 // %}
13623 //
13624 // peephole %{
13625 //   peepmatch ( addI_eReg_imm movI );
13626 //   peepconstraint ( 0.dst == 1.dst );
13627 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13628 // %}
13629 //
13630 // peephole %{
13631 //   peepmatch ( addP_eReg_imm movP );
13632 //   peepconstraint ( 0.dst == 1.dst );
13633 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13634 // %}
13635 
13636 // // Change load of spilled value to only a spill
13637 // instruct storeI(memory mem, rRegI src) %{
13638 //   match(Set mem (StoreI mem src));
13639 // %}
13640 //
13641 // instruct loadI(rRegI dst, memory mem) %{
13642 //   match(Set dst (LoadI mem));
13643 // %}
13644 //
13645 peephole %{
13646   peepmatch ( loadI storeI );
13647   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13648   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13649 %}
13650 
13651 //----------SMARTSPILL RULES---------------------------------------------------
13652 // These must follow all instruction definitions as they use the names
13653 // defined in the instructions definitions.