1 //
   2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     if (SafepointMechanism::uses_thread_local_poll()) {
 710       Register pollReg = as_Register(EBX_enc);
 711       MacroAssembler masm(&cbuf);
 712       masm.get_thread(pollReg);
 713       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 714       masm.relocate(relocInfo::poll_return_type);
 715       masm.testl(rax, Address(pollReg, 0));
 716     } else {
 717       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 718       emit_opcode(cbuf,0x85);
 719       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 720       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 721     }
 722   }
 723 }
 724 
 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 726   return MachNode::size(ra_); // too many variables; just compute it
 727                               // the hard way
 728 }
 729 
 730 int MachEpilogNode::reloc() const {
 731   return 0; // a large enough number
 732 }
 733 
 734 const Pipeline * MachEpilogNode::pipeline() const {
 735   return MachNode::pipeline_class();
 736 }
 737 
 738 int MachEpilogNode::safepoint_offset() const { return 0; }
 739 
 740 //=============================================================================
 741 
 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 743 static enum RC rc_class( OptoReg::Name reg ) {
 744 
 745   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 746   if (OptoReg::is_stack(reg)) return rc_stack;
 747 
 748   VMReg r = OptoReg::as_VMReg(reg);
 749   if (r->is_Register()) return rc_int;
 750   if (r->is_FloatRegister()) {
 751     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 752     return rc_float;
 753   }
 754   assert(r->is_XMMRegister(), "must be");
 755   return rc_xmm;
 756 }
 757 
 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 759                         int opcode, const char *op_str, int size, outputStream* st ) {
 760   if( cbuf ) {
 761     emit_opcode  (*cbuf, opcode );
 762     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 763 #ifndef PRODUCT
 764   } else if( !do_size ) {
 765     if( size != 0 ) st->print("\n\t");
 766     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 767       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 768       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 769     } else { // FLD, FST, PUSH, POP
 770       st->print("%s [ESP + #%d]",op_str,offset);
 771     }
 772 #endif
 773   }
 774   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 775   return size+3+offset_size;
 776 }
 777 
 778 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 780                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 781   int in_size_in_bits = Assembler::EVEX_32bit;
 782   int evex_encoding = 0;
 783   if (reg_lo+1 == reg_hi) {
 784     in_size_in_bits = Assembler::EVEX_64bit;
 785     evex_encoding = Assembler::VEX_W;
 786   }
 787   if (cbuf) {
 788     MacroAssembler _masm(cbuf);
 789     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 790     //                          it maps more cases to single byte displacement
 791     _masm.set_managed();
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 843     _masm.set_managed();
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 883     _masm.set_managed();
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 901     _masm.set_managed();
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Do we need to mask the count passed to shift instructions or does
1433 // the cpu only look at the lower 5/6 bits anyway?
1434 const bool Matcher::need_masked_shift_count = false;
1435 
1436 bool Matcher::narrow_oop_use_complex_address() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 bool Matcher::narrow_klass_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::const_oop_prefer_decode() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 bool Matcher::const_klass_prefer_decode() {
1452   ShouldNotCallThis();
1453   return true;
1454 }
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     // Clear upper bits of YMM registers when current compiled code uses
1886     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887     MacroAssembler _masm(&cbuf);
1888     __ vzeroupper();
1889     debug_only(int off1 = cbuf.insts_size());
1890     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891   %}
1892 
1893   enc_class post_call_FPU %{
1894     // If method sets FPU control word do it here also
1895     if (Compile::current()->in_24_bit_fp_mode()) {
1896       MacroAssembler masm(&cbuf);
1897       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898     }
1899   %}
1900 
1901   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903     // who we intended to call.
1904     cbuf.set_insts_mark();
1905     $$$emit8$primary;
1906 
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(),
1910                      RELOC_IMM32);
1911     } else {
1912       int method_index = resolved_method_index(cbuf);
1913       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                   : static_call_Relocation::spec(method_index);
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      rspec, RELOC_DISP32);
1917       // Emit stubs for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       }
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     emit_opcode(cbuf,0xF0);         // [Lock]
2091   %}
2092 
2093   // Cmp-xchg long value.
2094   // Note: we need to swap rbx, and rcx before and after the
2095   //       cmpxchg8 instruction because the instruction uses
2096   //       rcx as the high order word of the new value to store but
2097   //       our register encoding uses rbx,.
2098   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099 
2100     // XCHG  rbx,ecx
2101     emit_opcode(cbuf,0x87);
2102     emit_opcode(cbuf,0xD9);
2103     // [Lock]
2104     emit_opcode(cbuf,0xF0);
2105     // CMPXCHG8 [Eptr]
2106     emit_opcode(cbuf,0x0F);
2107     emit_opcode(cbuf,0xC7);
2108     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109     // XCHG  rbx,ecx
2110     emit_opcode(cbuf,0x87);
2111     emit_opcode(cbuf,0xD9);
2112   %}
2113 
2114   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115     // [Lock]
2116     emit_opcode(cbuf,0xF0);
2117 
2118     // CMPXCHG [Eptr]
2119     emit_opcode(cbuf,0x0F);
2120     emit_opcode(cbuf,0xB1);
2121     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2122   %}
2123 
2124   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2125     // [Lock]
2126     emit_opcode(cbuf,0xF0);
2127 
2128     // CMPXCHGB [Eptr]
2129     emit_opcode(cbuf,0x0F);
2130     emit_opcode(cbuf,0xB0);
2131     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2132   %}
2133 
2134   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2135     // [Lock]
2136     emit_opcode(cbuf,0xF0);
2137 
2138     // 16-bit mode
2139     emit_opcode(cbuf, 0x66);
2140 
2141     // CMPXCHGW [Eptr]
2142     emit_opcode(cbuf,0x0F);
2143     emit_opcode(cbuf,0xB1);
2144     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145   %}
2146 
2147   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2148     int res_encoding = $res$$reg;
2149 
2150     // MOV  res,0
2151     emit_opcode( cbuf, 0xB8 + res_encoding);
2152     emit_d32( cbuf, 0 );
2153     // JNE,s  fail
2154     emit_opcode(cbuf,0x75);
2155     emit_d8(cbuf, 5 );
2156     // MOV  res,1
2157     emit_opcode( cbuf, 0xB8 + res_encoding);
2158     emit_d32( cbuf, 1 );
2159     // fail:
2160   %}
2161 
2162   enc_class set_instruction_start( ) %{
2163     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2164   %}
2165 
2166   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2167     int reg_encoding = $ereg$$reg;
2168     int base  = $mem$$base;
2169     int index = $mem$$index;
2170     int scale = $mem$$scale;
2171     int displace = $mem$$disp;
2172     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2173     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2174   %}
2175 
2176   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2177     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2178     int base  = $mem$$base;
2179     int index = $mem$$index;
2180     int scale = $mem$$scale;
2181     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2182     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2183     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2184   %}
2185 
2186   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2187     int r1, r2;
2188     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190     emit_opcode(cbuf,0x0F);
2191     emit_opcode(cbuf,$tertiary);
2192     emit_rm(cbuf, 0x3, r1, r2);
2193     emit_d8(cbuf,$cnt$$constant);
2194     emit_d8(cbuf,$primary);
2195     emit_rm(cbuf, 0x3, $secondary, r1);
2196     emit_d8(cbuf,$cnt$$constant);
2197   %}
2198 
2199   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2200     emit_opcode( cbuf, 0x8B ); // Move
2201     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_d8(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_d8(cbuf,$primary);
2208     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2209     emit_d8(cbuf,31);
2210   %}
2211 
2212   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2213     int r1, r2;
2214     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2215     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2216 
2217     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2218     emit_rm(cbuf, 0x3, r1, r2);
2219     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220       emit_opcode(cbuf,$primary);
2221       emit_rm(cbuf, 0x3, $secondary, r1);
2222       emit_d8(cbuf,$cnt$$constant-32);
2223     }
2224     emit_opcode(cbuf,0x33);  // XOR r2,r2
2225     emit_rm(cbuf, 0x3, r2, r2);
2226   %}
2227 
2228   // Clone of RegMem but accepts an extra parameter to access each
2229   // half of a double in memory; it never needs relocation info.
2230   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2231     emit_opcode(cbuf,$opcode$$constant);
2232     int reg_encoding = $rm_reg$$reg;
2233     int base     = $mem$$base;
2234     int index    = $mem$$index;
2235     int scale    = $mem$$scale;
2236     int displace = $mem$$disp + $disp_for_half$$constant;
2237     relocInfo::relocType disp_reloc = relocInfo::none;
2238     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2239   %}
2240 
2241   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2242   //
2243   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2244   // and it never needs relocation information.
2245   // Frequently used to move data between FPU's Stack Top and memory.
2246   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2247     int rm_byte_opcode = $rm_opcode$$constant;
2248     int base     = $mem$$base;
2249     int index    = $mem$$index;
2250     int scale    = $mem$$scale;
2251     int displace = $mem$$disp;
2252     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2253     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2254   %}
2255 
2256   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2257     int rm_byte_opcode = $rm_opcode$$constant;
2258     int base     = $mem$$base;
2259     int index    = $mem$$index;
2260     int scale    = $mem$$scale;
2261     int displace = $mem$$disp;
2262     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2263     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2264   %}
2265 
2266   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2267     int reg_encoding = $dst$$reg;
2268     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2269     int index        = 0x04;            // 0x04 indicates no index
2270     int scale        = 0x00;            // 0x00 indicates no scale
2271     int displace     = $src1$$constant; // 0x00 indicates no displacement
2272     relocInfo::relocType disp_reloc = relocInfo::none;
2273     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2274   %}
2275 
2276   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2277     // Compare dst,src
2278     emit_opcode(cbuf,0x3B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280     // jmp dst < src around move
2281     emit_opcode(cbuf,0x7C);
2282     emit_d8(cbuf,2);
2283     // move dst,src
2284     emit_opcode(cbuf,0x8B);
2285     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286   %}
2287 
2288   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2289     // Compare dst,src
2290     emit_opcode(cbuf,0x3B);
2291     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2292     // jmp dst > src around move
2293     emit_opcode(cbuf,0x7F);
2294     emit_d8(cbuf,2);
2295     // move dst,src
2296     emit_opcode(cbuf,0x8B);
2297     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2298   %}
2299 
2300   enc_class enc_FPR_store(memory mem, regDPR src) %{
2301     // If src is FPR1, we can just FST to store it.
2302     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2303     int reg_encoding = 0x2; // Just store
2304     int base  = $mem$$base;
2305     int index = $mem$$index;
2306     int scale = $mem$$scale;
2307     int displace = $mem$$disp;
2308     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2309     if( $src$$reg != FPR1L_enc ) {
2310       reg_encoding = 0x3;  // Store & pop
2311       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2312       emit_d8( cbuf, 0xC0-1+$src$$reg );
2313     }
2314     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2315     emit_opcode(cbuf,$primary);
2316     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2317   %}
2318 
2319   enc_class neg_reg(rRegI dst) %{
2320     // NEG $dst
2321     emit_opcode(cbuf,0xF7);
2322     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2323   %}
2324 
2325   enc_class setLT_reg(eCXRegI dst) %{
2326     // SETLT $dst
2327     emit_opcode(cbuf,0x0F);
2328     emit_opcode(cbuf,0x9C);
2329     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2330   %}
2331 
2332   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2333     int tmpReg = $tmp$$reg;
2334 
2335     // SUB $p,$q
2336     emit_opcode(cbuf,0x2B);
2337     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2338     // SBB $tmp,$tmp
2339     emit_opcode(cbuf,0x1B);
2340     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2341     // AND $tmp,$y
2342     emit_opcode(cbuf,0x23);
2343     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2344     // ADD $p,$tmp
2345     emit_opcode(cbuf,0x03);
2346     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2347   %}
2348 
2349   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.hi,$dst.lo
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2360     // CLR    $dst.lo
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2363 // small:
2364     // SHLD   $dst.hi,$dst.lo,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xA5);
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2368     // SHL    $dst.lo,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2371   %}
2372 
2373   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x04);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // CLR    $dst.hi
2385     emit_opcode(cbuf, 0x33);
2386     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2387 // small:
2388     // SHRD   $dst.lo,$dst.hi,$shift
2389     emit_opcode(cbuf,0x0F);
2390     emit_opcode(cbuf,0xAD);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392     // SHR    $dst.hi,$shift"
2393     emit_opcode(cbuf,0xD3);
2394     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2395   %}
2396 
2397   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2398     // TEST shift,32
2399     emit_opcode(cbuf,0xF7);
2400     emit_rm(cbuf, 0x3, 0, ECX_enc);
2401     emit_d32(cbuf,0x20);
2402     // JEQ,s small
2403     emit_opcode(cbuf, 0x74);
2404     emit_d8(cbuf, 0x05);
2405     // MOV    $dst.lo,$dst.hi
2406     emit_opcode( cbuf, 0x8B );
2407     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2408     // SAR    $dst.hi,31
2409     emit_opcode(cbuf, 0xC1);
2410     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2411     emit_d8(cbuf, 0x1F );
2412 // small:
2413     // SHRD   $dst.lo,$dst.hi,$shift
2414     emit_opcode(cbuf,0x0F);
2415     emit_opcode(cbuf,0xAD);
2416     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417     // SAR    $dst.hi,$shift"
2418     emit_opcode(cbuf,0xD3);
2419     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2420   %}
2421 
2422 
2423   // ----------------- Encodings for floating point unit -----------------
2424   // May leave result in FPU-TOS or FPU reg depending on opcodes
2425   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2426     $$$emit8$primary;
2427     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2428   %}
2429 
2430   // Pop argument in FPR0 with FSTP ST(0)
2431   enc_class PopFPU() %{
2432     emit_opcode( cbuf, 0xDD );
2433     emit_d8( cbuf, 0xD8 );
2434   %}
2435 
2436   // !!!!! equivalent to Pop_Reg_F
2437   enc_class Pop_Reg_DPR( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2439     emit_d8( cbuf, 0xD8+$dst$$reg );
2440   %}
2441 
2442   enc_class Push_Reg_DPR( regDPR dst ) %{
2443     emit_opcode( cbuf, 0xD9 );
2444     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2445   %}
2446 
2447   enc_class strictfp_bias1( regDPR dst ) %{
2448     emit_opcode( cbuf, 0xDB );           // FLD m80real
2449     emit_opcode( cbuf, 0x2D );
2450     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2451     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2452     emit_opcode( cbuf, 0xC8+$dst$$reg );
2453   %}
2454 
2455   enc_class strictfp_bias2( regDPR dst ) %{
2456     emit_opcode( cbuf, 0xDB );           // FLD m80real
2457     emit_opcode( cbuf, 0x2D );
2458     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2459     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460     emit_opcode( cbuf, 0xC8+$dst$$reg );
2461   %}
2462 
2463   // Special case for moving an integer register to a stack slot.
2464   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2465     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2466   %}
2467 
2468   // Special case for moving a register to a stack slot.
2469   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470     // Opcode already emitted
2471     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2472     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2473     emit_d32(cbuf, $dst$$disp);   // Displacement
2474   %}
2475 
2476   // Push the integer in stackSlot 'src' onto FP-stack
2477   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2478     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2479   %}
2480 
2481   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2483     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2484   %}
2485 
2486   // Same as Pop_Mem_F except for opcode
2487   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2488   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2489     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2490   %}
2491 
2492   enc_class Pop_Reg_FPR( regFPR dst ) %{
2493     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2494     emit_d8( cbuf, 0xD8+$dst$$reg );
2495   %}
2496 
2497   enc_class Push_Reg_FPR( regFPR dst ) %{
2498     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2499     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2500   %}
2501 
2502   // Push FPU's float to a stack-slot, and pop FPU-stack
2503   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2504     int pop = 0x02;
2505     if ($src$$reg != FPR1L_enc) {
2506       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2507       emit_d8( cbuf, 0xC0-1+$src$$reg );
2508       pop = 0x03;
2509     }
2510     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2511   %}
2512 
2513   // Push FPU's double to a stack-slot, and pop FPU-stack
2514   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2515     int pop = 0x02;
2516     if ($src$$reg != FPR1L_enc) {
2517       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2518       emit_d8( cbuf, 0xC0-1+$src$$reg );
2519       pop = 0x03;
2520     }
2521     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2522   %}
2523 
2524   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2525   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2526     int pop = 0xD0 - 1; // -1 since we skip FLD
2527     if ($src$$reg != FPR1L_enc) {
2528       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2529       emit_d8( cbuf, 0xC0-1+$src$$reg );
2530       pop = 0xD8;
2531     }
2532     emit_opcode( cbuf, 0xDD );
2533     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2534   %}
2535 
2536 
2537   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2538     // load dst in FPR0
2539     emit_opcode( cbuf, 0xD9 );
2540     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2541     if ($src$$reg != FPR1L_enc) {
2542       // fincstp
2543       emit_opcode (cbuf, 0xD9);
2544       emit_opcode (cbuf, 0xF7);
2545       // swap src with FPR1:
2546       // FXCH FPR1 with src
2547       emit_opcode(cbuf, 0xD9);
2548       emit_d8(cbuf, 0xC8-1+$src$$reg );
2549       // fdecstp
2550       emit_opcode (cbuf, 0xD9);
2551       emit_opcode (cbuf, 0xF6);
2552     }
2553   %}
2554 
2555   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2561     __ fld_d(Address(rsp, 0));
2562   %}
2563 
2564   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ subptr(rsp, 4);
2567     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2568     __ fld_s(Address(rsp, 0));
2569     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2570     __ fld_s(Address(rsp, 0));
2571   %}
2572 
2573   enc_class Push_ResultD(regD dst) %{
2574     MacroAssembler _masm(&cbuf);
2575     __ fstp_d(Address(rsp, 0));
2576     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class Push_ResultF(regF dst, immI d8) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ fstp_s(Address(rsp, 0));
2583     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2584     __ addptr(rsp, $d8$$constant);
2585   %}
2586 
2587   enc_class Push_SrcD(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ subptr(rsp, 8);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class push_stack_temp_qword() %{
2595     MacroAssembler _masm(&cbuf);
2596     __ subptr(rsp, 8);
2597   %}
2598 
2599   enc_class pop_stack_temp_qword() %{
2600     MacroAssembler _masm(&cbuf);
2601     __ addptr(rsp, 8);
2602   %}
2603 
2604   enc_class push_xmm_to_fpr1(regD src) %{
2605     MacroAssembler _masm(&cbuf);
2606     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2607     __ fld_d(Address(rsp, 0));
2608   %}
2609 
2610   enc_class Push_Result_Mod_DPR( regDPR src) %{
2611     if ($src$$reg != FPR1L_enc) {
2612       // fincstp
2613       emit_opcode (cbuf, 0xD9);
2614       emit_opcode (cbuf, 0xF7);
2615       // FXCH FPR1 with src
2616       emit_opcode(cbuf, 0xD9);
2617       emit_d8(cbuf, 0xC8-1+$src$$reg );
2618       // fdecstp
2619       emit_opcode (cbuf, 0xD9);
2620       emit_opcode (cbuf, 0xF6);
2621     }
2622     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2623     // // FSTP   FPR$dst$$reg
2624     // emit_opcode( cbuf, 0xDD );
2625     // emit_d8( cbuf, 0xD8+$dst$$reg );
2626   %}
2627 
2628   enc_class fnstsw_sahf_skip_parity() %{
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jnp  ::skip
2635     emit_opcode( cbuf, 0x7B );
2636     emit_opcode( cbuf, 0x05 );
2637   %}
2638 
2639   enc_class emitModDPR() %{
2640     // fprem must be iterative
2641     // :: loop
2642     // fprem
2643     emit_opcode( cbuf, 0xD9 );
2644     emit_opcode( cbuf, 0xF8 );
2645     // wait
2646     emit_opcode( cbuf, 0x9b );
2647     // fnstsw ax
2648     emit_opcode( cbuf, 0xDF );
2649     emit_opcode( cbuf, 0xE0 );
2650     // sahf
2651     emit_opcode( cbuf, 0x9E );
2652     // jp  ::loop
2653     emit_opcode( cbuf, 0x0F );
2654     emit_opcode( cbuf, 0x8A );
2655     emit_opcode( cbuf, 0xF4 );
2656     emit_opcode( cbuf, 0xFF );
2657     emit_opcode( cbuf, 0xFF );
2658     emit_opcode( cbuf, 0xFF );
2659   %}
2660 
2661   enc_class fpu_flags() %{
2662     // fnstsw_ax
2663     emit_opcode( cbuf, 0xDF);
2664     emit_opcode( cbuf, 0xE0);
2665     // test ax,0x0400
2666     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2667     emit_opcode( cbuf, 0xA9 );
2668     emit_d16   ( cbuf, 0x0400 );
2669     // // // This sequence works, but stalls for 12-16 cycles on PPro
2670     // // test rax,0x0400
2671     // emit_opcode( cbuf, 0xA9 );
2672     // emit_d32   ( cbuf, 0x00000400 );
2673     //
2674     // jz exit (no unordered comparison)
2675     emit_opcode( cbuf, 0x74 );
2676     emit_d8    ( cbuf, 0x02 );
2677     // mov ah,1 - treat as LT case (set carry flag)
2678     emit_opcode( cbuf, 0xB4 );
2679     emit_d8    ( cbuf, 0x01 );
2680     // sahf
2681     emit_opcode( cbuf, 0x9E);
2682   %}
2683 
2684   enc_class cmpF_P6_fixup() %{
2685     // Fixup the integer flags in case comparison involved a NaN
2686     //
2687     // JNP exit (no unordered comparison, P-flag is set by NaN)
2688     emit_opcode( cbuf, 0x7B );
2689     emit_d8    ( cbuf, 0x03 );
2690     // MOV AH,1 - treat as LT case (set carry flag)
2691     emit_opcode( cbuf, 0xB4 );
2692     emit_d8    ( cbuf, 0x01 );
2693     // SAHF
2694     emit_opcode( cbuf, 0x9E);
2695     // NOP     // target for branch to avoid branch to branch
2696     emit_opcode( cbuf, 0x90);
2697   %}
2698 
2699 //     fnstsw_ax();
2700 //     sahf();
2701 //     movl(dst, nan_result);
2702 //     jcc(Assembler::parity, exit);
2703 //     movl(dst, less_result);
2704 //     jcc(Assembler::below, exit);
2705 //     movl(dst, equal_result);
2706 //     jcc(Assembler::equal, exit);
2707 //     movl(dst, greater_result);
2708 
2709 // less_result     =  1;
2710 // greater_result  = -1;
2711 // equal_result    = 0;
2712 // nan_result      = -1;
2713 
2714   enc_class CmpF_Result(rRegI dst) %{
2715     // fnstsw_ax();
2716     emit_opcode( cbuf, 0xDF);
2717     emit_opcode( cbuf, 0xE0);
2718     // sahf
2719     emit_opcode( cbuf, 0x9E);
2720     // movl(dst, nan_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, -1 );
2723     // jcc(Assembler::parity, exit);
2724     emit_opcode( cbuf, 0x7A );
2725     emit_d8    ( cbuf, 0x13 );
2726     // movl(dst, less_result);
2727     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728     emit_d32( cbuf, -1 );
2729     // jcc(Assembler::below, exit);
2730     emit_opcode( cbuf, 0x72 );
2731     emit_d8    ( cbuf, 0x0C );
2732     // movl(dst, equal_result);
2733     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2734     emit_d32( cbuf, 0 );
2735     // jcc(Assembler::equal, exit);
2736     emit_opcode( cbuf, 0x74 );
2737     emit_d8    ( cbuf, 0x05 );
2738     // movl(dst, greater_result);
2739     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2740     emit_d32( cbuf, 1 );
2741   %}
2742 
2743 
2744   // Compare the longs and set flags
2745   // BROKEN!  Do Not use as-is
2746   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2747     // CMP    $src1.hi,$src2.hi
2748     emit_opcode( cbuf, 0x3B );
2749     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2750     // JNE,s  done
2751     emit_opcode(cbuf,0x75);
2752     emit_d8(cbuf, 2 );
2753     // CMP    $src1.lo,$src2.lo
2754     emit_opcode( cbuf, 0x3B );
2755     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2756 // done:
2757   %}
2758 
2759   enc_class convert_int_long( regL dst, rRegI src ) %{
2760     // mov $dst.lo,$src
2761     int dst_encoding = $dst$$reg;
2762     int src_encoding = $src$$reg;
2763     encode_Copy( cbuf, dst_encoding  , src_encoding );
2764     // mov $dst.hi,$src
2765     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2766     // sar $dst.hi,31
2767     emit_opcode( cbuf, 0xC1 );
2768     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2769     emit_d8(cbuf, 0x1F );
2770   %}
2771 
2772   enc_class convert_long_double( eRegL src ) %{
2773     // push $src.hi
2774     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2775     // push $src.lo
2776     emit_opcode(cbuf, 0x50+$src$$reg  );
2777     // fild 64-bits at [SP]
2778     emit_opcode(cbuf,0xdf);
2779     emit_d8(cbuf, 0x6C);
2780     emit_d8(cbuf, 0x24);
2781     emit_d8(cbuf, 0x00);
2782     // pop stack
2783     emit_opcode(cbuf, 0x83); // add  SP, #8
2784     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2785     emit_d8(cbuf, 0x8);
2786   %}
2787 
2788   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2789     // IMUL   EDX:EAX,$src1
2790     emit_opcode( cbuf, 0xF7 );
2791     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2792     // SAR    EDX,$cnt-32
2793     int shift_count = ((int)$cnt$$constant) - 32;
2794     if (shift_count > 0) {
2795       emit_opcode(cbuf, 0xC1);
2796       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2797       emit_d8(cbuf, shift_count);
2798     }
2799   %}
2800 
2801   // this version doesn't have add sp, 8
2802   enc_class convert_long_double2( eRegL src ) %{
2803     // push $src.hi
2804     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2805     // push $src.lo
2806     emit_opcode(cbuf, 0x50+$src$$reg  );
2807     // fild 64-bits at [SP]
2808     emit_opcode(cbuf,0xdf);
2809     emit_d8(cbuf, 0x6C);
2810     emit_d8(cbuf, 0x24);
2811     emit_d8(cbuf, 0x00);
2812   %}
2813 
2814   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2815     // Basic idea: long = (long)int * (long)int
2816     // IMUL EDX:EAX, src
2817     emit_opcode( cbuf, 0xF7 );
2818     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2819   %}
2820 
2821   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2822     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2823     // MUL EDX:EAX, src
2824     emit_opcode( cbuf, 0xF7 );
2825     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2826   %}
2827 
2828   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2829     // Basic idea: lo(result) = lo(x_lo * y_lo)
2830     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2831     // MOV    $tmp,$src.lo
2832     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2833     // IMUL   $tmp,EDX
2834     emit_opcode( cbuf, 0x0F );
2835     emit_opcode( cbuf, 0xAF );
2836     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2837     // MOV    EDX,$src.hi
2838     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2839     // IMUL   EDX,EAX
2840     emit_opcode( cbuf, 0x0F );
2841     emit_opcode( cbuf, 0xAF );
2842     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2843     // ADD    $tmp,EDX
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2846     // MUL   EDX:EAX,$src.lo
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2852   %}
2853 
2854   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2855     // Basic idea: lo(result) = lo(src * y_lo)
2856     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2857     // IMUL   $tmp,EDX,$src
2858     emit_opcode( cbuf, 0x6B );
2859     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860     emit_d8( cbuf, (int)$src$$constant );
2861     // MOV    EDX,$src
2862     emit_opcode(cbuf, 0xB8 + EDX_enc);
2863     emit_d32( cbuf, (int)$src$$constant );
2864     // MUL   EDX:EAX,EDX
2865     emit_opcode( cbuf, 0xF7 );
2866     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2867     // ADD    EDX,ESI
2868     emit_opcode( cbuf, 0x03 );
2869     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2870   %}
2871 
2872   enc_class long_div( eRegL src1, eRegL src2 ) %{
2873     // PUSH src1.hi
2874     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875     // PUSH src1.lo
2876     emit_opcode(cbuf,               0x50+$src1$$reg  );
2877     // PUSH src2.hi
2878     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879     // PUSH src2.lo
2880     emit_opcode(cbuf,               0x50+$src2$$reg  );
2881     // CALL directly to the runtime
2882     cbuf.set_insts_mark();
2883     emit_opcode(cbuf,0xE8);       // Call into runtime
2884     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885     // Restore stack
2886     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888     emit_d8(cbuf, 4*4);
2889   %}
2890 
2891   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2892     // PUSH src1.hi
2893     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2894     // PUSH src1.lo
2895     emit_opcode(cbuf,               0x50+$src1$$reg  );
2896     // PUSH src2.hi
2897     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2898     // PUSH src2.lo
2899     emit_opcode(cbuf,               0x50+$src2$$reg  );
2900     // CALL directly to the runtime
2901     cbuf.set_insts_mark();
2902     emit_opcode(cbuf,0xE8);       // Call into runtime
2903     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2904     // Restore stack
2905     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2906     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2907     emit_d8(cbuf, 4*4);
2908   %}
2909 
2910   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2911     // MOV   $tmp,$src.lo
2912     emit_opcode(cbuf, 0x8B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2914     // OR    $tmp,$src.hi
2915     emit_opcode(cbuf, 0x0B);
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2917   %}
2918 
2919   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2920     // CMP    $src1.lo,$src2.lo
2921     emit_opcode( cbuf, 0x3B );
2922     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923     // JNE,s  skip
2924     emit_cc(cbuf, 0x70, 0x5);
2925     emit_d8(cbuf,2);
2926     // CMP    $src1.hi,$src2.hi
2927     emit_opcode( cbuf, 0x3B );
2928     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2929   %}
2930 
2931   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2932     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2933     emit_opcode( cbuf, 0x3B );
2934     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2935     // MOV    $tmp,$src1.hi
2936     emit_opcode( cbuf, 0x8B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2938     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2939     emit_opcode( cbuf, 0x1B );
2940     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2941   %}
2942 
2943   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2944     // XOR    $tmp,$tmp
2945     emit_opcode(cbuf,0x33);  // XOR
2946     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2947     // CMP    $tmp,$src.lo
2948     emit_opcode( cbuf, 0x3B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2950     // SBB    $tmp,$src.hi
2951     emit_opcode( cbuf, 0x1B );
2952     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2953   %}
2954 
2955  // Sniff, sniff... smells like Gnu Superoptimizer
2956   enc_class neg_long( eRegL dst ) %{
2957     emit_opcode(cbuf,0xF7);    // NEG hi
2958     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2959     emit_opcode(cbuf,0xF7);    // NEG lo
2960     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2961     emit_opcode(cbuf,0x83);    // SBB hi,0
2962     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2963     emit_d8    (cbuf,0 );
2964   %}
2965 
2966   enc_class enc_pop_rdx() %{
2967     emit_opcode(cbuf,0x5A);
2968   %}
2969 
2970   enc_class enc_rethrow() %{
2971     cbuf.set_insts_mark();
2972     emit_opcode(cbuf, 0xE9);        // jmp    entry
2973     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2974                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2975   %}
2976 
2977 
2978   // Convert a double to an int.  Java semantics require we do complex
2979   // manglelations in the corner cases.  So we set the rounding mode to
2980   // 'zero', store the darned double down as an int, and reset the
2981   // rounding mode to 'nearest'.  The hardware throws an exception which
2982   // patches up the correct value directly to the stack.
2983   enc_class DPR2I_encoding( regDPR src ) %{
2984     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2985     // exceptions here, so that a NAN or other corner-case value will
2986     // thrown an exception (but normal values get converted at full speed).
2987     // However, I2C adapters and other float-stack manglers leave pending
2988     // invalid-op exceptions hanging.  We would have to clear them before
2989     // enabling them and that is more expensive than just testing for the
2990     // invalid value Intel stores down in the corner cases.
2991     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2992     emit_opcode(cbuf,0x2D);
2993     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2994     // Allocate a word
2995     emit_opcode(cbuf,0x83);            // SUB ESP,4
2996     emit_opcode(cbuf,0xEC);
2997     emit_d8(cbuf,0x04);
2998     // Encoding assumes a double has been pushed into FPR0.
2999     // Store down the double as an int, popping the FPU stack
3000     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3001     emit_opcode(cbuf,0x1C);
3002     emit_d8(cbuf,0x24);
3003     // Restore the rounding mode; mask the exception
3004     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3005     emit_opcode(cbuf,0x2D);
3006     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3007         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3008         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3009 
3010     // Load the converted int; adjust CPU stack
3011     emit_opcode(cbuf,0x58);       // POP EAX
3012     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3013     emit_d32   (cbuf,0x80000000); //         0x80000000
3014     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3015     emit_d8    (cbuf,0x07);       // Size of slow_call
3016     // Push src onto stack slow-path
3017     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3018     emit_d8    (cbuf,0xC0-1+$src$$reg );
3019     // CALL directly to the runtime
3020     cbuf.set_insts_mark();
3021     emit_opcode(cbuf,0xE8);       // Call into runtime
3022     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3023     // Carry on here...
3024   %}
3025 
3026   enc_class DPR2L_encoding( regDPR src ) %{
3027     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3028     emit_opcode(cbuf,0x2D);
3029     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3030     // Allocate a word
3031     emit_opcode(cbuf,0x83);            // SUB ESP,8
3032     emit_opcode(cbuf,0xEC);
3033     emit_d8(cbuf,0x08);
3034     // Encoding assumes a double has been pushed into FPR0.
3035     // Store down the double as a long, popping the FPU stack
3036     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3037     emit_opcode(cbuf,0x3C);
3038     emit_d8(cbuf,0x24);
3039     // Restore the rounding mode; mask the exception
3040     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3041     emit_opcode(cbuf,0x2D);
3042     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3043         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3044         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3045 
3046     // Load the converted int; adjust CPU stack
3047     emit_opcode(cbuf,0x58);       // POP EAX
3048     emit_opcode(cbuf,0x5A);       // POP EDX
3049     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3050     emit_d8    (cbuf,0xFA);       // rdx
3051     emit_d32   (cbuf,0x80000000); //         0x80000000
3052     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3053     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3054     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3055     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3056     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3057     emit_d8    (cbuf,0x07);       // Size of slow_call
3058     // Push src onto stack slow-path
3059     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3060     emit_d8    (cbuf,0xC0-1+$src$$reg );
3061     // CALL directly to the runtime
3062     cbuf.set_insts_mark();
3063     emit_opcode(cbuf,0xE8);       // Call into runtime
3064     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065     // Carry on here...
3066   %}
3067 
3068   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3069     // Operand was loaded from memory into fp ST (stack top)
3070     // FMUL   ST,$src  /* D8 C8+i */
3071     emit_opcode(cbuf, 0xD8);
3072     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3073   %}
3074 
3075   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3076     // FADDP  ST,src2  /* D8 C0+i */
3077     emit_opcode(cbuf, 0xD8);
3078     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3079     //could use FADDP  src2,fpST  /* DE C0+i */
3080   %}
3081 
3082   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3083     // FADDP  src2,ST  /* DE C0+i */
3084     emit_opcode(cbuf, 0xDE);
3085     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3086   %}
3087 
3088   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3089     // Operand has been loaded into fp ST (stack top)
3090       // FSUB   ST,$src1
3091       emit_opcode(cbuf, 0xD8);
3092       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3093 
3094       // FDIV
3095       emit_opcode(cbuf, 0xD8);
3096       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3097   %}
3098 
3099   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3100     // Operand was loaded from memory into fp ST (stack top)
3101     // FADD   ST,$src  /* D8 C0+i */
3102     emit_opcode(cbuf, 0xD8);
3103     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104 
3105     // FMUL  ST,src2  /* D8 C*+i */
3106     emit_opcode(cbuf, 0xD8);
3107     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108   %}
3109 
3110 
3111   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3112     // Operand was loaded from memory into fp ST (stack top)
3113     // FADD   ST,$src  /* D8 C0+i */
3114     emit_opcode(cbuf, 0xD8);
3115     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3116 
3117     // FMULP  src2,ST  /* DE C8+i */
3118     emit_opcode(cbuf, 0xDE);
3119     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3120   %}
3121 
3122   // Atomically load the volatile long
3123   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x05;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3133   %}
3134 
3135   // Volatile Store Long.  Must be atomic, so move it into
3136   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3137   // target address before the store (for null-ptr checks)
3138   // so the memory operand is used twice in the encoding.
3139   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3140     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3141     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3142     emit_opcode(cbuf,0xDF);
3143     int rm_byte_opcode = 0x07;
3144     int base     = $mem$$base;
3145     int index    = $mem$$index;
3146     int scale    = $mem$$scale;
3147     int displace = $mem$$disp;
3148     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3149     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3150   %}
3151 
3152   // Safepoint Poll.  This polls the safepoint page, and causes an
3153   // exception if it is not readable. Unfortunately, it kills the condition code
3154   // in the process
3155   // We current use TESTL [spp],EDI
3156   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3157 
3158   enc_class Safepoint_Poll() %{
3159     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3160     emit_opcode(cbuf,0x85);
3161     emit_rm (cbuf, 0x0, 0x7, 0x5);
3162     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3163   %}
3164 %}
3165 
3166 
3167 //----------FRAME--------------------------------------------------------------
3168 // Definition of frame structure and management information.
3169 //
3170 //  S T A C K   L A Y O U T    Allocators stack-slot number
3171 //                             |   (to get allocators register number
3172 //  G  Owned by    |        |  v    add OptoReg::stack0())
3173 //  r   CALLER     |        |
3174 //  o     |        +--------+      pad to even-align allocators stack-slot
3175 //  w     V        |  pad0  |        numbers; owned by CALLER
3176 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3177 //  h     ^        |   in   |  5
3178 //        |        |  args  |  4   Holes in incoming args owned by SELF
3179 //  |     |        |        |  3
3180 //  |     |        +--------+
3181 //  V     |        | old out|      Empty on Intel, window on Sparc
3182 //        |    old |preserve|      Must be even aligned.
3183 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3184 //        |        |   in   |  3   area for Intel ret address
3185 //     Owned by    |preserve|      Empty on Sparc.
3186 //       SELF      +--------+
3187 //        |        |  pad2  |  2   pad to align old SP
3188 //        |        +--------+  1
3189 //        |        | locks  |  0
3190 //        |        +--------+----> OptoReg::stack0(), even aligned
3191 //        |        |  pad1  | 11   pad to align new SP
3192 //        |        +--------+
3193 //        |        |        | 10
3194 //        |        | spills |  9   spills
3195 //        V        |        |  8   (pad0 slot for callee)
3196 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3197 //        ^        |  out   |  7
3198 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3199 //     Owned by    +--------+
3200 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3201 //        |    new |preserve|      Must be even-aligned.
3202 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3203 //        |        |        |
3204 //
3205 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3206 //         known from SELF's arguments and the Java calling convention.
3207 //         Region 6-7 is determined per call site.
3208 // Note 2: If the calling convention leaves holes in the incoming argument
3209 //         area, those holes are owned by SELF.  Holes in the outgoing area
3210 //         are owned by the CALLEE.  Holes should not be nessecary in the
3211 //         incoming area, as the Java calling convention is completely under
3212 //         the control of the AD file.  Doubles can be sorted and packed to
3213 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3214 //         varargs C calling conventions.
3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3216 //         even aligned with pad0 as needed.
3217 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3218 //         region 6-11 is even aligned; it may be padded out more so that
3219 //         the region from SP to FP meets the minimum stack alignment.
3220 
3221 frame %{
3222   // What direction does stack grow in (assumed to be same for C & Java)
3223   stack_direction(TOWARDS_LOW);
3224 
3225   // These three registers define part of the calling convention
3226   // between compiled code and the interpreter.
3227   inline_cache_reg(EAX);                // Inline Cache Register
3228   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3229 
3230   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3231   cisc_spilling_operand_name(indOffset32);
3232 
3233   // Number of stack slots consumed by locking an object
3234   sync_stack_slots(1);
3235 
3236   // Compiled code's Frame Pointer
3237   frame_pointer(ESP);
3238   // Interpreter stores its frame pointer in a register which is
3239   // stored to the stack by I2CAdaptors.
3240   // I2CAdaptors convert from interpreted java to compiled java.
3241   interpreter_frame_pointer(EBP);
3242 
3243   // Stack alignment requirement
3244   // Alignment size in bytes (128-bit -> 16 bytes)
3245   stack_alignment(StackAlignmentInBytes);
3246 
3247   // Number of stack slots between incoming argument block and the start of
3248   // a new frame.  The PROLOG must add this many slots to the stack.  The
3249   // EPILOG must remove this many slots.  Intel needs one slot for
3250   // return address and one for rbp, (must save rbp)
3251   in_preserve_stack_slots(2+VerifyStackAtCalls);
3252 
3253   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3254   // for calls to C.  Supports the var-args backing area for register parms.
3255   varargs_C_out_slots_killed(0);
3256 
3257   // The after-PROLOG location of the return address.  Location of
3258   // return address specifies a type (REG or STACK) and a number
3259   // representing the register number (i.e. - use a register name) or
3260   // stack slot.
3261   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3262   // Otherwise, it is above the locks and verification slot and alignment word
3263   return_addr(STACK - 1 +
3264               align_up((Compile::current()->in_preserve_stack_slots() +
3265                         Compile::current()->fixed_slots()),
3266                        stack_alignment_in_slots()));
3267 
3268   // Body of function which returns an integer array locating
3269   // arguments either in registers or in stack slots.  Passed an array
3270   // of ideal registers called "sig" and a "length" count.  Stack-slot
3271   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272   // arguments for a CALLEE.  Incoming stack arguments are
3273   // automatically biased by the preserve_stack_slots field above.
3274   calling_convention %{
3275     // No difference between ingoing/outgoing just pass false
3276     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3277   %}
3278 
3279 
3280   // Body of function which returns an integer array locating
3281   // arguments either in registers or in stack slots.  Passed an array
3282   // of ideal registers called "sig" and a "length" count.  Stack-slot
3283   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3284   // arguments for a CALLEE.  Incoming stack arguments are
3285   // automatically biased by the preserve_stack_slots field above.
3286   c_calling_convention %{
3287     // This is obviously always outgoing
3288     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3289   %}
3290 
3291   // Location of C & interpreter return values
3292   c_return_value %{
3293     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3294     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3295     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3296 
3297     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3298     // that C functions return float and double results in XMM0.
3299     if( ideal_reg == Op_RegD && UseSSE>=2 )
3300       return OptoRegPair(XMM0b_num,XMM0_num);
3301     if( ideal_reg == Op_RegF && UseSSE>=2 )
3302       return OptoRegPair(OptoReg::Bad,XMM0_num);
3303 
3304     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305   %}
3306 
3307   // Location of return values
3308   return_value %{
3309     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3310     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3311     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3312     if( ideal_reg == Op_RegD && UseSSE>=2 )
3313       return OptoRegPair(XMM0b_num,XMM0_num);
3314     if( ideal_reg == Op_RegF && UseSSE>=1 )
3315       return OptoRegPair(OptoReg::Bad,XMM0_num);
3316     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3317   %}
3318 
3319 %}
3320 
3321 //----------ATTRIBUTES---------------------------------------------------------
3322 //----------Operand Attributes-------------------------------------------------
3323 op_attrib op_cost(0);        // Required cost attribute
3324 
3325 //----------Instruction Attributes---------------------------------------------
3326 ins_attrib ins_cost(100);       // Required cost attribute
3327 ins_attrib ins_size(8);         // Required size attribute (in bits)
3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3329                                 // non-matching short branch variant of some
3330                                                             // long branch?
3331 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3332                                 // specifies the alignment that some part of the instruction (not
3333                                 // necessarily the start) requires.  If > 1, a compute_padding()
3334                                 // function must be provided for the instruction
3335 
3336 //----------OPERANDS-----------------------------------------------------------
3337 // Operand definitions must precede instruction definitions for correct parsing
3338 // in the ADLC because operands constitute user defined types which are used in
3339 // instruction definitions.
3340 
3341 //----------Simple Operands----------------------------------------------------
3342 // Immediate Operands
3343 // Integer Immediate
3344 operand immI() %{
3345   match(ConI);
3346 
3347   op_cost(10);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for test vs zero
3353 operand immI0() %{
3354   predicate(n->get_int() == 0);
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 // Constant for increment
3363 operand immI1() %{
3364   predicate(n->get_int() == 1);
3365   match(ConI);
3366 
3367   op_cost(0);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 // Constant for decrement
3373 operand immI_M1() %{
3374   predicate(n->get_int() == -1);
3375   match(ConI);
3376 
3377   op_cost(0);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 // Valid scale values for addressing modes
3383 operand immI2() %{
3384   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3385   match(ConI);
3386 
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 operand immI8() %{
3392   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3393   match(ConI);
3394 
3395   op_cost(5);
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 operand immI16() %{
3401   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3402   match(ConI);
3403 
3404   op_cost(10);
3405   format %{ %}
3406   interface(CONST_INTER);
3407 %}
3408 
3409 // Int Immediate non-negative
3410 operand immU31()
3411 %{
3412   predicate(n->get_int() >= 0);
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 // Constant for long shifts
3421 operand immI_32() %{
3422   predicate( n->get_int() == 32 );
3423   match(ConI);
3424 
3425   op_cost(0);
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1_31() %{
3431   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_32_63() %{
3440   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3441   match(ConI);
3442   op_cost(0);
3443 
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_1() %{
3449   predicate( n->get_int() == 1 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 operand immI_2() %{
3458   predicate( n->get_int() == 2 );
3459   match(ConI);
3460 
3461   op_cost(0);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 operand immI_3() %{
3467   predicate( n->get_int() == 3 );
3468   match(ConI);
3469 
3470   op_cost(0);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Pointer Immediate
3476 operand immP() %{
3477   match(ConP);
3478 
3479   op_cost(10);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // NULL Pointer Immediate
3485 operand immP0() %{
3486   predicate( n->get_ptr() == 0 );
3487   match(ConP);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate
3495 operand immL() %{
3496   match(ConL);
3497 
3498   op_cost(20);
3499   format %{ %}
3500   interface(CONST_INTER);
3501 %}
3502 
3503 // Long Immediate zero
3504 operand immL0() %{
3505   predicate( n->get_long() == 0L );
3506   match(ConL);
3507   op_cost(0);
3508 
3509   format %{ %}
3510   interface(CONST_INTER);
3511 %}
3512 
3513 // Long Immediate zero
3514 operand immL_M1() %{
3515   predicate( n->get_long() == -1L );
3516   match(ConL);
3517   op_cost(0);
3518 
3519   format %{ %}
3520   interface(CONST_INTER);
3521 %}
3522 
3523 // Long immediate from 0 to 127.
3524 // Used for a shorter form of long mul by 10.
3525 operand immL_127() %{
3526   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3527   match(ConL);
3528   op_cost(0);
3529 
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Long Immediate: low 32-bit mask
3535 operand immL_32bits() %{
3536   predicate(n->get_long() == 0xFFFFFFFFL);
3537   match(ConL);
3538   op_cost(0);
3539 
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Long Immediate: low 32-bit mask
3545 operand immL32() %{
3546   predicate(n->get_long() == (int)(n->get_long()));
3547   match(ConL);
3548   op_cost(20);
3549 
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 //Double Immediate zero
3555 operand immDPR0() %{
3556   // Do additional (and counter-intuitive) test against NaN to work around VC++
3557   // bug that generates code such that NaNs compare equal to 0.0
3558   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3559   match(ConD);
3560 
3561   op_cost(5);
3562   format %{ %}
3563   interface(CONST_INTER);
3564 %}
3565 
3566 // Double Immediate one
3567 operand immDPR1() %{
3568   predicate( UseSSE<=1 && n->getd() == 1.0 );
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate
3577 operand immDPR() %{
3578   predicate(UseSSE<=1);
3579   match(ConD);
3580 
3581   op_cost(5);
3582   format %{ %}
3583   interface(CONST_INTER);
3584 %}
3585 
3586 operand immD() %{
3587   predicate(UseSSE>=2);
3588   match(ConD);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Double Immediate zero
3596 operand immD0() %{
3597   // Do additional (and counter-intuitive) test against NaN to work around VC++
3598   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3599   // compare equal to -0.0.
3600   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3601   match(ConD);
3602 
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 // Float Immediate zero
3608 operand immFPR0() %{
3609   predicate(UseSSE == 0 && n->getf() == 0.0F);
3610   match(ConF);
3611 
3612   op_cost(5);
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Float Immediate one
3618 operand immFPR1() %{
3619   predicate(UseSSE == 0 && n->getf() == 1.0F);
3620   match(ConF);
3621 
3622   op_cost(5);
3623   format %{ %}
3624   interface(CONST_INTER);
3625 %}
3626 
3627 // Float Immediate
3628 operand immFPR() %{
3629   predicate( UseSSE == 0 );
3630   match(ConF);
3631 
3632   op_cost(5);
3633   format %{ %}
3634   interface(CONST_INTER);
3635 %}
3636 
3637 // Float Immediate
3638 operand immF() %{
3639   predicate(UseSSE >= 1);
3640   match(ConF);
3641 
3642   op_cost(5);
3643   format %{ %}
3644   interface(CONST_INTER);
3645 %}
3646 
3647 // Float Immediate zero.  Zero and not -0.0
3648 operand immF0() %{
3649   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3650   match(ConF);
3651 
3652   op_cost(5);
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Immediates for special shifts (sign extend)
3658 
3659 // Constants for increment
3660 operand immI_16() %{
3661   predicate( n->get_int() == 16 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 operand immI_24() %{
3669   predicate( n->get_int() == 24 );
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Constant for byte-wide masking
3677 operand immI_255() %{
3678   predicate( n->get_int() == 255 );
3679   match(ConI);
3680 
3681   format %{ %}
3682   interface(CONST_INTER);
3683 %}
3684 
3685 // Constant for short-wide masking
3686 operand immI_65535() %{
3687   predicate(n->get_int() == 65535);
3688   match(ConI);
3689 
3690   format %{ %}
3691   interface(CONST_INTER);
3692 %}
3693 
3694 // Register Operands
3695 // Integer Register
3696 operand rRegI() %{
3697   constraint(ALLOC_IN_RC(int_reg));
3698   match(RegI);
3699   match(xRegI);
3700   match(eAXRegI);
3701   match(eBXRegI);
3702   match(eCXRegI);
3703   match(eDXRegI);
3704   match(eDIRegI);
3705   match(eSIRegI);
3706 
3707   format %{ %}
3708   interface(REG_INTER);
3709 %}
3710 
3711 // Subset of Integer Register
3712 operand xRegI(rRegI reg) %{
3713   constraint(ALLOC_IN_RC(int_x_reg));
3714   match(reg);
3715   match(eAXRegI);
3716   match(eBXRegI);
3717   match(eCXRegI);
3718   match(eDXRegI);
3719 
3720   format %{ %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Special Registers
3725 operand eAXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(eax_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EAX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 // Special Registers
3735 operand eBXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(ebx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EBX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eCXRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(ecx_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "ECX" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand eDXRegI(xRegI reg) %{
3754   constraint(ALLOC_IN_RC(edx_reg));
3755   match(reg);
3756   match(rRegI);
3757 
3758   format %{ "EDX" %}
3759   interface(REG_INTER);
3760 %}
3761 
3762 operand eDIRegI(xRegI reg) %{
3763   constraint(ALLOC_IN_RC(edi_reg));
3764   match(reg);
3765   match(rRegI);
3766 
3767   format %{ "EDI" %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand naxRegI() %{
3772   constraint(ALLOC_IN_RC(nax_reg));
3773   match(RegI);
3774   match(eCXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 operand nadxRegI() %{
3784   constraint(ALLOC_IN_RC(nadx_reg));
3785   match(RegI);
3786   match(eBXRegI);
3787   match(eCXRegI);
3788   match(eSIRegI);
3789   match(eDIRegI);
3790 
3791   format %{ %}
3792   interface(REG_INTER);
3793 %}
3794 
3795 operand ncxRegI() %{
3796   constraint(ALLOC_IN_RC(ncx_reg));
3797   match(RegI);
3798   match(eAXRegI);
3799   match(eDXRegI);
3800   match(eSIRegI);
3801   match(eDIRegI);
3802 
3803   format %{ %}
3804   interface(REG_INTER);
3805 %}
3806 
3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3808 // //
3809 operand eSIRegI(xRegI reg) %{
3810    constraint(ALLOC_IN_RC(esi_reg));
3811    match(reg);
3812    match(rRegI);
3813 
3814    format %{ "ESI" %}
3815    interface(REG_INTER);
3816 %}
3817 
3818 // Pointer Register
3819 operand anyRegP() %{
3820   constraint(ALLOC_IN_RC(any_reg));
3821   match(RegP);
3822   match(eAXRegP);
3823   match(eBXRegP);
3824   match(eCXRegP);
3825   match(eDIRegP);
3826   match(eRegP);
3827 
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 operand eRegP() %{
3833   constraint(ALLOC_IN_RC(int_reg));
3834   match(RegP);
3835   match(eAXRegP);
3836   match(eBXRegP);
3837   match(eCXRegP);
3838   match(eDIRegP);
3839 
3840   format %{ %}
3841   interface(REG_INTER);
3842 %}
3843 
3844 // On windows95, EBP is not safe to use for implicit null tests.
3845 operand eRegP_no_EBP() %{
3846   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3847   match(RegP);
3848   match(eAXRegP);
3849   match(eBXRegP);
3850   match(eCXRegP);
3851   match(eDIRegP);
3852 
3853   op_cost(100);
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 operand naxRegP() %{
3859   constraint(ALLOC_IN_RC(nax_reg));
3860   match(RegP);
3861   match(eBXRegP);
3862   match(eDXRegP);
3863   match(eCXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 operand nabxRegP() %{
3872   constraint(ALLOC_IN_RC(nabx_reg));
3873   match(RegP);
3874   match(eCXRegP);
3875   match(eDXRegP);
3876   match(eSIRegP);
3877   match(eDIRegP);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 operand pRegP() %{
3884   constraint(ALLOC_IN_RC(p_reg));
3885   match(RegP);
3886   match(eBXRegP);
3887   match(eDXRegP);
3888   match(eSIRegP);
3889   match(eDIRegP);
3890 
3891   format %{ %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Special Registers
3896 // Return a pointer value
3897 operand eAXRegP(eRegP reg) %{
3898   constraint(ALLOC_IN_RC(eax_reg));
3899   match(reg);
3900   format %{ "EAX" %}
3901   interface(REG_INTER);
3902 %}
3903 
3904 // Used in AtomicAdd
3905 operand eBXRegP(eRegP reg) %{
3906   constraint(ALLOC_IN_RC(ebx_reg));
3907   match(reg);
3908   format %{ "EBX" %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Tail-call (interprocedural jump) to interpreter
3913 operand eCXRegP(eRegP reg) %{
3914   constraint(ALLOC_IN_RC(ecx_reg));
3915   match(reg);
3916   format %{ "ECX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eSIRegP(eRegP reg) %{
3921   constraint(ALLOC_IN_RC(esi_reg));
3922   match(reg);
3923   format %{ "ESI" %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 // Used in rep stosw
3928 operand eDIRegP(eRegP reg) %{
3929   constraint(ALLOC_IN_RC(edi_reg));
3930   match(reg);
3931   format %{ "EDI" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eRegL() %{
3936   constraint(ALLOC_IN_RC(long_reg));
3937   match(RegL);
3938   match(eADXRegL);
3939 
3940   format %{ %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 operand eADXRegL( eRegL reg ) %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(reg);
3947 
3948   format %{ "EDX:EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eBCXRegL( eRegL reg ) %{
3953   constraint(ALLOC_IN_RC(ebcx_reg));
3954   match(reg);
3955 
3956   format %{ "EBX:ECX" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Special case for integer high multiply
3961 operand eADXRegL_low_only() %{
3962   constraint(ALLOC_IN_RC(eadx_reg));
3963   match(RegL);
3964 
3965   format %{ "EAX" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 // Flags register, used as output of compare instructions
3970 operand eFlagsReg() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973 
3974   format %{ "EFLAGS" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Flags register, used as output of FLOATING POINT compare instructions
3979 operand eFlagsRegU() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982 
3983   format %{ "EFLAGS_U" %}
3984   interface(REG_INTER);
3985 %}
3986 
3987 operand eFlagsRegUCF() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990   predicate(false);
3991 
3992   format %{ "EFLAGS_U_CF" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 // Condition Code Register used by long compare
3997 operand flagsReg_long_LTGE() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LTGE" %}
4001   interface(REG_INTER);
4002 %}
4003 operand flagsReg_long_EQNE() %{
4004   constraint(ALLOC_IN_RC(int_flags));
4005   match(RegFlags);
4006   format %{ "FLAGS_EQNE" %}
4007   interface(REG_INTER);
4008 %}
4009 operand flagsReg_long_LEGT() %{
4010   constraint(ALLOC_IN_RC(int_flags));
4011   match(RegFlags);
4012   format %{ "FLAGS_LEGT" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 // Condition Code Register used by unsigned long compare
4017 operand flagsReg_ulong_LTGE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_U_LTGE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_ulong_EQNE() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_U_EQNE" %}
4027   interface(REG_INTER);
4028 %}
4029 operand flagsReg_ulong_LEGT() %{
4030   constraint(ALLOC_IN_RC(int_flags));
4031   match(RegFlags);
4032   format %{ "FLAGS_U_LEGT" %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Float register operands
4037 operand regDPR() %{
4038   predicate( UseSSE < 2 );
4039   constraint(ALLOC_IN_RC(fp_dbl_reg));
4040   match(RegD);
4041   match(regDPR1);
4042   match(regDPR2);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 operand regDPR1(regDPR reg) %{
4048   predicate( UseSSE < 2 );
4049   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4050   match(reg);
4051   format %{ "FPR1" %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 operand regDPR2(regDPR reg) %{
4056   predicate( UseSSE < 2 );
4057   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4058   match(reg);
4059   format %{ "FPR2" %}
4060   interface(REG_INTER);
4061 %}
4062 
4063 operand regnotDPR1(regDPR reg) %{
4064   predicate( UseSSE < 2 );
4065   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4066   match(reg);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Float register operands
4072 operand regFPR() %{
4073   predicate( UseSSE < 2 );
4074   constraint(ALLOC_IN_RC(fp_flt_reg));
4075   match(RegF);
4076   match(regFPR1);
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 // Float register operands
4082 operand regFPR1(regFPR reg) %{
4083   predicate( UseSSE < 2 );
4084   constraint(ALLOC_IN_RC(fp_flt_reg0));
4085   match(reg);
4086   format %{ "FPR1" %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 // XMM Float register operands
4091 operand regF() %{
4092   predicate( UseSSE>=1 );
4093   constraint(ALLOC_IN_RC(float_reg_legacy));
4094   match(RegF);
4095   format %{ %}
4096   interface(REG_INTER);
4097 %}
4098 
4099 // XMM Double register operands
4100 operand regD() %{
4101   predicate( UseSSE>=2 );
4102   constraint(ALLOC_IN_RC(double_reg_legacy));
4103   match(RegD);
4104   format %{ %}
4105   interface(REG_INTER);
4106 %}
4107 
4108 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4109 // runtime code generation via reg_class_dynamic.
4110 operand vecS() %{
4111   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4112   match(VecS);
4113 
4114   format %{ %}
4115   interface(REG_INTER);
4116 %}
4117 
4118 operand vecD() %{
4119   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4120   match(VecD);
4121 
4122   format %{ %}
4123   interface(REG_INTER);
4124 %}
4125 
4126 operand vecX() %{
4127   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4128   match(VecX);
4129 
4130   format %{ %}
4131   interface(REG_INTER);
4132 %}
4133 
4134 operand vecY() %{
4135   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4136   match(VecY);
4137 
4138   format %{ %}
4139   interface(REG_INTER);
4140 %}
4141 
4142 //----------Memory Operands----------------------------------------------------
4143 // Direct Memory Operand
4144 operand direct(immP addr) %{
4145   match(addr);
4146 
4147   format %{ "[$addr]" %}
4148   interface(MEMORY_INTER) %{
4149     base(0xFFFFFFFF);
4150     index(0x4);
4151     scale(0x0);
4152     disp($addr);
4153   %}
4154 %}
4155 
4156 // Indirect Memory Operand
4157 operand indirect(eRegP reg) %{
4158   constraint(ALLOC_IN_RC(int_reg));
4159   match(reg);
4160 
4161   format %{ "[$reg]" %}
4162   interface(MEMORY_INTER) %{
4163     base($reg);
4164     index(0x4);
4165     scale(0x0);
4166     disp(0x0);
4167   %}
4168 %}
4169 
4170 // Indirect Memory Plus Short Offset Operand
4171 operand indOffset8(eRegP reg, immI8 off) %{
4172   match(AddP reg off);
4173 
4174   format %{ "[$reg + $off]" %}
4175   interface(MEMORY_INTER) %{
4176     base($reg);
4177     index(0x4);
4178     scale(0x0);
4179     disp($off);
4180   %}
4181 %}
4182 
4183 // Indirect Memory Plus Long Offset Operand
4184 operand indOffset32(eRegP reg, immI off) %{
4185   match(AddP reg off);
4186 
4187   format %{ "[$reg + $off]" %}
4188   interface(MEMORY_INTER) %{
4189     base($reg);
4190     index(0x4);
4191     scale(0x0);
4192     disp($off);
4193   %}
4194 %}
4195 
4196 // Indirect Memory Plus Long Offset Operand
4197 operand indOffset32X(rRegI reg, immP off) %{
4198   match(AddP off reg);
4199 
4200   format %{ "[$reg + $off]" %}
4201   interface(MEMORY_INTER) %{
4202     base($reg);
4203     index(0x4);
4204     scale(0x0);
4205     disp($off);
4206   %}
4207 %}
4208 
4209 // Indirect Memory Plus Index Register Plus Offset Operand
4210 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4211   match(AddP (AddP reg ireg) off);
4212 
4213   op_cost(10);
4214   format %{"[$reg + $off + $ireg]" %}
4215   interface(MEMORY_INTER) %{
4216     base($reg);
4217     index($ireg);
4218     scale(0x0);
4219     disp($off);
4220   %}
4221 %}
4222 
4223 // Indirect Memory Plus Index Register Plus Offset Operand
4224 operand indIndex(eRegP reg, rRegI ireg) %{
4225   match(AddP reg ireg);
4226 
4227   op_cost(10);
4228   format %{"[$reg + $ireg]" %}
4229   interface(MEMORY_INTER) %{
4230     base($reg);
4231     index($ireg);
4232     scale(0x0);
4233     disp(0x0);
4234   %}
4235 %}
4236 
4237 // // -------------------------------------------------------------------------
4238 // // 486 architecture doesn't support "scale * index + offset" with out a base
4239 // // -------------------------------------------------------------------------
4240 // // Scaled Memory Operands
4241 // // Indirect Memory Times Scale Plus Offset Operand
4242 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4243 //   match(AddP off (LShiftI ireg scale));
4244 //
4245 //   op_cost(10);
4246 //   format %{"[$off + $ireg << $scale]" %}
4247 //   interface(MEMORY_INTER) %{
4248 //     base(0x4);
4249 //     index($ireg);
4250 //     scale($scale);
4251 //     disp($off);
4252 //   %}
4253 // %}
4254 
4255 // Indirect Memory Times Scale Plus Index Register
4256 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4257   match(AddP reg (LShiftI ireg scale));
4258 
4259   op_cost(10);
4260   format %{"[$reg + $ireg << $scale]" %}
4261   interface(MEMORY_INTER) %{
4262     base($reg);
4263     index($ireg);
4264     scale($scale);
4265     disp(0x0);
4266   %}
4267 %}
4268 
4269 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4270 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4271   match(AddP (AddP reg (LShiftI ireg scale)) off);
4272 
4273   op_cost(10);
4274   format %{"[$reg + $off + $ireg << $scale]" %}
4275   interface(MEMORY_INTER) %{
4276     base($reg);
4277     index($ireg);
4278     scale($scale);
4279     disp($off);
4280   %}
4281 %}
4282 
4283 //----------Load Long Memory Operands------------------------------------------
4284 // The load-long idiom will use it's address expression again after loading
4285 // the first word of the long.  If the load-long destination overlaps with
4286 // registers used in the addressing expression, the 2nd half will be loaded
4287 // from a clobbered address.  Fix this by requiring that load-long use
4288 // address registers that do not overlap with the load-long target.
4289 
4290 // load-long support
4291 operand load_long_RegP() %{
4292   constraint(ALLOC_IN_RC(esi_reg));
4293   match(RegP);
4294   match(eSIRegP);
4295   op_cost(100);
4296   format %{  %}
4297   interface(REG_INTER);
4298 %}
4299 
4300 // Indirect Memory Operand Long
4301 operand load_long_indirect(load_long_RegP reg) %{
4302   constraint(ALLOC_IN_RC(esi_reg));
4303   match(reg);
4304 
4305   format %{ "[$reg]" %}
4306   interface(MEMORY_INTER) %{
4307     base($reg);
4308     index(0x4);
4309     scale(0x0);
4310     disp(0x0);
4311   %}
4312 %}
4313 
4314 // Indirect Memory Plus Long Offset Operand
4315 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4316   match(AddP reg off);
4317 
4318   format %{ "[$reg + $off]" %}
4319   interface(MEMORY_INTER) %{
4320     base($reg);
4321     index(0x4);
4322     scale(0x0);
4323     disp($off);
4324   %}
4325 %}
4326 
4327 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4328 
4329 
4330 //----------Special Memory Operands--------------------------------------------
4331 // Stack Slot Operand - This operand is used for loading and storing temporary
4332 //                      values on the stack where a match requires a value to
4333 //                      flow through memory.
4334 operand stackSlotP(sRegP reg) %{
4335   constraint(ALLOC_IN_RC(stack_slots));
4336   // No match rule because this operand is only generated in matching
4337   format %{ "[$reg]" %}
4338   interface(MEMORY_INTER) %{
4339     base(0x4);   // ESP
4340     index(0x4);  // No Index
4341     scale(0x0);  // No Scale
4342     disp($reg);  // Stack Offset
4343   %}
4344 %}
4345 
4346 operand stackSlotI(sRegI reg) %{
4347   constraint(ALLOC_IN_RC(stack_slots));
4348   // No match rule because this operand is only generated in matching
4349   format %{ "[$reg]" %}
4350   interface(MEMORY_INTER) %{
4351     base(0x4);   // ESP
4352     index(0x4);  // No Index
4353     scale(0x0);  // No Scale
4354     disp($reg);  // Stack Offset
4355   %}
4356 %}
4357 
4358 operand stackSlotF(sRegF reg) %{
4359   constraint(ALLOC_IN_RC(stack_slots));
4360   // No match rule because this operand is only generated in matching
4361   format %{ "[$reg]" %}
4362   interface(MEMORY_INTER) %{
4363     base(0x4);   // ESP
4364     index(0x4);  // No Index
4365     scale(0x0);  // No Scale
4366     disp($reg);  // Stack Offset
4367   %}
4368 %}
4369 
4370 operand stackSlotD(sRegD reg) %{
4371   constraint(ALLOC_IN_RC(stack_slots));
4372   // No match rule because this operand is only generated in matching
4373   format %{ "[$reg]" %}
4374   interface(MEMORY_INTER) %{
4375     base(0x4);   // ESP
4376     index(0x4);  // No Index
4377     scale(0x0);  // No Scale
4378     disp($reg);  // Stack Offset
4379   %}
4380 %}
4381 
4382 operand stackSlotL(sRegL reg) %{
4383   constraint(ALLOC_IN_RC(stack_slots));
4384   // No match rule because this operand is only generated in matching
4385   format %{ "[$reg]" %}
4386   interface(MEMORY_INTER) %{
4387     base(0x4);   // ESP
4388     index(0x4);  // No Index
4389     scale(0x0);  // No Scale
4390     disp($reg);  // Stack Offset
4391   %}
4392 %}
4393 
4394 //----------Memory Operands - Win95 Implicit Null Variants----------------
4395 // Indirect Memory Operand
4396 operand indirect_win95_safe(eRegP_no_EBP reg)
4397 %{
4398   constraint(ALLOC_IN_RC(int_reg));
4399   match(reg);
4400 
4401   op_cost(100);
4402   format %{ "[$reg]" %}
4403   interface(MEMORY_INTER) %{
4404     base($reg);
4405     index(0x4);
4406     scale(0x0);
4407     disp(0x0);
4408   %}
4409 %}
4410 
4411 // Indirect Memory Plus Short Offset Operand
4412 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4413 %{
4414   match(AddP reg off);
4415 
4416   op_cost(100);
4417   format %{ "[$reg + $off]" %}
4418   interface(MEMORY_INTER) %{
4419     base($reg);
4420     index(0x4);
4421     scale(0x0);
4422     disp($off);
4423   %}
4424 %}
4425 
4426 // Indirect Memory Plus Long Offset Operand
4427 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4428 %{
4429   match(AddP reg off);
4430 
4431   op_cost(100);
4432   format %{ "[$reg + $off]" %}
4433   interface(MEMORY_INTER) %{
4434     base($reg);
4435     index(0x4);
4436     scale(0x0);
4437     disp($off);
4438   %}
4439 %}
4440 
4441 // Indirect Memory Plus Index Register Plus Offset Operand
4442 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4443 %{
4444   match(AddP (AddP reg ireg) off);
4445 
4446   op_cost(100);
4447   format %{"[$reg + $off + $ireg]" %}
4448   interface(MEMORY_INTER) %{
4449     base($reg);
4450     index($ireg);
4451     scale(0x0);
4452     disp($off);
4453   %}
4454 %}
4455 
4456 // Indirect Memory Times Scale Plus Index Register
4457 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4458 %{
4459   match(AddP reg (LShiftI ireg scale));
4460 
4461   op_cost(100);
4462   format %{"[$reg + $ireg << $scale]" %}
4463   interface(MEMORY_INTER) %{
4464     base($reg);
4465     index($ireg);
4466     scale($scale);
4467     disp(0x0);
4468   %}
4469 %}
4470 
4471 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4472 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4473 %{
4474   match(AddP (AddP reg (LShiftI ireg scale)) off);
4475 
4476   op_cost(100);
4477   format %{"[$reg + $off + $ireg << $scale]" %}
4478   interface(MEMORY_INTER) %{
4479     base($reg);
4480     index($ireg);
4481     scale($scale);
4482     disp($off);
4483   %}
4484 %}
4485 
4486 //----------Conditional Branch Operands----------------------------------------
4487 // Comparison Op  - This is the operation of the comparison, and is limited to
4488 //                  the following set of codes:
4489 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4490 //
4491 // Other attributes of the comparison, such as unsignedness, are specified
4492 // by the comparison instruction that sets a condition code flags register.
4493 // That result is represented by a flags operand whose subtype is appropriate
4494 // to the unsignedness (etc.) of the comparison.
4495 //
4496 // Later, the instruction which matches both the Comparison Op (a Bool) and
4497 // the flags (produced by the Cmp) specifies the coding of the comparison op
4498 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4499 
4500 // Comparision Code
4501 operand cmpOp() %{
4502   match(Bool);
4503 
4504   format %{ "" %}
4505   interface(COND_INTER) %{
4506     equal(0x4, "e");
4507     not_equal(0x5, "ne");
4508     less(0xC, "l");
4509     greater_equal(0xD, "ge");
4510     less_equal(0xE, "le");
4511     greater(0xF, "g");
4512     overflow(0x0, "o");
4513     no_overflow(0x1, "no");
4514   %}
4515 %}
4516 
4517 // Comparison Code, unsigned compare.  Used by FP also, with
4518 // C2 (unordered) turned into GT or LT already.  The other bits
4519 // C0 and C3 are turned into Carry & Zero flags.
4520 operand cmpOpU() %{
4521   match(Bool);
4522 
4523   format %{ "" %}
4524   interface(COND_INTER) %{
4525     equal(0x4, "e");
4526     not_equal(0x5, "ne");
4527     less(0x2, "b");
4528     greater_equal(0x3, "nb");
4529     less_equal(0x6, "be");
4530     greater(0x7, "nbe");
4531     overflow(0x0, "o");
4532     no_overflow(0x1, "no");
4533   %}
4534 %}
4535 
4536 // Floating comparisons that don't require any fixup for the unordered case
4537 operand cmpOpUCF() %{
4538   match(Bool);
4539   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4540             n->as_Bool()->_test._test == BoolTest::ge ||
4541             n->as_Bool()->_test._test == BoolTest::le ||
4542             n->as_Bool()->_test._test == BoolTest::gt);
4543   format %{ "" %}
4544   interface(COND_INTER) %{
4545     equal(0x4, "e");
4546     not_equal(0x5, "ne");
4547     less(0x2, "b");
4548     greater_equal(0x3, "nb");
4549     less_equal(0x6, "be");
4550     greater(0x7, "nbe");
4551     overflow(0x0, "o");
4552     no_overflow(0x1, "no");
4553   %}
4554 %}
4555 
4556 
4557 // Floating comparisons that can be fixed up with extra conditional jumps
4558 operand cmpOpUCF2() %{
4559   match(Bool);
4560   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4561             n->as_Bool()->_test._test == BoolTest::eq);
4562   format %{ "" %}
4563   interface(COND_INTER) %{
4564     equal(0x4, "e");
4565     not_equal(0x5, "ne");
4566     less(0x2, "b");
4567     greater_equal(0x3, "nb");
4568     less_equal(0x6, "be");
4569     greater(0x7, "nbe");
4570     overflow(0x0, "o");
4571     no_overflow(0x1, "no");
4572   %}
4573 %}
4574 
4575 // Comparison Code for FP conditional move
4576 operand cmpOp_fcmov() %{
4577   match(Bool);
4578 
4579   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4580             n->as_Bool()->_test._test != BoolTest::no_overflow);
4581   format %{ "" %}
4582   interface(COND_INTER) %{
4583     equal        (0x0C8);
4584     not_equal    (0x1C8);
4585     less         (0x0C0);
4586     greater_equal(0x1C0);
4587     less_equal   (0x0D0);
4588     greater      (0x1D0);
4589     overflow(0x0, "o"); // not really supported by the instruction
4590     no_overflow(0x1, "no"); // not really supported by the instruction
4591   %}
4592 %}
4593 
4594 // Comparison Code used in long compares
4595 operand cmpOp_commute() %{
4596   match(Bool);
4597 
4598   format %{ "" %}
4599   interface(COND_INTER) %{
4600     equal(0x4, "e");
4601     not_equal(0x5, "ne");
4602     less(0xF, "g");
4603     greater_equal(0xE, "le");
4604     less_equal(0xD, "ge");
4605     greater(0xC, "l");
4606     overflow(0x0, "o");
4607     no_overflow(0x1, "no");
4608   %}
4609 %}
4610 
4611 // Comparison Code used in unsigned long compares
4612 operand cmpOpU_commute() %{
4613   match(Bool);
4614 
4615   format %{ "" %}
4616   interface(COND_INTER) %{
4617     equal(0x4, "e");
4618     not_equal(0x5, "ne");
4619     less(0x7, "nbe");
4620     greater_equal(0x6, "be");
4621     less_equal(0x3, "nb");
4622     greater(0x2, "b");
4623     overflow(0x0, "o");
4624     no_overflow(0x1, "no");
4625   %}
4626 %}
4627 
4628 //----------OPERAND CLASSES----------------------------------------------------
4629 // Operand Classes are groups of operands that are used as to simplify
4630 // instruction definitions by not requiring the AD writer to specify separate
4631 // instructions for every form of operand when the instruction accepts
4632 // multiple operand types with the same basic encoding and format.  The classic
4633 // case of this is memory operands.
4634 
4635 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4636                indIndex, indIndexScale, indIndexScaleOffset);
4637 
4638 // Long memory operations are encoded in 2 instructions and a +4 offset.
4639 // This means some kind of offset is always required and you cannot use
4640 // an oop as the offset (done when working on static globals).
4641 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4642                     indIndex, indIndexScale, indIndexScaleOffset);
4643 
4644 
4645 //----------PIPELINE-----------------------------------------------------------
4646 // Rules which define the behavior of the target architectures pipeline.
4647 pipeline %{
4648 
4649 //----------ATTRIBUTES---------------------------------------------------------
4650 attributes %{
4651   variable_size_instructions;        // Fixed size instructions
4652   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4653   instruction_unit_size = 1;         // An instruction is 1 bytes long
4654   instruction_fetch_unit_size = 16;  // The processor fetches one line
4655   instruction_fetch_units = 1;       // of 16 bytes
4656 
4657   // List of nop instructions
4658   nops( MachNop );
4659 %}
4660 
4661 //----------RESOURCES----------------------------------------------------------
4662 // Resources are the functional units available to the machine
4663 
4664 // Generic P2/P3 pipeline
4665 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4666 // 3 instructions decoded per cycle.
4667 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4668 // 2 ALU op, only ALU0 handles mul/div instructions.
4669 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4670            MS0, MS1, MEM = MS0 | MS1,
4671            BR, FPU,
4672            ALU0, ALU1, ALU = ALU0 | ALU1 );
4673 
4674 //----------PIPELINE DESCRIPTION-----------------------------------------------
4675 // Pipeline Description specifies the stages in the machine's pipeline
4676 
4677 // Generic P2/P3 pipeline
4678 pipe_desc(S0, S1, S2, S3, S4, S5);
4679 
4680 //----------PIPELINE CLASSES---------------------------------------------------
4681 // Pipeline Classes describe the stages in which input and output are
4682 // referenced by the hardware pipeline.
4683 
4684 // Naming convention: ialu or fpu
4685 // Then: _reg
4686 // Then: _reg if there is a 2nd register
4687 // Then: _long if it's a pair of instructions implementing a long
4688 // Then: _fat if it requires the big decoder
4689 //   Or: _mem if it requires the big decoder and a memory unit.
4690 
4691 // Integer ALU reg operation
4692 pipe_class ialu_reg(rRegI dst) %{
4693     single_instruction;
4694     dst    : S4(write);
4695     dst    : S3(read);
4696     DECODE : S0;        // any decoder
4697     ALU    : S3;        // any alu
4698 %}
4699 
4700 // Long ALU reg operation
4701 pipe_class ialu_reg_long(eRegL dst) %{
4702     instruction_count(2);
4703     dst    : S4(write);
4704     dst    : S3(read);
4705     DECODE : S0(2);     // any 2 decoders
4706     ALU    : S3(2);     // both alus
4707 %}
4708 
4709 // Integer ALU reg operation using big decoder
4710 pipe_class ialu_reg_fat(rRegI dst) %{
4711     single_instruction;
4712     dst    : S4(write);
4713     dst    : S3(read);
4714     D0     : S0;        // big decoder only
4715     ALU    : S3;        // any alu
4716 %}
4717 
4718 // Long ALU reg operation using big decoder
4719 pipe_class ialu_reg_long_fat(eRegL dst) %{
4720     instruction_count(2);
4721     dst    : S4(write);
4722     dst    : S3(read);
4723     D0     : S0(2);     // big decoder only; twice
4724     ALU    : S3(2);     // any 2 alus
4725 %}
4726 
4727 // Integer ALU reg-reg operation
4728 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4729     single_instruction;
4730     dst    : S4(write);
4731     src    : S3(read);
4732     DECODE : S0;        // any decoder
4733     ALU    : S3;        // any alu
4734 %}
4735 
4736 // Long ALU reg-reg operation
4737 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4738     instruction_count(2);
4739     dst    : S4(write);
4740     src    : S3(read);
4741     DECODE : S0(2);     // any 2 decoders
4742     ALU    : S3(2);     // both alus
4743 %}
4744 
4745 // Integer ALU reg-reg operation
4746 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4747     single_instruction;
4748     dst    : S4(write);
4749     src    : S3(read);
4750     D0     : S0;        // big decoder only
4751     ALU    : S3;        // any alu
4752 %}
4753 
4754 // Long ALU reg-reg operation
4755 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4756     instruction_count(2);
4757     dst    : S4(write);
4758     src    : S3(read);
4759     D0     : S0(2);     // big decoder only; twice
4760     ALU    : S3(2);     // both alus
4761 %}
4762 
4763 // Integer ALU reg-mem operation
4764 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4765     single_instruction;
4766     dst    : S5(write);
4767     mem    : S3(read);
4768     D0     : S0;        // big decoder only
4769     ALU    : S4;        // any alu
4770     MEM    : S3;        // any mem
4771 %}
4772 
4773 // Long ALU reg-mem operation
4774 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4775     instruction_count(2);
4776     dst    : S5(write);
4777     mem    : S3(read);
4778     D0     : S0(2);     // big decoder only; twice
4779     ALU    : S4(2);     // any 2 alus
4780     MEM    : S3(2);     // both mems
4781 %}
4782 
4783 // Integer mem operation (prefetch)
4784 pipe_class ialu_mem(memory mem)
4785 %{
4786     single_instruction;
4787     mem    : S3(read);
4788     D0     : S0;        // big decoder only
4789     MEM    : S3;        // any mem
4790 %}
4791 
4792 // Integer Store to Memory
4793 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4794     single_instruction;
4795     mem    : S3(read);
4796     src    : S5(read);
4797     D0     : S0;        // big decoder only
4798     ALU    : S4;        // any alu
4799     MEM    : S3;
4800 %}
4801 
4802 // Long Store to Memory
4803 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4804     instruction_count(2);
4805     mem    : S3(read);
4806     src    : S5(read);
4807     D0     : S0(2);     // big decoder only; twice
4808     ALU    : S4(2);     // any 2 alus
4809     MEM    : S3(2);     // Both mems
4810 %}
4811 
4812 // Integer Store to Memory
4813 pipe_class ialu_mem_imm(memory mem) %{
4814     single_instruction;
4815     mem    : S3(read);
4816     D0     : S0;        // big decoder only
4817     ALU    : S4;        // any alu
4818     MEM    : S3;
4819 %}
4820 
4821 // Integer ALU0 reg-reg operation
4822 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4823     single_instruction;
4824     dst    : S4(write);
4825     src    : S3(read);
4826     D0     : S0;        // Big decoder only
4827     ALU0   : S3;        // only alu0
4828 %}
4829 
4830 // Integer ALU0 reg-mem operation
4831 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4832     single_instruction;
4833     dst    : S5(write);
4834     mem    : S3(read);
4835     D0     : S0;        // big decoder only
4836     ALU0   : S4;        // ALU0 only
4837     MEM    : S3;        // any mem
4838 %}
4839 
4840 // Integer ALU reg-reg operation
4841 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4842     single_instruction;
4843     cr     : S4(write);
4844     src1   : S3(read);
4845     src2   : S3(read);
4846     DECODE : S0;        // any decoder
4847     ALU    : S3;        // any alu
4848 %}
4849 
4850 // Integer ALU reg-imm operation
4851 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4852     single_instruction;
4853     cr     : S4(write);
4854     src1   : S3(read);
4855     DECODE : S0;        // any decoder
4856     ALU    : S3;        // any alu
4857 %}
4858 
4859 // Integer ALU reg-mem operation
4860 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4861     single_instruction;
4862     cr     : S4(write);
4863     src1   : S3(read);
4864     src2   : S3(read);
4865     D0     : S0;        // big decoder only
4866     ALU    : S4;        // any alu
4867     MEM    : S3;
4868 %}
4869 
4870 // Conditional move reg-reg
4871 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4872     instruction_count(4);
4873     y      : S4(read);
4874     q      : S3(read);
4875     p      : S3(read);
4876     DECODE : S0(4);     // any decoder
4877 %}
4878 
4879 // Conditional move reg-reg
4880 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4881     single_instruction;
4882     dst    : S4(write);
4883     src    : S3(read);
4884     cr     : S3(read);
4885     DECODE : S0;        // any decoder
4886 %}
4887 
4888 // Conditional move reg-mem
4889 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4890     single_instruction;
4891     dst    : S4(write);
4892     src    : S3(read);
4893     cr     : S3(read);
4894     DECODE : S0;        // any decoder
4895     MEM    : S3;
4896 %}
4897 
4898 // Conditional move reg-reg long
4899 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4900     single_instruction;
4901     dst    : S4(write);
4902     src    : S3(read);
4903     cr     : S3(read);
4904     DECODE : S0(2);     // any 2 decoders
4905 %}
4906 
4907 // Conditional move double reg-reg
4908 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4909     single_instruction;
4910     dst    : S4(write);
4911     src    : S3(read);
4912     cr     : S3(read);
4913     DECODE : S0;        // any decoder
4914 %}
4915 
4916 // Float reg-reg operation
4917 pipe_class fpu_reg(regDPR dst) %{
4918     instruction_count(2);
4919     dst    : S3(read);
4920     DECODE : S0(2);     // any 2 decoders
4921     FPU    : S3;
4922 %}
4923 
4924 // Float reg-reg operation
4925 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4926     instruction_count(2);
4927     dst    : S4(write);
4928     src    : S3(read);
4929     DECODE : S0(2);     // any 2 decoders
4930     FPU    : S3;
4931 %}
4932 
4933 // Float reg-reg operation
4934 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4935     instruction_count(3);
4936     dst    : S4(write);
4937     src1   : S3(read);
4938     src2   : S3(read);
4939     DECODE : S0(3);     // any 3 decoders
4940     FPU    : S3(2);
4941 %}
4942 
4943 // Float reg-reg operation
4944 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4945     instruction_count(4);
4946     dst    : S4(write);
4947     src1   : S3(read);
4948     src2   : S3(read);
4949     src3   : S3(read);
4950     DECODE : S0(4);     // any 3 decoders
4951     FPU    : S3(2);
4952 %}
4953 
4954 // Float reg-reg operation
4955 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4956     instruction_count(4);
4957     dst    : S4(write);
4958     src1   : S3(read);
4959     src2   : S3(read);
4960     src3   : S3(read);
4961     DECODE : S1(3);     // any 3 decoders
4962     D0     : S0;        // Big decoder only
4963     FPU    : S3(2);
4964     MEM    : S3;
4965 %}
4966 
4967 // Float reg-mem operation
4968 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4969     instruction_count(2);
4970     dst    : S5(write);
4971     mem    : S3(read);
4972     D0     : S0;        // big decoder only
4973     DECODE : S1;        // any decoder for FPU POP
4974     FPU    : S4;
4975     MEM    : S3;        // any mem
4976 %}
4977 
4978 // Float reg-mem operation
4979 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4980     instruction_count(3);
4981     dst    : S5(write);
4982     src1   : S3(read);
4983     mem    : S3(read);
4984     D0     : S0;        // big decoder only
4985     DECODE : S1(2);     // any decoder for FPU POP
4986     FPU    : S4;
4987     MEM    : S3;        // any mem
4988 %}
4989 
4990 // Float mem-reg operation
4991 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4992     instruction_count(2);
4993     src    : S5(read);
4994     mem    : S3(read);
4995     DECODE : S0;        // any decoder for FPU PUSH
4996     D0     : S1;        // big decoder only
4997     FPU    : S4;
4998     MEM    : S3;        // any mem
4999 %}
5000 
5001 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5002     instruction_count(3);
5003     src1   : S3(read);
5004     src2   : S3(read);
5005     mem    : S3(read);
5006     DECODE : S0(2);     // any decoder for FPU PUSH
5007     D0     : S1;        // big decoder only
5008     FPU    : S4;
5009     MEM    : S3;        // any mem
5010 %}
5011 
5012 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5013     instruction_count(3);
5014     src1   : S3(read);
5015     src2   : S3(read);
5016     mem    : S4(read);
5017     DECODE : S0;        // any decoder for FPU PUSH
5018     D0     : S0(2);     // big decoder only
5019     FPU    : S4;
5020     MEM    : S3(2);     // any mem
5021 %}
5022 
5023 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5024     instruction_count(2);
5025     src1   : S3(read);
5026     dst    : S4(read);
5027     D0     : S0(2);     // big decoder only
5028     MEM    : S3(2);     // any mem
5029 %}
5030 
5031 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5032     instruction_count(3);
5033     src1   : S3(read);
5034     src2   : S3(read);
5035     dst    : S4(read);
5036     D0     : S0(3);     // big decoder only
5037     FPU    : S4;
5038     MEM    : S3(3);     // any mem
5039 %}
5040 
5041 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5042     instruction_count(3);
5043     src1   : S4(read);
5044     mem    : S4(read);
5045     DECODE : S0;        // any decoder for FPU PUSH
5046     D0     : S0(2);     // big decoder only
5047     FPU    : S4;
5048     MEM    : S3(2);     // any mem
5049 %}
5050 
5051 // Float load constant
5052 pipe_class fpu_reg_con(regDPR dst) %{
5053     instruction_count(2);
5054     dst    : S5(write);
5055     D0     : S0;        // big decoder only for the load
5056     DECODE : S1;        // any decoder for FPU POP
5057     FPU    : S4;
5058     MEM    : S3;        // any mem
5059 %}
5060 
5061 // Float load constant
5062 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5063     instruction_count(3);
5064     dst    : S5(write);
5065     src    : S3(read);
5066     D0     : S0;        // big decoder only for the load
5067     DECODE : S1(2);     // any decoder for FPU POP
5068     FPU    : S4;
5069     MEM    : S3;        // any mem
5070 %}
5071 
5072 // UnConditional branch
5073 pipe_class pipe_jmp( label labl ) %{
5074     single_instruction;
5075     BR   : S3;
5076 %}
5077 
5078 // Conditional branch
5079 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5080     single_instruction;
5081     cr    : S1(read);
5082     BR    : S3;
5083 %}
5084 
5085 // Allocation idiom
5086 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5087     instruction_count(1); force_serialization;
5088     fixed_latency(6);
5089     heap_ptr : S3(read);
5090     DECODE   : S0(3);
5091     D0       : S2;
5092     MEM      : S3;
5093     ALU      : S3(2);
5094     dst      : S5(write);
5095     BR       : S5;
5096 %}
5097 
5098 // Generic big/slow expanded idiom
5099 pipe_class pipe_slow(  ) %{
5100     instruction_count(10); multiple_bundles; force_serialization;
5101     fixed_latency(100);
5102     D0  : S0(2);
5103     MEM : S3(2);
5104 %}
5105 
5106 // The real do-nothing guy
5107 pipe_class empty( ) %{
5108     instruction_count(0);
5109 %}
5110 
5111 // Define the class for the Nop node
5112 define %{
5113    MachNop = empty;
5114 %}
5115 
5116 %}
5117 
5118 //----------INSTRUCTIONS-------------------------------------------------------
5119 //
5120 // match      -- States which machine-independent subtree may be replaced
5121 //               by this instruction.
5122 // ins_cost   -- The estimated cost of this instruction is used by instruction
5123 //               selection to identify a minimum cost tree of machine
5124 //               instructions that matches a tree of machine-independent
5125 //               instructions.
5126 // format     -- A string providing the disassembly for this instruction.
5127 //               The value of an instruction's operand may be inserted
5128 //               by referring to it with a '$' prefix.
5129 // opcode     -- Three instruction opcodes may be provided.  These are referred
5130 //               to within an encode class as $primary, $secondary, and $tertiary
5131 //               respectively.  The primary opcode is commonly used to
5132 //               indicate the type of machine instruction, while secondary
5133 //               and tertiary are often used for prefix options or addressing
5134 //               modes.
5135 // ins_encode -- A list of encode classes with parameters. The encode class
5136 //               name must have been defined in an 'enc_class' specification
5137 //               in the encode section of the architecture description.
5138 
5139 //----------BSWAP-Instruction--------------------------------------------------
5140 instruct bytes_reverse_int(rRegI dst) %{
5141   match(Set dst (ReverseBytesI dst));
5142 
5143   format %{ "BSWAP  $dst" %}
5144   opcode(0x0F, 0xC8);
5145   ins_encode( OpcP, OpcSReg(dst) );
5146   ins_pipe( ialu_reg );
5147 %}
5148 
5149 instruct bytes_reverse_long(eRegL dst) %{
5150   match(Set dst (ReverseBytesL dst));
5151 
5152   format %{ "BSWAP  $dst.lo\n\t"
5153             "BSWAP  $dst.hi\n\t"
5154             "XCHG   $dst.lo $dst.hi" %}
5155 
5156   ins_cost(125);
5157   ins_encode( bswap_long_bytes(dst) );
5158   ins_pipe( ialu_reg_reg);
5159 %}
5160 
5161 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5162   match(Set dst (ReverseBytesUS dst));
5163   effect(KILL cr);
5164 
5165   format %{ "BSWAP  $dst\n\t"
5166             "SHR    $dst,16\n\t" %}
5167   ins_encode %{
5168     __ bswapl($dst$$Register);
5169     __ shrl($dst$$Register, 16);
5170   %}
5171   ins_pipe( ialu_reg );
5172 %}
5173 
5174 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5175   match(Set dst (ReverseBytesS dst));
5176   effect(KILL cr);
5177 
5178   format %{ "BSWAP  $dst\n\t"
5179             "SAR    $dst,16\n\t" %}
5180   ins_encode %{
5181     __ bswapl($dst$$Register);
5182     __ sarl($dst$$Register, 16);
5183   %}
5184   ins_pipe( ialu_reg );
5185 %}
5186 
5187 
5188 //---------- Zeros Count Instructions ------------------------------------------
5189 
5190 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5191   predicate(UseCountLeadingZerosInstruction);
5192   match(Set dst (CountLeadingZerosI src));
5193   effect(KILL cr);
5194 
5195   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5196   ins_encode %{
5197     __ lzcntl($dst$$Register, $src$$Register);
5198   %}
5199   ins_pipe(ialu_reg);
5200 %}
5201 
5202 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5203   predicate(!UseCountLeadingZerosInstruction);
5204   match(Set dst (CountLeadingZerosI src));
5205   effect(KILL cr);
5206 
5207   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5208             "JNZ    skip\n\t"
5209             "MOV    $dst, -1\n"
5210       "skip:\n\t"
5211             "NEG    $dst\n\t"
5212             "ADD    $dst, 31" %}
5213   ins_encode %{
5214     Register Rdst = $dst$$Register;
5215     Register Rsrc = $src$$Register;
5216     Label skip;
5217     __ bsrl(Rdst, Rsrc);
5218     __ jccb(Assembler::notZero, skip);
5219     __ movl(Rdst, -1);
5220     __ bind(skip);
5221     __ negl(Rdst);
5222     __ addl(Rdst, BitsPerInt - 1);
5223   %}
5224   ins_pipe(ialu_reg);
5225 %}
5226 
5227 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5228   predicate(UseCountLeadingZerosInstruction);
5229   match(Set dst (CountLeadingZerosL src));
5230   effect(TEMP dst, KILL cr);
5231 
5232   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5233             "JNC    done\n\t"
5234             "LZCNT  $dst, $src.lo\n\t"
5235             "ADD    $dst, 32\n"
5236       "done:" %}
5237   ins_encode %{
5238     Register Rdst = $dst$$Register;
5239     Register Rsrc = $src$$Register;
5240     Label done;
5241     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5242     __ jccb(Assembler::carryClear, done);
5243     __ lzcntl(Rdst, Rsrc);
5244     __ addl(Rdst, BitsPerInt);
5245     __ bind(done);
5246   %}
5247   ins_pipe(ialu_reg);
5248 %}
5249 
5250 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5251   predicate(!UseCountLeadingZerosInstruction);
5252   match(Set dst (CountLeadingZerosL src));
5253   effect(TEMP dst, KILL cr);
5254 
5255   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5256             "JZ     msw_is_zero\n\t"
5257             "ADD    $dst, 32\n\t"
5258             "JMP    not_zero\n"
5259       "msw_is_zero:\n\t"
5260             "BSR    $dst, $src.lo\n\t"
5261             "JNZ    not_zero\n\t"
5262             "MOV    $dst, -1\n"
5263       "not_zero:\n\t"
5264             "NEG    $dst\n\t"
5265             "ADD    $dst, 63\n" %}
5266  ins_encode %{
5267     Register Rdst = $dst$$Register;
5268     Register Rsrc = $src$$Register;
5269     Label msw_is_zero;
5270     Label not_zero;
5271     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5272     __ jccb(Assembler::zero, msw_is_zero);
5273     __ addl(Rdst, BitsPerInt);
5274     __ jmpb(not_zero);
5275     __ bind(msw_is_zero);
5276     __ bsrl(Rdst, Rsrc);
5277     __ jccb(Assembler::notZero, not_zero);
5278     __ movl(Rdst, -1);
5279     __ bind(not_zero);
5280     __ negl(Rdst);
5281     __ addl(Rdst, BitsPerLong - 1);
5282   %}
5283   ins_pipe(ialu_reg);
5284 %}
5285 
5286 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5287   predicate(UseCountTrailingZerosInstruction);
5288   match(Set dst (CountTrailingZerosI src));
5289   effect(KILL cr);
5290 
5291   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5292   ins_encode %{
5293     __ tzcntl($dst$$Register, $src$$Register);
5294   %}
5295   ins_pipe(ialu_reg);
5296 %}
5297 
5298 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5299   predicate(!UseCountTrailingZerosInstruction);
5300   match(Set dst (CountTrailingZerosI src));
5301   effect(KILL cr);
5302 
5303   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5304             "JNZ    done\n\t"
5305             "MOV    $dst, 32\n"
5306       "done:" %}
5307   ins_encode %{
5308     Register Rdst = $dst$$Register;
5309     Label done;
5310     __ bsfl(Rdst, $src$$Register);
5311     __ jccb(Assembler::notZero, done);
5312     __ movl(Rdst, BitsPerInt);
5313     __ bind(done);
5314   %}
5315   ins_pipe(ialu_reg);
5316 %}
5317 
5318 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5319   predicate(UseCountTrailingZerosInstruction);
5320   match(Set dst (CountTrailingZerosL src));
5321   effect(TEMP dst, KILL cr);
5322 
5323   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5324             "JNC    done\n\t"
5325             "TZCNT  $dst, $src.hi\n\t"
5326             "ADD    $dst, 32\n"
5327             "done:" %}
5328   ins_encode %{
5329     Register Rdst = $dst$$Register;
5330     Register Rsrc = $src$$Register;
5331     Label done;
5332     __ tzcntl(Rdst, Rsrc);
5333     __ jccb(Assembler::carryClear, done);
5334     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5335     __ addl(Rdst, BitsPerInt);
5336     __ bind(done);
5337   %}
5338   ins_pipe(ialu_reg);
5339 %}
5340 
5341 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5342   predicate(!UseCountTrailingZerosInstruction);
5343   match(Set dst (CountTrailingZerosL src));
5344   effect(TEMP dst, KILL cr);
5345 
5346   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5347             "JNZ    done\n\t"
5348             "BSF    $dst, $src.hi\n\t"
5349             "JNZ    msw_not_zero\n\t"
5350             "MOV    $dst, 32\n"
5351       "msw_not_zero:\n\t"
5352             "ADD    $dst, 32\n"
5353       "done:" %}
5354   ins_encode %{
5355     Register Rdst = $dst$$Register;
5356     Register Rsrc = $src$$Register;
5357     Label msw_not_zero;
5358     Label done;
5359     __ bsfl(Rdst, Rsrc);
5360     __ jccb(Assembler::notZero, done);
5361     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5362     __ jccb(Assembler::notZero, msw_not_zero);
5363     __ movl(Rdst, BitsPerInt);
5364     __ bind(msw_not_zero);
5365     __ addl(Rdst, BitsPerInt);
5366     __ bind(done);
5367   %}
5368   ins_pipe(ialu_reg);
5369 %}
5370 
5371 
5372 //---------- Population Count Instructions -------------------------------------
5373 
5374 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5375   predicate(UsePopCountInstruction);
5376   match(Set dst (PopCountI src));
5377   effect(KILL cr);
5378 
5379   format %{ "POPCNT $dst, $src" %}
5380   ins_encode %{
5381     __ popcntl($dst$$Register, $src$$Register);
5382   %}
5383   ins_pipe(ialu_reg);
5384 %}
5385 
5386 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5387   predicate(UsePopCountInstruction);
5388   match(Set dst (PopCountI (LoadI mem)));
5389   effect(KILL cr);
5390 
5391   format %{ "POPCNT $dst, $mem" %}
5392   ins_encode %{
5393     __ popcntl($dst$$Register, $mem$$Address);
5394   %}
5395   ins_pipe(ialu_reg);
5396 %}
5397 
5398 // Note: Long.bitCount(long) returns an int.
5399 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5400   predicate(UsePopCountInstruction);
5401   match(Set dst (PopCountL src));
5402   effect(KILL cr, TEMP tmp, TEMP dst);
5403 
5404   format %{ "POPCNT $dst, $src.lo\n\t"
5405             "POPCNT $tmp, $src.hi\n\t"
5406             "ADD    $dst, $tmp" %}
5407   ins_encode %{
5408     __ popcntl($dst$$Register, $src$$Register);
5409     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5410     __ addl($dst$$Register, $tmp$$Register);
5411   %}
5412   ins_pipe(ialu_reg);
5413 %}
5414 
5415 // Note: Long.bitCount(long) returns an int.
5416 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5417   predicate(UsePopCountInstruction);
5418   match(Set dst (PopCountL (LoadL mem)));
5419   effect(KILL cr, TEMP tmp, TEMP dst);
5420 
5421   format %{ "POPCNT $dst, $mem\n\t"
5422             "POPCNT $tmp, $mem+4\n\t"
5423             "ADD    $dst, $tmp" %}
5424   ins_encode %{
5425     //__ popcntl($dst$$Register, $mem$$Address$$first);
5426     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5427     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5428     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5429     __ addl($dst$$Register, $tmp$$Register);
5430   %}
5431   ins_pipe(ialu_reg);
5432 %}
5433 
5434 
5435 //----------Load/Store/Move Instructions---------------------------------------
5436 //----------Load Instructions--------------------------------------------------
5437 // Load Byte (8bit signed)
5438 instruct loadB(xRegI dst, memory mem) %{
5439   match(Set dst (LoadB mem));
5440 
5441   ins_cost(125);
5442   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5443 
5444   ins_encode %{
5445     __ movsbl($dst$$Register, $mem$$Address);
5446   %}
5447 
5448   ins_pipe(ialu_reg_mem);
5449 %}
5450 
5451 // Load Byte (8bit signed) into Long Register
5452 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5453   match(Set dst (ConvI2L (LoadB mem)));
5454   effect(KILL cr);
5455 
5456   ins_cost(375);
5457   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5458             "MOV    $dst.hi,$dst.lo\n\t"
5459             "SAR    $dst.hi,7" %}
5460 
5461   ins_encode %{
5462     __ movsbl($dst$$Register, $mem$$Address);
5463     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5464     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5465   %}
5466 
5467   ins_pipe(ialu_reg_mem);
5468 %}
5469 
5470 // Load Unsigned Byte (8bit UNsigned)
5471 instruct loadUB(xRegI dst, memory mem) %{
5472   match(Set dst (LoadUB mem));
5473 
5474   ins_cost(125);
5475   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5476 
5477   ins_encode %{
5478     __ movzbl($dst$$Register, $mem$$Address);
5479   %}
5480 
5481   ins_pipe(ialu_reg_mem);
5482 %}
5483 
5484 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5485 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5486   match(Set dst (ConvI2L (LoadUB mem)));
5487   effect(KILL cr);
5488 
5489   ins_cost(250);
5490   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5491             "XOR    $dst.hi,$dst.hi" %}
5492 
5493   ins_encode %{
5494     Register Rdst = $dst$$Register;
5495     __ movzbl(Rdst, $mem$$Address);
5496     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5497   %}
5498 
5499   ins_pipe(ialu_reg_mem);
5500 %}
5501 
5502 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5503 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5504   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5505   effect(KILL cr);
5506 
5507   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5508             "XOR    $dst.hi,$dst.hi\n\t"
5509             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5510   ins_encode %{
5511     Register Rdst = $dst$$Register;
5512     __ movzbl(Rdst, $mem$$Address);
5513     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5514     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5515   %}
5516   ins_pipe(ialu_reg_mem);
5517 %}
5518 
5519 // Load Short (16bit signed)
5520 instruct loadS(rRegI dst, memory mem) %{
5521   match(Set dst (LoadS mem));
5522 
5523   ins_cost(125);
5524   format %{ "MOVSX  $dst,$mem\t# short" %}
5525 
5526   ins_encode %{
5527     __ movswl($dst$$Register, $mem$$Address);
5528   %}
5529 
5530   ins_pipe(ialu_reg_mem);
5531 %}
5532 
5533 // Load Short (16 bit signed) to Byte (8 bit signed)
5534 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5535   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5536 
5537   ins_cost(125);
5538   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5539   ins_encode %{
5540     __ movsbl($dst$$Register, $mem$$Address);
5541   %}
5542   ins_pipe(ialu_reg_mem);
5543 %}
5544 
5545 // Load Short (16bit signed) into Long Register
5546 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5547   match(Set dst (ConvI2L (LoadS mem)));
5548   effect(KILL cr);
5549 
5550   ins_cost(375);
5551   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5552             "MOV    $dst.hi,$dst.lo\n\t"
5553             "SAR    $dst.hi,15" %}
5554 
5555   ins_encode %{
5556     __ movswl($dst$$Register, $mem$$Address);
5557     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5558     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5559   %}
5560 
5561   ins_pipe(ialu_reg_mem);
5562 %}
5563 
5564 // Load Unsigned Short/Char (16bit unsigned)
5565 instruct loadUS(rRegI dst, memory mem) %{
5566   match(Set dst (LoadUS mem));
5567 
5568   ins_cost(125);
5569   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5570 
5571   ins_encode %{
5572     __ movzwl($dst$$Register, $mem$$Address);
5573   %}
5574 
5575   ins_pipe(ialu_reg_mem);
5576 %}
5577 
5578 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5579 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5580   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5581 
5582   ins_cost(125);
5583   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5584   ins_encode %{
5585     __ movsbl($dst$$Register, $mem$$Address);
5586   %}
5587   ins_pipe(ialu_reg_mem);
5588 %}
5589 
5590 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5591 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5592   match(Set dst (ConvI2L (LoadUS mem)));
5593   effect(KILL cr);
5594 
5595   ins_cost(250);
5596   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5597             "XOR    $dst.hi,$dst.hi" %}
5598 
5599   ins_encode %{
5600     __ movzwl($dst$$Register, $mem$$Address);
5601     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5602   %}
5603 
5604   ins_pipe(ialu_reg_mem);
5605 %}
5606 
5607 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5608 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5609   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5610   effect(KILL cr);
5611 
5612   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5613             "XOR    $dst.hi,$dst.hi" %}
5614   ins_encode %{
5615     Register Rdst = $dst$$Register;
5616     __ movzbl(Rdst, $mem$$Address);
5617     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5618   %}
5619   ins_pipe(ialu_reg_mem);
5620 %}
5621 
5622 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5623 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5624   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5625   effect(KILL cr);
5626 
5627   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5628             "XOR    $dst.hi,$dst.hi\n\t"
5629             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5630   ins_encode %{
5631     Register Rdst = $dst$$Register;
5632     __ movzwl(Rdst, $mem$$Address);
5633     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5634     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5635   %}
5636   ins_pipe(ialu_reg_mem);
5637 %}
5638 
5639 // Load Integer
5640 instruct loadI(rRegI dst, memory mem) %{
5641   match(Set dst (LoadI mem));
5642 
5643   ins_cost(125);
5644   format %{ "MOV    $dst,$mem\t# int" %}
5645 
5646   ins_encode %{
5647     __ movl($dst$$Register, $mem$$Address);
5648   %}
5649 
5650   ins_pipe(ialu_reg_mem);
5651 %}
5652 
5653 // Load Integer (32 bit signed) to Byte (8 bit signed)
5654 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5655   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5656 
5657   ins_cost(125);
5658   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5659   ins_encode %{
5660     __ movsbl($dst$$Register, $mem$$Address);
5661   %}
5662   ins_pipe(ialu_reg_mem);
5663 %}
5664 
5665 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5666 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5667   match(Set dst (AndI (LoadI mem) mask));
5668 
5669   ins_cost(125);
5670   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5671   ins_encode %{
5672     __ movzbl($dst$$Register, $mem$$Address);
5673   %}
5674   ins_pipe(ialu_reg_mem);
5675 %}
5676 
5677 // Load Integer (32 bit signed) to Short (16 bit signed)
5678 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5679   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5680 
5681   ins_cost(125);
5682   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5683   ins_encode %{
5684     __ movswl($dst$$Register, $mem$$Address);
5685   %}
5686   ins_pipe(ialu_reg_mem);
5687 %}
5688 
5689 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5690 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5691   match(Set dst (AndI (LoadI mem) mask));
5692 
5693   ins_cost(125);
5694   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5695   ins_encode %{
5696     __ movzwl($dst$$Register, $mem$$Address);
5697   %}
5698   ins_pipe(ialu_reg_mem);
5699 %}
5700 
5701 // Load Integer into Long Register
5702 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5703   match(Set dst (ConvI2L (LoadI mem)));
5704   effect(KILL cr);
5705 
5706   ins_cost(375);
5707   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5708             "MOV    $dst.hi,$dst.lo\n\t"
5709             "SAR    $dst.hi,31" %}
5710 
5711   ins_encode %{
5712     __ movl($dst$$Register, $mem$$Address);
5713     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5714     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5715   %}
5716 
5717   ins_pipe(ialu_reg_mem);
5718 %}
5719 
5720 // Load Integer with mask 0xFF into Long Register
5721 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5722   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5723   effect(KILL cr);
5724 
5725   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5726             "XOR    $dst.hi,$dst.hi" %}
5727   ins_encode %{
5728     Register Rdst = $dst$$Register;
5729     __ movzbl(Rdst, $mem$$Address);
5730     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5731   %}
5732   ins_pipe(ialu_reg_mem);
5733 %}
5734 
5735 // Load Integer with mask 0xFFFF into Long Register
5736 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5737   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5738   effect(KILL cr);
5739 
5740   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5741             "XOR    $dst.hi,$dst.hi" %}
5742   ins_encode %{
5743     Register Rdst = $dst$$Register;
5744     __ movzwl(Rdst, $mem$$Address);
5745     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5746   %}
5747   ins_pipe(ialu_reg_mem);
5748 %}
5749 
5750 // Load Integer with 31-bit mask into Long Register
5751 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5752   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5753   effect(KILL cr);
5754 
5755   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5756             "XOR    $dst.hi,$dst.hi\n\t"
5757             "AND    $dst.lo,$mask" %}
5758   ins_encode %{
5759     Register Rdst = $dst$$Register;
5760     __ movl(Rdst, $mem$$Address);
5761     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5762     __ andl(Rdst, $mask$$constant);
5763   %}
5764   ins_pipe(ialu_reg_mem);
5765 %}
5766 
5767 // Load Unsigned Integer into Long Register
5768 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5769   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5770   effect(KILL cr);
5771 
5772   ins_cost(250);
5773   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5774             "XOR    $dst.hi,$dst.hi" %}
5775 
5776   ins_encode %{
5777     __ movl($dst$$Register, $mem$$Address);
5778     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5779   %}
5780 
5781   ins_pipe(ialu_reg_mem);
5782 %}
5783 
5784 // Load Long.  Cannot clobber address while loading, so restrict address
5785 // register to ESI
5786 instruct loadL(eRegL dst, load_long_memory mem) %{
5787   predicate(!((LoadLNode*)n)->require_atomic_access());
5788   match(Set dst (LoadL mem));
5789 
5790   ins_cost(250);
5791   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5792             "MOV    $dst.hi,$mem+4" %}
5793 
5794   ins_encode %{
5795     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5796     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5797     __ movl($dst$$Register, Amemlo);
5798     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5799   %}
5800 
5801   ins_pipe(ialu_reg_long_mem);
5802 %}
5803 
5804 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5805 // then store it down to the stack and reload on the int
5806 // side.
5807 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5808   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5809   match(Set dst (LoadL mem));
5810 
5811   ins_cost(200);
5812   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5813             "FISTp  $dst" %}
5814   ins_encode(enc_loadL_volatile(mem,dst));
5815   ins_pipe( fpu_reg_mem );
5816 %}
5817 
5818 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5819   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5820   match(Set dst (LoadL mem));
5821   effect(TEMP tmp);
5822   ins_cost(180);
5823   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5824             "MOVSD  $dst,$tmp" %}
5825   ins_encode %{
5826     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5827     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5828   %}
5829   ins_pipe( pipe_slow );
5830 %}
5831 
5832 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5833   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5834   match(Set dst (LoadL mem));
5835   effect(TEMP tmp);
5836   ins_cost(160);
5837   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5838             "MOVD   $dst.lo,$tmp\n\t"
5839             "PSRLQ  $tmp,32\n\t"
5840             "MOVD   $dst.hi,$tmp" %}
5841   ins_encode %{
5842     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5843     __ movdl($dst$$Register, $tmp$$XMMRegister);
5844     __ psrlq($tmp$$XMMRegister, 32);
5845     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5846   %}
5847   ins_pipe( pipe_slow );
5848 %}
5849 
5850 // Load Range
5851 instruct loadRange(rRegI dst, memory mem) %{
5852   match(Set dst (LoadRange mem));
5853 
5854   ins_cost(125);
5855   format %{ "MOV    $dst,$mem" %}
5856   opcode(0x8B);
5857   ins_encode( OpcP, RegMem(dst,mem));
5858   ins_pipe( ialu_reg_mem );
5859 %}
5860 
5861 
5862 // Load Pointer
5863 instruct loadP(eRegP dst, memory mem) %{
5864   match(Set dst (LoadP mem));
5865 
5866   ins_cost(125);
5867   format %{ "MOV    $dst,$mem" %}
5868   opcode(0x8B);
5869   ins_encode( OpcP, RegMem(dst,mem));
5870   ins_pipe( ialu_reg_mem );
5871 %}
5872 
5873 // Load Klass Pointer
5874 instruct loadKlass(eRegP dst, memory mem) %{
5875   match(Set dst (LoadKlass mem));
5876 
5877   ins_cost(125);
5878   format %{ "MOV    $dst,$mem" %}
5879   opcode(0x8B);
5880   ins_encode( OpcP, RegMem(dst,mem));
5881   ins_pipe( ialu_reg_mem );
5882 %}
5883 
5884 // Load Double
5885 instruct loadDPR(regDPR dst, memory mem) %{
5886   predicate(UseSSE<=1);
5887   match(Set dst (LoadD mem));
5888 
5889   ins_cost(150);
5890   format %{ "FLD_D  ST,$mem\n\t"
5891             "FSTP   $dst" %}
5892   opcode(0xDD);               /* DD /0 */
5893   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5894               Pop_Reg_DPR(dst) );
5895   ins_pipe( fpu_reg_mem );
5896 %}
5897 
5898 // Load Double to XMM
5899 instruct loadD(regD dst, memory mem) %{
5900   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5901   match(Set dst (LoadD mem));
5902   ins_cost(145);
5903   format %{ "MOVSD  $dst,$mem" %}
5904   ins_encode %{
5905     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5906   %}
5907   ins_pipe( pipe_slow );
5908 %}
5909 
5910 instruct loadD_partial(regD dst, memory mem) %{
5911   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5912   match(Set dst (LoadD mem));
5913   ins_cost(145);
5914   format %{ "MOVLPD $dst,$mem" %}
5915   ins_encode %{
5916     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5917   %}
5918   ins_pipe( pipe_slow );
5919 %}
5920 
5921 // Load to XMM register (single-precision floating point)
5922 // MOVSS instruction
5923 instruct loadF(regF dst, memory mem) %{
5924   predicate(UseSSE>=1);
5925   match(Set dst (LoadF mem));
5926   ins_cost(145);
5927   format %{ "MOVSS  $dst,$mem" %}
5928   ins_encode %{
5929     __ movflt ($dst$$XMMRegister, $mem$$Address);
5930   %}
5931   ins_pipe( pipe_slow );
5932 %}
5933 
5934 // Load Float
5935 instruct loadFPR(regFPR dst, memory mem) %{
5936   predicate(UseSSE==0);
5937   match(Set dst (LoadF mem));
5938 
5939   ins_cost(150);
5940   format %{ "FLD_S  ST,$mem\n\t"
5941             "FSTP   $dst" %}
5942   opcode(0xD9);               /* D9 /0 */
5943   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5944               Pop_Reg_FPR(dst) );
5945   ins_pipe( fpu_reg_mem );
5946 %}
5947 
5948 // Load Effective Address
5949 instruct leaP8(eRegP dst, indOffset8 mem) %{
5950   match(Set dst mem);
5951 
5952   ins_cost(110);
5953   format %{ "LEA    $dst,$mem" %}
5954   opcode(0x8D);
5955   ins_encode( OpcP, RegMem(dst,mem));
5956   ins_pipe( ialu_reg_reg_fat );
5957 %}
5958 
5959 instruct leaP32(eRegP dst, indOffset32 mem) %{
5960   match(Set dst mem);
5961 
5962   ins_cost(110);
5963   format %{ "LEA    $dst,$mem" %}
5964   opcode(0x8D);
5965   ins_encode( OpcP, RegMem(dst,mem));
5966   ins_pipe( ialu_reg_reg_fat );
5967 %}
5968 
5969 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5970   match(Set dst mem);
5971 
5972   ins_cost(110);
5973   format %{ "LEA    $dst,$mem" %}
5974   opcode(0x8D);
5975   ins_encode( OpcP, RegMem(dst,mem));
5976   ins_pipe( ialu_reg_reg_fat );
5977 %}
5978 
5979 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5980   match(Set dst mem);
5981 
5982   ins_cost(110);
5983   format %{ "LEA    $dst,$mem" %}
5984   opcode(0x8D);
5985   ins_encode( OpcP, RegMem(dst,mem));
5986   ins_pipe( ialu_reg_reg_fat );
5987 %}
5988 
5989 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5990   match(Set dst mem);
5991 
5992   ins_cost(110);
5993   format %{ "LEA    $dst,$mem" %}
5994   opcode(0x8D);
5995   ins_encode( OpcP, RegMem(dst,mem));
5996   ins_pipe( ialu_reg_reg_fat );
5997 %}
5998 
5999 // Load Constant
6000 instruct loadConI(rRegI dst, immI src) %{
6001   match(Set dst src);
6002 
6003   format %{ "MOV    $dst,$src" %}
6004   ins_encode( LdImmI(dst, src) );
6005   ins_pipe( ialu_reg_fat );
6006 %}
6007 
6008 // Load Constant zero
6009 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6010   match(Set dst src);
6011   effect(KILL cr);
6012 
6013   ins_cost(50);
6014   format %{ "XOR    $dst,$dst" %}
6015   opcode(0x33);  /* + rd */
6016   ins_encode( OpcP, RegReg( dst, dst ) );
6017   ins_pipe( ialu_reg );
6018 %}
6019 
6020 instruct loadConP(eRegP dst, immP src) %{
6021   match(Set dst src);
6022 
6023   format %{ "MOV    $dst,$src" %}
6024   opcode(0xB8);  /* + rd */
6025   ins_encode( LdImmP(dst, src) );
6026   ins_pipe( ialu_reg_fat );
6027 %}
6028 
6029 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6030   match(Set dst src);
6031   effect(KILL cr);
6032   ins_cost(200);
6033   format %{ "MOV    $dst.lo,$src.lo\n\t"
6034             "MOV    $dst.hi,$src.hi" %}
6035   opcode(0xB8);
6036   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6037   ins_pipe( ialu_reg_long_fat );
6038 %}
6039 
6040 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6041   match(Set dst src);
6042   effect(KILL cr);
6043   ins_cost(150);
6044   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6045             "XOR    $dst.hi,$dst.hi" %}
6046   opcode(0x33,0x33);
6047   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6048   ins_pipe( ialu_reg_long );
6049 %}
6050 
6051 // The instruction usage is guarded by predicate in operand immFPR().
6052 instruct loadConFPR(regFPR dst, immFPR con) %{
6053   match(Set dst con);
6054   ins_cost(125);
6055   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6056             "FSTP   $dst" %}
6057   ins_encode %{
6058     __ fld_s($constantaddress($con));
6059     __ fstp_d($dst$$reg);
6060   %}
6061   ins_pipe(fpu_reg_con);
6062 %}
6063 
6064 // The instruction usage is guarded by predicate in operand immFPR0().
6065 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6066   match(Set dst con);
6067   ins_cost(125);
6068   format %{ "FLDZ   ST\n\t"
6069             "FSTP   $dst" %}
6070   ins_encode %{
6071     __ fldz();
6072     __ fstp_d($dst$$reg);
6073   %}
6074   ins_pipe(fpu_reg_con);
6075 %}
6076 
6077 // The instruction usage is guarded by predicate in operand immFPR1().
6078 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6079   match(Set dst con);
6080   ins_cost(125);
6081   format %{ "FLD1   ST\n\t"
6082             "FSTP   $dst" %}
6083   ins_encode %{
6084     __ fld1();
6085     __ fstp_d($dst$$reg);
6086   %}
6087   ins_pipe(fpu_reg_con);
6088 %}
6089 
6090 // The instruction usage is guarded by predicate in operand immF().
6091 instruct loadConF(regF dst, immF con) %{
6092   match(Set dst con);
6093   ins_cost(125);
6094   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6095   ins_encode %{
6096     __ movflt($dst$$XMMRegister, $constantaddress($con));
6097   %}
6098   ins_pipe(pipe_slow);
6099 %}
6100 
6101 // The instruction usage is guarded by predicate in operand immF0().
6102 instruct loadConF0(regF dst, immF0 src) %{
6103   match(Set dst src);
6104   ins_cost(100);
6105   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6106   ins_encode %{
6107     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6108   %}
6109   ins_pipe(pipe_slow);
6110 %}
6111 
6112 // The instruction usage is guarded by predicate in operand immDPR().
6113 instruct loadConDPR(regDPR dst, immDPR con) %{
6114   match(Set dst con);
6115   ins_cost(125);
6116 
6117   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6118             "FSTP   $dst" %}
6119   ins_encode %{
6120     __ fld_d($constantaddress($con));
6121     __ fstp_d($dst$$reg);
6122   %}
6123   ins_pipe(fpu_reg_con);
6124 %}
6125 
6126 // The instruction usage is guarded by predicate in operand immDPR0().
6127 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6128   match(Set dst con);
6129   ins_cost(125);
6130 
6131   format %{ "FLDZ   ST\n\t"
6132             "FSTP   $dst" %}
6133   ins_encode %{
6134     __ fldz();
6135     __ fstp_d($dst$$reg);
6136   %}
6137   ins_pipe(fpu_reg_con);
6138 %}
6139 
6140 // The instruction usage is guarded by predicate in operand immDPR1().
6141 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6142   match(Set dst con);
6143   ins_cost(125);
6144 
6145   format %{ "FLD1   ST\n\t"
6146             "FSTP   $dst" %}
6147   ins_encode %{
6148     __ fld1();
6149     __ fstp_d($dst$$reg);
6150   %}
6151   ins_pipe(fpu_reg_con);
6152 %}
6153 
6154 // The instruction usage is guarded by predicate in operand immD().
6155 instruct loadConD(regD dst, immD con) %{
6156   match(Set dst con);
6157   ins_cost(125);
6158   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6159   ins_encode %{
6160     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6161   %}
6162   ins_pipe(pipe_slow);
6163 %}
6164 
6165 // The instruction usage is guarded by predicate in operand immD0().
6166 instruct loadConD0(regD dst, immD0 src) %{
6167   match(Set dst src);
6168   ins_cost(100);
6169   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6170   ins_encode %{
6171     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6172   %}
6173   ins_pipe( pipe_slow );
6174 %}
6175 
6176 // Load Stack Slot
6177 instruct loadSSI(rRegI dst, stackSlotI src) %{
6178   match(Set dst src);
6179   ins_cost(125);
6180 
6181   format %{ "MOV    $dst,$src" %}
6182   opcode(0x8B);
6183   ins_encode( OpcP, RegMem(dst,src));
6184   ins_pipe( ialu_reg_mem );
6185 %}
6186 
6187 instruct loadSSL(eRegL dst, stackSlotL src) %{
6188   match(Set dst src);
6189 
6190   ins_cost(200);
6191   format %{ "MOV    $dst,$src.lo\n\t"
6192             "MOV    $dst+4,$src.hi" %}
6193   opcode(0x8B, 0x8B);
6194   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6195   ins_pipe( ialu_mem_long_reg );
6196 %}
6197 
6198 // Load Stack Slot
6199 instruct loadSSP(eRegP dst, stackSlotP src) %{
6200   match(Set dst src);
6201   ins_cost(125);
6202 
6203   format %{ "MOV    $dst,$src" %}
6204   opcode(0x8B);
6205   ins_encode( OpcP, RegMem(dst,src));
6206   ins_pipe( ialu_reg_mem );
6207 %}
6208 
6209 // Load Stack Slot
6210 instruct loadSSF(regFPR dst, stackSlotF src) %{
6211   match(Set dst src);
6212   ins_cost(125);
6213 
6214   format %{ "FLD_S  $src\n\t"
6215             "FSTP   $dst" %}
6216   opcode(0xD9);               /* D9 /0, FLD m32real */
6217   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6218               Pop_Reg_FPR(dst) );
6219   ins_pipe( fpu_reg_mem );
6220 %}
6221 
6222 // Load Stack Slot
6223 instruct loadSSD(regDPR dst, stackSlotD src) %{
6224   match(Set dst src);
6225   ins_cost(125);
6226 
6227   format %{ "FLD_D  $src\n\t"
6228             "FSTP   $dst" %}
6229   opcode(0xDD);               /* DD /0, FLD m64real */
6230   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6231               Pop_Reg_DPR(dst) );
6232   ins_pipe( fpu_reg_mem );
6233 %}
6234 
6235 // Prefetch instructions for allocation.
6236 // Must be safe to execute with invalid address (cannot fault).
6237 
6238 instruct prefetchAlloc0( memory mem ) %{
6239   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6240   match(PrefetchAllocation mem);
6241   ins_cost(0);
6242   size(0);
6243   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6244   ins_encode();
6245   ins_pipe(empty);
6246 %}
6247 
6248 instruct prefetchAlloc( memory mem ) %{
6249   predicate(AllocatePrefetchInstr==3);
6250   match( PrefetchAllocation mem );
6251   ins_cost(100);
6252 
6253   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6254   ins_encode %{
6255     __ prefetchw($mem$$Address);
6256   %}
6257   ins_pipe(ialu_mem);
6258 %}
6259 
6260 instruct prefetchAllocNTA( memory mem ) %{
6261   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6262   match(PrefetchAllocation mem);
6263   ins_cost(100);
6264 
6265   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6266   ins_encode %{
6267     __ prefetchnta($mem$$Address);
6268   %}
6269   ins_pipe(ialu_mem);
6270 %}
6271 
6272 instruct prefetchAllocT0( memory mem ) %{
6273   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6274   match(PrefetchAllocation mem);
6275   ins_cost(100);
6276 
6277   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6278   ins_encode %{
6279     __ prefetcht0($mem$$Address);
6280   %}
6281   ins_pipe(ialu_mem);
6282 %}
6283 
6284 instruct prefetchAllocT2( memory mem ) %{
6285   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6286   match(PrefetchAllocation mem);
6287   ins_cost(100);
6288 
6289   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6290   ins_encode %{
6291     __ prefetcht2($mem$$Address);
6292   %}
6293   ins_pipe(ialu_mem);
6294 %}
6295 
6296 //----------Store Instructions-------------------------------------------------
6297 
6298 // Store Byte
6299 instruct storeB(memory mem, xRegI src) %{
6300   match(Set mem (StoreB mem src));
6301 
6302   ins_cost(125);
6303   format %{ "MOV8   $mem,$src" %}
6304   opcode(0x88);
6305   ins_encode( OpcP, RegMem( src, mem ) );
6306   ins_pipe( ialu_mem_reg );
6307 %}
6308 
6309 // Store Char/Short
6310 instruct storeC(memory mem, rRegI src) %{
6311   match(Set mem (StoreC mem src));
6312 
6313   ins_cost(125);
6314   format %{ "MOV16  $mem,$src" %}
6315   opcode(0x89, 0x66);
6316   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6317   ins_pipe( ialu_mem_reg );
6318 %}
6319 
6320 // Store Integer
6321 instruct storeI(memory mem, rRegI src) %{
6322   match(Set mem (StoreI mem src));
6323 
6324   ins_cost(125);
6325   format %{ "MOV    $mem,$src" %}
6326   opcode(0x89);
6327   ins_encode( OpcP, RegMem( src, mem ) );
6328   ins_pipe( ialu_mem_reg );
6329 %}
6330 
6331 // Store Long
6332 instruct storeL(long_memory mem, eRegL src) %{
6333   predicate(!((StoreLNode*)n)->require_atomic_access());
6334   match(Set mem (StoreL mem src));
6335 
6336   ins_cost(200);
6337   format %{ "MOV    $mem,$src.lo\n\t"
6338             "MOV    $mem+4,$src.hi" %}
6339   opcode(0x89, 0x89);
6340   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6341   ins_pipe( ialu_mem_long_reg );
6342 %}
6343 
6344 // Store Long to Integer
6345 instruct storeL2I(memory mem, eRegL src) %{
6346   match(Set mem (StoreI mem (ConvL2I src)));
6347 
6348   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6349   ins_encode %{
6350     __ movl($mem$$Address, $src$$Register);
6351   %}
6352   ins_pipe(ialu_mem_reg);
6353 %}
6354 
6355 // Volatile Store Long.  Must be atomic, so move it into
6356 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6357 // target address before the store (for null-ptr checks)
6358 // so the memory operand is used twice in the encoding.
6359 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6360   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6361   match(Set mem (StoreL mem src));
6362   effect( KILL cr );
6363   ins_cost(400);
6364   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6365             "FILD   $src\n\t"
6366             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6367   opcode(0x3B);
6368   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6369   ins_pipe( fpu_reg_mem );
6370 %}
6371 
6372 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6373   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6374   match(Set mem (StoreL mem src));
6375   effect( TEMP tmp, KILL cr );
6376   ins_cost(380);
6377   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6378             "MOVSD  $tmp,$src\n\t"
6379             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6380   ins_encode %{
6381     __ cmpl(rax, $mem$$Address);
6382     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6383     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6384   %}
6385   ins_pipe( pipe_slow );
6386 %}
6387 
6388 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6389   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6390   match(Set mem (StoreL mem src));
6391   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6392   ins_cost(360);
6393   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6394             "MOVD   $tmp,$src.lo\n\t"
6395             "MOVD   $tmp2,$src.hi\n\t"
6396             "PUNPCKLDQ $tmp,$tmp2\n\t"
6397             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6398   ins_encode %{
6399     __ cmpl(rax, $mem$$Address);
6400     __ movdl($tmp$$XMMRegister, $src$$Register);
6401     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6402     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6403     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6404   %}
6405   ins_pipe( pipe_slow );
6406 %}
6407 
6408 // Store Pointer; for storing unknown oops and raw pointers
6409 instruct storeP(memory mem, anyRegP src) %{
6410   match(Set mem (StoreP mem src));
6411 
6412   ins_cost(125);
6413   format %{ "MOV    $mem,$src" %}
6414   opcode(0x89);
6415   ins_encode( OpcP, RegMem( src, mem ) );
6416   ins_pipe( ialu_mem_reg );
6417 %}
6418 
6419 // Store Integer Immediate
6420 instruct storeImmI(memory mem, immI src) %{
6421   match(Set mem (StoreI mem src));
6422 
6423   ins_cost(150);
6424   format %{ "MOV    $mem,$src" %}
6425   opcode(0xC7);               /* C7 /0 */
6426   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6427   ins_pipe( ialu_mem_imm );
6428 %}
6429 
6430 // Store Short/Char Immediate
6431 instruct storeImmI16(memory mem, immI16 src) %{
6432   predicate(UseStoreImmI16);
6433   match(Set mem (StoreC mem src));
6434 
6435   ins_cost(150);
6436   format %{ "MOV16  $mem,$src" %}
6437   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6438   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6439   ins_pipe( ialu_mem_imm );
6440 %}
6441 
6442 // Store Pointer Immediate; null pointers or constant oops that do not
6443 // need card-mark barriers.
6444 instruct storeImmP(memory mem, immP src) %{
6445   match(Set mem (StoreP mem src));
6446 
6447   ins_cost(150);
6448   format %{ "MOV    $mem,$src" %}
6449   opcode(0xC7);               /* C7 /0 */
6450   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6451   ins_pipe( ialu_mem_imm );
6452 %}
6453 
6454 // Store Byte Immediate
6455 instruct storeImmB(memory mem, immI8 src) %{
6456   match(Set mem (StoreB mem src));
6457 
6458   ins_cost(150);
6459   format %{ "MOV8   $mem,$src" %}
6460   opcode(0xC6);               /* C6 /0 */
6461   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6462   ins_pipe( ialu_mem_imm );
6463 %}
6464 
6465 // Store CMS card-mark Immediate
6466 instruct storeImmCM(memory mem, immI8 src) %{
6467   match(Set mem (StoreCM mem src));
6468 
6469   ins_cost(150);
6470   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6471   opcode(0xC6);               /* C6 /0 */
6472   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6473   ins_pipe( ialu_mem_imm );
6474 %}
6475 
6476 // Store Double
6477 instruct storeDPR( memory mem, regDPR1 src) %{
6478   predicate(UseSSE<=1);
6479   match(Set mem (StoreD mem src));
6480 
6481   ins_cost(100);
6482   format %{ "FST_D  $mem,$src" %}
6483   opcode(0xDD);       /* DD /2 */
6484   ins_encode( enc_FPR_store(mem,src) );
6485   ins_pipe( fpu_mem_reg );
6486 %}
6487 
6488 // Store double does rounding on x86
6489 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6490   predicate(UseSSE<=1);
6491   match(Set mem (StoreD mem (RoundDouble src)));
6492 
6493   ins_cost(100);
6494   format %{ "FST_D  $mem,$src\t# round" %}
6495   opcode(0xDD);       /* DD /2 */
6496   ins_encode( enc_FPR_store(mem,src) );
6497   ins_pipe( fpu_mem_reg );
6498 %}
6499 
6500 // Store XMM register to memory (double-precision floating points)
6501 // MOVSD instruction
6502 instruct storeD(memory mem, regD src) %{
6503   predicate(UseSSE>=2);
6504   match(Set mem (StoreD mem src));
6505   ins_cost(95);
6506   format %{ "MOVSD  $mem,$src" %}
6507   ins_encode %{
6508     __ movdbl($mem$$Address, $src$$XMMRegister);
6509   %}
6510   ins_pipe( pipe_slow );
6511 %}
6512 
6513 // Store XMM register to memory (single-precision floating point)
6514 // MOVSS instruction
6515 instruct storeF(memory mem, regF src) %{
6516   predicate(UseSSE>=1);
6517   match(Set mem (StoreF mem src));
6518   ins_cost(95);
6519   format %{ "MOVSS  $mem,$src" %}
6520   ins_encode %{
6521     __ movflt($mem$$Address, $src$$XMMRegister);
6522   %}
6523   ins_pipe( pipe_slow );
6524 %}
6525 
6526 // Store Float
6527 instruct storeFPR( memory mem, regFPR1 src) %{
6528   predicate(UseSSE==0);
6529   match(Set mem (StoreF mem src));
6530 
6531   ins_cost(100);
6532   format %{ "FST_S  $mem,$src" %}
6533   opcode(0xD9);       /* D9 /2 */
6534   ins_encode( enc_FPR_store(mem,src) );
6535   ins_pipe( fpu_mem_reg );
6536 %}
6537 
6538 // Store Float does rounding on x86
6539 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6540   predicate(UseSSE==0);
6541   match(Set mem (StoreF mem (RoundFloat src)));
6542 
6543   ins_cost(100);
6544   format %{ "FST_S  $mem,$src\t# round" %}
6545   opcode(0xD9);       /* D9 /2 */
6546   ins_encode( enc_FPR_store(mem,src) );
6547   ins_pipe( fpu_mem_reg );
6548 %}
6549 
6550 // Store Float does rounding on x86
6551 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6552   predicate(UseSSE<=1);
6553   match(Set mem (StoreF mem (ConvD2F src)));
6554 
6555   ins_cost(100);
6556   format %{ "FST_S  $mem,$src\t# D-round" %}
6557   opcode(0xD9);       /* D9 /2 */
6558   ins_encode( enc_FPR_store(mem,src) );
6559   ins_pipe( fpu_mem_reg );
6560 %}
6561 
6562 // Store immediate Float value (it is faster than store from FPU register)
6563 // The instruction usage is guarded by predicate in operand immFPR().
6564 instruct storeFPR_imm( memory mem, immFPR src) %{
6565   match(Set mem (StoreF mem src));
6566 
6567   ins_cost(50);
6568   format %{ "MOV    $mem,$src\t# store float" %}
6569   opcode(0xC7);               /* C7 /0 */
6570   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6571   ins_pipe( ialu_mem_imm );
6572 %}
6573 
6574 // Store immediate Float value (it is faster than store from XMM register)
6575 // The instruction usage is guarded by predicate in operand immF().
6576 instruct storeF_imm( memory mem, immF src) %{
6577   match(Set mem (StoreF mem src));
6578 
6579   ins_cost(50);
6580   format %{ "MOV    $mem,$src\t# store float" %}
6581   opcode(0xC7);               /* C7 /0 */
6582   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6583   ins_pipe( ialu_mem_imm );
6584 %}
6585 
6586 // Store Integer to stack slot
6587 instruct storeSSI(stackSlotI dst, rRegI src) %{
6588   match(Set dst src);
6589 
6590   ins_cost(100);
6591   format %{ "MOV    $dst,$src" %}
6592   opcode(0x89);
6593   ins_encode( OpcPRegSS( dst, src ) );
6594   ins_pipe( ialu_mem_reg );
6595 %}
6596 
6597 // Store Integer to stack slot
6598 instruct storeSSP(stackSlotP dst, eRegP src) %{
6599   match(Set dst src);
6600 
6601   ins_cost(100);
6602   format %{ "MOV    $dst,$src" %}
6603   opcode(0x89);
6604   ins_encode( OpcPRegSS( dst, src ) );
6605   ins_pipe( ialu_mem_reg );
6606 %}
6607 
6608 // Store Long to stack slot
6609 instruct storeSSL(stackSlotL dst, eRegL src) %{
6610   match(Set dst src);
6611 
6612   ins_cost(200);
6613   format %{ "MOV    $dst,$src.lo\n\t"
6614             "MOV    $dst+4,$src.hi" %}
6615   opcode(0x89, 0x89);
6616   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6617   ins_pipe( ialu_mem_long_reg );
6618 %}
6619 
6620 //----------MemBar Instructions-----------------------------------------------
6621 // Memory barrier flavors
6622 
6623 instruct membar_acquire() %{
6624   match(MemBarAcquire);
6625   match(LoadFence);
6626   ins_cost(400);
6627 
6628   size(0);
6629   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6630   ins_encode();
6631   ins_pipe(empty);
6632 %}
6633 
6634 instruct membar_acquire_lock() %{
6635   match(MemBarAcquireLock);
6636   ins_cost(0);
6637 
6638   size(0);
6639   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6640   ins_encode( );
6641   ins_pipe(empty);
6642 %}
6643 
6644 instruct membar_release() %{
6645   match(MemBarRelease);
6646   match(StoreFence);
6647   ins_cost(400);
6648 
6649   size(0);
6650   format %{ "MEMBAR-release ! (empty encoding)" %}
6651   ins_encode( );
6652   ins_pipe(empty);
6653 %}
6654 
6655 instruct membar_release_lock() %{
6656   match(MemBarReleaseLock);
6657   ins_cost(0);
6658 
6659   size(0);
6660   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6661   ins_encode( );
6662   ins_pipe(empty);
6663 %}
6664 
6665 instruct membar_volatile(eFlagsReg cr) %{
6666   match(MemBarVolatile);
6667   effect(KILL cr);
6668   ins_cost(400);
6669 
6670   format %{
6671     $$template
6672     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6673   %}
6674   ins_encode %{
6675     __ membar(Assembler::StoreLoad);
6676   %}
6677   ins_pipe(pipe_slow);
6678 %}
6679 
6680 instruct unnecessary_membar_volatile() %{
6681   match(MemBarVolatile);
6682   predicate(Matcher::post_store_load_barrier(n));
6683   ins_cost(0);
6684 
6685   size(0);
6686   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6687   ins_encode( );
6688   ins_pipe(empty);
6689 %}
6690 
6691 instruct membar_storestore() %{
6692   match(MemBarStoreStore);
6693   ins_cost(0);
6694 
6695   size(0);
6696   format %{ "MEMBAR-storestore (empty encoding)" %}
6697   ins_encode( );
6698   ins_pipe(empty);
6699 %}
6700 
6701 //----------Move Instructions--------------------------------------------------
6702 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6703   match(Set dst (CastX2P src));
6704   format %{ "# X2P  $dst, $src" %}
6705   ins_encode( /*empty encoding*/ );
6706   ins_cost(0);
6707   ins_pipe(empty);
6708 %}
6709 
6710 instruct castP2X(rRegI dst, eRegP src ) %{
6711   match(Set dst (CastP2X src));
6712   ins_cost(50);
6713   format %{ "MOV    $dst, $src\t# CastP2X" %}
6714   ins_encode( enc_Copy( dst, src) );
6715   ins_pipe( ialu_reg_reg );
6716 %}
6717 
6718 //----------Conditional Move---------------------------------------------------
6719 // Conditional move
6720 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6721   predicate(!VM_Version::supports_cmov() );
6722   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6723   ins_cost(200);
6724   format %{ "J$cop,us skip\t# signed cmove\n\t"
6725             "MOV    $dst,$src\n"
6726       "skip:" %}
6727   ins_encode %{
6728     Label Lskip;
6729     // Invert sense of branch from sense of CMOV
6730     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6731     __ movl($dst$$Register, $src$$Register);
6732     __ bind(Lskip);
6733   %}
6734   ins_pipe( pipe_cmov_reg );
6735 %}
6736 
6737 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6738   predicate(!VM_Version::supports_cmov() );
6739   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6740   ins_cost(200);
6741   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6742             "MOV    $dst,$src\n"
6743       "skip:" %}
6744   ins_encode %{
6745     Label Lskip;
6746     // Invert sense of branch from sense of CMOV
6747     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6748     __ movl($dst$$Register, $src$$Register);
6749     __ bind(Lskip);
6750   %}
6751   ins_pipe( pipe_cmov_reg );
6752 %}
6753 
6754 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6755   predicate(VM_Version::supports_cmov() );
6756   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6757   ins_cost(200);
6758   format %{ "CMOV$cop $dst,$src" %}
6759   opcode(0x0F,0x40);
6760   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6761   ins_pipe( pipe_cmov_reg );
6762 %}
6763 
6764 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6765   predicate(VM_Version::supports_cmov() );
6766   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6767   ins_cost(200);
6768   format %{ "CMOV$cop $dst,$src" %}
6769   opcode(0x0F,0x40);
6770   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6771   ins_pipe( pipe_cmov_reg );
6772 %}
6773 
6774 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6775   predicate(VM_Version::supports_cmov() );
6776   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6777   ins_cost(200);
6778   expand %{
6779     cmovI_regU(cop, cr, dst, src);
6780   %}
6781 %}
6782 
6783 // Conditional move
6784 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6785   predicate(VM_Version::supports_cmov() );
6786   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6787   ins_cost(250);
6788   format %{ "CMOV$cop $dst,$src" %}
6789   opcode(0x0F,0x40);
6790   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6791   ins_pipe( pipe_cmov_mem );
6792 %}
6793 
6794 // Conditional move
6795 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6796   predicate(VM_Version::supports_cmov() );
6797   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6798   ins_cost(250);
6799   format %{ "CMOV$cop $dst,$src" %}
6800   opcode(0x0F,0x40);
6801   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6802   ins_pipe( pipe_cmov_mem );
6803 %}
6804 
6805 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6806   predicate(VM_Version::supports_cmov() );
6807   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6808   ins_cost(250);
6809   expand %{
6810     cmovI_memU(cop, cr, dst, src);
6811   %}
6812 %}
6813 
6814 // Conditional move
6815 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6816   predicate(VM_Version::supports_cmov() );
6817   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6818   ins_cost(200);
6819   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6820   opcode(0x0F,0x40);
6821   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6822   ins_pipe( pipe_cmov_reg );
6823 %}
6824 
6825 // Conditional move (non-P6 version)
6826 // Note:  a CMoveP is generated for  stubs and native wrappers
6827 //        regardless of whether we are on a P6, so we
6828 //        emulate a cmov here
6829 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6830   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6831   ins_cost(300);
6832   format %{ "Jn$cop   skip\n\t"
6833           "MOV    $dst,$src\t# pointer\n"
6834       "skip:" %}
6835   opcode(0x8b);
6836   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6837   ins_pipe( pipe_cmov_reg );
6838 %}
6839 
6840 // Conditional move
6841 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6842   predicate(VM_Version::supports_cmov() );
6843   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6844   ins_cost(200);
6845   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6846   opcode(0x0F,0x40);
6847   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6848   ins_pipe( pipe_cmov_reg );
6849 %}
6850 
6851 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6852   predicate(VM_Version::supports_cmov() );
6853   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6854   ins_cost(200);
6855   expand %{
6856     cmovP_regU(cop, cr, dst, src);
6857   %}
6858 %}
6859 
6860 // DISABLED: Requires the ADLC to emit a bottom_type call that
6861 // correctly meets the two pointer arguments; one is an incoming
6862 // register but the other is a memory operand.  ALSO appears to
6863 // be buggy with implicit null checks.
6864 //
6865 //// Conditional move
6866 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6867 //  predicate(VM_Version::supports_cmov() );
6868 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6869 //  ins_cost(250);
6870 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6871 //  opcode(0x0F,0x40);
6872 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6873 //  ins_pipe( pipe_cmov_mem );
6874 //%}
6875 //
6876 //// Conditional move
6877 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6878 //  predicate(VM_Version::supports_cmov() );
6879 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6880 //  ins_cost(250);
6881 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6882 //  opcode(0x0F,0x40);
6883 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6884 //  ins_pipe( pipe_cmov_mem );
6885 //%}
6886 
6887 // Conditional move
6888 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6889   predicate(UseSSE<=1);
6890   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6891   ins_cost(200);
6892   format %{ "FCMOV$cop $dst,$src\t# double" %}
6893   opcode(0xDA);
6894   ins_encode( enc_cmov_dpr(cop,src) );
6895   ins_pipe( pipe_cmovDPR_reg );
6896 %}
6897 
6898 // Conditional move
6899 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6900   predicate(UseSSE==0);
6901   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6902   ins_cost(200);
6903   format %{ "FCMOV$cop $dst,$src\t# float" %}
6904   opcode(0xDA);
6905   ins_encode( enc_cmov_dpr(cop,src) );
6906   ins_pipe( pipe_cmovDPR_reg );
6907 %}
6908 
6909 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6910 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6911   predicate(UseSSE<=1);
6912   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6913   ins_cost(200);
6914   format %{ "Jn$cop   skip\n\t"
6915             "MOV    $dst,$src\t# double\n"
6916       "skip:" %}
6917   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6918   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6919   ins_pipe( pipe_cmovDPR_reg );
6920 %}
6921 
6922 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6923 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6924   predicate(UseSSE==0);
6925   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6926   ins_cost(200);
6927   format %{ "Jn$cop    skip\n\t"
6928             "MOV    $dst,$src\t# float\n"
6929       "skip:" %}
6930   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6931   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6932   ins_pipe( pipe_cmovDPR_reg );
6933 %}
6934 
6935 // No CMOVE with SSE/SSE2
6936 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6937   predicate (UseSSE>=1);
6938   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6939   ins_cost(200);
6940   format %{ "Jn$cop   skip\n\t"
6941             "MOVSS  $dst,$src\t# float\n"
6942       "skip:" %}
6943   ins_encode %{
6944     Label skip;
6945     // Invert sense of branch from sense of CMOV
6946     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6947     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6948     __ bind(skip);
6949   %}
6950   ins_pipe( pipe_slow );
6951 %}
6952 
6953 // No CMOVE with SSE/SSE2
6954 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6955   predicate (UseSSE>=2);
6956   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6957   ins_cost(200);
6958   format %{ "Jn$cop   skip\n\t"
6959             "MOVSD  $dst,$src\t# float\n"
6960       "skip:" %}
6961   ins_encode %{
6962     Label skip;
6963     // Invert sense of branch from sense of CMOV
6964     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6965     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6966     __ bind(skip);
6967   %}
6968   ins_pipe( pipe_slow );
6969 %}
6970 
6971 // unsigned version
6972 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6973   predicate (UseSSE>=1);
6974   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6975   ins_cost(200);
6976   format %{ "Jn$cop   skip\n\t"
6977             "MOVSS  $dst,$src\t# float\n"
6978       "skip:" %}
6979   ins_encode %{
6980     Label skip;
6981     // Invert sense of branch from sense of CMOV
6982     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6983     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6984     __ bind(skip);
6985   %}
6986   ins_pipe( pipe_slow );
6987 %}
6988 
6989 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6990   predicate (UseSSE>=1);
6991   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6992   ins_cost(200);
6993   expand %{
6994     fcmovF_regU(cop, cr, dst, src);
6995   %}
6996 %}
6997 
6998 // unsigned version
6999 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7000   predicate (UseSSE>=2);
7001   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7002   ins_cost(200);
7003   format %{ "Jn$cop   skip\n\t"
7004             "MOVSD  $dst,$src\t# float\n"
7005       "skip:" %}
7006   ins_encode %{
7007     Label skip;
7008     // Invert sense of branch from sense of CMOV
7009     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7010     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7011     __ bind(skip);
7012   %}
7013   ins_pipe( pipe_slow );
7014 %}
7015 
7016 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7017   predicate (UseSSE>=2);
7018   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7019   ins_cost(200);
7020   expand %{
7021     fcmovD_regU(cop, cr, dst, src);
7022   %}
7023 %}
7024 
7025 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7026   predicate(VM_Version::supports_cmov() );
7027   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7028   ins_cost(200);
7029   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7030             "CMOV$cop $dst.hi,$src.hi" %}
7031   opcode(0x0F,0x40);
7032   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7033   ins_pipe( pipe_cmov_reg_long );
7034 %}
7035 
7036 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7037   predicate(VM_Version::supports_cmov() );
7038   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7039   ins_cost(200);
7040   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7041             "CMOV$cop $dst.hi,$src.hi" %}
7042   opcode(0x0F,0x40);
7043   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7044   ins_pipe( pipe_cmov_reg_long );
7045 %}
7046 
7047 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7048   predicate(VM_Version::supports_cmov() );
7049   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7050   ins_cost(200);
7051   expand %{
7052     cmovL_regU(cop, cr, dst, src);
7053   %}
7054 %}
7055 
7056 //----------Arithmetic Instructions--------------------------------------------
7057 //----------Addition Instructions----------------------------------------------
7058 
7059 // Integer Addition Instructions
7060 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7061   match(Set dst (AddI dst src));
7062   effect(KILL cr);
7063 
7064   size(2);
7065   format %{ "ADD    $dst,$src" %}
7066   opcode(0x03);
7067   ins_encode( OpcP, RegReg( dst, src) );
7068   ins_pipe( ialu_reg_reg );
7069 %}
7070 
7071 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7072   match(Set dst (AddI dst src));
7073   effect(KILL cr);
7074 
7075   format %{ "ADD    $dst,$src" %}
7076   opcode(0x81, 0x00); /* /0 id */
7077   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7078   ins_pipe( ialu_reg );
7079 %}
7080 
7081 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7082   predicate(UseIncDec);
7083   match(Set dst (AddI dst src));
7084   effect(KILL cr);
7085 
7086   size(1);
7087   format %{ "INC    $dst" %}
7088   opcode(0x40); /*  */
7089   ins_encode( Opc_plus( primary, dst ) );
7090   ins_pipe( ialu_reg );
7091 %}
7092 
7093 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7094   match(Set dst (AddI src0 src1));
7095   ins_cost(110);
7096 
7097   format %{ "LEA    $dst,[$src0 + $src1]" %}
7098   opcode(0x8D); /* 0x8D /r */
7099   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7100   ins_pipe( ialu_reg_reg );
7101 %}
7102 
7103 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7104   match(Set dst (AddP src0 src1));
7105   ins_cost(110);
7106 
7107   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7108   opcode(0x8D); /* 0x8D /r */
7109   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7110   ins_pipe( ialu_reg_reg );
7111 %}
7112 
7113 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7114   predicate(UseIncDec);
7115   match(Set dst (AddI dst src));
7116   effect(KILL cr);
7117 
7118   size(1);
7119   format %{ "DEC    $dst" %}
7120   opcode(0x48); /*  */
7121   ins_encode( Opc_plus( primary, dst ) );
7122   ins_pipe( ialu_reg );
7123 %}
7124 
7125 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7126   match(Set dst (AddP dst src));
7127   effect(KILL cr);
7128 
7129   size(2);
7130   format %{ "ADD    $dst,$src" %}
7131   opcode(0x03);
7132   ins_encode( OpcP, RegReg( dst, src) );
7133   ins_pipe( ialu_reg_reg );
7134 %}
7135 
7136 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7137   match(Set dst (AddP dst src));
7138   effect(KILL cr);
7139 
7140   format %{ "ADD    $dst,$src" %}
7141   opcode(0x81,0x00); /* Opcode 81 /0 id */
7142   // ins_encode( RegImm( dst, src) );
7143   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7144   ins_pipe( ialu_reg );
7145 %}
7146 
7147 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7148   match(Set dst (AddI dst (LoadI src)));
7149   effect(KILL cr);
7150 
7151   ins_cost(125);
7152   format %{ "ADD    $dst,$src" %}
7153   opcode(0x03);
7154   ins_encode( OpcP, RegMem( dst, src) );
7155   ins_pipe( ialu_reg_mem );
7156 %}
7157 
7158 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7159   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7160   effect(KILL cr);
7161 
7162   ins_cost(150);
7163   format %{ "ADD    $dst,$src" %}
7164   opcode(0x01);  /* Opcode 01 /r */
7165   ins_encode( OpcP, RegMem( src, dst ) );
7166   ins_pipe( ialu_mem_reg );
7167 %}
7168 
7169 // Add Memory with Immediate
7170 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7171   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7172   effect(KILL cr);
7173 
7174   ins_cost(125);
7175   format %{ "ADD    $dst,$src" %}
7176   opcode(0x81);               /* Opcode 81 /0 id */
7177   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7178   ins_pipe( ialu_mem_imm );
7179 %}
7180 
7181 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7182   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7183   effect(KILL cr);
7184 
7185   ins_cost(125);
7186   format %{ "INC    $dst" %}
7187   opcode(0xFF);               /* Opcode FF /0 */
7188   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7189   ins_pipe( ialu_mem_imm );
7190 %}
7191 
7192 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7193   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7194   effect(KILL cr);
7195 
7196   ins_cost(125);
7197   format %{ "DEC    $dst" %}
7198   opcode(0xFF);               /* Opcode FF /1 */
7199   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7200   ins_pipe( ialu_mem_imm );
7201 %}
7202 
7203 
7204 instruct checkCastPP( eRegP dst ) %{
7205   match(Set dst (CheckCastPP dst));
7206 
7207   size(0);
7208   format %{ "#checkcastPP of $dst" %}
7209   ins_encode( /*empty encoding*/ );
7210   ins_pipe( empty );
7211 %}
7212 
7213 instruct castPP( eRegP dst ) %{
7214   match(Set dst (CastPP dst));
7215   format %{ "#castPP of $dst" %}
7216   ins_encode( /*empty encoding*/ );
7217   ins_pipe( empty );
7218 %}
7219 
7220 instruct castII( rRegI dst ) %{
7221   match(Set dst (CastII dst));
7222   format %{ "#castII of $dst" %}
7223   ins_encode( /*empty encoding*/ );
7224   ins_cost(0);
7225   ins_pipe( empty );
7226 %}
7227 
7228 
7229 // Load-locked - same as a regular pointer load when used with compare-swap
7230 instruct loadPLocked(eRegP dst, memory mem) %{
7231   match(Set dst (LoadPLocked mem));
7232 
7233   ins_cost(125);
7234   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7235   opcode(0x8B);
7236   ins_encode( OpcP, RegMem(dst,mem));
7237   ins_pipe( ialu_reg_mem );
7238 %}
7239 
7240 // Conditional-store of the updated heap-top.
7241 // Used during allocation of the shared heap.
7242 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7243 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7244   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7245   // EAX is killed if there is contention, but then it's also unused.
7246   // In the common case of no contention, EAX holds the new oop address.
7247   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7248   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7249   ins_pipe( pipe_cmpxchg );
7250 %}
7251 
7252 // Conditional-store of an int value.
7253 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7254 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7255   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7256   effect(KILL oldval);
7257   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7258   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7259   ins_pipe( pipe_cmpxchg );
7260 %}
7261 
7262 // Conditional-store of a long value.
7263 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7264 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7265   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7266   effect(KILL oldval);
7267   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7268             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7269             "XCHG   EBX,ECX"
7270   %}
7271   ins_encode %{
7272     // Note: we need to swap rbx, and rcx before and after the
7273     //       cmpxchg8 instruction because the instruction uses
7274     //       rcx as the high order word of the new value to store but
7275     //       our register encoding uses rbx.
7276     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7277     __ lock();
7278     __ cmpxchg8($mem$$Address);
7279     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7280   %}
7281   ins_pipe( pipe_cmpxchg );
7282 %}
7283 
7284 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7285 
7286 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7287   predicate(VM_Version::supports_cx8());
7288   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7289   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7290   effect(KILL cr, KILL oldval);
7291   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7292             "MOV    $res,0\n\t"
7293             "JNE,s  fail\n\t"
7294             "MOV    $res,1\n"
7295           "fail:" %}
7296   ins_encode( enc_cmpxchg8(mem_ptr),
7297               enc_flags_ne_to_boolean(res) );
7298   ins_pipe( pipe_cmpxchg );
7299 %}
7300 
7301 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7302   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7303   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7304   effect(KILL cr, KILL oldval);
7305   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7306             "MOV    $res,0\n\t"
7307             "JNE,s  fail\n\t"
7308             "MOV    $res,1\n"
7309           "fail:" %}
7310   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7311   ins_pipe( pipe_cmpxchg );
7312 %}
7313 
7314 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7315   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7316   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7317   effect(KILL cr, KILL oldval);
7318   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7319             "MOV    $res,0\n\t"
7320             "JNE,s  fail\n\t"
7321             "MOV    $res,1\n"
7322           "fail:" %}
7323   ins_encode( enc_cmpxchgb(mem_ptr),
7324               enc_flags_ne_to_boolean(res) );
7325   ins_pipe( pipe_cmpxchg );
7326 %}
7327 
7328 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7329   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7330   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7331   effect(KILL cr, KILL oldval);
7332   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7333             "MOV    $res,0\n\t"
7334             "JNE,s  fail\n\t"
7335             "MOV    $res,1\n"
7336           "fail:" %}
7337   ins_encode( enc_cmpxchgw(mem_ptr),
7338               enc_flags_ne_to_boolean(res) );
7339   ins_pipe( pipe_cmpxchg );
7340 %}
7341 
7342 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7343   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7344   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7345   effect(KILL cr, KILL oldval);
7346   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7347             "MOV    $res,0\n\t"
7348             "JNE,s  fail\n\t"
7349             "MOV    $res,1\n"
7350           "fail:" %}
7351   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7352   ins_pipe( pipe_cmpxchg );
7353 %}
7354 
7355 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7356   predicate(VM_Version::supports_cx8());
7357   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7358   effect(KILL cr);
7359   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7360   ins_encode( enc_cmpxchg8(mem_ptr) );
7361   ins_pipe( pipe_cmpxchg );
7362 %}
7363 
7364 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7365   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7366   effect(KILL cr);
7367   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7368   ins_encode( enc_cmpxchg(mem_ptr) );
7369   ins_pipe( pipe_cmpxchg );
7370 %}
7371 
7372 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7373   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7374   effect(KILL cr);
7375   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7376   ins_encode( enc_cmpxchgb(mem_ptr) );
7377   ins_pipe( pipe_cmpxchg );
7378 %}
7379 
7380 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7381   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7382   effect(KILL cr);
7383   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7384   ins_encode( enc_cmpxchgw(mem_ptr) );
7385   ins_pipe( pipe_cmpxchg );
7386 %}
7387 
7388 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7389   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7390   effect(KILL cr);
7391   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7392   ins_encode( enc_cmpxchg(mem_ptr) );
7393   ins_pipe( pipe_cmpxchg );
7394 %}
7395 
7396 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7397   predicate(n->as_LoadStore()->result_not_used());
7398   match(Set dummy (GetAndAddB mem add));
7399   effect(KILL cr);
7400   format %{ "ADDB  [$mem],$add" %}
7401   ins_encode %{
7402     __ lock();
7403     __ addb($mem$$Address, $add$$constant);
7404   %}
7405   ins_pipe( pipe_cmpxchg );
7406 %}
7407 
7408 // Important to match to xRegI: only 8-bit regs.
7409 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7410   match(Set newval (GetAndAddB mem newval));
7411   effect(KILL cr);
7412   format %{ "XADDB  [$mem],$newval" %}
7413   ins_encode %{
7414     __ lock();
7415     __ xaddb($mem$$Address, $newval$$Register);
7416   %}
7417   ins_pipe( pipe_cmpxchg );
7418 %}
7419 
7420 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7421   predicate(n->as_LoadStore()->result_not_used());
7422   match(Set dummy (GetAndAddS mem add));
7423   effect(KILL cr);
7424   format %{ "ADDS  [$mem],$add" %}
7425   ins_encode %{
7426     __ lock();
7427     __ addw($mem$$Address, $add$$constant);
7428   %}
7429   ins_pipe( pipe_cmpxchg );
7430 %}
7431 
7432 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7433   match(Set newval (GetAndAddS mem newval));
7434   effect(KILL cr);
7435   format %{ "XADDS  [$mem],$newval" %}
7436   ins_encode %{
7437     __ lock();
7438     __ xaddw($mem$$Address, $newval$$Register);
7439   %}
7440   ins_pipe( pipe_cmpxchg );
7441 %}
7442 
7443 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7444   predicate(n->as_LoadStore()->result_not_used());
7445   match(Set dummy (GetAndAddI mem add));
7446   effect(KILL cr);
7447   format %{ "ADDL  [$mem],$add" %}
7448   ins_encode %{
7449     __ lock();
7450     __ addl($mem$$Address, $add$$constant);
7451   %}
7452   ins_pipe( pipe_cmpxchg );
7453 %}
7454 
7455 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7456   match(Set newval (GetAndAddI mem newval));
7457   effect(KILL cr);
7458   format %{ "XADDL  [$mem],$newval" %}
7459   ins_encode %{
7460     __ lock();
7461     __ xaddl($mem$$Address, $newval$$Register);
7462   %}
7463   ins_pipe( pipe_cmpxchg );
7464 %}
7465 
7466 // Important to match to xRegI: only 8-bit regs.
7467 instruct xchgB( memory mem, xRegI newval) %{
7468   match(Set newval (GetAndSetB mem newval));
7469   format %{ "XCHGB  $newval,[$mem]" %}
7470   ins_encode %{
7471     __ xchgb($newval$$Register, $mem$$Address);
7472   %}
7473   ins_pipe( pipe_cmpxchg );
7474 %}
7475 
7476 instruct xchgS( memory mem, rRegI newval) %{
7477   match(Set newval (GetAndSetS mem newval));
7478   format %{ "XCHGW  $newval,[$mem]" %}
7479   ins_encode %{
7480     __ xchgw($newval$$Register, $mem$$Address);
7481   %}
7482   ins_pipe( pipe_cmpxchg );
7483 %}
7484 
7485 instruct xchgI( memory mem, rRegI newval) %{
7486   match(Set newval (GetAndSetI mem newval));
7487   format %{ "XCHGL  $newval,[$mem]" %}
7488   ins_encode %{
7489     __ xchgl($newval$$Register, $mem$$Address);
7490   %}
7491   ins_pipe( pipe_cmpxchg );
7492 %}
7493 
7494 instruct xchgP( memory mem, pRegP newval) %{
7495   match(Set newval (GetAndSetP mem newval));
7496   format %{ "XCHGL  $newval,[$mem]" %}
7497   ins_encode %{
7498     __ xchgl($newval$$Register, $mem$$Address);
7499   %}
7500   ins_pipe( pipe_cmpxchg );
7501 %}
7502 
7503 //----------Subtraction Instructions-------------------------------------------
7504 
7505 // Integer Subtraction Instructions
7506 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7507   match(Set dst (SubI dst src));
7508   effect(KILL cr);
7509 
7510   size(2);
7511   format %{ "SUB    $dst,$src" %}
7512   opcode(0x2B);
7513   ins_encode( OpcP, RegReg( dst, src) );
7514   ins_pipe( ialu_reg_reg );
7515 %}
7516 
7517 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7518   match(Set dst (SubI dst src));
7519   effect(KILL cr);
7520 
7521   format %{ "SUB    $dst,$src" %}
7522   opcode(0x81,0x05);  /* Opcode 81 /5 */
7523   // ins_encode( RegImm( dst, src) );
7524   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7525   ins_pipe( ialu_reg );
7526 %}
7527 
7528 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7529   match(Set dst (SubI dst (LoadI src)));
7530   effect(KILL cr);
7531 
7532   ins_cost(125);
7533   format %{ "SUB    $dst,$src" %}
7534   opcode(0x2B);
7535   ins_encode( OpcP, RegMem( dst, src) );
7536   ins_pipe( ialu_reg_mem );
7537 %}
7538 
7539 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7540   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7541   effect(KILL cr);
7542 
7543   ins_cost(150);
7544   format %{ "SUB    $dst,$src" %}
7545   opcode(0x29);  /* Opcode 29 /r */
7546   ins_encode( OpcP, RegMem( src, dst ) );
7547   ins_pipe( ialu_mem_reg );
7548 %}
7549 
7550 // Subtract from a pointer
7551 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7552   match(Set dst (AddP dst (SubI zero src)));
7553   effect(KILL cr);
7554 
7555   size(2);
7556   format %{ "SUB    $dst,$src" %}
7557   opcode(0x2B);
7558   ins_encode( OpcP, RegReg( dst, src) );
7559   ins_pipe( ialu_reg_reg );
7560 %}
7561 
7562 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7563   match(Set dst (SubI zero dst));
7564   effect(KILL cr);
7565 
7566   size(2);
7567   format %{ "NEG    $dst" %}
7568   opcode(0xF7,0x03);  // Opcode F7 /3
7569   ins_encode( OpcP, RegOpc( dst ) );
7570   ins_pipe( ialu_reg );
7571 %}
7572 
7573 //----------Multiplication/Division Instructions-------------------------------
7574 // Integer Multiplication Instructions
7575 // Multiply Register
7576 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7577   match(Set dst (MulI dst src));
7578   effect(KILL cr);
7579 
7580   size(3);
7581   ins_cost(300);
7582   format %{ "IMUL   $dst,$src" %}
7583   opcode(0xAF, 0x0F);
7584   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7585   ins_pipe( ialu_reg_reg_alu0 );
7586 %}
7587 
7588 // Multiply 32-bit Immediate
7589 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7590   match(Set dst (MulI src imm));
7591   effect(KILL cr);
7592 
7593   ins_cost(300);
7594   format %{ "IMUL   $dst,$src,$imm" %}
7595   opcode(0x69);  /* 69 /r id */
7596   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7597   ins_pipe( ialu_reg_reg_alu0 );
7598 %}
7599 
7600 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7601   match(Set dst src);
7602   effect(KILL cr);
7603 
7604   // Note that this is artificially increased to make it more expensive than loadConL
7605   ins_cost(250);
7606   format %{ "MOV    EAX,$src\t// low word only" %}
7607   opcode(0xB8);
7608   ins_encode( LdImmL_Lo(dst, src) );
7609   ins_pipe( ialu_reg_fat );
7610 %}
7611 
7612 // Multiply by 32-bit Immediate, taking the shifted high order results
7613 //  (special case for shift by 32)
7614 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7615   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7616   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7617              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7618              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7619   effect(USE src1, KILL cr);
7620 
7621   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7622   ins_cost(0*100 + 1*400 - 150);
7623   format %{ "IMUL   EDX:EAX,$src1" %}
7624   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7625   ins_pipe( pipe_slow );
7626 %}
7627 
7628 // Multiply by 32-bit Immediate, taking the shifted high order results
7629 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7630   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7631   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7632              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7633              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7634   effect(USE src1, KILL cr);
7635 
7636   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7637   ins_cost(1*100 + 1*400 - 150);
7638   format %{ "IMUL   EDX:EAX,$src1\n\t"
7639             "SAR    EDX,$cnt-32" %}
7640   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7641   ins_pipe( pipe_slow );
7642 %}
7643 
7644 // Multiply Memory 32-bit Immediate
7645 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7646   match(Set dst (MulI (LoadI src) imm));
7647   effect(KILL cr);
7648 
7649   ins_cost(300);
7650   format %{ "IMUL   $dst,$src,$imm" %}
7651   opcode(0x69);  /* 69 /r id */
7652   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7653   ins_pipe( ialu_reg_mem_alu0 );
7654 %}
7655 
7656 // Multiply Memory
7657 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7658   match(Set dst (MulI dst (LoadI src)));
7659   effect(KILL cr);
7660 
7661   ins_cost(350);
7662   format %{ "IMUL   $dst,$src" %}
7663   opcode(0xAF, 0x0F);
7664   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7665   ins_pipe( ialu_reg_mem_alu0 );
7666 %}
7667 
7668 // Multiply Register Int to Long
7669 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7670   // Basic Idea: long = (long)int * (long)int
7671   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7672   effect(DEF dst, USE src, USE src1, KILL flags);
7673 
7674   ins_cost(300);
7675   format %{ "IMUL   $dst,$src1" %}
7676 
7677   ins_encode( long_int_multiply( dst, src1 ) );
7678   ins_pipe( ialu_reg_reg_alu0 );
7679 %}
7680 
7681 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7682   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7683   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7684   effect(KILL flags);
7685 
7686   ins_cost(300);
7687   format %{ "MUL    $dst,$src1" %}
7688 
7689   ins_encode( long_uint_multiply(dst, src1) );
7690   ins_pipe( ialu_reg_reg_alu0 );
7691 %}
7692 
7693 // Multiply Register Long
7694 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7695   match(Set dst (MulL dst src));
7696   effect(KILL cr, TEMP tmp);
7697   ins_cost(4*100+3*400);
7698 // Basic idea: lo(result) = lo(x_lo * y_lo)
7699 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7700   format %{ "MOV    $tmp,$src.lo\n\t"
7701             "IMUL   $tmp,EDX\n\t"
7702             "MOV    EDX,$src.hi\n\t"
7703             "IMUL   EDX,EAX\n\t"
7704             "ADD    $tmp,EDX\n\t"
7705             "MUL    EDX:EAX,$src.lo\n\t"
7706             "ADD    EDX,$tmp" %}
7707   ins_encode( long_multiply( dst, src, tmp ) );
7708   ins_pipe( pipe_slow );
7709 %}
7710 
7711 // Multiply Register Long where the left operand's high 32 bits are zero
7712 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7713   predicate(is_operand_hi32_zero(n->in(1)));
7714   match(Set dst (MulL dst src));
7715   effect(KILL cr, TEMP tmp);
7716   ins_cost(2*100+2*400);
7717 // Basic idea: lo(result) = lo(x_lo * y_lo)
7718 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7719   format %{ "MOV    $tmp,$src.hi\n\t"
7720             "IMUL   $tmp,EAX\n\t"
7721             "MUL    EDX:EAX,$src.lo\n\t"
7722             "ADD    EDX,$tmp" %}
7723   ins_encode %{
7724     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7725     __ imull($tmp$$Register, rax);
7726     __ mull($src$$Register);
7727     __ addl(rdx, $tmp$$Register);
7728   %}
7729   ins_pipe( pipe_slow );
7730 %}
7731 
7732 // Multiply Register Long where the right operand's high 32 bits are zero
7733 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7734   predicate(is_operand_hi32_zero(n->in(2)));
7735   match(Set dst (MulL dst src));
7736   effect(KILL cr, TEMP tmp);
7737   ins_cost(2*100+2*400);
7738 // Basic idea: lo(result) = lo(x_lo * y_lo)
7739 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7740   format %{ "MOV    $tmp,$src.lo\n\t"
7741             "IMUL   $tmp,EDX\n\t"
7742             "MUL    EDX:EAX,$src.lo\n\t"
7743             "ADD    EDX,$tmp" %}
7744   ins_encode %{
7745     __ movl($tmp$$Register, $src$$Register);
7746     __ imull($tmp$$Register, rdx);
7747     __ mull($src$$Register);
7748     __ addl(rdx, $tmp$$Register);
7749   %}
7750   ins_pipe( pipe_slow );
7751 %}
7752 
7753 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7754 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7755   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7756   match(Set dst (MulL dst src));
7757   effect(KILL cr);
7758   ins_cost(1*400);
7759 // Basic idea: lo(result) = lo(x_lo * y_lo)
7760 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7761   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7762   ins_encode %{
7763     __ mull($src$$Register);
7764   %}
7765   ins_pipe( pipe_slow );
7766 %}
7767 
7768 // Multiply Register Long by small constant
7769 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7770   match(Set dst (MulL dst src));
7771   effect(KILL cr, TEMP tmp);
7772   ins_cost(2*100+2*400);
7773   size(12);
7774 // Basic idea: lo(result) = lo(src * EAX)
7775 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7776   format %{ "IMUL   $tmp,EDX,$src\n\t"
7777             "MOV    EDX,$src\n\t"
7778             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7779             "ADD    EDX,$tmp" %}
7780   ins_encode( long_multiply_con( dst, src, tmp ) );
7781   ins_pipe( pipe_slow );
7782 %}
7783 
7784 // Integer DIV with Register
7785 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7786   match(Set rax (DivI rax div));
7787   effect(KILL rdx, KILL cr);
7788   size(26);
7789   ins_cost(30*100+10*100);
7790   format %{ "CMP    EAX,0x80000000\n\t"
7791             "JNE,s  normal\n\t"
7792             "XOR    EDX,EDX\n\t"
7793             "CMP    ECX,-1\n\t"
7794             "JE,s   done\n"
7795     "normal: CDQ\n\t"
7796             "IDIV   $div\n\t"
7797     "done:"        %}
7798   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7799   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7800   ins_pipe( ialu_reg_reg_alu0 );
7801 %}
7802 
7803 // Divide Register Long
7804 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7805   match(Set dst (DivL src1 src2));
7806   effect( KILL cr, KILL cx, KILL bx );
7807   ins_cost(10000);
7808   format %{ "PUSH   $src1.hi\n\t"
7809             "PUSH   $src1.lo\n\t"
7810             "PUSH   $src2.hi\n\t"
7811             "PUSH   $src2.lo\n\t"
7812             "CALL   SharedRuntime::ldiv\n\t"
7813             "ADD    ESP,16" %}
7814   ins_encode( long_div(src1,src2) );
7815   ins_pipe( pipe_slow );
7816 %}
7817 
7818 // Integer DIVMOD with Register, both quotient and mod results
7819 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7820   match(DivModI rax div);
7821   effect(KILL cr);
7822   size(26);
7823   ins_cost(30*100+10*100);
7824   format %{ "CMP    EAX,0x80000000\n\t"
7825             "JNE,s  normal\n\t"
7826             "XOR    EDX,EDX\n\t"
7827             "CMP    ECX,-1\n\t"
7828             "JE,s   done\n"
7829     "normal: CDQ\n\t"
7830             "IDIV   $div\n\t"
7831     "done:"        %}
7832   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7833   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7834   ins_pipe( pipe_slow );
7835 %}
7836 
7837 // Integer MOD with Register
7838 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7839   match(Set rdx (ModI rax div));
7840   effect(KILL rax, KILL cr);
7841 
7842   size(26);
7843   ins_cost(300);
7844   format %{ "CDQ\n\t"
7845             "IDIV   $div" %}
7846   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7847   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7848   ins_pipe( ialu_reg_reg_alu0 );
7849 %}
7850 
7851 // Remainder Register Long
7852 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7853   match(Set dst (ModL src1 src2));
7854   effect( KILL cr, KILL cx, KILL bx );
7855   ins_cost(10000);
7856   format %{ "PUSH   $src1.hi\n\t"
7857             "PUSH   $src1.lo\n\t"
7858             "PUSH   $src2.hi\n\t"
7859             "PUSH   $src2.lo\n\t"
7860             "CALL   SharedRuntime::lrem\n\t"
7861             "ADD    ESP,16" %}
7862   ins_encode( long_mod(src1,src2) );
7863   ins_pipe( pipe_slow );
7864 %}
7865 
7866 // Divide Register Long (no special case since divisor != -1)
7867 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7868   match(Set dst (DivL dst imm));
7869   effect( TEMP tmp, TEMP tmp2, KILL cr );
7870   ins_cost(1000);
7871   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7872             "XOR    $tmp2,$tmp2\n\t"
7873             "CMP    $tmp,EDX\n\t"
7874             "JA,s   fast\n\t"
7875             "MOV    $tmp2,EAX\n\t"
7876             "MOV    EAX,EDX\n\t"
7877             "MOV    EDX,0\n\t"
7878             "JLE,s  pos\n\t"
7879             "LNEG   EAX : $tmp2\n\t"
7880             "DIV    $tmp # unsigned division\n\t"
7881             "XCHG   EAX,$tmp2\n\t"
7882             "DIV    $tmp\n\t"
7883             "LNEG   $tmp2 : EAX\n\t"
7884             "JMP,s  done\n"
7885     "pos:\n\t"
7886             "DIV    $tmp\n\t"
7887             "XCHG   EAX,$tmp2\n"
7888     "fast:\n\t"
7889             "DIV    $tmp\n"
7890     "done:\n\t"
7891             "MOV    EDX,$tmp2\n\t"
7892             "NEG    EDX:EAX # if $imm < 0" %}
7893   ins_encode %{
7894     int con = (int)$imm$$constant;
7895     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7896     int pcon = (con > 0) ? con : -con;
7897     Label Lfast, Lpos, Ldone;
7898 
7899     __ movl($tmp$$Register, pcon);
7900     __ xorl($tmp2$$Register,$tmp2$$Register);
7901     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7902     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7903 
7904     __ movl($tmp2$$Register, $dst$$Register); // save
7905     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7906     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7907     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7908 
7909     // Negative dividend.
7910     // convert value to positive to use unsigned division
7911     __ lneg($dst$$Register, $tmp2$$Register);
7912     __ divl($tmp$$Register);
7913     __ xchgl($dst$$Register, $tmp2$$Register);
7914     __ divl($tmp$$Register);
7915     // revert result back to negative
7916     __ lneg($tmp2$$Register, $dst$$Register);
7917     __ jmpb(Ldone);
7918 
7919     __ bind(Lpos);
7920     __ divl($tmp$$Register); // Use unsigned division
7921     __ xchgl($dst$$Register, $tmp2$$Register);
7922     // Fallthrow for final divide, tmp2 has 32 bit hi result
7923 
7924     __ bind(Lfast);
7925     // fast path: src is positive
7926     __ divl($tmp$$Register); // Use unsigned division
7927 
7928     __ bind(Ldone);
7929     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7930     if (con < 0) {
7931       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7932     }
7933   %}
7934   ins_pipe( pipe_slow );
7935 %}
7936 
7937 // Remainder Register Long (remainder fit into 32 bits)
7938 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7939   match(Set dst (ModL dst imm));
7940   effect( TEMP tmp, TEMP tmp2, KILL cr );
7941   ins_cost(1000);
7942   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7943             "CMP    $tmp,EDX\n\t"
7944             "JA,s   fast\n\t"
7945             "MOV    $tmp2,EAX\n\t"
7946             "MOV    EAX,EDX\n\t"
7947             "MOV    EDX,0\n\t"
7948             "JLE,s  pos\n\t"
7949             "LNEG   EAX : $tmp2\n\t"
7950             "DIV    $tmp # unsigned division\n\t"
7951             "MOV    EAX,$tmp2\n\t"
7952             "DIV    $tmp\n\t"
7953             "NEG    EDX\n\t"
7954             "JMP,s  done\n"
7955     "pos:\n\t"
7956             "DIV    $tmp\n\t"
7957             "MOV    EAX,$tmp2\n"
7958     "fast:\n\t"
7959             "DIV    $tmp\n"
7960     "done:\n\t"
7961             "MOV    EAX,EDX\n\t"
7962             "SAR    EDX,31\n\t" %}
7963   ins_encode %{
7964     int con = (int)$imm$$constant;
7965     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7966     int pcon = (con > 0) ? con : -con;
7967     Label  Lfast, Lpos, Ldone;
7968 
7969     __ movl($tmp$$Register, pcon);
7970     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7971     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7972 
7973     __ movl($tmp2$$Register, $dst$$Register); // save
7974     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7975     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7976     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7977 
7978     // Negative dividend.
7979     // convert value to positive to use unsigned division
7980     __ lneg($dst$$Register, $tmp2$$Register);
7981     __ divl($tmp$$Register);
7982     __ movl($dst$$Register, $tmp2$$Register);
7983     __ divl($tmp$$Register);
7984     // revert remainder back to negative
7985     __ negl(HIGH_FROM_LOW($dst$$Register));
7986     __ jmpb(Ldone);
7987 
7988     __ bind(Lpos);
7989     __ divl($tmp$$Register);
7990     __ movl($dst$$Register, $tmp2$$Register);
7991 
7992     __ bind(Lfast);
7993     // fast path: src is positive
7994     __ divl($tmp$$Register);
7995 
7996     __ bind(Ldone);
7997     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7998     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7999 
8000   %}
8001   ins_pipe( pipe_slow );
8002 %}
8003 
8004 // Integer Shift Instructions
8005 // Shift Left by one
8006 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8007   match(Set dst (LShiftI dst shift));
8008   effect(KILL cr);
8009 
8010   size(2);
8011   format %{ "SHL    $dst,$shift" %}
8012   opcode(0xD1, 0x4);  /* D1 /4 */
8013   ins_encode( OpcP, RegOpc( dst ) );
8014   ins_pipe( ialu_reg );
8015 %}
8016 
8017 // Shift Left by 8-bit immediate
8018 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8019   match(Set dst (LShiftI dst shift));
8020   effect(KILL cr);
8021 
8022   size(3);
8023   format %{ "SHL    $dst,$shift" %}
8024   opcode(0xC1, 0x4);  /* C1 /4 ib */
8025   ins_encode( RegOpcImm( dst, shift) );
8026   ins_pipe( ialu_reg );
8027 %}
8028 
8029 // Shift Left by variable
8030 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8031   match(Set dst (LShiftI dst shift));
8032   effect(KILL cr);
8033 
8034   size(2);
8035   format %{ "SHL    $dst,$shift" %}
8036   opcode(0xD3, 0x4);  /* D3 /4 */
8037   ins_encode( OpcP, RegOpc( dst ) );
8038   ins_pipe( ialu_reg_reg );
8039 %}
8040 
8041 // Arithmetic shift right by one
8042 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8043   match(Set dst (RShiftI dst shift));
8044   effect(KILL cr);
8045 
8046   size(2);
8047   format %{ "SAR    $dst,$shift" %}
8048   opcode(0xD1, 0x7);  /* D1 /7 */
8049   ins_encode( OpcP, RegOpc( dst ) );
8050   ins_pipe( ialu_reg );
8051 %}
8052 
8053 // Arithmetic shift right by one
8054 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8055   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8056   effect(KILL cr);
8057   format %{ "SAR    $dst,$shift" %}
8058   opcode(0xD1, 0x7);  /* D1 /7 */
8059   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8060   ins_pipe( ialu_mem_imm );
8061 %}
8062 
8063 // Arithmetic Shift Right by 8-bit immediate
8064 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8065   match(Set dst (RShiftI dst shift));
8066   effect(KILL cr);
8067 
8068   size(3);
8069   format %{ "SAR    $dst,$shift" %}
8070   opcode(0xC1, 0x7);  /* C1 /7 ib */
8071   ins_encode( RegOpcImm( dst, shift ) );
8072   ins_pipe( ialu_mem_imm );
8073 %}
8074 
8075 // Arithmetic Shift Right by 8-bit immediate
8076 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8077   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8078   effect(KILL cr);
8079 
8080   format %{ "SAR    $dst,$shift" %}
8081   opcode(0xC1, 0x7);  /* C1 /7 ib */
8082   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8083   ins_pipe( ialu_mem_imm );
8084 %}
8085 
8086 // Arithmetic Shift Right by variable
8087 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8088   match(Set dst (RShiftI dst shift));
8089   effect(KILL cr);
8090 
8091   size(2);
8092   format %{ "SAR    $dst,$shift" %}
8093   opcode(0xD3, 0x7);  /* D3 /7 */
8094   ins_encode( OpcP, RegOpc( dst ) );
8095   ins_pipe( ialu_reg_reg );
8096 %}
8097 
8098 // Logical shift right by one
8099 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8100   match(Set dst (URShiftI dst shift));
8101   effect(KILL cr);
8102 
8103   size(2);
8104   format %{ "SHR    $dst,$shift" %}
8105   opcode(0xD1, 0x5);  /* D1 /5 */
8106   ins_encode( OpcP, RegOpc( dst ) );
8107   ins_pipe( ialu_reg );
8108 %}
8109 
8110 // Logical Shift Right by 8-bit immediate
8111 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8112   match(Set dst (URShiftI dst shift));
8113   effect(KILL cr);
8114 
8115   size(3);
8116   format %{ "SHR    $dst,$shift" %}
8117   opcode(0xC1, 0x5);  /* C1 /5 ib */
8118   ins_encode( RegOpcImm( dst, shift) );
8119   ins_pipe( ialu_reg );
8120 %}
8121 
8122 
8123 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8124 // This idiom is used by the compiler for the i2b bytecode.
8125 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8126   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8127 
8128   size(3);
8129   format %{ "MOVSX  $dst,$src :8" %}
8130   ins_encode %{
8131     __ movsbl($dst$$Register, $src$$Register);
8132   %}
8133   ins_pipe(ialu_reg_reg);
8134 %}
8135 
8136 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8137 // This idiom is used by the compiler the i2s bytecode.
8138 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8139   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8140 
8141   size(3);
8142   format %{ "MOVSX  $dst,$src :16" %}
8143   ins_encode %{
8144     __ movswl($dst$$Register, $src$$Register);
8145   %}
8146   ins_pipe(ialu_reg_reg);
8147 %}
8148 
8149 
8150 // Logical Shift Right by variable
8151 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8152   match(Set dst (URShiftI dst shift));
8153   effect(KILL cr);
8154 
8155   size(2);
8156   format %{ "SHR    $dst,$shift" %}
8157   opcode(0xD3, 0x5);  /* D3 /5 */
8158   ins_encode( OpcP, RegOpc( dst ) );
8159   ins_pipe( ialu_reg_reg );
8160 %}
8161 
8162 
8163 //----------Logical Instructions-----------------------------------------------
8164 //----------Integer Logical Instructions---------------------------------------
8165 // And Instructions
8166 // And Register with Register
8167 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8168   match(Set dst (AndI dst src));
8169   effect(KILL cr);
8170 
8171   size(2);
8172   format %{ "AND    $dst,$src" %}
8173   opcode(0x23);
8174   ins_encode( OpcP, RegReg( dst, src) );
8175   ins_pipe( ialu_reg_reg );
8176 %}
8177 
8178 // And Register with Immediate
8179 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8180   match(Set dst (AndI dst src));
8181   effect(KILL cr);
8182 
8183   format %{ "AND    $dst,$src" %}
8184   opcode(0x81,0x04);  /* Opcode 81 /4 */
8185   // ins_encode( RegImm( dst, src) );
8186   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8187   ins_pipe( ialu_reg );
8188 %}
8189 
8190 // And Register with Memory
8191 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8192   match(Set dst (AndI dst (LoadI src)));
8193   effect(KILL cr);
8194 
8195   ins_cost(125);
8196   format %{ "AND    $dst,$src" %}
8197   opcode(0x23);
8198   ins_encode( OpcP, RegMem( dst, src) );
8199   ins_pipe( ialu_reg_mem );
8200 %}
8201 
8202 // And Memory with Register
8203 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8204   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8205   effect(KILL cr);
8206 
8207   ins_cost(150);
8208   format %{ "AND    $dst,$src" %}
8209   opcode(0x21);  /* Opcode 21 /r */
8210   ins_encode( OpcP, RegMem( src, dst ) );
8211   ins_pipe( ialu_mem_reg );
8212 %}
8213 
8214 // And Memory with Immediate
8215 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8216   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8217   effect(KILL cr);
8218 
8219   ins_cost(125);
8220   format %{ "AND    $dst,$src" %}
8221   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8222   // ins_encode( MemImm( dst, src) );
8223   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8224   ins_pipe( ialu_mem_imm );
8225 %}
8226 
8227 // BMI1 instructions
8228 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8229   match(Set dst (AndI (XorI src1 minus_1) src2));
8230   predicate(UseBMI1Instructions);
8231   effect(KILL cr);
8232 
8233   format %{ "ANDNL  $dst, $src1, $src2" %}
8234 
8235   ins_encode %{
8236     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8237   %}
8238   ins_pipe(ialu_reg);
8239 %}
8240 
8241 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8242   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8243   predicate(UseBMI1Instructions);
8244   effect(KILL cr);
8245 
8246   ins_cost(125);
8247   format %{ "ANDNL  $dst, $src1, $src2" %}
8248 
8249   ins_encode %{
8250     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8251   %}
8252   ins_pipe(ialu_reg_mem);
8253 %}
8254 
8255 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8256   match(Set dst (AndI (SubI imm_zero src) src));
8257   predicate(UseBMI1Instructions);
8258   effect(KILL cr);
8259 
8260   format %{ "BLSIL  $dst, $src" %}
8261 
8262   ins_encode %{
8263     __ blsil($dst$$Register, $src$$Register);
8264   %}
8265   ins_pipe(ialu_reg);
8266 %}
8267 
8268 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8269   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8270   predicate(UseBMI1Instructions);
8271   effect(KILL cr);
8272 
8273   ins_cost(125);
8274   format %{ "BLSIL  $dst, $src" %}
8275 
8276   ins_encode %{
8277     __ blsil($dst$$Register, $src$$Address);
8278   %}
8279   ins_pipe(ialu_reg_mem);
8280 %}
8281 
8282 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8283 %{
8284   match(Set dst (XorI (AddI src minus_1) src));
8285   predicate(UseBMI1Instructions);
8286   effect(KILL cr);
8287 
8288   format %{ "BLSMSKL $dst, $src" %}
8289 
8290   ins_encode %{
8291     __ blsmskl($dst$$Register, $src$$Register);
8292   %}
8293 
8294   ins_pipe(ialu_reg);
8295 %}
8296 
8297 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8298 %{
8299   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8300   predicate(UseBMI1Instructions);
8301   effect(KILL cr);
8302 
8303   ins_cost(125);
8304   format %{ "BLSMSKL $dst, $src" %}
8305 
8306   ins_encode %{
8307     __ blsmskl($dst$$Register, $src$$Address);
8308   %}
8309 
8310   ins_pipe(ialu_reg_mem);
8311 %}
8312 
8313 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8314 %{
8315   match(Set dst (AndI (AddI src minus_1) src) );
8316   predicate(UseBMI1Instructions);
8317   effect(KILL cr);
8318 
8319   format %{ "BLSRL  $dst, $src" %}
8320 
8321   ins_encode %{
8322     __ blsrl($dst$$Register, $src$$Register);
8323   %}
8324 
8325   ins_pipe(ialu_reg);
8326 %}
8327 
8328 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8329 %{
8330   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8331   predicate(UseBMI1Instructions);
8332   effect(KILL cr);
8333 
8334   ins_cost(125);
8335   format %{ "BLSRL  $dst, $src" %}
8336 
8337   ins_encode %{
8338     __ blsrl($dst$$Register, $src$$Address);
8339   %}
8340 
8341   ins_pipe(ialu_reg_mem);
8342 %}
8343 
8344 // Or Instructions
8345 // Or Register with Register
8346 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8347   match(Set dst (OrI dst src));
8348   effect(KILL cr);
8349 
8350   size(2);
8351   format %{ "OR     $dst,$src" %}
8352   opcode(0x0B);
8353   ins_encode( OpcP, RegReg( dst, src) );
8354   ins_pipe( ialu_reg_reg );
8355 %}
8356 
8357 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8358   match(Set dst (OrI dst (CastP2X src)));
8359   effect(KILL cr);
8360 
8361   size(2);
8362   format %{ "OR     $dst,$src" %}
8363   opcode(0x0B);
8364   ins_encode( OpcP, RegReg( dst, src) );
8365   ins_pipe( ialu_reg_reg );
8366 %}
8367 
8368 
8369 // Or Register with Immediate
8370 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8371   match(Set dst (OrI dst src));
8372   effect(KILL cr);
8373 
8374   format %{ "OR     $dst,$src" %}
8375   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8376   // ins_encode( RegImm( dst, src) );
8377   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8378   ins_pipe( ialu_reg );
8379 %}
8380 
8381 // Or Register with Memory
8382 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8383   match(Set dst (OrI dst (LoadI src)));
8384   effect(KILL cr);
8385 
8386   ins_cost(125);
8387   format %{ "OR     $dst,$src" %}
8388   opcode(0x0B);
8389   ins_encode( OpcP, RegMem( dst, src) );
8390   ins_pipe( ialu_reg_mem );
8391 %}
8392 
8393 // Or Memory with Register
8394 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8395   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8396   effect(KILL cr);
8397 
8398   ins_cost(150);
8399   format %{ "OR     $dst,$src" %}
8400   opcode(0x09);  /* Opcode 09 /r */
8401   ins_encode( OpcP, RegMem( src, dst ) );
8402   ins_pipe( ialu_mem_reg );
8403 %}
8404 
8405 // Or Memory with Immediate
8406 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8407   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8408   effect(KILL cr);
8409 
8410   ins_cost(125);
8411   format %{ "OR     $dst,$src" %}
8412   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8413   // ins_encode( MemImm( dst, src) );
8414   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8415   ins_pipe( ialu_mem_imm );
8416 %}
8417 
8418 // ROL/ROR
8419 // ROL expand
8420 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8421   effect(USE_DEF dst, USE shift, KILL cr);
8422 
8423   format %{ "ROL    $dst, $shift" %}
8424   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8425   ins_encode( OpcP, RegOpc( dst ));
8426   ins_pipe( ialu_reg );
8427 %}
8428 
8429 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8430   effect(USE_DEF dst, USE shift, KILL cr);
8431 
8432   format %{ "ROL    $dst, $shift" %}
8433   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8434   ins_encode( RegOpcImm(dst, shift) );
8435   ins_pipe(ialu_reg);
8436 %}
8437 
8438 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8439   effect(USE_DEF dst, USE shift, KILL cr);
8440 
8441   format %{ "ROL    $dst, $shift" %}
8442   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8443   ins_encode(OpcP, RegOpc(dst));
8444   ins_pipe( ialu_reg_reg );
8445 %}
8446 // end of ROL expand
8447 
8448 // ROL 32bit by one once
8449 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8450   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8451 
8452   expand %{
8453     rolI_eReg_imm1(dst, lshift, cr);
8454   %}
8455 %}
8456 
8457 // ROL 32bit var by imm8 once
8458 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8459   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8460   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8461 
8462   expand %{
8463     rolI_eReg_imm8(dst, lshift, cr);
8464   %}
8465 %}
8466 
8467 // ROL 32bit var by var once
8468 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8469   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8470 
8471   expand %{
8472     rolI_eReg_CL(dst, shift, cr);
8473   %}
8474 %}
8475 
8476 // ROL 32bit var by var once
8477 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8478   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8479 
8480   expand %{
8481     rolI_eReg_CL(dst, shift, cr);
8482   %}
8483 %}
8484 
8485 // ROR expand
8486 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8487   effect(USE_DEF dst, USE shift, KILL cr);
8488 
8489   format %{ "ROR    $dst, $shift" %}
8490   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8491   ins_encode( OpcP, RegOpc( dst ) );
8492   ins_pipe( ialu_reg );
8493 %}
8494 
8495 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8496   effect (USE_DEF dst, USE shift, KILL cr);
8497 
8498   format %{ "ROR    $dst, $shift" %}
8499   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8500   ins_encode( RegOpcImm(dst, shift) );
8501   ins_pipe( ialu_reg );
8502 %}
8503 
8504 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8505   effect(USE_DEF dst, USE shift, KILL cr);
8506 
8507   format %{ "ROR    $dst, $shift" %}
8508   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8509   ins_encode(OpcP, RegOpc(dst));
8510   ins_pipe( ialu_reg_reg );
8511 %}
8512 // end of ROR expand
8513 
8514 // ROR right once
8515 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8516   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8517 
8518   expand %{
8519     rorI_eReg_imm1(dst, rshift, cr);
8520   %}
8521 %}
8522 
8523 // ROR 32bit by immI8 once
8524 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8525   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8526   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8527 
8528   expand %{
8529     rorI_eReg_imm8(dst, rshift, cr);
8530   %}
8531 %}
8532 
8533 // ROR 32bit var by var once
8534 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8535   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8536 
8537   expand %{
8538     rorI_eReg_CL(dst, shift, cr);
8539   %}
8540 %}
8541 
8542 // ROR 32bit var by var once
8543 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8544   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8545 
8546   expand %{
8547     rorI_eReg_CL(dst, shift, cr);
8548   %}
8549 %}
8550 
8551 // Xor Instructions
8552 // Xor Register with Register
8553 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8554   match(Set dst (XorI dst src));
8555   effect(KILL cr);
8556 
8557   size(2);
8558   format %{ "XOR    $dst,$src" %}
8559   opcode(0x33);
8560   ins_encode( OpcP, RegReg( dst, src) );
8561   ins_pipe( ialu_reg_reg );
8562 %}
8563 
8564 // Xor Register with Immediate -1
8565 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8566   match(Set dst (XorI dst imm));
8567 
8568   size(2);
8569   format %{ "NOT    $dst" %}
8570   ins_encode %{
8571      __ notl($dst$$Register);
8572   %}
8573   ins_pipe( ialu_reg );
8574 %}
8575 
8576 // Xor Register with Immediate
8577 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8578   match(Set dst (XorI dst src));
8579   effect(KILL cr);
8580 
8581   format %{ "XOR    $dst,$src" %}
8582   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8583   // ins_encode( RegImm( dst, src) );
8584   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8585   ins_pipe( ialu_reg );
8586 %}
8587 
8588 // Xor Register with Memory
8589 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8590   match(Set dst (XorI dst (LoadI src)));
8591   effect(KILL cr);
8592 
8593   ins_cost(125);
8594   format %{ "XOR    $dst,$src" %}
8595   opcode(0x33);
8596   ins_encode( OpcP, RegMem(dst, src) );
8597   ins_pipe( ialu_reg_mem );
8598 %}
8599 
8600 // Xor Memory with Register
8601 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8602   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8603   effect(KILL cr);
8604 
8605   ins_cost(150);
8606   format %{ "XOR    $dst,$src" %}
8607   opcode(0x31);  /* Opcode 31 /r */
8608   ins_encode( OpcP, RegMem( src, dst ) );
8609   ins_pipe( ialu_mem_reg );
8610 %}
8611 
8612 // Xor Memory with Immediate
8613 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8614   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8615   effect(KILL cr);
8616 
8617   ins_cost(125);
8618   format %{ "XOR    $dst,$src" %}
8619   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8620   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8621   ins_pipe( ialu_mem_imm );
8622 %}
8623 
8624 //----------Convert Int to Boolean---------------------------------------------
8625 
8626 instruct movI_nocopy(rRegI dst, rRegI src) %{
8627   effect( DEF dst, USE src );
8628   format %{ "MOV    $dst,$src" %}
8629   ins_encode( enc_Copy( dst, src) );
8630   ins_pipe( ialu_reg_reg );
8631 %}
8632 
8633 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8634   effect( USE_DEF dst, USE src, KILL cr );
8635 
8636   size(4);
8637   format %{ "NEG    $dst\n\t"
8638             "ADC    $dst,$src" %}
8639   ins_encode( neg_reg(dst),
8640               OpcRegReg(0x13,dst,src) );
8641   ins_pipe( ialu_reg_reg_long );
8642 %}
8643 
8644 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8645   match(Set dst (Conv2B src));
8646 
8647   expand %{
8648     movI_nocopy(dst,src);
8649     ci2b(dst,src,cr);
8650   %}
8651 %}
8652 
8653 instruct movP_nocopy(rRegI dst, eRegP src) %{
8654   effect( DEF dst, USE src );
8655   format %{ "MOV    $dst,$src" %}
8656   ins_encode( enc_Copy( dst, src) );
8657   ins_pipe( ialu_reg_reg );
8658 %}
8659 
8660 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8661   effect( USE_DEF dst, USE src, KILL cr );
8662   format %{ "NEG    $dst\n\t"
8663             "ADC    $dst,$src" %}
8664   ins_encode( neg_reg(dst),
8665               OpcRegReg(0x13,dst,src) );
8666   ins_pipe( ialu_reg_reg_long );
8667 %}
8668 
8669 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8670   match(Set dst (Conv2B src));
8671 
8672   expand %{
8673     movP_nocopy(dst,src);
8674     cp2b(dst,src,cr);
8675   %}
8676 %}
8677 
8678 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8679   match(Set dst (CmpLTMask p q));
8680   effect(KILL cr);
8681   ins_cost(400);
8682 
8683   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8684   format %{ "XOR    $dst,$dst\n\t"
8685             "CMP    $p,$q\n\t"
8686             "SETlt  $dst\n\t"
8687             "NEG    $dst" %}
8688   ins_encode %{
8689     Register Rp = $p$$Register;
8690     Register Rq = $q$$Register;
8691     Register Rd = $dst$$Register;
8692     Label done;
8693     __ xorl(Rd, Rd);
8694     __ cmpl(Rp, Rq);
8695     __ setb(Assembler::less, Rd);
8696     __ negl(Rd);
8697   %}
8698 
8699   ins_pipe(pipe_slow);
8700 %}
8701 
8702 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8703   match(Set dst (CmpLTMask dst zero));
8704   effect(DEF dst, KILL cr);
8705   ins_cost(100);
8706 
8707   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8708   ins_encode %{
8709   __ sarl($dst$$Register, 31);
8710   %}
8711   ins_pipe(ialu_reg);
8712 %}
8713 
8714 /* better to save a register than avoid a branch */
8715 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8716   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8717   effect(KILL cr);
8718   ins_cost(400);
8719   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8720             "JGE    done\n\t"
8721             "ADD    $p,$y\n"
8722             "done:  " %}
8723   ins_encode %{
8724     Register Rp = $p$$Register;
8725     Register Rq = $q$$Register;
8726     Register Ry = $y$$Register;
8727     Label done;
8728     __ subl(Rp, Rq);
8729     __ jccb(Assembler::greaterEqual, done);
8730     __ addl(Rp, Ry);
8731     __ bind(done);
8732   %}
8733 
8734   ins_pipe(pipe_cmplt);
8735 %}
8736 
8737 /* better to save a register than avoid a branch */
8738 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8739   match(Set y (AndI (CmpLTMask p q) y));
8740   effect(KILL cr);
8741 
8742   ins_cost(300);
8743 
8744   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8745             "JLT      done\n\t"
8746             "XORL     $y, $y\n"
8747             "done:  " %}
8748   ins_encode %{
8749     Register Rp = $p$$Register;
8750     Register Rq = $q$$Register;
8751     Register Ry = $y$$Register;
8752     Label done;
8753     __ cmpl(Rp, Rq);
8754     __ jccb(Assembler::less, done);
8755     __ xorl(Ry, Ry);
8756     __ bind(done);
8757   %}
8758 
8759   ins_pipe(pipe_cmplt);
8760 %}
8761 
8762 /* If I enable this, I encourage spilling in the inner loop of compress.
8763 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8764   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8765 */
8766 //----------Overflow Math Instructions-----------------------------------------
8767 
8768 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8769 %{
8770   match(Set cr (OverflowAddI op1 op2));
8771   effect(DEF cr, USE_KILL op1, USE op2);
8772 
8773   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8774 
8775   ins_encode %{
8776     __ addl($op1$$Register, $op2$$Register);
8777   %}
8778   ins_pipe(ialu_reg_reg);
8779 %}
8780 
8781 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8782 %{
8783   match(Set cr (OverflowAddI op1 op2));
8784   effect(DEF cr, USE_KILL op1, USE op2);
8785 
8786   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8787 
8788   ins_encode %{
8789     __ addl($op1$$Register, $op2$$constant);
8790   %}
8791   ins_pipe(ialu_reg_reg);
8792 %}
8793 
8794 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8795 %{
8796   match(Set cr (OverflowSubI op1 op2));
8797 
8798   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8799   ins_encode %{
8800     __ cmpl($op1$$Register, $op2$$Register);
8801   %}
8802   ins_pipe(ialu_reg_reg);
8803 %}
8804 
8805 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8806 %{
8807   match(Set cr (OverflowSubI op1 op2));
8808 
8809   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8810   ins_encode %{
8811     __ cmpl($op1$$Register, $op2$$constant);
8812   %}
8813   ins_pipe(ialu_reg_reg);
8814 %}
8815 
8816 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8817 %{
8818   match(Set cr (OverflowSubI zero op2));
8819   effect(DEF cr, USE_KILL op2);
8820 
8821   format %{ "NEG    $op2\t# overflow check int" %}
8822   ins_encode %{
8823     __ negl($op2$$Register);
8824   %}
8825   ins_pipe(ialu_reg_reg);
8826 %}
8827 
8828 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8829 %{
8830   match(Set cr (OverflowMulI op1 op2));
8831   effect(DEF cr, USE_KILL op1, USE op2);
8832 
8833   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8834   ins_encode %{
8835     __ imull($op1$$Register, $op2$$Register);
8836   %}
8837   ins_pipe(ialu_reg_reg_alu0);
8838 %}
8839 
8840 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8841 %{
8842   match(Set cr (OverflowMulI op1 op2));
8843   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8844 
8845   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8846   ins_encode %{
8847     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8848   %}
8849   ins_pipe(ialu_reg_reg_alu0);
8850 %}
8851 
8852 //----------Long Instructions------------------------------------------------
8853 // Add Long Register with Register
8854 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8855   match(Set dst (AddL dst src));
8856   effect(KILL cr);
8857   ins_cost(200);
8858   format %{ "ADD    $dst.lo,$src.lo\n\t"
8859             "ADC    $dst.hi,$src.hi" %}
8860   opcode(0x03, 0x13);
8861   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8862   ins_pipe( ialu_reg_reg_long );
8863 %}
8864 
8865 // Add Long Register with Immediate
8866 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8867   match(Set dst (AddL dst src));
8868   effect(KILL cr);
8869   format %{ "ADD    $dst.lo,$src.lo\n\t"
8870             "ADC    $dst.hi,$src.hi" %}
8871   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8872   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8873   ins_pipe( ialu_reg_long );
8874 %}
8875 
8876 // Add Long Register with Memory
8877 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8878   match(Set dst (AddL dst (LoadL mem)));
8879   effect(KILL cr);
8880   ins_cost(125);
8881   format %{ "ADD    $dst.lo,$mem\n\t"
8882             "ADC    $dst.hi,$mem+4" %}
8883   opcode(0x03, 0x13);
8884   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8885   ins_pipe( ialu_reg_long_mem );
8886 %}
8887 
8888 // Subtract Long Register with Register.
8889 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8890   match(Set dst (SubL dst src));
8891   effect(KILL cr);
8892   ins_cost(200);
8893   format %{ "SUB    $dst.lo,$src.lo\n\t"
8894             "SBB    $dst.hi,$src.hi" %}
8895   opcode(0x2B, 0x1B);
8896   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8897   ins_pipe( ialu_reg_reg_long );
8898 %}
8899 
8900 // Subtract Long Register with Immediate
8901 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8902   match(Set dst (SubL dst src));
8903   effect(KILL cr);
8904   format %{ "SUB    $dst.lo,$src.lo\n\t"
8905             "SBB    $dst.hi,$src.hi" %}
8906   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8907   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8908   ins_pipe( ialu_reg_long );
8909 %}
8910 
8911 // Subtract Long Register with Memory
8912 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8913   match(Set dst (SubL dst (LoadL mem)));
8914   effect(KILL cr);
8915   ins_cost(125);
8916   format %{ "SUB    $dst.lo,$mem\n\t"
8917             "SBB    $dst.hi,$mem+4" %}
8918   opcode(0x2B, 0x1B);
8919   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8920   ins_pipe( ialu_reg_long_mem );
8921 %}
8922 
8923 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8924   match(Set dst (SubL zero dst));
8925   effect(KILL cr);
8926   ins_cost(300);
8927   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8928   ins_encode( neg_long(dst) );
8929   ins_pipe( ialu_reg_reg_long );
8930 %}
8931 
8932 // And Long Register with Register
8933 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8934   match(Set dst (AndL dst src));
8935   effect(KILL cr);
8936   format %{ "AND    $dst.lo,$src.lo\n\t"
8937             "AND    $dst.hi,$src.hi" %}
8938   opcode(0x23,0x23);
8939   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8940   ins_pipe( ialu_reg_reg_long );
8941 %}
8942 
8943 // And Long Register with Immediate
8944 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8945   match(Set dst (AndL dst src));
8946   effect(KILL cr);
8947   format %{ "AND    $dst.lo,$src.lo\n\t"
8948             "AND    $dst.hi,$src.hi" %}
8949   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8950   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8951   ins_pipe( ialu_reg_long );
8952 %}
8953 
8954 // And Long Register with Memory
8955 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8956   match(Set dst (AndL dst (LoadL mem)));
8957   effect(KILL cr);
8958   ins_cost(125);
8959   format %{ "AND    $dst.lo,$mem\n\t"
8960             "AND    $dst.hi,$mem+4" %}
8961   opcode(0x23, 0x23);
8962   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8963   ins_pipe( ialu_reg_long_mem );
8964 %}
8965 
8966 // BMI1 instructions
8967 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8968   match(Set dst (AndL (XorL src1 minus_1) src2));
8969   predicate(UseBMI1Instructions);
8970   effect(KILL cr, TEMP dst);
8971 
8972   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8973             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8974          %}
8975 
8976   ins_encode %{
8977     Register Rdst = $dst$$Register;
8978     Register Rsrc1 = $src1$$Register;
8979     Register Rsrc2 = $src2$$Register;
8980     __ andnl(Rdst, Rsrc1, Rsrc2);
8981     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8982   %}
8983   ins_pipe(ialu_reg_reg_long);
8984 %}
8985 
8986 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8987   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8988   predicate(UseBMI1Instructions);
8989   effect(KILL cr, TEMP dst);
8990 
8991   ins_cost(125);
8992   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8993             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8994          %}
8995 
8996   ins_encode %{
8997     Register Rdst = $dst$$Register;
8998     Register Rsrc1 = $src1$$Register;
8999     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9000 
9001     __ andnl(Rdst, Rsrc1, $src2$$Address);
9002     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9003   %}
9004   ins_pipe(ialu_reg_mem);
9005 %}
9006 
9007 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9008   match(Set dst (AndL (SubL imm_zero src) src));
9009   predicate(UseBMI1Instructions);
9010   effect(KILL cr, TEMP dst);
9011 
9012   format %{ "MOVL   $dst.hi, 0\n\t"
9013             "BLSIL  $dst.lo, $src.lo\n\t"
9014             "JNZ    done\n\t"
9015             "BLSIL  $dst.hi, $src.hi\n"
9016             "done:"
9017          %}
9018 
9019   ins_encode %{
9020     Label done;
9021     Register Rdst = $dst$$Register;
9022     Register Rsrc = $src$$Register;
9023     __ movl(HIGH_FROM_LOW(Rdst), 0);
9024     __ blsil(Rdst, Rsrc);
9025     __ jccb(Assembler::notZero, done);
9026     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9027     __ bind(done);
9028   %}
9029   ins_pipe(ialu_reg);
9030 %}
9031 
9032 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9033   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9034   predicate(UseBMI1Instructions);
9035   effect(KILL cr, TEMP dst);
9036 
9037   ins_cost(125);
9038   format %{ "MOVL   $dst.hi, 0\n\t"
9039             "BLSIL  $dst.lo, $src\n\t"
9040             "JNZ    done\n\t"
9041             "BLSIL  $dst.hi, $src+4\n"
9042             "done:"
9043          %}
9044 
9045   ins_encode %{
9046     Label done;
9047     Register Rdst = $dst$$Register;
9048     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9049 
9050     __ movl(HIGH_FROM_LOW(Rdst), 0);
9051     __ blsil(Rdst, $src$$Address);
9052     __ jccb(Assembler::notZero, done);
9053     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9054     __ bind(done);
9055   %}
9056   ins_pipe(ialu_reg_mem);
9057 %}
9058 
9059 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9060 %{
9061   match(Set dst (XorL (AddL src minus_1) src));
9062   predicate(UseBMI1Instructions);
9063   effect(KILL cr, TEMP dst);
9064 
9065   format %{ "MOVL    $dst.hi, 0\n\t"
9066             "BLSMSKL $dst.lo, $src.lo\n\t"
9067             "JNC     done\n\t"
9068             "BLSMSKL $dst.hi, $src.hi\n"
9069             "done:"
9070          %}
9071 
9072   ins_encode %{
9073     Label done;
9074     Register Rdst = $dst$$Register;
9075     Register Rsrc = $src$$Register;
9076     __ movl(HIGH_FROM_LOW(Rdst), 0);
9077     __ blsmskl(Rdst, Rsrc);
9078     __ jccb(Assembler::carryClear, done);
9079     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9080     __ bind(done);
9081   %}
9082 
9083   ins_pipe(ialu_reg);
9084 %}
9085 
9086 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9087 %{
9088   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9089   predicate(UseBMI1Instructions);
9090   effect(KILL cr, TEMP dst);
9091 
9092   ins_cost(125);
9093   format %{ "MOVL    $dst.hi, 0\n\t"
9094             "BLSMSKL $dst.lo, $src\n\t"
9095             "JNC     done\n\t"
9096             "BLSMSKL $dst.hi, $src+4\n"
9097             "done:"
9098          %}
9099 
9100   ins_encode %{
9101     Label done;
9102     Register Rdst = $dst$$Register;
9103     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9104 
9105     __ movl(HIGH_FROM_LOW(Rdst), 0);
9106     __ blsmskl(Rdst, $src$$Address);
9107     __ jccb(Assembler::carryClear, done);
9108     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9109     __ bind(done);
9110   %}
9111 
9112   ins_pipe(ialu_reg_mem);
9113 %}
9114 
9115 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9116 %{
9117   match(Set dst (AndL (AddL src minus_1) src) );
9118   predicate(UseBMI1Instructions);
9119   effect(KILL cr, TEMP dst);
9120 
9121   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9122             "BLSRL  $dst.lo, $src.lo\n\t"
9123             "JNC    done\n\t"
9124             "BLSRL  $dst.hi, $src.hi\n"
9125             "done:"
9126   %}
9127 
9128   ins_encode %{
9129     Label done;
9130     Register Rdst = $dst$$Register;
9131     Register Rsrc = $src$$Register;
9132     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9133     __ blsrl(Rdst, Rsrc);
9134     __ jccb(Assembler::carryClear, done);
9135     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9136     __ bind(done);
9137   %}
9138 
9139   ins_pipe(ialu_reg);
9140 %}
9141 
9142 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9143 %{
9144   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9145   predicate(UseBMI1Instructions);
9146   effect(KILL cr, TEMP dst);
9147 
9148   ins_cost(125);
9149   format %{ "MOVL   $dst.hi, $src+4\n\t"
9150             "BLSRL  $dst.lo, $src\n\t"
9151             "JNC    done\n\t"
9152             "BLSRL  $dst.hi, $src+4\n"
9153             "done:"
9154   %}
9155 
9156   ins_encode %{
9157     Label done;
9158     Register Rdst = $dst$$Register;
9159     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9160     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9161     __ blsrl(Rdst, $src$$Address);
9162     __ jccb(Assembler::carryClear, done);
9163     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9164     __ bind(done);
9165   %}
9166 
9167   ins_pipe(ialu_reg_mem);
9168 %}
9169 
9170 // Or Long Register with Register
9171 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9172   match(Set dst (OrL dst src));
9173   effect(KILL cr);
9174   format %{ "OR     $dst.lo,$src.lo\n\t"
9175             "OR     $dst.hi,$src.hi" %}
9176   opcode(0x0B,0x0B);
9177   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9178   ins_pipe( ialu_reg_reg_long );
9179 %}
9180 
9181 // Or Long Register with Immediate
9182 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9183   match(Set dst (OrL dst src));
9184   effect(KILL cr);
9185   format %{ "OR     $dst.lo,$src.lo\n\t"
9186             "OR     $dst.hi,$src.hi" %}
9187   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9188   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9189   ins_pipe( ialu_reg_long );
9190 %}
9191 
9192 // Or Long Register with Memory
9193 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9194   match(Set dst (OrL dst (LoadL mem)));
9195   effect(KILL cr);
9196   ins_cost(125);
9197   format %{ "OR     $dst.lo,$mem\n\t"
9198             "OR     $dst.hi,$mem+4" %}
9199   opcode(0x0B,0x0B);
9200   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9201   ins_pipe( ialu_reg_long_mem );
9202 %}
9203 
9204 // Xor Long Register with Register
9205 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9206   match(Set dst (XorL dst src));
9207   effect(KILL cr);
9208   format %{ "XOR    $dst.lo,$src.lo\n\t"
9209             "XOR    $dst.hi,$src.hi" %}
9210   opcode(0x33,0x33);
9211   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9212   ins_pipe( ialu_reg_reg_long );
9213 %}
9214 
9215 // Xor Long Register with Immediate -1
9216 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9217   match(Set dst (XorL dst imm));
9218   format %{ "NOT    $dst.lo\n\t"
9219             "NOT    $dst.hi" %}
9220   ins_encode %{
9221      __ notl($dst$$Register);
9222      __ notl(HIGH_FROM_LOW($dst$$Register));
9223   %}
9224   ins_pipe( ialu_reg_long );
9225 %}
9226 
9227 // Xor Long Register with Immediate
9228 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9229   match(Set dst (XorL dst src));
9230   effect(KILL cr);
9231   format %{ "XOR    $dst.lo,$src.lo\n\t"
9232             "XOR    $dst.hi,$src.hi" %}
9233   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9234   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9235   ins_pipe( ialu_reg_long );
9236 %}
9237 
9238 // Xor Long Register with Memory
9239 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9240   match(Set dst (XorL dst (LoadL mem)));
9241   effect(KILL cr);
9242   ins_cost(125);
9243   format %{ "XOR    $dst.lo,$mem\n\t"
9244             "XOR    $dst.hi,$mem+4" %}
9245   opcode(0x33,0x33);
9246   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9247   ins_pipe( ialu_reg_long_mem );
9248 %}
9249 
9250 // Shift Left Long by 1
9251 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9252   predicate(UseNewLongLShift);
9253   match(Set dst (LShiftL dst cnt));
9254   effect(KILL cr);
9255   ins_cost(100);
9256   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9257             "ADC    $dst.hi,$dst.hi" %}
9258   ins_encode %{
9259     __ addl($dst$$Register,$dst$$Register);
9260     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9261   %}
9262   ins_pipe( ialu_reg_long );
9263 %}
9264 
9265 // Shift Left Long by 2
9266 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9267   predicate(UseNewLongLShift);
9268   match(Set dst (LShiftL dst cnt));
9269   effect(KILL cr);
9270   ins_cost(100);
9271   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9272             "ADC    $dst.hi,$dst.hi\n\t"
9273             "ADD    $dst.lo,$dst.lo\n\t"
9274             "ADC    $dst.hi,$dst.hi" %}
9275   ins_encode %{
9276     __ addl($dst$$Register,$dst$$Register);
9277     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9278     __ addl($dst$$Register,$dst$$Register);
9279     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9280   %}
9281   ins_pipe( ialu_reg_long );
9282 %}
9283 
9284 // Shift Left Long by 3
9285 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9286   predicate(UseNewLongLShift);
9287   match(Set dst (LShiftL dst cnt));
9288   effect(KILL cr);
9289   ins_cost(100);
9290   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9291             "ADC    $dst.hi,$dst.hi\n\t"
9292             "ADD    $dst.lo,$dst.lo\n\t"
9293             "ADC    $dst.hi,$dst.hi\n\t"
9294             "ADD    $dst.lo,$dst.lo\n\t"
9295             "ADC    $dst.hi,$dst.hi" %}
9296   ins_encode %{
9297     __ addl($dst$$Register,$dst$$Register);
9298     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9299     __ addl($dst$$Register,$dst$$Register);
9300     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9301     __ addl($dst$$Register,$dst$$Register);
9302     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9303   %}
9304   ins_pipe( ialu_reg_long );
9305 %}
9306 
9307 // Shift Left Long by 1-31
9308 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9309   match(Set dst (LShiftL dst cnt));
9310   effect(KILL cr);
9311   ins_cost(200);
9312   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9313             "SHL    $dst.lo,$cnt" %}
9314   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9315   ins_encode( move_long_small_shift(dst,cnt) );
9316   ins_pipe( ialu_reg_long );
9317 %}
9318 
9319 // Shift Left Long by 32-63
9320 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9321   match(Set dst (LShiftL dst cnt));
9322   effect(KILL cr);
9323   ins_cost(300);
9324   format %{ "MOV    $dst.hi,$dst.lo\n"
9325           "\tSHL    $dst.hi,$cnt-32\n"
9326           "\tXOR    $dst.lo,$dst.lo" %}
9327   opcode(0xC1, 0x4);  /* C1 /4 ib */
9328   ins_encode( move_long_big_shift_clr(dst,cnt) );
9329   ins_pipe( ialu_reg_long );
9330 %}
9331 
9332 // Shift Left Long by variable
9333 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9334   match(Set dst (LShiftL dst shift));
9335   effect(KILL cr);
9336   ins_cost(500+200);
9337   size(17);
9338   format %{ "TEST   $shift,32\n\t"
9339             "JEQ,s  small\n\t"
9340             "MOV    $dst.hi,$dst.lo\n\t"
9341             "XOR    $dst.lo,$dst.lo\n"
9342     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9343             "SHL    $dst.lo,$shift" %}
9344   ins_encode( shift_left_long( dst, shift ) );
9345   ins_pipe( pipe_slow );
9346 %}
9347 
9348 // Shift Right Long by 1-31
9349 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9350   match(Set dst (URShiftL dst cnt));
9351   effect(KILL cr);
9352   ins_cost(200);
9353   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9354             "SHR    $dst.hi,$cnt" %}
9355   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9356   ins_encode( move_long_small_shift(dst,cnt) );
9357   ins_pipe( ialu_reg_long );
9358 %}
9359 
9360 // Shift Right Long by 32-63
9361 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9362   match(Set dst (URShiftL dst cnt));
9363   effect(KILL cr);
9364   ins_cost(300);
9365   format %{ "MOV    $dst.lo,$dst.hi\n"
9366           "\tSHR    $dst.lo,$cnt-32\n"
9367           "\tXOR    $dst.hi,$dst.hi" %}
9368   opcode(0xC1, 0x5);  /* C1 /5 ib */
9369   ins_encode( move_long_big_shift_clr(dst,cnt) );
9370   ins_pipe( ialu_reg_long );
9371 %}
9372 
9373 // Shift Right Long by variable
9374 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9375   match(Set dst (URShiftL dst shift));
9376   effect(KILL cr);
9377   ins_cost(600);
9378   size(17);
9379   format %{ "TEST   $shift,32\n\t"
9380             "JEQ,s  small\n\t"
9381             "MOV    $dst.lo,$dst.hi\n\t"
9382             "XOR    $dst.hi,$dst.hi\n"
9383     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9384             "SHR    $dst.hi,$shift" %}
9385   ins_encode( shift_right_long( dst, shift ) );
9386   ins_pipe( pipe_slow );
9387 %}
9388 
9389 // Shift Right Long by 1-31
9390 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9391   match(Set dst (RShiftL dst cnt));
9392   effect(KILL cr);
9393   ins_cost(200);
9394   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9395             "SAR    $dst.hi,$cnt" %}
9396   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9397   ins_encode( move_long_small_shift(dst,cnt) );
9398   ins_pipe( ialu_reg_long );
9399 %}
9400 
9401 // Shift Right Long by 32-63
9402 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9403   match(Set dst (RShiftL dst cnt));
9404   effect(KILL cr);
9405   ins_cost(300);
9406   format %{ "MOV    $dst.lo,$dst.hi\n"
9407           "\tSAR    $dst.lo,$cnt-32\n"
9408           "\tSAR    $dst.hi,31" %}
9409   opcode(0xC1, 0x7);  /* C1 /7 ib */
9410   ins_encode( move_long_big_shift_sign(dst,cnt) );
9411   ins_pipe( ialu_reg_long );
9412 %}
9413 
9414 // Shift Right arithmetic Long by variable
9415 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9416   match(Set dst (RShiftL dst shift));
9417   effect(KILL cr);
9418   ins_cost(600);
9419   size(18);
9420   format %{ "TEST   $shift,32\n\t"
9421             "JEQ,s  small\n\t"
9422             "MOV    $dst.lo,$dst.hi\n\t"
9423             "SAR    $dst.hi,31\n"
9424     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9425             "SAR    $dst.hi,$shift" %}
9426   ins_encode( shift_right_arith_long( dst, shift ) );
9427   ins_pipe( pipe_slow );
9428 %}
9429 
9430 
9431 //----------Double Instructions------------------------------------------------
9432 // Double Math
9433 
9434 // Compare & branch
9435 
9436 // P6 version of float compare, sets condition codes in EFLAGS
9437 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9438   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9439   match(Set cr (CmpD src1 src2));
9440   effect(KILL rax);
9441   ins_cost(150);
9442   format %{ "FLD    $src1\n\t"
9443             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9444             "JNP    exit\n\t"
9445             "MOV    ah,1       // saw a NaN, set CF\n\t"
9446             "SAHF\n"
9447      "exit:\tNOP               // avoid branch to branch" %}
9448   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9449   ins_encode( Push_Reg_DPR(src1),
9450               OpcP, RegOpc(src2),
9451               cmpF_P6_fixup );
9452   ins_pipe( pipe_slow );
9453 %}
9454 
9455 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9456   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9457   match(Set cr (CmpD src1 src2));
9458   ins_cost(150);
9459   format %{ "FLD    $src1\n\t"
9460             "FUCOMIP ST,$src2  // P6 instruction" %}
9461   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9462   ins_encode( Push_Reg_DPR(src1),
9463               OpcP, RegOpc(src2));
9464   ins_pipe( pipe_slow );
9465 %}
9466 
9467 // Compare & branch
9468 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9469   predicate(UseSSE<=1);
9470   match(Set cr (CmpD src1 src2));
9471   effect(KILL rax);
9472   ins_cost(200);
9473   format %{ "FLD    $src1\n\t"
9474             "FCOMp  $src2\n\t"
9475             "FNSTSW AX\n\t"
9476             "TEST   AX,0x400\n\t"
9477             "JZ,s   flags\n\t"
9478             "MOV    AH,1\t# unordered treat as LT\n"
9479     "flags:\tSAHF" %}
9480   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9481   ins_encode( Push_Reg_DPR(src1),
9482               OpcP, RegOpc(src2),
9483               fpu_flags);
9484   ins_pipe( pipe_slow );
9485 %}
9486 
9487 // Compare vs zero into -1,0,1
9488 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9489   predicate(UseSSE<=1);
9490   match(Set dst (CmpD3 src1 zero));
9491   effect(KILL cr, KILL rax);
9492   ins_cost(280);
9493   format %{ "FTSTD  $dst,$src1" %}
9494   opcode(0xE4, 0xD9);
9495   ins_encode( Push_Reg_DPR(src1),
9496               OpcS, OpcP, PopFPU,
9497               CmpF_Result(dst));
9498   ins_pipe( pipe_slow );
9499 %}
9500 
9501 // Compare into -1,0,1
9502 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9503   predicate(UseSSE<=1);
9504   match(Set dst (CmpD3 src1 src2));
9505   effect(KILL cr, KILL rax);
9506   ins_cost(300);
9507   format %{ "FCMPD  $dst,$src1,$src2" %}
9508   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9509   ins_encode( Push_Reg_DPR(src1),
9510               OpcP, RegOpc(src2),
9511               CmpF_Result(dst));
9512   ins_pipe( pipe_slow );
9513 %}
9514 
9515 // float compare and set condition codes in EFLAGS by XMM regs
9516 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9517   predicate(UseSSE>=2);
9518   match(Set cr (CmpD src1 src2));
9519   ins_cost(145);
9520   format %{ "UCOMISD $src1,$src2\n\t"
9521             "JNP,s   exit\n\t"
9522             "PUSHF\t# saw NaN, set CF\n\t"
9523             "AND     [rsp], #0xffffff2b\n\t"
9524             "POPF\n"
9525     "exit:" %}
9526   ins_encode %{
9527     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9528     emit_cmpfp_fixup(_masm);
9529   %}
9530   ins_pipe( pipe_slow );
9531 %}
9532 
9533 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9534   predicate(UseSSE>=2);
9535   match(Set cr (CmpD src1 src2));
9536   ins_cost(100);
9537   format %{ "UCOMISD $src1,$src2" %}
9538   ins_encode %{
9539     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9540   %}
9541   ins_pipe( pipe_slow );
9542 %}
9543 
9544 // float compare and set condition codes in EFLAGS by XMM regs
9545 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9546   predicate(UseSSE>=2);
9547   match(Set cr (CmpD src1 (LoadD src2)));
9548   ins_cost(145);
9549   format %{ "UCOMISD $src1,$src2\n\t"
9550             "JNP,s   exit\n\t"
9551             "PUSHF\t# saw NaN, set CF\n\t"
9552             "AND     [rsp], #0xffffff2b\n\t"
9553             "POPF\n"
9554     "exit:" %}
9555   ins_encode %{
9556     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9557     emit_cmpfp_fixup(_masm);
9558   %}
9559   ins_pipe( pipe_slow );
9560 %}
9561 
9562 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9563   predicate(UseSSE>=2);
9564   match(Set cr (CmpD src1 (LoadD src2)));
9565   ins_cost(100);
9566   format %{ "UCOMISD $src1,$src2" %}
9567   ins_encode %{
9568     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9569   %}
9570   ins_pipe( pipe_slow );
9571 %}
9572 
9573 // Compare into -1,0,1 in XMM
9574 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9575   predicate(UseSSE>=2);
9576   match(Set dst (CmpD3 src1 src2));
9577   effect(KILL cr);
9578   ins_cost(255);
9579   format %{ "UCOMISD $src1, $src2\n\t"
9580             "MOV     $dst, #-1\n\t"
9581             "JP,s    done\n\t"
9582             "JB,s    done\n\t"
9583             "SETNE   $dst\n\t"
9584             "MOVZB   $dst, $dst\n"
9585     "done:" %}
9586   ins_encode %{
9587     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9588     emit_cmpfp3(_masm, $dst$$Register);
9589   %}
9590   ins_pipe( pipe_slow );
9591 %}
9592 
9593 // Compare into -1,0,1 in XMM and memory
9594 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9595   predicate(UseSSE>=2);
9596   match(Set dst (CmpD3 src1 (LoadD src2)));
9597   effect(KILL cr);
9598   ins_cost(275);
9599   format %{ "UCOMISD $src1, $src2\n\t"
9600             "MOV     $dst, #-1\n\t"
9601             "JP,s    done\n\t"
9602             "JB,s    done\n\t"
9603             "SETNE   $dst\n\t"
9604             "MOVZB   $dst, $dst\n"
9605     "done:" %}
9606   ins_encode %{
9607     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9608     emit_cmpfp3(_masm, $dst$$Register);
9609   %}
9610   ins_pipe( pipe_slow );
9611 %}
9612 
9613 
9614 instruct subDPR_reg(regDPR dst, regDPR src) %{
9615   predicate (UseSSE <=1);
9616   match(Set dst (SubD dst src));
9617 
9618   format %{ "FLD    $src\n\t"
9619             "DSUBp  $dst,ST" %}
9620   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9621   ins_cost(150);
9622   ins_encode( Push_Reg_DPR(src),
9623               OpcP, RegOpc(dst) );
9624   ins_pipe( fpu_reg_reg );
9625 %}
9626 
9627 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9628   predicate (UseSSE <=1);
9629   match(Set dst (RoundDouble (SubD src1 src2)));
9630   ins_cost(250);
9631 
9632   format %{ "FLD    $src2\n\t"
9633             "DSUB   ST,$src1\n\t"
9634             "FSTP_D $dst\t# D-round" %}
9635   opcode(0xD8, 0x5);
9636   ins_encode( Push_Reg_DPR(src2),
9637               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9638   ins_pipe( fpu_mem_reg_reg );
9639 %}
9640 
9641 
9642 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9643   predicate (UseSSE <=1);
9644   match(Set dst (SubD dst (LoadD src)));
9645   ins_cost(150);
9646 
9647   format %{ "FLD    $src\n\t"
9648             "DSUBp  $dst,ST" %}
9649   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9650   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9651               OpcP, RegOpc(dst) );
9652   ins_pipe( fpu_reg_mem );
9653 %}
9654 
9655 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9656   predicate (UseSSE<=1);
9657   match(Set dst (AbsD src));
9658   ins_cost(100);
9659   format %{ "FABS" %}
9660   opcode(0xE1, 0xD9);
9661   ins_encode( OpcS, OpcP );
9662   ins_pipe( fpu_reg_reg );
9663 %}
9664 
9665 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9666   predicate(UseSSE<=1);
9667   match(Set dst (NegD src));
9668   ins_cost(100);
9669   format %{ "FCHS" %}
9670   opcode(0xE0, 0xD9);
9671   ins_encode( OpcS, OpcP );
9672   ins_pipe( fpu_reg_reg );
9673 %}
9674 
9675 instruct addDPR_reg(regDPR dst, regDPR src) %{
9676   predicate(UseSSE<=1);
9677   match(Set dst (AddD dst src));
9678   format %{ "FLD    $src\n\t"
9679             "DADD   $dst,ST" %}
9680   size(4);
9681   ins_cost(150);
9682   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9683   ins_encode( Push_Reg_DPR(src),
9684               OpcP, RegOpc(dst) );
9685   ins_pipe( fpu_reg_reg );
9686 %}
9687 
9688 
9689 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9690   predicate(UseSSE<=1);
9691   match(Set dst (RoundDouble (AddD src1 src2)));
9692   ins_cost(250);
9693 
9694   format %{ "FLD    $src2\n\t"
9695             "DADD   ST,$src1\n\t"
9696             "FSTP_D $dst\t# D-round" %}
9697   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9698   ins_encode( Push_Reg_DPR(src2),
9699               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9700   ins_pipe( fpu_mem_reg_reg );
9701 %}
9702 
9703 
9704 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9705   predicate(UseSSE<=1);
9706   match(Set dst (AddD dst (LoadD src)));
9707   ins_cost(150);
9708 
9709   format %{ "FLD    $src\n\t"
9710             "DADDp  $dst,ST" %}
9711   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9712   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9713               OpcP, RegOpc(dst) );
9714   ins_pipe( fpu_reg_mem );
9715 %}
9716 
9717 // add-to-memory
9718 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9719   predicate(UseSSE<=1);
9720   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9721   ins_cost(150);
9722 
9723   format %{ "FLD_D  $dst\n\t"
9724             "DADD   ST,$src\n\t"
9725             "FST_D  $dst" %}
9726   opcode(0xDD, 0x0);
9727   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9728               Opcode(0xD8), RegOpc(src),
9729               set_instruction_start,
9730               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9731   ins_pipe( fpu_reg_mem );
9732 %}
9733 
9734 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9735   predicate(UseSSE<=1);
9736   match(Set dst (AddD dst con));
9737   ins_cost(125);
9738   format %{ "FLD1\n\t"
9739             "DADDp  $dst,ST" %}
9740   ins_encode %{
9741     __ fld1();
9742     __ faddp($dst$$reg);
9743   %}
9744   ins_pipe(fpu_reg);
9745 %}
9746 
9747 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9748   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9749   match(Set dst (AddD dst con));
9750   ins_cost(200);
9751   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9752             "DADDp  $dst,ST" %}
9753   ins_encode %{
9754     __ fld_d($constantaddress($con));
9755     __ faddp($dst$$reg);
9756   %}
9757   ins_pipe(fpu_reg_mem);
9758 %}
9759 
9760 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9761   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9762   match(Set dst (RoundDouble (AddD src con)));
9763   ins_cost(200);
9764   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9765             "DADD   ST,$src\n\t"
9766             "FSTP_D $dst\t# D-round" %}
9767   ins_encode %{
9768     __ fld_d($constantaddress($con));
9769     __ fadd($src$$reg);
9770     __ fstp_d(Address(rsp, $dst$$disp));
9771   %}
9772   ins_pipe(fpu_mem_reg_con);
9773 %}
9774 
9775 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9776   predicate(UseSSE<=1);
9777   match(Set dst (MulD dst src));
9778   format %{ "FLD    $src\n\t"
9779             "DMULp  $dst,ST" %}
9780   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9781   ins_cost(150);
9782   ins_encode( Push_Reg_DPR(src),
9783               OpcP, RegOpc(dst) );
9784   ins_pipe( fpu_reg_reg );
9785 %}
9786 
9787 // Strict FP instruction biases argument before multiply then
9788 // biases result to avoid double rounding of subnormals.
9789 //
9790 // scale arg1 by multiplying arg1 by 2^(-15360)
9791 // load arg2
9792 // multiply scaled arg1 by arg2
9793 // rescale product by 2^(15360)
9794 //
9795 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9796   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9797   match(Set dst (MulD dst src));
9798   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9799 
9800   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9801             "DMULp  $dst,ST\n\t"
9802             "FLD    $src\n\t"
9803             "DMULp  $dst,ST\n\t"
9804             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9805             "DMULp  $dst,ST\n\t" %}
9806   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9807   ins_encode( strictfp_bias1(dst),
9808               Push_Reg_DPR(src),
9809               OpcP, RegOpc(dst),
9810               strictfp_bias2(dst) );
9811   ins_pipe( fpu_reg_reg );
9812 %}
9813 
9814 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9815   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9816   match(Set dst (MulD dst con));
9817   ins_cost(200);
9818   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9819             "DMULp  $dst,ST" %}
9820   ins_encode %{
9821     __ fld_d($constantaddress($con));
9822     __ fmulp($dst$$reg);
9823   %}
9824   ins_pipe(fpu_reg_mem);
9825 %}
9826 
9827 
9828 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9829   predicate( UseSSE<=1 );
9830   match(Set dst (MulD dst (LoadD src)));
9831   ins_cost(200);
9832   format %{ "FLD_D  $src\n\t"
9833             "DMULp  $dst,ST" %}
9834   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9835   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9836               OpcP, RegOpc(dst) );
9837   ins_pipe( fpu_reg_mem );
9838 %}
9839 
9840 //
9841 // Cisc-alternate to reg-reg multiply
9842 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9843   predicate( UseSSE<=1 );
9844   match(Set dst (MulD src (LoadD mem)));
9845   ins_cost(250);
9846   format %{ "FLD_D  $mem\n\t"
9847             "DMUL   ST,$src\n\t"
9848             "FSTP_D $dst" %}
9849   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9850   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9851               OpcReg_FPR(src),
9852               Pop_Reg_DPR(dst) );
9853   ins_pipe( fpu_reg_reg_mem );
9854 %}
9855 
9856 
9857 // MACRO3 -- addDPR a mulDPR
9858 // This instruction is a '2-address' instruction in that the result goes
9859 // back to src2.  This eliminates a move from the macro; possibly the
9860 // register allocator will have to add it back (and maybe not).
9861 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9862   predicate( UseSSE<=1 );
9863   match(Set src2 (AddD (MulD src0 src1) src2));
9864   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9865             "DMUL   ST,$src1\n\t"
9866             "DADDp  $src2,ST" %}
9867   ins_cost(250);
9868   opcode(0xDD); /* LoadD DD /0 */
9869   ins_encode( Push_Reg_FPR(src0),
9870               FMul_ST_reg(src1),
9871               FAddP_reg_ST(src2) );
9872   ins_pipe( fpu_reg_reg_reg );
9873 %}
9874 
9875 
9876 // MACRO3 -- subDPR a mulDPR
9877 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9878   predicate( UseSSE<=1 );
9879   match(Set src2 (SubD (MulD src0 src1) src2));
9880   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9881             "DMUL   ST,$src1\n\t"
9882             "DSUBRp $src2,ST" %}
9883   ins_cost(250);
9884   ins_encode( Push_Reg_FPR(src0),
9885               FMul_ST_reg(src1),
9886               Opcode(0xDE), Opc_plus(0xE0,src2));
9887   ins_pipe( fpu_reg_reg_reg );
9888 %}
9889 
9890 
9891 instruct divDPR_reg(regDPR dst, regDPR src) %{
9892   predicate( UseSSE<=1 );
9893   match(Set dst (DivD dst src));
9894 
9895   format %{ "FLD    $src\n\t"
9896             "FDIVp  $dst,ST" %}
9897   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9898   ins_cost(150);
9899   ins_encode( Push_Reg_DPR(src),
9900               OpcP, RegOpc(dst) );
9901   ins_pipe( fpu_reg_reg );
9902 %}
9903 
9904 // Strict FP instruction biases argument before division then
9905 // biases result, to avoid double rounding of subnormals.
9906 //
9907 // scale dividend by multiplying dividend by 2^(-15360)
9908 // load divisor
9909 // divide scaled dividend by divisor
9910 // rescale quotient by 2^(15360)
9911 //
9912 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9913   predicate (UseSSE<=1);
9914   match(Set dst (DivD dst src));
9915   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9916   ins_cost(01);
9917 
9918   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9919             "DMULp  $dst,ST\n\t"
9920             "FLD    $src\n\t"
9921             "FDIVp  $dst,ST\n\t"
9922             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9923             "DMULp  $dst,ST\n\t" %}
9924   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9925   ins_encode( strictfp_bias1(dst),
9926               Push_Reg_DPR(src),
9927               OpcP, RegOpc(dst),
9928               strictfp_bias2(dst) );
9929   ins_pipe( fpu_reg_reg );
9930 %}
9931 
9932 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9933   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9934   match(Set dst (RoundDouble (DivD src1 src2)));
9935 
9936   format %{ "FLD    $src1\n\t"
9937             "FDIV   ST,$src2\n\t"
9938             "FSTP_D $dst\t# D-round" %}
9939   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9940   ins_encode( Push_Reg_DPR(src1),
9941               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9942   ins_pipe( fpu_mem_reg_reg );
9943 %}
9944 
9945 
9946 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9947   predicate(UseSSE<=1);
9948   match(Set dst (ModD dst src));
9949   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9950 
9951   format %{ "DMOD   $dst,$src" %}
9952   ins_cost(250);
9953   ins_encode(Push_Reg_Mod_DPR(dst, src),
9954               emitModDPR(),
9955               Push_Result_Mod_DPR(src),
9956               Pop_Reg_DPR(dst));
9957   ins_pipe( pipe_slow );
9958 %}
9959 
9960 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9961   predicate(UseSSE>=2);
9962   match(Set dst (ModD src0 src1));
9963   effect(KILL rax, KILL cr);
9964 
9965   format %{ "SUB    ESP,8\t # DMOD\n"
9966           "\tMOVSD  [ESP+0],$src1\n"
9967           "\tFLD_D  [ESP+0]\n"
9968           "\tMOVSD  [ESP+0],$src0\n"
9969           "\tFLD_D  [ESP+0]\n"
9970      "loop:\tFPREM\n"
9971           "\tFWAIT\n"
9972           "\tFNSTSW AX\n"
9973           "\tSAHF\n"
9974           "\tJP     loop\n"
9975           "\tFSTP_D [ESP+0]\n"
9976           "\tMOVSD  $dst,[ESP+0]\n"
9977           "\tADD    ESP,8\n"
9978           "\tFSTP   ST0\t # Restore FPU Stack"
9979     %}
9980   ins_cost(250);
9981   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9982   ins_pipe( pipe_slow );
9983 %}
9984 
9985 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9986   predicate (UseSSE<=1);
9987   match(Set dst(AtanD dst src));
9988   format %{ "DATA   $dst,$src" %}
9989   opcode(0xD9, 0xF3);
9990   ins_encode( Push_Reg_DPR(src),
9991               OpcP, OpcS, RegOpc(dst) );
9992   ins_pipe( pipe_slow );
9993 %}
9994 
9995 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9996   predicate (UseSSE>=2);
9997   match(Set dst(AtanD dst src));
9998   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9999   format %{ "DATA   $dst,$src" %}
10000   opcode(0xD9, 0xF3);
10001   ins_encode( Push_SrcD(src),
10002               OpcP, OpcS, Push_ResultD(dst) );
10003   ins_pipe( pipe_slow );
10004 %}
10005 
10006 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10007   predicate (UseSSE<=1);
10008   match(Set dst (SqrtD src));
10009   format %{ "DSQRT  $dst,$src" %}
10010   opcode(0xFA, 0xD9);
10011   ins_encode( Push_Reg_DPR(src),
10012               OpcS, OpcP, Pop_Reg_DPR(dst) );
10013   ins_pipe( pipe_slow );
10014 %}
10015 
10016 //-------------Float Instructions-------------------------------
10017 // Float Math
10018 
10019 // Code for float compare:
10020 //     fcompp();
10021 //     fwait(); fnstsw_ax();
10022 //     sahf();
10023 //     movl(dst, unordered_result);
10024 //     jcc(Assembler::parity, exit);
10025 //     movl(dst, less_result);
10026 //     jcc(Assembler::below, exit);
10027 //     movl(dst, equal_result);
10028 //     jcc(Assembler::equal, exit);
10029 //     movl(dst, greater_result);
10030 //   exit:
10031 
10032 // P6 version of float compare, sets condition codes in EFLAGS
10033 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10034   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10035   match(Set cr (CmpF src1 src2));
10036   effect(KILL rax);
10037   ins_cost(150);
10038   format %{ "FLD    $src1\n\t"
10039             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10040             "JNP    exit\n\t"
10041             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10042             "SAHF\n"
10043      "exit:\tNOP               // avoid branch to branch" %}
10044   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10045   ins_encode( Push_Reg_DPR(src1),
10046               OpcP, RegOpc(src2),
10047               cmpF_P6_fixup );
10048   ins_pipe( pipe_slow );
10049 %}
10050 
10051 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10052   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10053   match(Set cr (CmpF src1 src2));
10054   ins_cost(100);
10055   format %{ "FLD    $src1\n\t"
10056             "FUCOMIP ST,$src2  // P6 instruction" %}
10057   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10058   ins_encode( Push_Reg_DPR(src1),
10059               OpcP, RegOpc(src2));
10060   ins_pipe( pipe_slow );
10061 %}
10062 
10063 
10064 // Compare & branch
10065 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10066   predicate(UseSSE == 0);
10067   match(Set cr (CmpF src1 src2));
10068   effect(KILL rax);
10069   ins_cost(200);
10070   format %{ "FLD    $src1\n\t"
10071             "FCOMp  $src2\n\t"
10072             "FNSTSW AX\n\t"
10073             "TEST   AX,0x400\n\t"
10074             "JZ,s   flags\n\t"
10075             "MOV    AH,1\t# unordered treat as LT\n"
10076     "flags:\tSAHF" %}
10077   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10078   ins_encode( Push_Reg_DPR(src1),
10079               OpcP, RegOpc(src2),
10080               fpu_flags);
10081   ins_pipe( pipe_slow );
10082 %}
10083 
10084 // Compare vs zero into -1,0,1
10085 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10086   predicate(UseSSE == 0);
10087   match(Set dst (CmpF3 src1 zero));
10088   effect(KILL cr, KILL rax);
10089   ins_cost(280);
10090   format %{ "FTSTF  $dst,$src1" %}
10091   opcode(0xE4, 0xD9);
10092   ins_encode( Push_Reg_DPR(src1),
10093               OpcS, OpcP, PopFPU,
10094               CmpF_Result(dst));
10095   ins_pipe( pipe_slow );
10096 %}
10097 
10098 // Compare into -1,0,1
10099 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10100   predicate(UseSSE == 0);
10101   match(Set dst (CmpF3 src1 src2));
10102   effect(KILL cr, KILL rax);
10103   ins_cost(300);
10104   format %{ "FCMPF  $dst,$src1,$src2" %}
10105   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10106   ins_encode( Push_Reg_DPR(src1),
10107               OpcP, RegOpc(src2),
10108               CmpF_Result(dst));
10109   ins_pipe( pipe_slow );
10110 %}
10111 
10112 // float compare and set condition codes in EFLAGS by XMM regs
10113 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10114   predicate(UseSSE>=1);
10115   match(Set cr (CmpF src1 src2));
10116   ins_cost(145);
10117   format %{ "UCOMISS $src1,$src2\n\t"
10118             "JNP,s   exit\n\t"
10119             "PUSHF\t# saw NaN, set CF\n\t"
10120             "AND     [rsp], #0xffffff2b\n\t"
10121             "POPF\n"
10122     "exit:" %}
10123   ins_encode %{
10124     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10125     emit_cmpfp_fixup(_masm);
10126   %}
10127   ins_pipe( pipe_slow );
10128 %}
10129 
10130 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10131   predicate(UseSSE>=1);
10132   match(Set cr (CmpF src1 src2));
10133   ins_cost(100);
10134   format %{ "UCOMISS $src1,$src2" %}
10135   ins_encode %{
10136     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10137   %}
10138   ins_pipe( pipe_slow );
10139 %}
10140 
10141 // float compare and set condition codes in EFLAGS by XMM regs
10142 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10143   predicate(UseSSE>=1);
10144   match(Set cr (CmpF src1 (LoadF src2)));
10145   ins_cost(165);
10146   format %{ "UCOMISS $src1,$src2\n\t"
10147             "JNP,s   exit\n\t"
10148             "PUSHF\t# saw NaN, set CF\n\t"
10149             "AND     [rsp], #0xffffff2b\n\t"
10150             "POPF\n"
10151     "exit:" %}
10152   ins_encode %{
10153     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10154     emit_cmpfp_fixup(_masm);
10155   %}
10156   ins_pipe( pipe_slow );
10157 %}
10158 
10159 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10160   predicate(UseSSE>=1);
10161   match(Set cr (CmpF src1 (LoadF src2)));
10162   ins_cost(100);
10163   format %{ "UCOMISS $src1,$src2" %}
10164   ins_encode %{
10165     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10166   %}
10167   ins_pipe( pipe_slow );
10168 %}
10169 
10170 // Compare into -1,0,1 in XMM
10171 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10172   predicate(UseSSE>=1);
10173   match(Set dst (CmpF3 src1 src2));
10174   effect(KILL cr);
10175   ins_cost(255);
10176   format %{ "UCOMISS $src1, $src2\n\t"
10177             "MOV     $dst, #-1\n\t"
10178             "JP,s    done\n\t"
10179             "JB,s    done\n\t"
10180             "SETNE   $dst\n\t"
10181             "MOVZB   $dst, $dst\n"
10182     "done:" %}
10183   ins_encode %{
10184     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10185     emit_cmpfp3(_masm, $dst$$Register);
10186   %}
10187   ins_pipe( pipe_slow );
10188 %}
10189 
10190 // Compare into -1,0,1 in XMM and memory
10191 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10192   predicate(UseSSE>=1);
10193   match(Set dst (CmpF3 src1 (LoadF src2)));
10194   effect(KILL cr);
10195   ins_cost(275);
10196   format %{ "UCOMISS $src1, $src2\n\t"
10197             "MOV     $dst, #-1\n\t"
10198             "JP,s    done\n\t"
10199             "JB,s    done\n\t"
10200             "SETNE   $dst\n\t"
10201             "MOVZB   $dst, $dst\n"
10202     "done:" %}
10203   ins_encode %{
10204     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10205     emit_cmpfp3(_masm, $dst$$Register);
10206   %}
10207   ins_pipe( pipe_slow );
10208 %}
10209 
10210 // Spill to obtain 24-bit precision
10211 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10212   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10213   match(Set dst (SubF src1 src2));
10214 
10215   format %{ "FSUB   $dst,$src1 - $src2" %}
10216   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10217   ins_encode( Push_Reg_FPR(src1),
10218               OpcReg_FPR(src2),
10219               Pop_Mem_FPR(dst) );
10220   ins_pipe( fpu_mem_reg_reg );
10221 %}
10222 //
10223 // This instruction does not round to 24-bits
10224 instruct subFPR_reg(regFPR dst, regFPR src) %{
10225   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10226   match(Set dst (SubF dst src));
10227 
10228   format %{ "FSUB   $dst,$src" %}
10229   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10230   ins_encode( Push_Reg_FPR(src),
10231               OpcP, RegOpc(dst) );
10232   ins_pipe( fpu_reg_reg );
10233 %}
10234 
10235 // Spill to obtain 24-bit precision
10236 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10237   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10238   match(Set dst (AddF src1 src2));
10239 
10240   format %{ "FADD   $dst,$src1,$src2" %}
10241   opcode(0xD8, 0x0); /* D8 C0+i */
10242   ins_encode( Push_Reg_FPR(src2),
10243               OpcReg_FPR(src1),
10244               Pop_Mem_FPR(dst) );
10245   ins_pipe( fpu_mem_reg_reg );
10246 %}
10247 //
10248 // This instruction does not round to 24-bits
10249 instruct addFPR_reg(regFPR dst, regFPR src) %{
10250   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10251   match(Set dst (AddF dst src));
10252 
10253   format %{ "FLD    $src\n\t"
10254             "FADDp  $dst,ST" %}
10255   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10256   ins_encode( Push_Reg_FPR(src),
10257               OpcP, RegOpc(dst) );
10258   ins_pipe( fpu_reg_reg );
10259 %}
10260 
10261 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10262   predicate(UseSSE==0);
10263   match(Set dst (AbsF src));
10264   ins_cost(100);
10265   format %{ "FABS" %}
10266   opcode(0xE1, 0xD9);
10267   ins_encode( OpcS, OpcP );
10268   ins_pipe( fpu_reg_reg );
10269 %}
10270 
10271 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10272   predicate(UseSSE==0);
10273   match(Set dst (NegF src));
10274   ins_cost(100);
10275   format %{ "FCHS" %}
10276   opcode(0xE0, 0xD9);
10277   ins_encode( OpcS, OpcP );
10278   ins_pipe( fpu_reg_reg );
10279 %}
10280 
10281 // Cisc-alternate to addFPR_reg
10282 // Spill to obtain 24-bit precision
10283 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10284   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10285   match(Set dst (AddF src1 (LoadF src2)));
10286 
10287   format %{ "FLD    $src2\n\t"
10288             "FADD   ST,$src1\n\t"
10289             "FSTP_S $dst" %}
10290   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10291   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10292               OpcReg_FPR(src1),
10293               Pop_Mem_FPR(dst) );
10294   ins_pipe( fpu_mem_reg_mem );
10295 %}
10296 //
10297 // Cisc-alternate to addFPR_reg
10298 // This instruction does not round to 24-bits
10299 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10300   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10301   match(Set dst (AddF dst (LoadF src)));
10302 
10303   format %{ "FADD   $dst,$src" %}
10304   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10305   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10306               OpcP, RegOpc(dst) );
10307   ins_pipe( fpu_reg_mem );
10308 %}
10309 
10310 // // Following two instructions for _222_mpegaudio
10311 // Spill to obtain 24-bit precision
10312 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10313   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10314   match(Set dst (AddF src1 src2));
10315 
10316   format %{ "FADD   $dst,$src1,$src2" %}
10317   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10318   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10319               OpcReg_FPR(src2),
10320               Pop_Mem_FPR(dst) );
10321   ins_pipe( fpu_mem_reg_mem );
10322 %}
10323 
10324 // Cisc-spill variant
10325 // Spill to obtain 24-bit precision
10326 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10327   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10328   match(Set dst (AddF src1 (LoadF src2)));
10329 
10330   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10331   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10332   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10333               set_instruction_start,
10334               OpcP, RMopc_Mem(secondary,src1),
10335               Pop_Mem_FPR(dst) );
10336   ins_pipe( fpu_mem_mem_mem );
10337 %}
10338 
10339 // Spill to obtain 24-bit precision
10340 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10341   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10342   match(Set dst (AddF src1 src2));
10343 
10344   format %{ "FADD   $dst,$src1,$src2" %}
10345   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10346   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10347               set_instruction_start,
10348               OpcP, RMopc_Mem(secondary,src1),
10349               Pop_Mem_FPR(dst) );
10350   ins_pipe( fpu_mem_mem_mem );
10351 %}
10352 
10353 
10354 // Spill to obtain 24-bit precision
10355 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10356   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10357   match(Set dst (AddF src con));
10358   format %{ "FLD    $src\n\t"
10359             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10360             "FSTP_S $dst"  %}
10361   ins_encode %{
10362     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10363     __ fadd_s($constantaddress($con));
10364     __ fstp_s(Address(rsp, $dst$$disp));
10365   %}
10366   ins_pipe(fpu_mem_reg_con);
10367 %}
10368 //
10369 // This instruction does not round to 24-bits
10370 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10371   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10372   match(Set dst (AddF src con));
10373   format %{ "FLD    $src\n\t"
10374             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10375             "FSTP   $dst"  %}
10376   ins_encode %{
10377     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10378     __ fadd_s($constantaddress($con));
10379     __ fstp_d($dst$$reg);
10380   %}
10381   ins_pipe(fpu_reg_reg_con);
10382 %}
10383 
10384 // Spill to obtain 24-bit precision
10385 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10386   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10387   match(Set dst (MulF src1 src2));
10388 
10389   format %{ "FLD    $src1\n\t"
10390             "FMUL   $src2\n\t"
10391             "FSTP_S $dst"  %}
10392   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10393   ins_encode( Push_Reg_FPR(src1),
10394               OpcReg_FPR(src2),
10395               Pop_Mem_FPR(dst) );
10396   ins_pipe( fpu_mem_reg_reg );
10397 %}
10398 //
10399 // This instruction does not round to 24-bits
10400 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10401   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10402   match(Set dst (MulF src1 src2));
10403 
10404   format %{ "FLD    $src1\n\t"
10405             "FMUL   $src2\n\t"
10406             "FSTP_S $dst"  %}
10407   opcode(0xD8, 0x1); /* D8 C8+i */
10408   ins_encode( Push_Reg_FPR(src2),
10409               OpcReg_FPR(src1),
10410               Pop_Reg_FPR(dst) );
10411   ins_pipe( fpu_reg_reg_reg );
10412 %}
10413 
10414 
10415 // Spill to obtain 24-bit precision
10416 // Cisc-alternate to reg-reg multiply
10417 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10418   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10419   match(Set dst (MulF src1 (LoadF src2)));
10420 
10421   format %{ "FLD_S  $src2\n\t"
10422             "FMUL   $src1\n\t"
10423             "FSTP_S $dst"  %}
10424   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10425   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10426               OpcReg_FPR(src1),
10427               Pop_Mem_FPR(dst) );
10428   ins_pipe( fpu_mem_reg_mem );
10429 %}
10430 //
10431 // This instruction does not round to 24-bits
10432 // Cisc-alternate to reg-reg multiply
10433 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10434   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10435   match(Set dst (MulF src1 (LoadF src2)));
10436 
10437   format %{ "FMUL   $dst,$src1,$src2" %}
10438   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10439   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10440               OpcReg_FPR(src1),
10441               Pop_Reg_FPR(dst) );
10442   ins_pipe( fpu_reg_reg_mem );
10443 %}
10444 
10445 // Spill to obtain 24-bit precision
10446 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10447   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10448   match(Set dst (MulF src1 src2));
10449 
10450   format %{ "FMUL   $dst,$src1,$src2" %}
10451   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10452   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10453               set_instruction_start,
10454               OpcP, RMopc_Mem(secondary,src1),
10455               Pop_Mem_FPR(dst) );
10456   ins_pipe( fpu_mem_mem_mem );
10457 %}
10458 
10459 // Spill to obtain 24-bit precision
10460 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10461   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10462   match(Set dst (MulF src con));
10463 
10464   format %{ "FLD    $src\n\t"
10465             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10466             "FSTP_S $dst"  %}
10467   ins_encode %{
10468     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10469     __ fmul_s($constantaddress($con));
10470     __ fstp_s(Address(rsp, $dst$$disp));
10471   %}
10472   ins_pipe(fpu_mem_reg_con);
10473 %}
10474 //
10475 // This instruction does not round to 24-bits
10476 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10477   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10478   match(Set dst (MulF src con));
10479 
10480   format %{ "FLD    $src\n\t"
10481             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10482             "FSTP   $dst"  %}
10483   ins_encode %{
10484     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10485     __ fmul_s($constantaddress($con));
10486     __ fstp_d($dst$$reg);
10487   %}
10488   ins_pipe(fpu_reg_reg_con);
10489 %}
10490 
10491 
10492 //
10493 // MACRO1 -- subsume unshared load into mulFPR
10494 // This instruction does not round to 24-bits
10495 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10496   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10497   match(Set dst (MulF (LoadF mem1) src));
10498 
10499   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10500             "FMUL   ST,$src\n\t"
10501             "FSTP   $dst" %}
10502   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10503   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10504               OpcReg_FPR(src),
10505               Pop_Reg_FPR(dst) );
10506   ins_pipe( fpu_reg_reg_mem );
10507 %}
10508 //
10509 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10510 // This instruction does not round to 24-bits
10511 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10512   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10513   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10514   ins_cost(95);
10515 
10516   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10517             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10518             "FADD   ST,$src2\n\t"
10519             "FSTP   $dst" %}
10520   opcode(0xD9); /* LoadF D9 /0 */
10521   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10522               FMul_ST_reg(src1),
10523               FAdd_ST_reg(src2),
10524               Pop_Reg_FPR(dst) );
10525   ins_pipe( fpu_reg_mem_reg_reg );
10526 %}
10527 
10528 // MACRO3 -- addFPR a mulFPR
10529 // This instruction does not round to 24-bits.  It is a '2-address'
10530 // instruction in that the result goes back to src2.  This eliminates
10531 // a move from the macro; possibly the register allocator will have
10532 // to add it back (and maybe not).
10533 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10534   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10535   match(Set src2 (AddF (MulF src0 src1) src2));
10536 
10537   format %{ "FLD    $src0     ===MACRO3===\n\t"
10538             "FMUL   ST,$src1\n\t"
10539             "FADDP  $src2,ST" %}
10540   opcode(0xD9); /* LoadF D9 /0 */
10541   ins_encode( Push_Reg_FPR(src0),
10542               FMul_ST_reg(src1),
10543               FAddP_reg_ST(src2) );
10544   ins_pipe( fpu_reg_reg_reg );
10545 %}
10546 
10547 // MACRO4 -- divFPR subFPR
10548 // This instruction does not round to 24-bits
10549 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10550   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10551   match(Set dst (DivF (SubF src2 src1) src3));
10552 
10553   format %{ "FLD    $src2   ===MACRO4===\n\t"
10554             "FSUB   ST,$src1\n\t"
10555             "FDIV   ST,$src3\n\t"
10556             "FSTP  $dst" %}
10557   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10558   ins_encode( Push_Reg_FPR(src2),
10559               subFPR_divFPR_encode(src1,src3),
10560               Pop_Reg_FPR(dst) );
10561   ins_pipe( fpu_reg_reg_reg_reg );
10562 %}
10563 
10564 // Spill to obtain 24-bit precision
10565 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10566   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10567   match(Set dst (DivF src1 src2));
10568 
10569   format %{ "FDIV   $dst,$src1,$src2" %}
10570   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10571   ins_encode( Push_Reg_FPR(src1),
10572               OpcReg_FPR(src2),
10573               Pop_Mem_FPR(dst) );
10574   ins_pipe( fpu_mem_reg_reg );
10575 %}
10576 //
10577 // This instruction does not round to 24-bits
10578 instruct divFPR_reg(regFPR dst, regFPR src) %{
10579   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10580   match(Set dst (DivF dst src));
10581 
10582   format %{ "FDIV   $dst,$src" %}
10583   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10584   ins_encode( Push_Reg_FPR(src),
10585               OpcP, RegOpc(dst) );
10586   ins_pipe( fpu_reg_reg );
10587 %}
10588 
10589 
10590 // Spill to obtain 24-bit precision
10591 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10592   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10593   match(Set dst (ModF src1 src2));
10594   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10595 
10596   format %{ "FMOD   $dst,$src1,$src2" %}
10597   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10598               emitModDPR(),
10599               Push_Result_Mod_DPR(src2),
10600               Pop_Mem_FPR(dst));
10601   ins_pipe( pipe_slow );
10602 %}
10603 //
10604 // This instruction does not round to 24-bits
10605 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10606   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10607   match(Set dst (ModF dst src));
10608   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10609 
10610   format %{ "FMOD   $dst,$src" %}
10611   ins_encode(Push_Reg_Mod_DPR(dst, src),
10612               emitModDPR(),
10613               Push_Result_Mod_DPR(src),
10614               Pop_Reg_FPR(dst));
10615   ins_pipe( pipe_slow );
10616 %}
10617 
10618 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10619   predicate(UseSSE>=1);
10620   match(Set dst (ModF src0 src1));
10621   effect(KILL rax, KILL cr);
10622   format %{ "SUB    ESP,4\t # FMOD\n"
10623           "\tMOVSS  [ESP+0],$src1\n"
10624           "\tFLD_S  [ESP+0]\n"
10625           "\tMOVSS  [ESP+0],$src0\n"
10626           "\tFLD_S  [ESP+0]\n"
10627      "loop:\tFPREM\n"
10628           "\tFWAIT\n"
10629           "\tFNSTSW AX\n"
10630           "\tSAHF\n"
10631           "\tJP     loop\n"
10632           "\tFSTP_S [ESP+0]\n"
10633           "\tMOVSS  $dst,[ESP+0]\n"
10634           "\tADD    ESP,4\n"
10635           "\tFSTP   ST0\t # Restore FPU Stack"
10636     %}
10637   ins_cost(250);
10638   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10639   ins_pipe( pipe_slow );
10640 %}
10641 
10642 
10643 //----------Arithmetic Conversion Instructions---------------------------------
10644 // The conversions operations are all Alpha sorted.  Please keep it that way!
10645 
10646 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10647   predicate(UseSSE==0);
10648   match(Set dst (RoundFloat src));
10649   ins_cost(125);
10650   format %{ "FST_S  $dst,$src\t# F-round" %}
10651   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10652   ins_pipe( fpu_mem_reg );
10653 %}
10654 
10655 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10656   predicate(UseSSE<=1);
10657   match(Set dst (RoundDouble src));
10658   ins_cost(125);
10659   format %{ "FST_D  $dst,$src\t# D-round" %}
10660   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10661   ins_pipe( fpu_mem_reg );
10662 %}
10663 
10664 // Force rounding to 24-bit precision and 6-bit exponent
10665 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10666   predicate(UseSSE==0);
10667   match(Set dst (ConvD2F src));
10668   format %{ "FST_S  $dst,$src\t# F-round" %}
10669   expand %{
10670     roundFloat_mem_reg(dst,src);
10671   %}
10672 %}
10673 
10674 // Force rounding to 24-bit precision and 6-bit exponent
10675 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10676   predicate(UseSSE==1);
10677   match(Set dst (ConvD2F src));
10678   effect( KILL cr );
10679   format %{ "SUB    ESP,4\n\t"
10680             "FST_S  [ESP],$src\t# F-round\n\t"
10681             "MOVSS  $dst,[ESP]\n\t"
10682             "ADD ESP,4" %}
10683   ins_encode %{
10684     __ subptr(rsp, 4);
10685     if ($src$$reg != FPR1L_enc) {
10686       __ fld_s($src$$reg-1);
10687       __ fstp_s(Address(rsp, 0));
10688     } else {
10689       __ fst_s(Address(rsp, 0));
10690     }
10691     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10692     __ addptr(rsp, 4);
10693   %}
10694   ins_pipe( pipe_slow );
10695 %}
10696 
10697 // Force rounding double precision to single precision
10698 instruct convD2F_reg(regF dst, regD src) %{
10699   predicate(UseSSE>=2);
10700   match(Set dst (ConvD2F src));
10701   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10702   ins_encode %{
10703     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10704   %}
10705   ins_pipe( pipe_slow );
10706 %}
10707 
10708 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10709   predicate(UseSSE==0);
10710   match(Set dst (ConvF2D src));
10711   format %{ "FST_S  $dst,$src\t# D-round" %}
10712   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10713   ins_pipe( fpu_reg_reg );
10714 %}
10715 
10716 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10717   predicate(UseSSE==1);
10718   match(Set dst (ConvF2D src));
10719   format %{ "FST_D  $dst,$src\t# D-round" %}
10720   expand %{
10721     roundDouble_mem_reg(dst,src);
10722   %}
10723 %}
10724 
10725 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10726   predicate(UseSSE==1);
10727   match(Set dst (ConvF2D src));
10728   effect( KILL cr );
10729   format %{ "SUB    ESP,4\n\t"
10730             "MOVSS  [ESP] $src\n\t"
10731             "FLD_S  [ESP]\n\t"
10732             "ADD    ESP,4\n\t"
10733             "FSTP   $dst\t# D-round" %}
10734   ins_encode %{
10735     __ subptr(rsp, 4);
10736     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10737     __ fld_s(Address(rsp, 0));
10738     __ addptr(rsp, 4);
10739     __ fstp_d($dst$$reg);
10740   %}
10741   ins_pipe( pipe_slow );
10742 %}
10743 
10744 instruct convF2D_reg(regD dst, regF src) %{
10745   predicate(UseSSE>=2);
10746   match(Set dst (ConvF2D src));
10747   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10748   ins_encode %{
10749     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10750   %}
10751   ins_pipe( pipe_slow );
10752 %}
10753 
10754 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10755 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10756   predicate(UseSSE<=1);
10757   match(Set dst (ConvD2I src));
10758   effect( KILL tmp, KILL cr );
10759   format %{ "FLD    $src\t# Convert double to int \n\t"
10760             "FLDCW  trunc mode\n\t"
10761             "SUB    ESP,4\n\t"
10762             "FISTp  [ESP + #0]\n\t"
10763             "FLDCW  std/24-bit mode\n\t"
10764             "POP    EAX\n\t"
10765             "CMP    EAX,0x80000000\n\t"
10766             "JNE,s  fast\n\t"
10767             "FLD_D  $src\n\t"
10768             "CALL   d2i_wrapper\n"
10769       "fast:" %}
10770   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10771   ins_pipe( pipe_slow );
10772 %}
10773 
10774 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10775 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10776   predicate(UseSSE>=2);
10777   match(Set dst (ConvD2I src));
10778   effect( KILL tmp, KILL cr );
10779   format %{ "CVTTSD2SI $dst, $src\n\t"
10780             "CMP    $dst,0x80000000\n\t"
10781             "JNE,s  fast\n\t"
10782             "SUB    ESP, 8\n\t"
10783             "MOVSD  [ESP], $src\n\t"
10784             "FLD_D  [ESP]\n\t"
10785             "ADD    ESP, 8\n\t"
10786             "CALL   d2i_wrapper\n"
10787       "fast:" %}
10788   ins_encode %{
10789     Label fast;
10790     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10791     __ cmpl($dst$$Register, 0x80000000);
10792     __ jccb(Assembler::notEqual, fast);
10793     __ subptr(rsp, 8);
10794     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10795     __ fld_d(Address(rsp, 0));
10796     __ addptr(rsp, 8);
10797     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10798     __ bind(fast);
10799   %}
10800   ins_pipe( pipe_slow );
10801 %}
10802 
10803 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10804   predicate(UseSSE<=1);
10805   match(Set dst (ConvD2L src));
10806   effect( KILL cr );
10807   format %{ "FLD    $src\t# Convert double to long\n\t"
10808             "FLDCW  trunc mode\n\t"
10809             "SUB    ESP,8\n\t"
10810             "FISTp  [ESP + #0]\n\t"
10811             "FLDCW  std/24-bit mode\n\t"
10812             "POP    EAX\n\t"
10813             "POP    EDX\n\t"
10814             "CMP    EDX,0x80000000\n\t"
10815             "JNE,s  fast\n\t"
10816             "TEST   EAX,EAX\n\t"
10817             "JNE,s  fast\n\t"
10818             "FLD    $src\n\t"
10819             "CALL   d2l_wrapper\n"
10820       "fast:" %}
10821   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10822   ins_pipe( pipe_slow );
10823 %}
10824 
10825 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10826 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10827   predicate (UseSSE>=2);
10828   match(Set dst (ConvD2L src));
10829   effect( KILL cr );
10830   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10831             "MOVSD  [ESP],$src\n\t"
10832             "FLD_D  [ESP]\n\t"
10833             "FLDCW  trunc mode\n\t"
10834             "FISTp  [ESP + #0]\n\t"
10835             "FLDCW  std/24-bit mode\n\t"
10836             "POP    EAX\n\t"
10837             "POP    EDX\n\t"
10838             "CMP    EDX,0x80000000\n\t"
10839             "JNE,s  fast\n\t"
10840             "TEST   EAX,EAX\n\t"
10841             "JNE,s  fast\n\t"
10842             "SUB    ESP,8\n\t"
10843             "MOVSD  [ESP],$src\n\t"
10844             "FLD_D  [ESP]\n\t"
10845             "ADD    ESP,8\n\t"
10846             "CALL   d2l_wrapper\n"
10847       "fast:" %}
10848   ins_encode %{
10849     Label fast;
10850     __ subptr(rsp, 8);
10851     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10852     __ fld_d(Address(rsp, 0));
10853     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10854     __ fistp_d(Address(rsp, 0));
10855     // Restore the rounding mode, mask the exception
10856     if (Compile::current()->in_24_bit_fp_mode()) {
10857       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10858     } else {
10859       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10860     }
10861     // Load the converted long, adjust CPU stack
10862     __ pop(rax);
10863     __ pop(rdx);
10864     __ cmpl(rdx, 0x80000000);
10865     __ jccb(Assembler::notEqual, fast);
10866     __ testl(rax, rax);
10867     __ jccb(Assembler::notEqual, fast);
10868     __ subptr(rsp, 8);
10869     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10870     __ fld_d(Address(rsp, 0));
10871     __ addptr(rsp, 8);
10872     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10873     __ bind(fast);
10874   %}
10875   ins_pipe( pipe_slow );
10876 %}
10877 
10878 // Convert a double to an int.  Java semantics require we do complex
10879 // manglations in the corner cases.  So we set the rounding mode to
10880 // 'zero', store the darned double down as an int, and reset the
10881 // rounding mode to 'nearest'.  The hardware stores a flag value down
10882 // if we would overflow or converted a NAN; we check for this and
10883 // and go the slow path if needed.
10884 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10885   predicate(UseSSE==0);
10886   match(Set dst (ConvF2I src));
10887   effect( KILL tmp, KILL cr );
10888   format %{ "FLD    $src\t# Convert float to int \n\t"
10889             "FLDCW  trunc mode\n\t"
10890             "SUB    ESP,4\n\t"
10891             "FISTp  [ESP + #0]\n\t"
10892             "FLDCW  std/24-bit mode\n\t"
10893             "POP    EAX\n\t"
10894             "CMP    EAX,0x80000000\n\t"
10895             "JNE,s  fast\n\t"
10896             "FLD    $src\n\t"
10897             "CALL   d2i_wrapper\n"
10898       "fast:" %}
10899   // DPR2I_encoding works for FPR2I
10900   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10901   ins_pipe( pipe_slow );
10902 %}
10903 
10904 // Convert a float in xmm to an int reg.
10905 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10906   predicate(UseSSE>=1);
10907   match(Set dst (ConvF2I src));
10908   effect( KILL tmp, KILL cr );
10909   format %{ "CVTTSS2SI $dst, $src\n\t"
10910             "CMP    $dst,0x80000000\n\t"
10911             "JNE,s  fast\n\t"
10912             "SUB    ESP, 4\n\t"
10913             "MOVSS  [ESP], $src\n\t"
10914             "FLD    [ESP]\n\t"
10915             "ADD    ESP, 4\n\t"
10916             "CALL   d2i_wrapper\n"
10917       "fast:" %}
10918   ins_encode %{
10919     Label fast;
10920     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10921     __ cmpl($dst$$Register, 0x80000000);
10922     __ jccb(Assembler::notEqual, fast);
10923     __ subptr(rsp, 4);
10924     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10925     __ fld_s(Address(rsp, 0));
10926     __ addptr(rsp, 4);
10927     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10928     __ bind(fast);
10929   %}
10930   ins_pipe( pipe_slow );
10931 %}
10932 
10933 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10934   predicate(UseSSE==0);
10935   match(Set dst (ConvF2L src));
10936   effect( KILL cr );
10937   format %{ "FLD    $src\t# Convert float to long\n\t"
10938             "FLDCW  trunc mode\n\t"
10939             "SUB    ESP,8\n\t"
10940             "FISTp  [ESP + #0]\n\t"
10941             "FLDCW  std/24-bit mode\n\t"
10942             "POP    EAX\n\t"
10943             "POP    EDX\n\t"
10944             "CMP    EDX,0x80000000\n\t"
10945             "JNE,s  fast\n\t"
10946             "TEST   EAX,EAX\n\t"
10947             "JNE,s  fast\n\t"
10948             "FLD    $src\n\t"
10949             "CALL   d2l_wrapper\n"
10950       "fast:" %}
10951   // DPR2L_encoding works for FPR2L
10952   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10953   ins_pipe( pipe_slow );
10954 %}
10955 
10956 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10957 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10958   predicate (UseSSE>=1);
10959   match(Set dst (ConvF2L src));
10960   effect( KILL cr );
10961   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10962             "MOVSS  [ESP],$src\n\t"
10963             "FLD_S  [ESP]\n\t"
10964             "FLDCW  trunc mode\n\t"
10965             "FISTp  [ESP + #0]\n\t"
10966             "FLDCW  std/24-bit mode\n\t"
10967             "POP    EAX\n\t"
10968             "POP    EDX\n\t"
10969             "CMP    EDX,0x80000000\n\t"
10970             "JNE,s  fast\n\t"
10971             "TEST   EAX,EAX\n\t"
10972             "JNE,s  fast\n\t"
10973             "SUB    ESP,4\t# Convert float to long\n\t"
10974             "MOVSS  [ESP],$src\n\t"
10975             "FLD_S  [ESP]\n\t"
10976             "ADD    ESP,4\n\t"
10977             "CALL   d2l_wrapper\n"
10978       "fast:" %}
10979   ins_encode %{
10980     Label fast;
10981     __ subptr(rsp, 8);
10982     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10983     __ fld_s(Address(rsp, 0));
10984     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10985     __ fistp_d(Address(rsp, 0));
10986     // Restore the rounding mode, mask the exception
10987     if (Compile::current()->in_24_bit_fp_mode()) {
10988       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10989     } else {
10990       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10991     }
10992     // Load the converted long, adjust CPU stack
10993     __ pop(rax);
10994     __ pop(rdx);
10995     __ cmpl(rdx, 0x80000000);
10996     __ jccb(Assembler::notEqual, fast);
10997     __ testl(rax, rax);
10998     __ jccb(Assembler::notEqual, fast);
10999     __ subptr(rsp, 4);
11000     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11001     __ fld_s(Address(rsp, 0));
11002     __ addptr(rsp, 4);
11003     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11004     __ bind(fast);
11005   %}
11006   ins_pipe( pipe_slow );
11007 %}
11008 
11009 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11010   predicate( UseSSE<=1 );
11011   match(Set dst (ConvI2D src));
11012   format %{ "FILD   $src\n\t"
11013             "FSTP   $dst" %}
11014   opcode(0xDB, 0x0);  /* DB /0 */
11015   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11016   ins_pipe( fpu_reg_mem );
11017 %}
11018 
11019 instruct convI2D_reg(regD dst, rRegI src) %{
11020   predicate( UseSSE>=2 && !UseXmmI2D );
11021   match(Set dst (ConvI2D src));
11022   format %{ "CVTSI2SD $dst,$src" %}
11023   ins_encode %{
11024     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11025   %}
11026   ins_pipe( pipe_slow );
11027 %}
11028 
11029 instruct convI2D_mem(regD dst, memory mem) %{
11030   predicate( UseSSE>=2 );
11031   match(Set dst (ConvI2D (LoadI mem)));
11032   format %{ "CVTSI2SD $dst,$mem" %}
11033   ins_encode %{
11034     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11035   %}
11036   ins_pipe( pipe_slow );
11037 %}
11038 
11039 instruct convXI2D_reg(regD dst, rRegI src)
11040 %{
11041   predicate( UseSSE>=2 && UseXmmI2D );
11042   match(Set dst (ConvI2D src));
11043 
11044   format %{ "MOVD  $dst,$src\n\t"
11045             "CVTDQ2PD $dst,$dst\t# i2d" %}
11046   ins_encode %{
11047     __ movdl($dst$$XMMRegister, $src$$Register);
11048     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11049   %}
11050   ins_pipe(pipe_slow); // XXX
11051 %}
11052 
11053 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11054   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11055   match(Set dst (ConvI2D (LoadI mem)));
11056   format %{ "FILD   $mem\n\t"
11057             "FSTP   $dst" %}
11058   opcode(0xDB);      /* DB /0 */
11059   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11060               Pop_Reg_DPR(dst));
11061   ins_pipe( fpu_reg_mem );
11062 %}
11063 
11064 // Convert a byte to a float; no rounding step needed.
11065 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11066   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11067   match(Set dst (ConvI2F src));
11068   format %{ "FILD   $src\n\t"
11069             "FSTP   $dst" %}
11070 
11071   opcode(0xDB, 0x0);  /* DB /0 */
11072   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11073   ins_pipe( fpu_reg_mem );
11074 %}
11075 
11076 // In 24-bit mode, force exponent rounding by storing back out
11077 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11078   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11079   match(Set dst (ConvI2F src));
11080   ins_cost(200);
11081   format %{ "FILD   $src\n\t"
11082             "FSTP_S $dst" %}
11083   opcode(0xDB, 0x0);  /* DB /0 */
11084   ins_encode( Push_Mem_I(src),
11085               Pop_Mem_FPR(dst));
11086   ins_pipe( fpu_mem_mem );
11087 %}
11088 
11089 // In 24-bit mode, force exponent rounding by storing back out
11090 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11091   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11092   match(Set dst (ConvI2F (LoadI mem)));
11093   ins_cost(200);
11094   format %{ "FILD   $mem\n\t"
11095             "FSTP_S $dst" %}
11096   opcode(0xDB);  /* DB /0 */
11097   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11098               Pop_Mem_FPR(dst));
11099   ins_pipe( fpu_mem_mem );
11100 %}
11101 
11102 // This instruction does not round to 24-bits
11103 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11104   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11105   match(Set dst (ConvI2F src));
11106   format %{ "FILD   $src\n\t"
11107             "FSTP   $dst" %}
11108   opcode(0xDB, 0x0);  /* DB /0 */
11109   ins_encode( Push_Mem_I(src),
11110               Pop_Reg_FPR(dst));
11111   ins_pipe( fpu_reg_mem );
11112 %}
11113 
11114 // This instruction does not round to 24-bits
11115 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11116   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11117   match(Set dst (ConvI2F (LoadI mem)));
11118   format %{ "FILD   $mem\n\t"
11119             "FSTP   $dst" %}
11120   opcode(0xDB);      /* DB /0 */
11121   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11122               Pop_Reg_FPR(dst));
11123   ins_pipe( fpu_reg_mem );
11124 %}
11125 
11126 // Convert an int to a float in xmm; no rounding step needed.
11127 instruct convI2F_reg(regF dst, rRegI src) %{
11128   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11129   match(Set dst (ConvI2F src));
11130   format %{ "CVTSI2SS $dst, $src" %}
11131   ins_encode %{
11132     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11133   %}
11134   ins_pipe( pipe_slow );
11135 %}
11136 
11137  instruct convXI2F_reg(regF dst, rRegI src)
11138 %{
11139   predicate( UseSSE>=2 && UseXmmI2F );
11140   match(Set dst (ConvI2F src));
11141 
11142   format %{ "MOVD  $dst,$src\n\t"
11143             "CVTDQ2PS $dst,$dst\t# i2f" %}
11144   ins_encode %{
11145     __ movdl($dst$$XMMRegister, $src$$Register);
11146     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11147   %}
11148   ins_pipe(pipe_slow); // XXX
11149 %}
11150 
11151 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11152   match(Set dst (ConvI2L src));
11153   effect(KILL cr);
11154   ins_cost(375);
11155   format %{ "MOV    $dst.lo,$src\n\t"
11156             "MOV    $dst.hi,$src\n\t"
11157             "SAR    $dst.hi,31" %}
11158   ins_encode(convert_int_long(dst,src));
11159   ins_pipe( ialu_reg_reg_long );
11160 %}
11161 
11162 // Zero-extend convert int to long
11163 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11164   match(Set dst (AndL (ConvI2L src) mask) );
11165   effect( KILL flags );
11166   ins_cost(250);
11167   format %{ "MOV    $dst.lo,$src\n\t"
11168             "XOR    $dst.hi,$dst.hi" %}
11169   opcode(0x33); // XOR
11170   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11171   ins_pipe( ialu_reg_reg_long );
11172 %}
11173 
11174 // Zero-extend long
11175 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11176   match(Set dst (AndL src mask) );
11177   effect( KILL flags );
11178   ins_cost(250);
11179   format %{ "MOV    $dst.lo,$src.lo\n\t"
11180             "XOR    $dst.hi,$dst.hi\n\t" %}
11181   opcode(0x33); // XOR
11182   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11183   ins_pipe( ialu_reg_reg_long );
11184 %}
11185 
11186 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11187   predicate (UseSSE<=1);
11188   match(Set dst (ConvL2D src));
11189   effect( KILL cr );
11190   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11191             "PUSH   $src.lo\n\t"
11192             "FILD   ST,[ESP + #0]\n\t"
11193             "ADD    ESP,8\n\t"
11194             "FSTP_D $dst\t# D-round" %}
11195   opcode(0xDF, 0x5);  /* DF /5 */
11196   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11197   ins_pipe( pipe_slow );
11198 %}
11199 
11200 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11201   predicate (UseSSE>=2);
11202   match(Set dst (ConvL2D src));
11203   effect( KILL cr );
11204   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11205             "PUSH   $src.lo\n\t"
11206             "FILD_D [ESP]\n\t"
11207             "FSTP_D [ESP]\n\t"
11208             "MOVSD  $dst,[ESP]\n\t"
11209             "ADD    ESP,8" %}
11210   opcode(0xDF, 0x5);  /* DF /5 */
11211   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11212   ins_pipe( pipe_slow );
11213 %}
11214 
11215 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11216   predicate (UseSSE>=1);
11217   match(Set dst (ConvL2F src));
11218   effect( KILL cr );
11219   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11220             "PUSH   $src.lo\n\t"
11221             "FILD_D [ESP]\n\t"
11222             "FSTP_S [ESP]\n\t"
11223             "MOVSS  $dst,[ESP]\n\t"
11224             "ADD    ESP,8" %}
11225   opcode(0xDF, 0x5);  /* DF /5 */
11226   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11227   ins_pipe( pipe_slow );
11228 %}
11229 
11230 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11231   match(Set dst (ConvL2F src));
11232   effect( KILL cr );
11233   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11234             "PUSH   $src.lo\n\t"
11235             "FILD   ST,[ESP + #0]\n\t"
11236             "ADD    ESP,8\n\t"
11237             "FSTP_S $dst\t# F-round" %}
11238   opcode(0xDF, 0x5);  /* DF /5 */
11239   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11240   ins_pipe( pipe_slow );
11241 %}
11242 
11243 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11244   match(Set dst (ConvL2I src));
11245   effect( DEF dst, USE src );
11246   format %{ "MOV    $dst,$src.lo" %}
11247   ins_encode(enc_CopyL_Lo(dst,src));
11248   ins_pipe( ialu_reg_reg );
11249 %}
11250 
11251 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11252   match(Set dst (MoveF2I src));
11253   effect( DEF dst, USE src );
11254   ins_cost(100);
11255   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11256   ins_encode %{
11257     __ movl($dst$$Register, Address(rsp, $src$$disp));
11258   %}
11259   ins_pipe( ialu_reg_mem );
11260 %}
11261 
11262 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11263   predicate(UseSSE==0);
11264   match(Set dst (MoveF2I src));
11265   effect( DEF dst, USE src );
11266 
11267   ins_cost(125);
11268   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11269   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11270   ins_pipe( fpu_mem_reg );
11271 %}
11272 
11273 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11274   predicate(UseSSE>=1);
11275   match(Set dst (MoveF2I src));
11276   effect( DEF dst, USE src );
11277 
11278   ins_cost(95);
11279   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11280   ins_encode %{
11281     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11282   %}
11283   ins_pipe( pipe_slow );
11284 %}
11285 
11286 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11287   predicate(UseSSE>=2);
11288   match(Set dst (MoveF2I src));
11289   effect( DEF dst, USE src );
11290   ins_cost(85);
11291   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11292   ins_encode %{
11293     __ movdl($dst$$Register, $src$$XMMRegister);
11294   %}
11295   ins_pipe( pipe_slow );
11296 %}
11297 
11298 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11299   match(Set dst (MoveI2F src));
11300   effect( DEF dst, USE src );
11301 
11302   ins_cost(100);
11303   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11304   ins_encode %{
11305     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11306   %}
11307   ins_pipe( ialu_mem_reg );
11308 %}
11309 
11310 
11311 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11312   predicate(UseSSE==0);
11313   match(Set dst (MoveI2F src));
11314   effect(DEF dst, USE src);
11315 
11316   ins_cost(125);
11317   format %{ "FLD_S  $src\n\t"
11318             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11319   opcode(0xD9);               /* D9 /0, FLD m32real */
11320   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11321               Pop_Reg_FPR(dst) );
11322   ins_pipe( fpu_reg_mem );
11323 %}
11324 
11325 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11326   predicate(UseSSE>=1);
11327   match(Set dst (MoveI2F src));
11328   effect( DEF dst, USE src );
11329 
11330   ins_cost(95);
11331   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11332   ins_encode %{
11333     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11334   %}
11335   ins_pipe( pipe_slow );
11336 %}
11337 
11338 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11339   predicate(UseSSE>=2);
11340   match(Set dst (MoveI2F src));
11341   effect( DEF dst, USE src );
11342 
11343   ins_cost(85);
11344   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11345   ins_encode %{
11346     __ movdl($dst$$XMMRegister, $src$$Register);
11347   %}
11348   ins_pipe( pipe_slow );
11349 %}
11350 
11351 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11352   match(Set dst (MoveD2L src));
11353   effect(DEF dst, USE src);
11354 
11355   ins_cost(250);
11356   format %{ "MOV    $dst.lo,$src\n\t"
11357             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11358   opcode(0x8B, 0x8B);
11359   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11360   ins_pipe( ialu_mem_long_reg );
11361 %}
11362 
11363 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11364   predicate(UseSSE<=1);
11365   match(Set dst (MoveD2L src));
11366   effect(DEF dst, USE src);
11367 
11368   ins_cost(125);
11369   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11370   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11371   ins_pipe( fpu_mem_reg );
11372 %}
11373 
11374 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11375   predicate(UseSSE>=2);
11376   match(Set dst (MoveD2L src));
11377   effect(DEF dst, USE src);
11378   ins_cost(95);
11379   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11380   ins_encode %{
11381     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11382   %}
11383   ins_pipe( pipe_slow );
11384 %}
11385 
11386 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11387   predicate(UseSSE>=2);
11388   match(Set dst (MoveD2L src));
11389   effect(DEF dst, USE src, TEMP tmp);
11390   ins_cost(85);
11391   format %{ "MOVD   $dst.lo,$src\n\t"
11392             "PSHUFLW $tmp,$src,0x4E\n\t"
11393             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11394   ins_encode %{
11395     __ movdl($dst$$Register, $src$$XMMRegister);
11396     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11397     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11398   %}
11399   ins_pipe( pipe_slow );
11400 %}
11401 
11402 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11403   match(Set dst (MoveL2D src));
11404   effect(DEF dst, USE src);
11405 
11406   ins_cost(200);
11407   format %{ "MOV    $dst,$src.lo\n\t"
11408             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11409   opcode(0x89, 0x89);
11410   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11411   ins_pipe( ialu_mem_long_reg );
11412 %}
11413 
11414 
11415 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11416   predicate(UseSSE<=1);
11417   match(Set dst (MoveL2D src));
11418   effect(DEF dst, USE src);
11419   ins_cost(125);
11420 
11421   format %{ "FLD_D  $src\n\t"
11422             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11423   opcode(0xDD);               /* DD /0, FLD m64real */
11424   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11425               Pop_Reg_DPR(dst) );
11426   ins_pipe( fpu_reg_mem );
11427 %}
11428 
11429 
11430 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11431   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11432   match(Set dst (MoveL2D src));
11433   effect(DEF dst, USE src);
11434 
11435   ins_cost(95);
11436   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11437   ins_encode %{
11438     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11439   %}
11440   ins_pipe( pipe_slow );
11441 %}
11442 
11443 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11444   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11445   match(Set dst (MoveL2D src));
11446   effect(DEF dst, USE src);
11447 
11448   ins_cost(95);
11449   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11450   ins_encode %{
11451     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11452   %}
11453   ins_pipe( pipe_slow );
11454 %}
11455 
11456 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11457   predicate(UseSSE>=2);
11458   match(Set dst (MoveL2D src));
11459   effect(TEMP dst, USE src, TEMP tmp);
11460   ins_cost(85);
11461   format %{ "MOVD   $dst,$src.lo\n\t"
11462             "MOVD   $tmp,$src.hi\n\t"
11463             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11464   ins_encode %{
11465     __ movdl($dst$$XMMRegister, $src$$Register);
11466     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11467     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11468   %}
11469   ins_pipe( pipe_slow );
11470 %}
11471 
11472 
11473 // =======================================================================
11474 // fast clearing of an array
11475 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11476   predicate(!((ClearArrayNode*)n)->is_large());
11477   match(Set dummy (ClearArray cnt base));
11478   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11479 
11480   format %{ $$template
11481     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11482     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11483     $$emit$$"JG     LARGE\n\t"
11484     $$emit$$"SHL    ECX, 1\n\t"
11485     $$emit$$"DEC    ECX\n\t"
11486     $$emit$$"JS     DONE\t# Zero length\n\t"
11487     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11488     $$emit$$"DEC    ECX\n\t"
11489     $$emit$$"JGE    LOOP\n\t"
11490     $$emit$$"JMP    DONE\n\t"
11491     $$emit$$"# LARGE:\n\t"
11492     if (UseFastStosb) {
11493        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11494        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11495     } else if (UseXMMForObjInit) {
11496        $$emit$$"MOV     RDI,RAX\n\t"
11497        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11498        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11499        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11500        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11501        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11502        $$emit$$"ADD     0x40,RAX\n\t"
11503        $$emit$$"# L_zero_64_bytes:\n\t"
11504        $$emit$$"SUB     0x8,RCX\n\t"
11505        $$emit$$"JGE     L_loop\n\t"
11506        $$emit$$"ADD     0x4,RCX\n\t"
11507        $$emit$$"JL      L_tail\n\t"
11508        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11509        $$emit$$"ADD     0x20,RAX\n\t"
11510        $$emit$$"SUB     0x4,RCX\n\t"
11511        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11512        $$emit$$"ADD     0x4,RCX\n\t"
11513        $$emit$$"JLE     L_end\n\t"
11514        $$emit$$"DEC     RCX\n\t"
11515        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11516        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11517        $$emit$$"ADD     0x8,RAX\n\t"
11518        $$emit$$"DEC     RCX\n\t"
11519        $$emit$$"JGE     L_sloop\n\t"
11520        $$emit$$"# L_end:\n\t"
11521     } else {
11522        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11523        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11524     }
11525     $$emit$$"# DONE"
11526   %}
11527   ins_encode %{
11528     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11529                  $tmp$$XMMRegister, false);
11530   %}
11531   ins_pipe( pipe_slow );
11532 %}
11533 
11534 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11535   predicate(((ClearArrayNode*)n)->is_large());
11536   match(Set dummy (ClearArray cnt base));
11537   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11538   format %{ $$template
11539     if (UseFastStosb) {
11540        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11541        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11542        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11543     } else if (UseXMMForObjInit) {
11544        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11545        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11546        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11547        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11548        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11549        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11550        $$emit$$"ADD     0x40,RAX\n\t"
11551        $$emit$$"# L_zero_64_bytes:\n\t"
11552        $$emit$$"SUB     0x8,RCX\n\t"
11553        $$emit$$"JGE     L_loop\n\t"
11554        $$emit$$"ADD     0x4,RCX\n\t"
11555        $$emit$$"JL      L_tail\n\t"
11556        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11557        $$emit$$"ADD     0x20,RAX\n\t"
11558        $$emit$$"SUB     0x4,RCX\n\t"
11559        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11560        $$emit$$"ADD     0x4,RCX\n\t"
11561        $$emit$$"JLE     L_end\n\t"
11562        $$emit$$"DEC     RCX\n\t"
11563        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11564        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11565        $$emit$$"ADD     0x8,RAX\n\t"
11566        $$emit$$"DEC     RCX\n\t"
11567        $$emit$$"JGE     L_sloop\n\t"
11568        $$emit$$"# L_end:\n\t"
11569     } else {
11570        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11571        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11572        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11573     }
11574     $$emit$$"# DONE"
11575   %}
11576   ins_encode %{
11577     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11578                  $tmp$$XMMRegister, true);
11579   %}
11580   ins_pipe( pipe_slow );
11581 %}
11582 
11583 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11584                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11585   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11586   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11587   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11588 
11589   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11590   ins_encode %{
11591     __ string_compare($str1$$Register, $str2$$Register,
11592                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11593                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11594   %}
11595   ins_pipe( pipe_slow );
11596 %}
11597 
11598 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11599                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11600   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11601   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11602   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11603 
11604   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11605   ins_encode %{
11606     __ string_compare($str1$$Register, $str2$$Register,
11607                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11608                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11609   %}
11610   ins_pipe( pipe_slow );
11611 %}
11612 
11613 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11614                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11615   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11616   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11617   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11618 
11619   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11620   ins_encode %{
11621     __ string_compare($str1$$Register, $str2$$Register,
11622                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11623                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11624   %}
11625   ins_pipe( pipe_slow );
11626 %}
11627 
11628 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11629                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11630   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11631   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11632   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11633 
11634   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11635   ins_encode %{
11636     __ string_compare($str2$$Register, $str1$$Register,
11637                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11638                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11639   %}
11640   ins_pipe( pipe_slow );
11641 %}
11642 
11643 // fast string equals
11644 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11645                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11646   match(Set result (StrEquals (Binary str1 str2) cnt));
11647   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11648 
11649   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11650   ins_encode %{
11651     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11652                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11653                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11654   %}
11655 
11656   ins_pipe( pipe_slow );
11657 %}
11658 
11659 // fast search of substring with known size.
11660 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11661                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11662   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11663   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11664   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11665 
11666   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11667   ins_encode %{
11668     int icnt2 = (int)$int_cnt2$$constant;
11669     if (icnt2 >= 16) {
11670       // IndexOf for constant substrings with size >= 16 elements
11671       // which don't need to be loaded through stack.
11672       __ string_indexofC8($str1$$Register, $str2$$Register,
11673                           $cnt1$$Register, $cnt2$$Register,
11674                           icnt2, $result$$Register,
11675                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11676     } else {
11677       // Small strings are loaded through stack if they cross page boundary.
11678       __ string_indexof($str1$$Register, $str2$$Register,
11679                         $cnt1$$Register, $cnt2$$Register,
11680                         icnt2, $result$$Register,
11681                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11682     }
11683   %}
11684   ins_pipe( pipe_slow );
11685 %}
11686 
11687 // fast search of substring with known size.
11688 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11689                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11690   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11691   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11692   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11693 
11694   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11695   ins_encode %{
11696     int icnt2 = (int)$int_cnt2$$constant;
11697     if (icnt2 >= 8) {
11698       // IndexOf for constant substrings with size >= 8 elements
11699       // which don't need to be loaded through stack.
11700       __ string_indexofC8($str1$$Register, $str2$$Register,
11701                           $cnt1$$Register, $cnt2$$Register,
11702                           icnt2, $result$$Register,
11703                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11704     } else {
11705       // Small strings are loaded through stack if they cross page boundary.
11706       __ string_indexof($str1$$Register, $str2$$Register,
11707                         $cnt1$$Register, $cnt2$$Register,
11708                         icnt2, $result$$Register,
11709                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11710     }
11711   %}
11712   ins_pipe( pipe_slow );
11713 %}
11714 
11715 // fast search of substring with known size.
11716 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11717                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11718   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11719   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11720   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11721 
11722   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11723   ins_encode %{
11724     int icnt2 = (int)$int_cnt2$$constant;
11725     if (icnt2 >= 8) {
11726       // IndexOf for constant substrings with size >= 8 elements
11727       // which don't need to be loaded through stack.
11728       __ string_indexofC8($str1$$Register, $str2$$Register,
11729                           $cnt1$$Register, $cnt2$$Register,
11730                           icnt2, $result$$Register,
11731                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11732     } else {
11733       // Small strings are loaded through stack if they cross page boundary.
11734       __ string_indexof($str1$$Register, $str2$$Register,
11735                         $cnt1$$Register, $cnt2$$Register,
11736                         icnt2, $result$$Register,
11737                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11738     }
11739   %}
11740   ins_pipe( pipe_slow );
11741 %}
11742 
11743 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11744                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11745   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11746   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11747   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11748 
11749   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11750   ins_encode %{
11751     __ string_indexof($str1$$Register, $str2$$Register,
11752                       $cnt1$$Register, $cnt2$$Register,
11753                       (-1), $result$$Register,
11754                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11755   %}
11756   ins_pipe( pipe_slow );
11757 %}
11758 
11759 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11760                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11761   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11762   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11763   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11764 
11765   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11766   ins_encode %{
11767     __ string_indexof($str1$$Register, $str2$$Register,
11768                       $cnt1$$Register, $cnt2$$Register,
11769                       (-1), $result$$Register,
11770                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11771   %}
11772   ins_pipe( pipe_slow );
11773 %}
11774 
11775 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11776                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11777   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11778   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11779   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11780 
11781   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11782   ins_encode %{
11783     __ string_indexof($str1$$Register, $str2$$Register,
11784                       $cnt1$$Register, $cnt2$$Register,
11785                       (-1), $result$$Register,
11786                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11787   %}
11788   ins_pipe( pipe_slow );
11789 %}
11790 
11791 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11792                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11793   predicate(UseSSE42Intrinsics);
11794   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11795   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11796   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11797   ins_encode %{
11798     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11799                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11800   %}
11801   ins_pipe( pipe_slow );
11802 %}
11803 
11804 // fast array equals
11805 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11806                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11807 %{
11808   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11809   match(Set result (AryEq ary1 ary2));
11810   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11811   //ins_cost(300);
11812 
11813   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11814   ins_encode %{
11815     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11816                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11817                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11818   %}
11819   ins_pipe( pipe_slow );
11820 %}
11821 
11822 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11823                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11824 %{
11825   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11826   match(Set result (AryEq ary1 ary2));
11827   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11828   //ins_cost(300);
11829 
11830   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11831   ins_encode %{
11832     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11833                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11834                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11835   %}
11836   ins_pipe( pipe_slow );
11837 %}
11838 
11839 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11840                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11841 %{
11842   match(Set result (HasNegatives ary1 len));
11843   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11844 
11845   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11846   ins_encode %{
11847     __ has_negatives($ary1$$Register, $len$$Register,
11848                      $result$$Register, $tmp3$$Register,
11849                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11850   %}
11851   ins_pipe( pipe_slow );
11852 %}
11853 
11854 // fast char[] to byte[] compression
11855 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11856                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11857   match(Set result (StrCompressedCopy src (Binary dst len)));
11858   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11859 
11860   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11861   ins_encode %{
11862     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11863                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11864                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11865   %}
11866   ins_pipe( pipe_slow );
11867 %}
11868 
11869 // fast byte[] to char[] inflation
11870 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11871                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11872   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11873   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11874 
11875   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11876   ins_encode %{
11877     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11878                           $tmp1$$XMMRegister, $tmp2$$Register);
11879   %}
11880   ins_pipe( pipe_slow );
11881 %}
11882 
11883 // encode char[] to byte[] in ISO_8859_1
11884 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11885                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11886                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11887   match(Set result (EncodeISOArray src (Binary dst len)));
11888   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11889 
11890   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11891   ins_encode %{
11892     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11893                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11894                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11895   %}
11896   ins_pipe( pipe_slow );
11897 %}
11898 
11899 
11900 //----------Control Flow Instructions------------------------------------------
11901 // Signed compare Instructions
11902 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11903   match(Set cr (CmpI op1 op2));
11904   effect( DEF cr, USE op1, USE op2 );
11905   format %{ "CMP    $op1,$op2" %}
11906   opcode(0x3B);  /* Opcode 3B /r */
11907   ins_encode( OpcP, RegReg( op1, op2) );
11908   ins_pipe( ialu_cr_reg_reg );
11909 %}
11910 
11911 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11912   match(Set cr (CmpI op1 op2));
11913   effect( DEF cr, USE op1 );
11914   format %{ "CMP    $op1,$op2" %}
11915   opcode(0x81,0x07);  /* Opcode 81 /7 */
11916   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11917   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11918   ins_pipe( ialu_cr_reg_imm );
11919 %}
11920 
11921 // Cisc-spilled version of cmpI_eReg
11922 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11923   match(Set cr (CmpI op1 (LoadI op2)));
11924 
11925   format %{ "CMP    $op1,$op2" %}
11926   ins_cost(500);
11927   opcode(0x3B);  /* Opcode 3B /r */
11928   ins_encode( OpcP, RegMem( op1, op2) );
11929   ins_pipe( ialu_cr_reg_mem );
11930 %}
11931 
11932 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11933   match(Set cr (CmpI src zero));
11934   effect( DEF cr, USE src );
11935 
11936   format %{ "TEST   $src,$src" %}
11937   opcode(0x85);
11938   ins_encode( OpcP, RegReg( src, src ) );
11939   ins_pipe( ialu_cr_reg_imm );
11940 %}
11941 
11942 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11943   match(Set cr (CmpI (AndI src con) zero));
11944 
11945   format %{ "TEST   $src,$con" %}
11946   opcode(0xF7,0x00);
11947   ins_encode( OpcP, RegOpc(src), Con32(con) );
11948   ins_pipe( ialu_cr_reg_imm );
11949 %}
11950 
11951 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11952   match(Set cr (CmpI (AndI src mem) zero));
11953 
11954   format %{ "TEST   $src,$mem" %}
11955   opcode(0x85);
11956   ins_encode( OpcP, RegMem( src, mem ) );
11957   ins_pipe( ialu_cr_reg_mem );
11958 %}
11959 
11960 // Unsigned compare Instructions; really, same as signed except they
11961 // produce an eFlagsRegU instead of eFlagsReg.
11962 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11963   match(Set cr (CmpU op1 op2));
11964 
11965   format %{ "CMPu   $op1,$op2" %}
11966   opcode(0x3B);  /* Opcode 3B /r */
11967   ins_encode( OpcP, RegReg( op1, op2) );
11968   ins_pipe( ialu_cr_reg_reg );
11969 %}
11970 
11971 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11972   match(Set cr (CmpU op1 op2));
11973 
11974   format %{ "CMPu   $op1,$op2" %}
11975   opcode(0x81,0x07);  /* Opcode 81 /7 */
11976   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11977   ins_pipe( ialu_cr_reg_imm );
11978 %}
11979 
11980 // // Cisc-spilled version of cmpU_eReg
11981 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11982   match(Set cr (CmpU op1 (LoadI op2)));
11983 
11984   format %{ "CMPu   $op1,$op2" %}
11985   ins_cost(500);
11986   opcode(0x3B);  /* Opcode 3B /r */
11987   ins_encode( OpcP, RegMem( op1, op2) );
11988   ins_pipe( ialu_cr_reg_mem );
11989 %}
11990 
11991 // // Cisc-spilled version of cmpU_eReg
11992 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11993 //  match(Set cr (CmpU (LoadI op1) op2));
11994 //
11995 //  format %{ "CMPu   $op1,$op2" %}
11996 //  ins_cost(500);
11997 //  opcode(0x39);  /* Opcode 39 /r */
11998 //  ins_encode( OpcP, RegMem( op1, op2) );
11999 //%}
12000 
12001 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12002   match(Set cr (CmpU src zero));
12003 
12004   format %{ "TESTu  $src,$src" %}
12005   opcode(0x85);
12006   ins_encode( OpcP, RegReg( src, src ) );
12007   ins_pipe( ialu_cr_reg_imm );
12008 %}
12009 
12010 // Unsigned pointer compare Instructions
12011 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12012   match(Set cr (CmpP op1 op2));
12013 
12014   format %{ "CMPu   $op1,$op2" %}
12015   opcode(0x3B);  /* Opcode 3B /r */
12016   ins_encode( OpcP, RegReg( op1, op2) );
12017   ins_pipe( ialu_cr_reg_reg );
12018 %}
12019 
12020 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12021   match(Set cr (CmpP op1 op2));
12022 
12023   format %{ "CMPu   $op1,$op2" %}
12024   opcode(0x81,0x07);  /* Opcode 81 /7 */
12025   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12026   ins_pipe( ialu_cr_reg_imm );
12027 %}
12028 
12029 // // Cisc-spilled version of cmpP_eReg
12030 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12031   match(Set cr (CmpP op1 (LoadP op2)));
12032 
12033   format %{ "CMPu   $op1,$op2" %}
12034   ins_cost(500);
12035   opcode(0x3B);  /* Opcode 3B /r */
12036   ins_encode( OpcP, RegMem( op1, op2) );
12037   ins_pipe( ialu_cr_reg_mem );
12038 %}
12039 
12040 // // Cisc-spilled version of cmpP_eReg
12041 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12042 //  match(Set cr (CmpP (LoadP op1) op2));
12043 //
12044 //  format %{ "CMPu   $op1,$op2" %}
12045 //  ins_cost(500);
12046 //  opcode(0x39);  /* Opcode 39 /r */
12047 //  ins_encode( OpcP, RegMem( op1, op2) );
12048 //%}
12049 
12050 // Compare raw pointer (used in out-of-heap check).
12051 // Only works because non-oop pointers must be raw pointers
12052 // and raw pointers have no anti-dependencies.
12053 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12054   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12055   match(Set cr (CmpP op1 (LoadP op2)));
12056 
12057   format %{ "CMPu   $op1,$op2" %}
12058   opcode(0x3B);  /* Opcode 3B /r */
12059   ins_encode( OpcP, RegMem( op1, op2) );
12060   ins_pipe( ialu_cr_reg_mem );
12061 %}
12062 
12063 //
12064 // This will generate a signed flags result. This should be ok
12065 // since any compare to a zero should be eq/neq.
12066 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12067   match(Set cr (CmpP src zero));
12068 
12069   format %{ "TEST   $src,$src" %}
12070   opcode(0x85);
12071   ins_encode( OpcP, RegReg( src, src ) );
12072   ins_pipe( ialu_cr_reg_imm );
12073 %}
12074 
12075 // Cisc-spilled version of testP_reg
12076 // This will generate a signed flags result. This should be ok
12077 // since any compare to a zero should be eq/neq.
12078 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12079   match(Set cr (CmpP (LoadP op) zero));
12080 
12081   format %{ "TEST   $op,0xFFFFFFFF" %}
12082   ins_cost(500);
12083   opcode(0xF7);               /* Opcode F7 /0 */
12084   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12085   ins_pipe( ialu_cr_reg_imm );
12086 %}
12087 
12088 // Yanked all unsigned pointer compare operations.
12089 // Pointer compares are done with CmpP which is already unsigned.
12090 
12091 //----------Max and Min--------------------------------------------------------
12092 // Min Instructions
12093 ////
12094 //   *** Min and Max using the conditional move are slower than the
12095 //   *** branch version on a Pentium III.
12096 // // Conditional move for min
12097 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12098 //  effect( USE_DEF op2, USE op1, USE cr );
12099 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12100 //  opcode(0x4C,0x0F);
12101 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12102 //  ins_pipe( pipe_cmov_reg );
12103 //%}
12104 //
12105 //// Min Register with Register (P6 version)
12106 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12107 //  predicate(VM_Version::supports_cmov() );
12108 //  match(Set op2 (MinI op1 op2));
12109 //  ins_cost(200);
12110 //  expand %{
12111 //    eFlagsReg cr;
12112 //    compI_eReg(cr,op1,op2);
12113 //    cmovI_reg_lt(op2,op1,cr);
12114 //  %}
12115 //%}
12116 
12117 // Min Register with Register (generic version)
12118 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12119   match(Set dst (MinI dst src));
12120   effect(KILL flags);
12121   ins_cost(300);
12122 
12123   format %{ "MIN    $dst,$src" %}
12124   opcode(0xCC);
12125   ins_encode( min_enc(dst,src) );
12126   ins_pipe( pipe_slow );
12127 %}
12128 
12129 // Max Register with Register
12130 //   *** Min and Max using the conditional move are slower than the
12131 //   *** branch version on a Pentium III.
12132 // // Conditional move for max
12133 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12134 //  effect( USE_DEF op2, USE op1, USE cr );
12135 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12136 //  opcode(0x4F,0x0F);
12137 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12138 //  ins_pipe( pipe_cmov_reg );
12139 //%}
12140 //
12141 // // Max Register with Register (P6 version)
12142 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12143 //  predicate(VM_Version::supports_cmov() );
12144 //  match(Set op2 (MaxI op1 op2));
12145 //  ins_cost(200);
12146 //  expand %{
12147 //    eFlagsReg cr;
12148 //    compI_eReg(cr,op1,op2);
12149 //    cmovI_reg_gt(op2,op1,cr);
12150 //  %}
12151 //%}
12152 
12153 // Max Register with Register (generic version)
12154 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12155   match(Set dst (MaxI dst src));
12156   effect(KILL flags);
12157   ins_cost(300);
12158 
12159   format %{ "MAX    $dst,$src" %}
12160   opcode(0xCC);
12161   ins_encode( max_enc(dst,src) );
12162   ins_pipe( pipe_slow );
12163 %}
12164 
12165 // ============================================================================
12166 // Counted Loop limit node which represents exact final iterator value.
12167 // Note: the resulting value should fit into integer range since
12168 // counted loops have limit check on overflow.
12169 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12170   match(Set limit (LoopLimit (Binary init limit) stride));
12171   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12172   ins_cost(300);
12173 
12174   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12175   ins_encode %{
12176     int strd = (int)$stride$$constant;
12177     assert(strd != 1 && strd != -1, "sanity");
12178     int m1 = (strd > 0) ? 1 : -1;
12179     // Convert limit to long (EAX:EDX)
12180     __ cdql();
12181     // Convert init to long (init:tmp)
12182     __ movl($tmp$$Register, $init$$Register);
12183     __ sarl($tmp$$Register, 31);
12184     // $limit - $init
12185     __ subl($limit$$Register, $init$$Register);
12186     __ sbbl($limit_hi$$Register, $tmp$$Register);
12187     // + ($stride - 1)
12188     if (strd > 0) {
12189       __ addl($limit$$Register, (strd - 1));
12190       __ adcl($limit_hi$$Register, 0);
12191       __ movl($tmp$$Register, strd);
12192     } else {
12193       __ addl($limit$$Register, (strd + 1));
12194       __ adcl($limit_hi$$Register, -1);
12195       __ lneg($limit_hi$$Register, $limit$$Register);
12196       __ movl($tmp$$Register, -strd);
12197     }
12198     // signed devision: (EAX:EDX) / pos_stride
12199     __ idivl($tmp$$Register);
12200     if (strd < 0) {
12201       // restore sign
12202       __ negl($tmp$$Register);
12203     }
12204     // (EAX) * stride
12205     __ mull($tmp$$Register);
12206     // + init (ignore upper bits)
12207     __ addl($limit$$Register, $init$$Register);
12208   %}
12209   ins_pipe( pipe_slow );
12210 %}
12211 
12212 // ============================================================================
12213 // Branch Instructions
12214 // Jump Table
12215 instruct jumpXtnd(rRegI switch_val) %{
12216   match(Jump switch_val);
12217   ins_cost(350);
12218   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12219   ins_encode %{
12220     // Jump to Address(table_base + switch_reg)
12221     Address index(noreg, $switch_val$$Register, Address::times_1);
12222     __ jump(ArrayAddress($constantaddress, index));
12223   %}
12224   ins_pipe(pipe_jmp);
12225 %}
12226 
12227 // Jump Direct - Label defines a relative address from JMP+1
12228 instruct jmpDir(label labl) %{
12229   match(Goto);
12230   effect(USE labl);
12231 
12232   ins_cost(300);
12233   format %{ "JMP    $labl" %}
12234   size(5);
12235   ins_encode %{
12236     Label* L = $labl$$label;
12237     __ jmp(*L, false); // Always long jump
12238   %}
12239   ins_pipe( pipe_jmp );
12240 %}
12241 
12242 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12243 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12244   match(If cop cr);
12245   effect(USE labl);
12246 
12247   ins_cost(300);
12248   format %{ "J$cop    $labl" %}
12249   size(6);
12250   ins_encode %{
12251     Label* L = $labl$$label;
12252     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12253   %}
12254   ins_pipe( pipe_jcc );
12255 %}
12256 
12257 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12258 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12259   predicate(!n->has_vector_mask_set());
12260   match(CountedLoopEnd cop cr);
12261   effect(USE labl);
12262 
12263   ins_cost(300);
12264   format %{ "J$cop    $labl\t# Loop end" %}
12265   size(6);
12266   ins_encode %{
12267     Label* L = $labl$$label;
12268     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12269   %}
12270   ins_pipe( pipe_jcc );
12271 %}
12272 
12273 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12274 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12275   predicate(!n->has_vector_mask_set());
12276   match(CountedLoopEnd cop cmp);
12277   effect(USE labl);
12278 
12279   ins_cost(300);
12280   format %{ "J$cop,u  $labl\t# Loop end" %}
12281   size(6);
12282   ins_encode %{
12283     Label* L = $labl$$label;
12284     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12285   %}
12286   ins_pipe( pipe_jcc );
12287 %}
12288 
12289 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12290   predicate(!n->has_vector_mask_set());
12291   match(CountedLoopEnd cop cmp);
12292   effect(USE labl);
12293 
12294   ins_cost(200);
12295   format %{ "J$cop,u  $labl\t# Loop end" %}
12296   size(6);
12297   ins_encode %{
12298     Label* L = $labl$$label;
12299     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12300   %}
12301   ins_pipe( pipe_jcc );
12302 %}
12303 
12304 // mask version
12305 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12306 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12307   predicate(n->has_vector_mask_set());
12308   match(CountedLoopEnd cop cr);
12309   effect(USE labl);
12310 
12311   ins_cost(400);
12312   format %{ "J$cop    $labl\t# Loop end\n\t"
12313             "restorevectmask \t# vector mask restore for loops" %}
12314   size(10);
12315   ins_encode %{
12316     Label* L = $labl$$label;
12317     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12318     __ restorevectmask();
12319   %}
12320   ins_pipe( pipe_jcc );
12321 %}
12322 
12323 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12324 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12325   predicate(n->has_vector_mask_set());
12326   match(CountedLoopEnd cop cmp);
12327   effect(USE labl);
12328 
12329   ins_cost(400);
12330   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12331             "restorevectmask \t# vector mask restore for loops" %}
12332   size(10);
12333   ins_encode %{
12334     Label* L = $labl$$label;
12335     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12336     __ restorevectmask();
12337   %}
12338   ins_pipe( pipe_jcc );
12339 %}
12340 
12341 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12342   predicate(n->has_vector_mask_set());
12343   match(CountedLoopEnd cop cmp);
12344   effect(USE labl);
12345 
12346   ins_cost(300);
12347   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12348             "restorevectmask \t# vector mask restore for loops" %}
12349   size(10);
12350   ins_encode %{
12351     Label* L = $labl$$label;
12352     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12353     __ restorevectmask();
12354   %}
12355   ins_pipe( pipe_jcc );
12356 %}
12357 
12358 // Jump Direct Conditional - using unsigned comparison
12359 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12360   match(If cop cmp);
12361   effect(USE labl);
12362 
12363   ins_cost(300);
12364   format %{ "J$cop,u  $labl" %}
12365   size(6);
12366   ins_encode %{
12367     Label* L = $labl$$label;
12368     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12369   %}
12370   ins_pipe(pipe_jcc);
12371 %}
12372 
12373 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12374   match(If cop cmp);
12375   effect(USE labl);
12376 
12377   ins_cost(200);
12378   format %{ "J$cop,u  $labl" %}
12379   size(6);
12380   ins_encode %{
12381     Label* L = $labl$$label;
12382     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12383   %}
12384   ins_pipe(pipe_jcc);
12385 %}
12386 
12387 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12388   match(If cop cmp);
12389   effect(USE labl);
12390 
12391   ins_cost(200);
12392   format %{ $$template
12393     if ($cop$$cmpcode == Assembler::notEqual) {
12394       $$emit$$"JP,u   $labl\n\t"
12395       $$emit$$"J$cop,u   $labl"
12396     } else {
12397       $$emit$$"JP,u   done\n\t"
12398       $$emit$$"J$cop,u   $labl\n\t"
12399       $$emit$$"done:"
12400     }
12401   %}
12402   ins_encode %{
12403     Label* l = $labl$$label;
12404     if ($cop$$cmpcode == Assembler::notEqual) {
12405       __ jcc(Assembler::parity, *l, false);
12406       __ jcc(Assembler::notEqual, *l, false);
12407     } else if ($cop$$cmpcode == Assembler::equal) {
12408       Label done;
12409       __ jccb(Assembler::parity, done);
12410       __ jcc(Assembler::equal, *l, false);
12411       __ bind(done);
12412     } else {
12413        ShouldNotReachHere();
12414     }
12415   %}
12416   ins_pipe(pipe_jcc);
12417 %}
12418 
12419 // ============================================================================
12420 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12421 // array for an instance of the superklass.  Set a hidden internal cache on a
12422 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12423 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12424 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12425   match(Set result (PartialSubtypeCheck sub super));
12426   effect( KILL rcx, KILL cr );
12427 
12428   ins_cost(1100);  // slightly larger than the next version
12429   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12430             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12431             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12432             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12433             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12434             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12435             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12436      "miss:\t" %}
12437 
12438   opcode(0x1); // Force a XOR of EDI
12439   ins_encode( enc_PartialSubtypeCheck() );
12440   ins_pipe( pipe_slow );
12441 %}
12442 
12443 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12444   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12445   effect( KILL rcx, KILL result );
12446 
12447   ins_cost(1000);
12448   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12449             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12450             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12451             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12452             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12453             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12454      "miss:\t" %}
12455 
12456   opcode(0x0);  // No need to XOR EDI
12457   ins_encode( enc_PartialSubtypeCheck() );
12458   ins_pipe( pipe_slow );
12459 %}
12460 
12461 // ============================================================================
12462 // Branch Instructions -- short offset versions
12463 //
12464 // These instructions are used to replace jumps of a long offset (the default
12465 // match) with jumps of a shorter offset.  These instructions are all tagged
12466 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12467 // match rules in general matching.  Instead, the ADLC generates a conversion
12468 // method in the MachNode which can be used to do in-place replacement of the
12469 // long variant with the shorter variant.  The compiler will determine if a
12470 // branch can be taken by the is_short_branch_offset() predicate in the machine
12471 // specific code section of the file.
12472 
12473 // Jump Direct - Label defines a relative address from JMP+1
12474 instruct jmpDir_short(label labl) %{
12475   match(Goto);
12476   effect(USE labl);
12477 
12478   ins_cost(300);
12479   format %{ "JMP,s  $labl" %}
12480   size(2);
12481   ins_encode %{
12482     Label* L = $labl$$label;
12483     __ jmpb(*L);
12484   %}
12485   ins_pipe( pipe_jmp );
12486   ins_short_branch(1);
12487 %}
12488 
12489 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12490 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12491   match(If cop cr);
12492   effect(USE labl);
12493 
12494   ins_cost(300);
12495   format %{ "J$cop,s  $labl" %}
12496   size(2);
12497   ins_encode %{
12498     Label* L = $labl$$label;
12499     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12500   %}
12501   ins_pipe( pipe_jcc );
12502   ins_short_branch(1);
12503 %}
12504 
12505 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12506 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12507   match(CountedLoopEnd cop cr);
12508   effect(USE labl);
12509 
12510   ins_cost(300);
12511   format %{ "J$cop,s  $labl\t# Loop end" %}
12512   size(2);
12513   ins_encode %{
12514     Label* L = $labl$$label;
12515     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12516   %}
12517   ins_pipe( pipe_jcc );
12518   ins_short_branch(1);
12519 %}
12520 
12521 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12522 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12523   match(CountedLoopEnd cop cmp);
12524   effect(USE labl);
12525 
12526   ins_cost(300);
12527   format %{ "J$cop,us $labl\t# Loop end" %}
12528   size(2);
12529   ins_encode %{
12530     Label* L = $labl$$label;
12531     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12532   %}
12533   ins_pipe( pipe_jcc );
12534   ins_short_branch(1);
12535 %}
12536 
12537 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12538   match(CountedLoopEnd cop cmp);
12539   effect(USE labl);
12540 
12541   ins_cost(300);
12542   format %{ "J$cop,us $labl\t# Loop end" %}
12543   size(2);
12544   ins_encode %{
12545     Label* L = $labl$$label;
12546     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12547   %}
12548   ins_pipe( pipe_jcc );
12549   ins_short_branch(1);
12550 %}
12551 
12552 // Jump Direct Conditional - using unsigned comparison
12553 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12554   match(If cop cmp);
12555   effect(USE labl);
12556 
12557   ins_cost(300);
12558   format %{ "J$cop,us $labl" %}
12559   size(2);
12560   ins_encode %{
12561     Label* L = $labl$$label;
12562     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12563   %}
12564   ins_pipe( pipe_jcc );
12565   ins_short_branch(1);
12566 %}
12567 
12568 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12569   match(If cop cmp);
12570   effect(USE labl);
12571 
12572   ins_cost(300);
12573   format %{ "J$cop,us $labl" %}
12574   size(2);
12575   ins_encode %{
12576     Label* L = $labl$$label;
12577     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12578   %}
12579   ins_pipe( pipe_jcc );
12580   ins_short_branch(1);
12581 %}
12582 
12583 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12584   match(If cop cmp);
12585   effect(USE labl);
12586 
12587   ins_cost(300);
12588   format %{ $$template
12589     if ($cop$$cmpcode == Assembler::notEqual) {
12590       $$emit$$"JP,u,s   $labl\n\t"
12591       $$emit$$"J$cop,u,s   $labl"
12592     } else {
12593       $$emit$$"JP,u,s   done\n\t"
12594       $$emit$$"J$cop,u,s  $labl\n\t"
12595       $$emit$$"done:"
12596     }
12597   %}
12598   size(4);
12599   ins_encode %{
12600     Label* l = $labl$$label;
12601     if ($cop$$cmpcode == Assembler::notEqual) {
12602       __ jccb(Assembler::parity, *l);
12603       __ jccb(Assembler::notEqual, *l);
12604     } else if ($cop$$cmpcode == Assembler::equal) {
12605       Label done;
12606       __ jccb(Assembler::parity, done);
12607       __ jccb(Assembler::equal, *l);
12608       __ bind(done);
12609     } else {
12610        ShouldNotReachHere();
12611     }
12612   %}
12613   ins_pipe(pipe_jcc);
12614   ins_short_branch(1);
12615 %}
12616 
12617 // ============================================================================
12618 // Long Compare
12619 //
12620 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12621 // is tricky.  The flavor of compare used depends on whether we are testing
12622 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12623 // The GE test is the negated LT test.  The LE test can be had by commuting
12624 // the operands (yielding a GE test) and then negating; negate again for the
12625 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12626 // NE test is negated from that.
12627 
12628 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12629 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12630 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12631 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12632 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12633 // foo match ends up with the wrong leaf.  One fix is to not match both
12634 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12635 // both forms beat the trinary form of long-compare and both are very useful
12636 // on Intel which has so few registers.
12637 
12638 // Manifest a CmpL result in an integer register.  Very painful.
12639 // This is the test to avoid.
12640 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12641   match(Set dst (CmpL3 src1 src2));
12642   effect( KILL flags );
12643   ins_cost(1000);
12644   format %{ "XOR    $dst,$dst\n\t"
12645             "CMP    $src1.hi,$src2.hi\n\t"
12646             "JLT,s  m_one\n\t"
12647             "JGT,s  p_one\n\t"
12648             "CMP    $src1.lo,$src2.lo\n\t"
12649             "JB,s   m_one\n\t"
12650             "JEQ,s  done\n"
12651     "p_one:\tINC    $dst\n\t"
12652             "JMP,s  done\n"
12653     "m_one:\tDEC    $dst\n"
12654      "done:" %}
12655   ins_encode %{
12656     Label p_one, m_one, done;
12657     __ xorptr($dst$$Register, $dst$$Register);
12658     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12659     __ jccb(Assembler::less,    m_one);
12660     __ jccb(Assembler::greater, p_one);
12661     __ cmpl($src1$$Register, $src2$$Register);
12662     __ jccb(Assembler::below,   m_one);
12663     __ jccb(Assembler::equal,   done);
12664     __ bind(p_one);
12665     __ incrementl($dst$$Register);
12666     __ jmpb(done);
12667     __ bind(m_one);
12668     __ decrementl($dst$$Register);
12669     __ bind(done);
12670   %}
12671   ins_pipe( pipe_slow );
12672 %}
12673 
12674 //======
12675 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12676 // compares.  Can be used for LE or GT compares by reversing arguments.
12677 // NOT GOOD FOR EQ/NE tests.
12678 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12679   match( Set flags (CmpL src zero ));
12680   ins_cost(100);
12681   format %{ "TEST   $src.hi,$src.hi" %}
12682   opcode(0x85);
12683   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12684   ins_pipe( ialu_cr_reg_reg );
12685 %}
12686 
12687 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12688 // compares.  Can be used for LE or GT compares by reversing arguments.
12689 // NOT GOOD FOR EQ/NE tests.
12690 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12691   match( Set flags (CmpL src1 src2 ));
12692   effect( TEMP tmp );
12693   ins_cost(300);
12694   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12695             "MOV    $tmp,$src1.hi\n\t"
12696             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12697   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12698   ins_pipe( ialu_cr_reg_reg );
12699 %}
12700 
12701 // Long compares reg < zero/req OR reg >= zero/req.
12702 // Just a wrapper for a normal branch, plus the predicate test.
12703 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12704   match(If cmp flags);
12705   effect(USE labl);
12706   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12707   expand %{
12708     jmpCon(cmp,flags,labl);    // JLT or JGE...
12709   %}
12710 %}
12711 
12712 //======
12713 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12714 // compares.  Can be used for LE or GT compares by reversing arguments.
12715 // NOT GOOD FOR EQ/NE tests.
12716 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12717   match(Set flags (CmpUL src zero));
12718   ins_cost(100);
12719   format %{ "TEST   $src.hi,$src.hi" %}
12720   opcode(0x85);
12721   ins_encode(OpcP, RegReg_Hi2(src, src));
12722   ins_pipe(ialu_cr_reg_reg);
12723 %}
12724 
12725 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12726 // compares.  Can be used for LE or GT compares by reversing arguments.
12727 // NOT GOOD FOR EQ/NE tests.
12728 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12729   match(Set flags (CmpUL src1 src2));
12730   effect(TEMP tmp);
12731   ins_cost(300);
12732   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12733             "MOV    $tmp,$src1.hi\n\t"
12734             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12735   ins_encode(long_cmp_flags2(src1, src2, tmp));
12736   ins_pipe(ialu_cr_reg_reg);
12737 %}
12738 
12739 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12740 // Just a wrapper for a normal branch, plus the predicate test.
12741 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12742   match(If cmp flags);
12743   effect(USE labl);
12744   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12745   expand %{
12746     jmpCon(cmp, flags, labl);    // JLT or JGE...
12747   %}
12748 %}
12749 
12750 // Compare 2 longs and CMOVE longs.
12751 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12752   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12753   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12754   ins_cost(400);
12755   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12756             "CMOV$cmp $dst.hi,$src.hi" %}
12757   opcode(0x0F,0x40);
12758   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12759   ins_pipe( pipe_cmov_reg_long );
12760 %}
12761 
12762 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12763   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12764   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12765   ins_cost(500);
12766   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12767             "CMOV$cmp $dst.hi,$src.hi" %}
12768   opcode(0x0F,0x40);
12769   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12770   ins_pipe( pipe_cmov_reg_long );
12771 %}
12772 
12773 // Compare 2 longs and CMOVE ints.
12774 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12775   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12776   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12777   ins_cost(200);
12778   format %{ "CMOV$cmp $dst,$src" %}
12779   opcode(0x0F,0x40);
12780   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12781   ins_pipe( pipe_cmov_reg );
12782 %}
12783 
12784 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12785   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12786   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12787   ins_cost(250);
12788   format %{ "CMOV$cmp $dst,$src" %}
12789   opcode(0x0F,0x40);
12790   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12791   ins_pipe( pipe_cmov_mem );
12792 %}
12793 
12794 // Compare 2 longs and CMOVE ints.
12795 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12796   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12797   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12798   ins_cost(200);
12799   format %{ "CMOV$cmp $dst,$src" %}
12800   opcode(0x0F,0x40);
12801   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12802   ins_pipe( pipe_cmov_reg );
12803 %}
12804 
12805 // Compare 2 longs and CMOVE doubles
12806 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12807   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12808   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12809   ins_cost(200);
12810   expand %{
12811     fcmovDPR_regS(cmp,flags,dst,src);
12812   %}
12813 %}
12814 
12815 // Compare 2 longs and CMOVE doubles
12816 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12817   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12818   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12819   ins_cost(200);
12820   expand %{
12821     fcmovD_regS(cmp,flags,dst,src);
12822   %}
12823 %}
12824 
12825 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12826   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12827   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12828   ins_cost(200);
12829   expand %{
12830     fcmovFPR_regS(cmp,flags,dst,src);
12831   %}
12832 %}
12833 
12834 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12835   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12836   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12837   ins_cost(200);
12838   expand %{
12839     fcmovF_regS(cmp,flags,dst,src);
12840   %}
12841 %}
12842 
12843 //======
12844 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12845 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12846   match( Set flags (CmpL src zero ));
12847   effect(TEMP tmp);
12848   ins_cost(200);
12849   format %{ "MOV    $tmp,$src.lo\n\t"
12850             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12851   ins_encode( long_cmp_flags0( src, tmp ) );
12852   ins_pipe( ialu_reg_reg_long );
12853 %}
12854 
12855 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12856 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12857   match( Set flags (CmpL src1 src2 ));
12858   ins_cost(200+300);
12859   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12860             "JNE,s  skip\n\t"
12861             "CMP    $src1.hi,$src2.hi\n\t"
12862      "skip:\t" %}
12863   ins_encode( long_cmp_flags1( src1, src2 ) );
12864   ins_pipe( ialu_cr_reg_reg );
12865 %}
12866 
12867 // Long compare reg == zero/reg OR reg != zero/reg
12868 // Just a wrapper for a normal branch, plus the predicate test.
12869 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12870   match(If cmp flags);
12871   effect(USE labl);
12872   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12873   expand %{
12874     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12875   %}
12876 %}
12877 
12878 //======
12879 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12880 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12881   match(Set flags (CmpUL src zero));
12882   effect(TEMP tmp);
12883   ins_cost(200);
12884   format %{ "MOV    $tmp,$src.lo\n\t"
12885             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12886   ins_encode(long_cmp_flags0(src, tmp));
12887   ins_pipe(ialu_reg_reg_long);
12888 %}
12889 
12890 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12891 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12892   match(Set flags (CmpUL src1 src2));
12893   ins_cost(200+300);
12894   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12895             "JNE,s  skip\n\t"
12896             "CMP    $src1.hi,$src2.hi\n\t"
12897      "skip:\t" %}
12898   ins_encode(long_cmp_flags1(src1, src2));
12899   ins_pipe(ialu_cr_reg_reg);
12900 %}
12901 
12902 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12903 // Just a wrapper for a normal branch, plus the predicate test.
12904 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12905   match(If cmp flags);
12906   effect(USE labl);
12907   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12908   expand %{
12909     jmpCon(cmp, flags, labl);    // JEQ or JNE...
12910   %}
12911 %}
12912 
12913 // Compare 2 longs and CMOVE longs.
12914 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12915   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12916   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12917   ins_cost(400);
12918   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12919             "CMOV$cmp $dst.hi,$src.hi" %}
12920   opcode(0x0F,0x40);
12921   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12922   ins_pipe( pipe_cmov_reg_long );
12923 %}
12924 
12925 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12926   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12927   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12928   ins_cost(500);
12929   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12930             "CMOV$cmp $dst.hi,$src.hi" %}
12931   opcode(0x0F,0x40);
12932   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12933   ins_pipe( pipe_cmov_reg_long );
12934 %}
12935 
12936 // Compare 2 longs and CMOVE ints.
12937 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12938   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12939   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12940   ins_cost(200);
12941   format %{ "CMOV$cmp $dst,$src" %}
12942   opcode(0x0F,0x40);
12943   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12944   ins_pipe( pipe_cmov_reg );
12945 %}
12946 
12947 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12948   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12949   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12950   ins_cost(250);
12951   format %{ "CMOV$cmp $dst,$src" %}
12952   opcode(0x0F,0x40);
12953   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12954   ins_pipe( pipe_cmov_mem );
12955 %}
12956 
12957 // Compare 2 longs and CMOVE ints.
12958 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12959   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12960   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12961   ins_cost(200);
12962   format %{ "CMOV$cmp $dst,$src" %}
12963   opcode(0x0F,0x40);
12964   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12965   ins_pipe( pipe_cmov_reg );
12966 %}
12967 
12968 // Compare 2 longs and CMOVE doubles
12969 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12970   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12971   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12972   ins_cost(200);
12973   expand %{
12974     fcmovDPR_regS(cmp,flags,dst,src);
12975   %}
12976 %}
12977 
12978 // Compare 2 longs and CMOVE doubles
12979 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12980   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12981   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12982   ins_cost(200);
12983   expand %{
12984     fcmovD_regS(cmp,flags,dst,src);
12985   %}
12986 %}
12987 
12988 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12989   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12990   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12991   ins_cost(200);
12992   expand %{
12993     fcmovFPR_regS(cmp,flags,dst,src);
12994   %}
12995 %}
12996 
12997 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12998   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12999   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13000   ins_cost(200);
13001   expand %{
13002     fcmovF_regS(cmp,flags,dst,src);
13003   %}
13004 %}
13005 
13006 //======
13007 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13008 // Same as cmpL_reg_flags_LEGT except must negate src
13009 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13010   match( Set flags (CmpL src zero ));
13011   effect( TEMP tmp );
13012   ins_cost(300);
13013   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13014             "CMP    $tmp,$src.lo\n\t"
13015             "SBB    $tmp,$src.hi\n\t" %}
13016   ins_encode( long_cmp_flags3(src, tmp) );
13017   ins_pipe( ialu_reg_reg_long );
13018 %}
13019 
13020 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13021 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13022 // requires a commuted test to get the same result.
13023 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13024   match( Set flags (CmpL src1 src2 ));
13025   effect( TEMP tmp );
13026   ins_cost(300);
13027   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13028             "MOV    $tmp,$src2.hi\n\t"
13029             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13030   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13031   ins_pipe( ialu_cr_reg_reg );
13032 %}
13033 
13034 // Long compares reg < zero/req OR reg >= zero/req.
13035 // Just a wrapper for a normal branch, plus the predicate test
13036 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13037   match(If cmp flags);
13038   effect(USE labl);
13039   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13040   ins_cost(300);
13041   expand %{
13042     jmpCon(cmp,flags,labl);    // JGT or JLE...
13043   %}
13044 %}
13045 
13046 //======
13047 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13048 // Same as cmpUL_reg_flags_LEGT except must negate src
13049 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13050   match(Set flags (CmpUL src zero));
13051   effect(TEMP tmp);
13052   ins_cost(300);
13053   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13054             "CMP    $tmp,$src.lo\n\t"
13055             "SBB    $tmp,$src.hi\n\t" %}
13056   ins_encode(long_cmp_flags3(src, tmp));
13057   ins_pipe(ialu_reg_reg_long);
13058 %}
13059 
13060 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13061 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13062 // requires a commuted test to get the same result.
13063 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13064   match(Set flags (CmpUL src1 src2));
13065   effect(TEMP tmp);
13066   ins_cost(300);
13067   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13068             "MOV    $tmp,$src2.hi\n\t"
13069             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13070   ins_encode(long_cmp_flags2( src2, src1, tmp));
13071   ins_pipe(ialu_cr_reg_reg);
13072 %}
13073 
13074 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13075 // Just a wrapper for a normal branch, plus the predicate test
13076 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13077   match(If cmp flags);
13078   effect(USE labl);
13079   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13080   ins_cost(300);
13081   expand %{
13082     jmpCon(cmp, flags, labl);    // JGT or JLE...
13083   %}
13084 %}
13085 
13086 // Compare 2 longs and CMOVE longs.
13087 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13088   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13089   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13090   ins_cost(400);
13091   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13092             "CMOV$cmp $dst.hi,$src.hi" %}
13093   opcode(0x0F,0x40);
13094   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13095   ins_pipe( pipe_cmov_reg_long );
13096 %}
13097 
13098 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13099   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13100   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13101   ins_cost(500);
13102   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13103             "CMOV$cmp $dst.hi,$src.hi+4" %}
13104   opcode(0x0F,0x40);
13105   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13106   ins_pipe( pipe_cmov_reg_long );
13107 %}
13108 
13109 // Compare 2 longs and CMOVE ints.
13110 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13111   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13112   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13113   ins_cost(200);
13114   format %{ "CMOV$cmp $dst,$src" %}
13115   opcode(0x0F,0x40);
13116   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13117   ins_pipe( pipe_cmov_reg );
13118 %}
13119 
13120 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13121   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13122   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13123   ins_cost(250);
13124   format %{ "CMOV$cmp $dst,$src" %}
13125   opcode(0x0F,0x40);
13126   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13127   ins_pipe( pipe_cmov_mem );
13128 %}
13129 
13130 // Compare 2 longs and CMOVE ptrs.
13131 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13132   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13133   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13134   ins_cost(200);
13135   format %{ "CMOV$cmp $dst,$src" %}
13136   opcode(0x0F,0x40);
13137   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13138   ins_pipe( pipe_cmov_reg );
13139 %}
13140 
13141 // Compare 2 longs and CMOVE doubles
13142 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13143   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13144   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13145   ins_cost(200);
13146   expand %{
13147     fcmovDPR_regS(cmp,flags,dst,src);
13148   %}
13149 %}
13150 
13151 // Compare 2 longs and CMOVE doubles
13152 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13153   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13154   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13155   ins_cost(200);
13156   expand %{
13157     fcmovD_regS(cmp,flags,dst,src);
13158   %}
13159 %}
13160 
13161 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13162   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13163   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13164   ins_cost(200);
13165   expand %{
13166     fcmovFPR_regS(cmp,flags,dst,src);
13167   %}
13168 %}
13169 
13170 
13171 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13172   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13173   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13174   ins_cost(200);
13175   expand %{
13176     fcmovF_regS(cmp,flags,dst,src);
13177   %}
13178 %}
13179 
13180 
13181 // ============================================================================
13182 // Procedure Call/Return Instructions
13183 // Call Java Static Instruction
13184 // Note: If this code changes, the corresponding ret_addr_offset() and
13185 //       compute_padding() functions will have to be adjusted.
13186 instruct CallStaticJavaDirect(method meth) %{
13187   match(CallStaticJava);
13188   effect(USE meth);
13189 
13190   ins_cost(300);
13191   format %{ "CALL,static " %}
13192   opcode(0xE8); /* E8 cd */
13193   ins_encode( pre_call_resets,
13194               Java_Static_Call( meth ),
13195               call_epilog,
13196               post_call_FPU );
13197   ins_pipe( pipe_slow );
13198   ins_alignment(4);
13199 %}
13200 
13201 // Call Java Dynamic Instruction
13202 // Note: If this code changes, the corresponding ret_addr_offset() and
13203 //       compute_padding() functions will have to be adjusted.
13204 instruct CallDynamicJavaDirect(method meth) %{
13205   match(CallDynamicJava);
13206   effect(USE meth);
13207 
13208   ins_cost(300);
13209   format %{ "MOV    EAX,(oop)-1\n\t"
13210             "CALL,dynamic" %}
13211   opcode(0xE8); /* E8 cd */
13212   ins_encode( pre_call_resets,
13213               Java_Dynamic_Call( meth ),
13214               call_epilog,
13215               post_call_FPU );
13216   ins_pipe( pipe_slow );
13217   ins_alignment(4);
13218 %}
13219 
13220 // Call Runtime Instruction
13221 instruct CallRuntimeDirect(method meth) %{
13222   match(CallRuntime );
13223   effect(USE meth);
13224 
13225   ins_cost(300);
13226   format %{ "CALL,runtime " %}
13227   opcode(0xE8); /* E8 cd */
13228   // Use FFREEs to clear entries in float stack
13229   ins_encode( pre_call_resets,
13230               FFree_Float_Stack_All,
13231               Java_To_Runtime( meth ),
13232               post_call_FPU );
13233   ins_pipe( pipe_slow );
13234 %}
13235 
13236 // Call runtime without safepoint
13237 instruct CallLeafDirect(method meth) %{
13238   match(CallLeaf);
13239   effect(USE meth);
13240 
13241   ins_cost(300);
13242   format %{ "CALL_LEAF,runtime " %}
13243   opcode(0xE8); /* E8 cd */
13244   ins_encode( pre_call_resets,
13245               FFree_Float_Stack_All,
13246               Java_To_Runtime( meth ),
13247               Verify_FPU_For_Leaf, post_call_FPU );
13248   ins_pipe( pipe_slow );
13249 %}
13250 
13251 instruct CallLeafNoFPDirect(method meth) %{
13252   match(CallLeafNoFP);
13253   effect(USE meth);
13254 
13255   ins_cost(300);
13256   format %{ "CALL_LEAF_NOFP,runtime " %}
13257   opcode(0xE8); /* E8 cd */
13258   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13259   ins_pipe( pipe_slow );
13260 %}
13261 
13262 
13263 // Return Instruction
13264 // Remove the return address & jump to it.
13265 instruct Ret() %{
13266   match(Return);
13267   format %{ "RET" %}
13268   opcode(0xC3);
13269   ins_encode(OpcP);
13270   ins_pipe( pipe_jmp );
13271 %}
13272 
13273 // Tail Call; Jump from runtime stub to Java code.
13274 // Also known as an 'interprocedural jump'.
13275 // Target of jump will eventually return to caller.
13276 // TailJump below removes the return address.
13277 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13278   match(TailCall jump_target method_oop );
13279   ins_cost(300);
13280   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13281   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13282   ins_encode( OpcP, RegOpc(jump_target) );
13283   ins_pipe( pipe_jmp );
13284 %}
13285 
13286 
13287 // Tail Jump; remove the return address; jump to target.
13288 // TailCall above leaves the return address around.
13289 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13290   match( TailJump jump_target ex_oop );
13291   ins_cost(300);
13292   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13293             "JMP    $jump_target " %}
13294   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13295   ins_encode( enc_pop_rdx,
13296               OpcP, RegOpc(jump_target) );
13297   ins_pipe( pipe_jmp );
13298 %}
13299 
13300 // Create exception oop: created by stack-crawling runtime code.
13301 // Created exception is now available to this handler, and is setup
13302 // just prior to jumping to this handler.  No code emitted.
13303 instruct CreateException( eAXRegP ex_oop )
13304 %{
13305   match(Set ex_oop (CreateEx));
13306 
13307   size(0);
13308   // use the following format syntax
13309   format %{ "# exception oop is in EAX; no code emitted" %}
13310   ins_encode();
13311   ins_pipe( empty );
13312 %}
13313 
13314 
13315 // Rethrow exception:
13316 // The exception oop will come in the first argument position.
13317 // Then JUMP (not call) to the rethrow stub code.
13318 instruct RethrowException()
13319 %{
13320   match(Rethrow);
13321 
13322   // use the following format syntax
13323   format %{ "JMP    rethrow_stub" %}
13324   ins_encode(enc_rethrow);
13325   ins_pipe( pipe_jmp );
13326 %}
13327 
13328 // inlined locking and unlocking
13329 
13330 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13331   predicate(Compile::current()->use_rtm());
13332   match(Set cr (FastLock object box));
13333   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13334   ins_cost(300);
13335   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13336   ins_encode %{
13337     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13338                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13339                  _counters, _rtm_counters, _stack_rtm_counters,
13340                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13341                  true, ra_->C->profile_rtm());
13342   %}
13343   ins_pipe(pipe_slow);
13344 %}
13345 
13346 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13347   predicate(!Compile::current()->use_rtm());
13348   match(Set cr (FastLock object box));
13349   effect(TEMP tmp, TEMP scr, USE_KILL box);
13350   ins_cost(300);
13351   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13352   ins_encode %{
13353     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13354                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13355   %}
13356   ins_pipe(pipe_slow);
13357 %}
13358 
13359 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13360   match(Set cr (FastUnlock object box));
13361   effect(TEMP tmp, USE_KILL box);
13362   ins_cost(300);
13363   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13364   ins_encode %{
13365     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13366   %}
13367   ins_pipe(pipe_slow);
13368 %}
13369 
13370 
13371 
13372 // ============================================================================
13373 // Safepoint Instruction
13374 instruct safePoint_poll(eFlagsReg cr) %{
13375   predicate(SafepointMechanism::uses_global_page_poll());
13376   match(SafePoint);
13377   effect(KILL cr);
13378 
13379   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13380   // On SPARC that might be acceptable as we can generate the address with
13381   // just a sethi, saving an or.  By polling at offset 0 we can end up
13382   // putting additional pressure on the index-0 in the D$.  Because of
13383   // alignment (just like the situation at hand) the lower indices tend
13384   // to see more traffic.  It'd be better to change the polling address
13385   // to offset 0 of the last $line in the polling page.
13386 
13387   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13388   ins_cost(125);
13389   size(6) ;
13390   ins_encode( Safepoint_Poll() );
13391   ins_pipe( ialu_reg_mem );
13392 %}
13393 
13394 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13395   predicate(SafepointMechanism::uses_thread_local_poll());
13396   match(SafePoint poll);
13397   effect(KILL cr, USE poll);
13398 
13399   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13400   ins_cost(125);
13401   // EBP would need size(3)
13402   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13403   ins_encode %{
13404     __ relocate(relocInfo::poll_type);
13405     address pre_pc = __ pc();
13406     __ testl(rax, Address($poll$$Register, 0));
13407     address post_pc = __ pc();
13408     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13409   %}
13410   ins_pipe(ialu_reg_mem);
13411 %}
13412 
13413 
13414 // ============================================================================
13415 // This name is KNOWN by the ADLC and cannot be changed.
13416 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13417 // for this guy.
13418 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13419   match(Set dst (ThreadLocal));
13420   effect(DEF dst, KILL cr);
13421 
13422   format %{ "MOV    $dst, Thread::current()" %}
13423   ins_encode %{
13424     Register dstReg = as_Register($dst$$reg);
13425     __ get_thread(dstReg);
13426   %}
13427   ins_pipe( ialu_reg_fat );
13428 %}
13429 
13430 
13431 
13432 //----------PEEPHOLE RULES-----------------------------------------------------
13433 // These must follow all instruction definitions as they use the names
13434 // defined in the instructions definitions.
13435 //
13436 // peepmatch ( root_instr_name [preceding_instruction]* );
13437 //
13438 // peepconstraint %{
13439 // (instruction_number.operand_name relational_op instruction_number.operand_name
13440 //  [, ...] );
13441 // // instruction numbers are zero-based using left to right order in peepmatch
13442 //
13443 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13444 // // provide an instruction_number.operand_name for each operand that appears
13445 // // in the replacement instruction's match rule
13446 //
13447 // ---------VM FLAGS---------------------------------------------------------
13448 //
13449 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13450 //
13451 // Each peephole rule is given an identifying number starting with zero and
13452 // increasing by one in the order seen by the parser.  An individual peephole
13453 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13454 // on the command-line.
13455 //
13456 // ---------CURRENT LIMITATIONS----------------------------------------------
13457 //
13458 // Only match adjacent instructions in same basic block
13459 // Only equality constraints
13460 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13461 // Only one replacement instruction
13462 //
13463 // ---------EXAMPLE----------------------------------------------------------
13464 //
13465 // // pertinent parts of existing instructions in architecture description
13466 // instruct movI(rRegI dst, rRegI src) %{
13467 //   match(Set dst (CopyI src));
13468 // %}
13469 //
13470 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13471 //   match(Set dst (AddI dst src));
13472 //   effect(KILL cr);
13473 // %}
13474 //
13475 // // Change (inc mov) to lea
13476 // peephole %{
13477 //   // increment preceeded by register-register move
13478 //   peepmatch ( incI_eReg movI );
13479 //   // require that the destination register of the increment
13480 //   // match the destination register of the move
13481 //   peepconstraint ( 0.dst == 1.dst );
13482 //   // construct a replacement instruction that sets
13483 //   // the destination to ( move's source register + one )
13484 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13485 // %}
13486 //
13487 // Implementation no longer uses movX instructions since
13488 // machine-independent system no longer uses CopyX nodes.
13489 //
13490 // peephole %{
13491 //   peepmatch ( incI_eReg movI );
13492 //   peepconstraint ( 0.dst == 1.dst );
13493 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13494 // %}
13495 //
13496 // peephole %{
13497 //   peepmatch ( decI_eReg movI );
13498 //   peepconstraint ( 0.dst == 1.dst );
13499 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13500 // %}
13501 //
13502 // peephole %{
13503 //   peepmatch ( addI_eReg_imm movI );
13504 //   peepconstraint ( 0.dst == 1.dst );
13505 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13506 // %}
13507 //
13508 // peephole %{
13509 //   peepmatch ( addP_eReg_imm movP );
13510 //   peepconstraint ( 0.dst == 1.dst );
13511 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13512 // %}
13513 
13514 // // Change load of spilled value to only a spill
13515 // instruct storeI(memory mem, rRegI src) %{
13516 //   match(Set mem (StoreI mem src));
13517 // %}
13518 //
13519 // instruct loadI(rRegI dst, memory mem) %{
13520 //   match(Set dst (LoadI mem));
13521 // %}
13522 //
13523 peephole %{
13524   peepmatch ( loadI storeI );
13525   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13526   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13527 %}
13528 
13529 //----------SMARTSPILL RULES---------------------------------------------------
13530 // These must follow all instruction definitions as they use the names
13531 // defined in the instructions definitions.