Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
 799     //                          it maps more cases to single byte displacement
 800     _masm.set_managed();
 801     if (reg_lo+1 == reg_hi) { // double move?
 802       if (is_load) {
 803         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 804       } else {
 805         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 806       }
 807     } else {
 808       if (is_load) {
 809         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 810       } else {
 811         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 812       }
 813     }
 814 #ifndef PRODUCT
 815   } else if (!do_size) {
 816     if (size != 0) st->print("\n\t");
 817     if (reg_lo+1 == reg_hi) { // double move?
 818       if (is_load) st->print("%s %s,[ESP + #%d]",
 819                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSD  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     } else {
 824       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 825                               Matcher::regName[reg_lo], offset);
 826       else         st->print("MOVSS  [ESP + #%d],%s",
 827                               offset, Matcher::regName[reg_lo]);
 828     }
 829 #endif
 830   }
 831   bool is_single_byte = false;
 832   if ((UseAVX > 2) && (offset != 0)) {
 833     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 834   }
 835   int offset_size = 0;
 836   if (UseAVX > 2 ) {
 837     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 838   } else {
 839     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 840   }
 841   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 842   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 843   return size+5+offset_size;
 844 }
 845 
 846 
 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 848                             int src_hi, int dst_hi, int size, outputStream* st ) {
 849   if (cbuf) {
 850     MacroAssembler _masm(cbuf);
 851     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 852     _masm.set_managed();
 853     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     } else {
 857       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 858                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 859     }
 860 #ifndef PRODUCT
 861   } else if (!do_size) {
 862     if (size != 0) st->print("\n\t");
 863     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 864       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 865         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 866       } else {
 867         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 868       }
 869     } else {
 870       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 871         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 872       } else {
 873         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 874       }
 875     }
 876 #endif
 877   }
 878   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 879   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 880   int sz = (UseAVX > 2) ? 6 : 4;
 881   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 882       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 883   return size + sz;
 884 }
 885 
 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 887                             int src_hi, int dst_hi, int size, outputStream* st ) {
 888   // 32-bit
 889   if (cbuf) {
 890     MacroAssembler _masm(cbuf);
 891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 892     _masm.set_managed();
 893     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 894              as_Register(Matcher::_regEncode[src_lo]));
 895 #ifndef PRODUCT
 896   } else if (!do_size) {
 897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 898 #endif
 899   }
 900   return (UseAVX> 2) ? 6 : 4;
 901 }
 902 
 903 
 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 905                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 906   // 32-bit
 907   if (cbuf) {
 908     MacroAssembler _masm(cbuf);
 909     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 910     _masm.set_managed();
 911     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 912              as_XMMRegister(Matcher::_regEncode[src_lo]));
 913 #ifndef PRODUCT
 914   } else if (!do_size) {
 915     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 916 #endif
 917   }
 918   return (UseAVX> 2) ? 6 : 4;
 919 }
 920 
 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 922   if( cbuf ) {
 923     emit_opcode(*cbuf, 0x8B );
 924     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 925 #ifndef PRODUCT
 926   } else if( !do_size ) {
 927     if( size != 0 ) st->print("\n\t");
 928     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 929 #endif
 930   }
 931   return size+2;
 932 }
 933 
 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 935                                  int offset, int size, outputStream* st ) {
 936   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 937     if( cbuf ) {
 938       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 939       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 940 #ifndef PRODUCT
 941     } else if( !do_size ) {
 942       if( size != 0 ) st->print("\n\t");
 943       st->print("FLD    %s",Matcher::regName[src_lo]);
 944 #endif
 945     }
 946     size += 2;
 947   }
 948 
 949   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 950   const char *op_str;
 951   int op;
 952   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 953     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 954     op = 0xDD;
 955   } else {                   // 32-bit store
 956     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 957     op = 0xD9;
 958     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 959   }
 960 
 961   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 962 }
 963 
 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 966                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 967 
 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 969                             int stack_offset, int reg, uint ireg, outputStream* st);
 970 
 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 972                                      int dst_offset, uint ireg, outputStream* st) {
 973   int calc_size = 0;
 974   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 975   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 976   switch (ireg) {
 977   case Op_VecS:
 978     calc_size = 3+src_offset_size + 3+dst_offset_size;
 979     break;
 980   case Op_VecD: {
 981     calc_size = 3+src_offset_size + 3+dst_offset_size;
 982     int tmp_src_offset = src_offset + 4;
 983     int tmp_dst_offset = dst_offset + 4;
 984     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 985     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 986     calc_size += 3+src_offset_size + 3+dst_offset_size;
 987     break;
 988   }   
 989   case Op_VecX:
 990   case Op_VecY:
 991   case Op_VecZ:
 992     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 993     break;
 994   default:
 995     ShouldNotReachHere();
 996   }
 997   if (cbuf) {
 998     MacroAssembler _masm(cbuf);
 999     int offset = __ offset();
1000     switch (ireg) {
1001     case Op_VecS:
1002       __ pushl(Address(rsp, src_offset));
1003       __ popl (Address(rsp, dst_offset));
1004       break;
1005     case Op_VecD:
1006       __ pushl(Address(rsp, src_offset));
1007       __ popl (Address(rsp, dst_offset));
1008       __ pushl(Address(rsp, src_offset+4));
1009       __ popl (Address(rsp, dst_offset+4));
1010       break;
1011     case Op_VecX:
1012       __ movdqu(Address(rsp, -16), xmm0);
1013       __ movdqu(xmm0, Address(rsp, src_offset));
1014       __ movdqu(Address(rsp, dst_offset), xmm0);
1015       __ movdqu(xmm0, Address(rsp, -16));
1016       break;
1017     case Op_VecY:
1018       __ vmovdqu(Address(rsp, -32), xmm0);
1019       __ vmovdqu(xmm0, Address(rsp, src_offset));
1020       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1021       __ vmovdqu(xmm0, Address(rsp, -32));
1022       break;
1023     case Op_VecZ:
1024       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1025       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1026       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1027       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1028       break;
1029     default:
1030       ShouldNotReachHere();
1031     }
1032     int size = __ offset() - offset;
1033     assert(size == calc_size, "incorrect size calculation");
1034     return size;
1035 #ifndef PRODUCT
1036   } else if (!do_size) {
1037     switch (ireg) {
1038     case Op_VecS:
1039       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1040                 "popl    [rsp + #%d]",
1041                 src_offset, dst_offset);
1042       break;
1043     case Op_VecD:
1044       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1045                 "popq    [rsp + #%d]\n\t"
1046                 "pushl   [rsp + #%d]\n\t"
1047                 "popq    [rsp + #%d]",
1048                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1049       break;
1050      case Op_VecX:
1051       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1052                 "movdqu  xmm0, [rsp + #%d]\n\t"
1053                 "movdqu  [rsp + #%d], xmm0\n\t"
1054                 "movdqu  xmm0, [rsp - #16]",
1055                 src_offset, dst_offset);
1056       break;
1057     case Op_VecY:
1058       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1059                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1060                 "vmovdqu [rsp + #%d], xmm0\n\t"
1061                 "vmovdqu xmm0, [rsp - #32]",
1062                 src_offset, dst_offset);
1063       break;
1064     case Op_VecZ:
1065       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1066                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1067                 "vmovdqu [rsp + #%d], xmm0\n\t"
1068                 "vmovdqu xmm0, [rsp - #64]",
1069                 src_offset, dst_offset);
1070       break;
1071     default:
1072       ShouldNotReachHere();
1073     }
1074 #endif
1075   }
1076   return calc_size;
1077 }
1078 
1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1080   // Get registers to move
1081   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1082   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1083   OptoReg::Name dst_second = ra_->get_reg_second(this );
1084   OptoReg::Name dst_first = ra_->get_reg_first(this );
1085 
1086   enum RC src_second_rc = rc_class(src_second);
1087   enum RC src_first_rc = rc_class(src_first);
1088   enum RC dst_second_rc = rc_class(dst_second);
1089   enum RC dst_first_rc = rc_class(dst_first);
1090 
1091   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1092 
1093   // Generate spill code!
1094   int size = 0;
1095 
1096   if( src_first == dst_first && src_second == dst_second )
1097     return size;            // Self copy, no move
1098 
1099   if (bottom_type()->isa_vect() != NULL) {
1100     uint ireg = ideal_reg();
1101     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1102     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1103     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1104     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1105       // mem -> mem
1106       int src_offset = ra_->reg2offset(src_first);
1107       int dst_offset = ra_->reg2offset(dst_first);
1108       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1109     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1110       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1111     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1112       int stack_offset = ra_->reg2offset(dst_first);
1113       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1114     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1115       int stack_offset = ra_->reg2offset(src_first);
1116       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1117     } else {
1118       ShouldNotReachHere();
1119     }
1120   }
1121 
1122   // --------------------------------------
1123   // Check for mem-mem move.  push/pop to move.
1124   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1125     if( src_second == dst_first ) { // overlapping stack copy ranges
1126       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1127       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1128       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1129       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1130     }
1131     // move low bits
1132     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1133     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1134     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1135       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1136       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1137     }
1138     return size;
1139   }
1140 
1141   // --------------------------------------
1142   // Check for integer reg-reg copy
1143   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1144     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1145 
1146   // Check for integer store
1147   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1148     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1149 
1150   // Check for integer load
1151   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1152     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1153 
1154   // Check for integer reg-xmm reg copy
1155   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1156     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1157             "no 64 bit integer-float reg moves" );
1158     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1159   }
1160   // --------------------------------------
1161   // Check for float reg-reg copy
1162   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1163     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1164             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1165     if( cbuf ) {
1166 
1167       // Note the mucking with the register encode to compensate for the 0/1
1168       // indexing issue mentioned in a comment in the reg_def sections
1169       // for FPR registers many lines above here.
1170 
1171       if( src_first != FPR1L_num ) {
1172         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1173         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1174         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1175         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1176      } else {
1177         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1178         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1179      }
1180 #ifndef PRODUCT
1181     } else if( !do_size ) {
1182       if( size != 0 ) st->print("\n\t");
1183       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1184       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1185 #endif
1186     }
1187     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1188   }
1189 
1190   // Check for float store
1191   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1192     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1193   }
1194 
1195   // Check for float load
1196   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1197     int offset = ra_->reg2offset(src_first);
1198     const char *op_str;
1199     int op;
1200     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1201       op_str = "FLD_D";
1202       op = 0xDD;
1203     } else {                   // 32-bit load
1204       op_str = "FLD_S";
1205       op = 0xD9;
1206       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1207     }
1208     if( cbuf ) {
1209       emit_opcode  (*cbuf, op );
1210       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1211       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1212       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1213 #ifndef PRODUCT
1214     } else if( !do_size ) {
1215       if( size != 0 ) st->print("\n\t");
1216       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1217 #endif
1218     }
1219     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1220     return size + 3+offset_size+2;
1221   }
1222 
1223   // Check for xmm reg-reg copy
1224   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1225     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1226             (src_first+1 == src_second && dst_first+1 == dst_second),
1227             "no non-adjacent float-moves" );
1228     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1229   }
1230 
1231   // Check for xmm reg-integer reg copy
1232   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1233     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1234             "no 64 bit float-integer reg moves" );
1235     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1236   }
1237 
1238   // Check for xmm store
1239   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1240     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1241   }
1242 
1243   // Check for float xmm load
1244   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1245     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1246   }
1247 
1248   // Copy from float reg to xmm reg
1249   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1250     // copy to the top of stack from floating point reg
1251     // and use LEA to preserve flags
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0xF8);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP-8]");
1261 #endif
1262     }
1263     size += 4;
1264 
1265     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1266 
1267     // Copy from the temp memory to the xmm reg.
1268     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1269 
1270     if( cbuf ) {
1271       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1272       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1273       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1274       emit_d8(*cbuf,0x08);
1275 #ifndef PRODUCT
1276     } else if( !do_size ) {
1277       if( size != 0 ) st->print("\n\t");
1278       st->print("LEA    ESP,[ESP+8]");
1279 #endif
1280     }
1281     size += 4;
1282     return size;
1283   }
1284 
1285   assert( size > 0, "missed a case" );
1286 
1287   // --------------------------------------------------------------------
1288   // Check for second bits still needing moving.
1289   if( src_second == dst_second )
1290     return size;               // Self copy; no move
1291   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1292 
1293   // Check for second word int-int move
1294   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1295     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1296 
1297   // Check for second word integer store
1298   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1299     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1300 
1301   // Check for second word integer load
1302   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1303     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1304 
1305 
1306   Unimplemented();
1307   return 0; // Mute compiler
1308 }
1309 
1310 #ifndef PRODUCT
1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1312   implementation( NULL, ra_, false, st );
1313 }
1314 #endif
1315 
1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   implementation( &cbuf, ra_, false, NULL );
1318 }
1319 
1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1321   return implementation( NULL, ra_, true, NULL );
1322 }
1323 
1324 
1325 //=============================================================================
1326 #ifndef PRODUCT
1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   int reg = ra_->get_reg_first(this);
1330   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1331 }
1332 #endif
1333 
1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1336   int reg = ra_->get_encode(this);
1337   if( offset >= 128 ) {
1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1339     emit_rm(cbuf, 0x2, reg, 0x04);
1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1341     emit_d32(cbuf, offset);
1342   }
1343   else {
1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1345     emit_rm(cbuf, 0x1, reg, 0x04);
1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347     emit_d8(cbuf, offset);
1348   }
1349 }
1350 
1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1353   if( offset >= 128 ) {
1354     return 7;
1355   }
1356   else {
1357     return 4;
1358   }
1359 }
1360 
1361 //=============================================================================
1362 #ifndef PRODUCT
1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1364   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1365   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1366   st->print_cr("\tNOP");
1367   st->print_cr("\tNOP");
1368   if( !OptoBreakpoint )
1369     st->print_cr("\tNOP");
1370 }
1371 #endif
1372 
1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1374   MacroAssembler masm(&cbuf);
1375 #ifdef ASSERT
1376   uint insts_size = cbuf.insts_size();
1377 #endif
1378   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1379   masm.jump_cc(Assembler::notEqual,
1380                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1381   /* WARNING these NOPs are critical so that verified entry point is properly
1382      aligned for patching by NativeJump::patch_verified_entry() */
1383   int nops_cnt = 2;
1384   if( !OptoBreakpoint ) // Leave space for int3
1385      nops_cnt += 1;
1386   masm.nop(nops_cnt);
1387 
1388   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1389 }
1390 
1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1392   return OptoBreakpoint ? 11 : 12;
1393 }
1394 
1395 
1396 //=============================================================================
1397 
1398 int Matcher::regnum_to_fpu_offset(int regnum) {
1399   return regnum - 32; // The FP registers are in the second chunk
1400 }
1401 
1402 // This is UltraSparc specific, true just means we have fast l2f conversion
1403 const bool Matcher::convL2FSupported(void) {
1404   return true;
1405 }
1406 
1407 // Is this branch offset short enough that a short branch can be used?
1408 //
1409 // NOTE: If the platform does not provide any short branch variants, then
1410 //       this method should return false for offset 0.
1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1412   // The passed offset is relative to address of the branch.
1413   // On 86 a branch displacement is calculated relative to address
1414   // of a next instruction.
1415   offset -= br_size;
1416 
1417   // the short version of jmpConUCF2 contains multiple branches,
1418   // making the reach slightly less
1419   if (rule == jmpConUCF2_rule)
1420     return (-126 <= offset && offset <= 125);
1421   return (-128 <= offset && offset <= 127);
1422 }
1423 
1424 const bool Matcher::isSimpleConstant64(jlong value) {
1425   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1426   return false;
1427 }
1428 
1429 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1430 const bool Matcher::init_array_count_is_in_bytes = false;
1431 
1432 // Needs 2 CMOV's for longs.
1433 const int Matcher::long_cmove_cost() { return 1; }
1434 
1435 // No CMOVF/CMOVD with SSE/SSE2
1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1437 
1438 // Does the CPU require late expand (see block.cpp for description of late expand)?
1439 const bool Matcher::require_postalloc_expand = false;
1440 
1441 // Do we need to mask the count passed to shift instructions or does
1442 // the cpu only look at the lower 5/6 bits anyway?
1443 const bool Matcher::need_masked_shift_count = false;
1444 
1445 bool Matcher::narrow_oop_use_complex_address() {
1446   ShouldNotCallThis();
1447   return true;
1448 }
1449 
1450 bool Matcher::narrow_klass_use_complex_address() {
1451   ShouldNotCallThis();
1452   return true;
1453 }
1454 
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878 
1879   enc_class pre_call_resets %{
1880     // If method sets FPU control word restore it here
1881     debug_only(int off0 = cbuf.insts_size());
1882     if (ra_->C->in_24_bit_fp_mode()) {
1883       MacroAssembler _masm(&cbuf);
1884       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1885     }
1886     if (ra_->C->max_vector_size() > 16) {
1887       // Clear upper bits of YMM registers when current compiled code uses
1888       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1889       MacroAssembler _masm(&cbuf);
1890       __ vzeroupper();
1891     }
1892     debug_only(int off1 = cbuf.insts_size());
1893     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1894   %}
1895 
1896   enc_class post_call_FPU %{
1897     // If method sets FPU control word do it here also
1898     if (Compile::current()->in_24_bit_fp_mode()) {
1899       MacroAssembler masm(&cbuf);
1900       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1901     }
1902   %}
1903 
1904   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1905     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1906     // who we intended to call.
1907     cbuf.set_insts_mark();
1908     $$$emit8$primary;
1909 
1910     if (!_method) {
1911       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1912                      runtime_call_Relocation::spec(),
1913                      RELOC_IMM32);
1914     } else {
1915       int method_index = resolved_method_index(cbuf);
1916       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1917                                                   : static_call_Relocation::spec(method_index);
1918       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919                      rspec, RELOC_DISP32);
1920       // Emit stubs for static call.
1921       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1922       if (stub == NULL) {
1923         ciEnv::current()->record_failure("CodeCache is full");
1924         return;
1925       }
1926     }
1927   %}
1928 
1929   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1930     MacroAssembler _masm(&cbuf);
1931     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1932   %}
1933 
1934   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1935     int disp = in_bytes(Method::from_compiled_offset());
1936     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1937 
1938     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1939     cbuf.set_insts_mark();
1940     $$$emit8$primary;
1941     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1942     emit_d8(cbuf, disp);             // Displacement
1943 
1944   %}
1945 
1946 //   Following encoding is no longer used, but may be restored if calling
1947 //   convention changes significantly.
1948 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1949 //
1950 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1951 //     // int ic_reg     = Matcher::inline_cache_reg();
1952 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1953 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1954 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1955 //
1956 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1957 //     // // so we load it immediately before the call
1958 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1959 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1960 //
1961 //     // xor rbp,ebp
1962 //     emit_opcode(cbuf, 0x33);
1963 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1964 //
1965 //     // CALL to interpreter.
1966 //     cbuf.set_insts_mark();
1967 //     $$$emit8$primary;
1968 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1969 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1970 //   %}
1971 
1972   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1973     $$$emit8$primary;
1974     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1975     $$$emit8$shift$$constant;
1976   %}
1977 
1978   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1979     // Load immediate does not have a zero or sign extended version
1980     // for 8-bit immediates
1981     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1982     $$$emit32$src$$constant;
1983   %}
1984 
1985   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1986     // Load immediate does not have a zero or sign extended version
1987     // for 8-bit immediates
1988     emit_opcode(cbuf, $primary + $dst$$reg);
1989     $$$emit32$src$$constant;
1990   %}
1991 
1992   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1993     // Load immediate does not have a zero or sign extended version
1994     // for 8-bit immediates
1995     int dst_enc = $dst$$reg;
1996     int src_con = $src$$constant & 0x0FFFFFFFFL;
1997     if (src_con == 0) {
1998       // xor dst, dst
1999       emit_opcode(cbuf, 0x33);
2000       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2001     } else {
2002       emit_opcode(cbuf, $primary + dst_enc);
2003       emit_d32(cbuf, src_con);
2004     }
2005   %}
2006 
2007   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2008     // Load immediate does not have a zero or sign extended version
2009     // for 8-bit immediates
2010     int dst_enc = $dst$$reg + 2;
2011     int src_con = ((julong)($src$$constant)) >> 32;
2012     if (src_con == 0) {
2013       // xor dst, dst
2014       emit_opcode(cbuf, 0x33);
2015       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2016     } else {
2017       emit_opcode(cbuf, $primary + dst_enc);
2018       emit_d32(cbuf, src_con);
2019     }
2020   %}
2021 
2022 
2023   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2024   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2025     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2026   %}
2027 
2028   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2029     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2030   %}
2031 
2032   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2033     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2034   %}
2035 
2036   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2037     $$$emit8$primary;
2038     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2039   %}
2040 
2041   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2042     $$$emit8$secondary;
2043     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2044   %}
2045 
2046   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2047     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2048   %}
2049 
2050   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2051     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2052   %}
2053 
2054   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2055     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2056   %}
2057 
2058   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2059     // Output immediate
2060     $$$emit32$src$$constant;
2061   %}
2062 
2063   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2064     // Output Float immediate bits
2065     jfloat jf = $src$$constant;
2066     int    jf_as_bits = jint_cast( jf );
2067     emit_d32(cbuf, jf_as_bits);
2068   %}
2069 
2070   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2071     // Output Float immediate bits
2072     jfloat jf = $src$$constant;
2073     int    jf_as_bits = jint_cast( jf );
2074     emit_d32(cbuf, jf_as_bits);
2075   %}
2076 
2077   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2078     // Output immediate
2079     $$$emit16$src$$constant;
2080   %}
2081 
2082   enc_class Con_d32(immI src) %{
2083     emit_d32(cbuf,$src$$constant);
2084   %}
2085 
2086   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2087     // Output immediate memory reference
2088     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2089     emit_d32(cbuf, 0x00);
2090   %}
2091 
2092   enc_class lock_prefix( ) %{
2093     if( os::is_MP() )
2094       emit_opcode(cbuf,0xF0);         // [Lock]
2095   %}
2096 
2097   // Cmp-xchg long value.
2098   // Note: we need to swap rbx, and rcx before and after the
2099   //       cmpxchg8 instruction because the instruction uses
2100   //       rcx as the high order word of the new value to store but
2101   //       our register encoding uses rbx,.
2102   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2103 
2104     // XCHG  rbx,ecx
2105     emit_opcode(cbuf,0x87);
2106     emit_opcode(cbuf,0xD9);
2107     // [Lock]
2108     if( os::is_MP() )
2109       emit_opcode(cbuf,0xF0);
2110     // CMPXCHG8 [Eptr]
2111     emit_opcode(cbuf,0x0F);
2112     emit_opcode(cbuf,0xC7);
2113     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2114     // XCHG  rbx,ecx
2115     emit_opcode(cbuf,0x87);
2116     emit_opcode(cbuf,0xD9);
2117   %}
2118 
2119   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2120     // [Lock]
2121     if( os::is_MP() )
2122       emit_opcode(cbuf,0xF0);
2123 
2124     // CMPXCHG [Eptr]
2125     emit_opcode(cbuf,0x0F);
2126     emit_opcode(cbuf,0xB1);
2127     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2128   %}
2129 
2130   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2131     int res_encoding = $res$$reg;
2132 
2133     // MOV  res,0
2134     emit_opcode( cbuf, 0xB8 + res_encoding);
2135     emit_d32( cbuf, 0 );
2136     // JNE,s  fail
2137     emit_opcode(cbuf,0x75);
2138     emit_d8(cbuf, 5 );
2139     // MOV  res,1
2140     emit_opcode( cbuf, 0xB8 + res_encoding);
2141     emit_d32( cbuf, 1 );
2142     // fail:
2143   %}
2144 
2145   enc_class set_instruction_start( ) %{
2146     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2147   %}
2148 
2149   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2150     int reg_encoding = $ereg$$reg;
2151     int base  = $mem$$base;
2152     int index = $mem$$index;
2153     int scale = $mem$$scale;
2154     int displace = $mem$$disp;
2155     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2156     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2157   %}
2158 
2159   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2160     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2161     int base  = $mem$$base;
2162     int index = $mem$$index;
2163     int scale = $mem$$scale;
2164     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2165     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2166     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2167   %}
2168 
2169   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2170     int r1, r2;
2171     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2172     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2173     emit_opcode(cbuf,0x0F);
2174     emit_opcode(cbuf,$tertiary);
2175     emit_rm(cbuf, 0x3, r1, r2);
2176     emit_d8(cbuf,$cnt$$constant);
2177     emit_d8(cbuf,$primary);
2178     emit_rm(cbuf, 0x3, $secondary, r1);
2179     emit_d8(cbuf,$cnt$$constant);
2180   %}
2181 
2182   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2183     emit_opcode( cbuf, 0x8B ); // Move
2184     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2185     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2186       emit_d8(cbuf,$primary);
2187       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2188       emit_d8(cbuf,$cnt$$constant-32);
2189     }
2190     emit_d8(cbuf,$primary);
2191     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2192     emit_d8(cbuf,31);
2193   %}
2194 
2195   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2196     int r1, r2;
2197     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2198     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2199 
2200     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2201     emit_rm(cbuf, 0x3, r1, r2);
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_opcode(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, r1);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_opcode(cbuf,0x33);  // XOR r2,r2
2208     emit_rm(cbuf, 0x3, r2, r2);
2209   %}
2210 
2211   // Clone of RegMem but accepts an extra parameter to access each
2212   // half of a double in memory; it never needs relocation info.
2213   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2214     emit_opcode(cbuf,$opcode$$constant);
2215     int reg_encoding = $rm_reg$$reg;
2216     int base     = $mem$$base;
2217     int index    = $mem$$index;
2218     int scale    = $mem$$scale;
2219     int displace = $mem$$disp + $disp_for_half$$constant;
2220     relocInfo::relocType disp_reloc = relocInfo::none;
2221     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2222   %}
2223 
2224   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2225   //
2226   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2227   // and it never needs relocation information.
2228   // Frequently used to move data between FPU's Stack Top and memory.
2229   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2230     int rm_byte_opcode = $rm_opcode$$constant;
2231     int base     = $mem$$base;
2232     int index    = $mem$$index;
2233     int scale    = $mem$$scale;
2234     int displace = $mem$$disp;
2235     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2236     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2237   %}
2238 
2239   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2240     int rm_byte_opcode = $rm_opcode$$constant;
2241     int base     = $mem$$base;
2242     int index    = $mem$$index;
2243     int scale    = $mem$$scale;
2244     int displace = $mem$$disp;
2245     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2246     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2247   %}
2248 
2249   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2250     int reg_encoding = $dst$$reg;
2251     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2252     int index        = 0x04;            // 0x04 indicates no index
2253     int scale        = 0x00;            // 0x00 indicates no scale
2254     int displace     = $src1$$constant; // 0x00 indicates no displacement
2255     relocInfo::relocType disp_reloc = relocInfo::none;
2256     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2257   %}
2258 
2259   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2260     // Compare dst,src
2261     emit_opcode(cbuf,0x3B);
2262     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2263     // jmp dst < src around move
2264     emit_opcode(cbuf,0x7C);
2265     emit_d8(cbuf,2);
2266     // move dst,src
2267     emit_opcode(cbuf,0x8B);
2268     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2269   %}
2270 
2271   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2272     // Compare dst,src
2273     emit_opcode(cbuf,0x3B);
2274     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2275     // jmp dst > src around move
2276     emit_opcode(cbuf,0x7F);
2277     emit_d8(cbuf,2);
2278     // move dst,src
2279     emit_opcode(cbuf,0x8B);
2280     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2281   %}
2282 
2283   enc_class enc_FPR_store(memory mem, regDPR src) %{
2284     // If src is FPR1, we can just FST to store it.
2285     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2286     int reg_encoding = 0x2; // Just store
2287     int base  = $mem$$base;
2288     int index = $mem$$index;
2289     int scale = $mem$$scale;
2290     int displace = $mem$$disp;
2291     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2292     if( $src$$reg != FPR1L_enc ) {
2293       reg_encoding = 0x3;  // Store & pop
2294       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2295       emit_d8( cbuf, 0xC0-1+$src$$reg );
2296     }
2297     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2298     emit_opcode(cbuf,$primary);
2299     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2300   %}
2301 
2302   enc_class neg_reg(rRegI dst) %{
2303     // NEG $dst
2304     emit_opcode(cbuf,0xF7);
2305     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2306   %}
2307 
2308   enc_class setLT_reg(eCXRegI dst) %{
2309     // SETLT $dst
2310     emit_opcode(cbuf,0x0F);
2311     emit_opcode(cbuf,0x9C);
2312     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2313   %}
2314 
2315   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2316     int tmpReg = $tmp$$reg;
2317 
2318     // SUB $p,$q
2319     emit_opcode(cbuf,0x2B);
2320     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2321     // SBB $tmp,$tmp
2322     emit_opcode(cbuf,0x1B);
2323     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2324     // AND $tmp,$y
2325     emit_opcode(cbuf,0x23);
2326     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2327     // ADD $p,$tmp
2328     emit_opcode(cbuf,0x03);
2329     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2330   %}
2331 
2332   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2333     // TEST shift,32
2334     emit_opcode(cbuf,0xF7);
2335     emit_rm(cbuf, 0x3, 0, ECX_enc);
2336     emit_d32(cbuf,0x20);
2337     // JEQ,s small
2338     emit_opcode(cbuf, 0x74);
2339     emit_d8(cbuf, 0x04);
2340     // MOV    $dst.hi,$dst.lo
2341     emit_opcode( cbuf, 0x8B );
2342     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2343     // CLR    $dst.lo
2344     emit_opcode(cbuf, 0x33);
2345     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2346 // small:
2347     // SHLD   $dst.hi,$dst.lo,$shift
2348     emit_opcode(cbuf,0x0F);
2349     emit_opcode(cbuf,0xA5);
2350     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2351     // SHL    $dst.lo,$shift"
2352     emit_opcode(cbuf,0xD3);
2353     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2354   %}
2355 
2356   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2357     // TEST shift,32
2358     emit_opcode(cbuf,0xF7);
2359     emit_rm(cbuf, 0x3, 0, ECX_enc);
2360     emit_d32(cbuf,0x20);
2361     // JEQ,s small
2362     emit_opcode(cbuf, 0x74);
2363     emit_d8(cbuf, 0x04);
2364     // MOV    $dst.lo,$dst.hi
2365     emit_opcode( cbuf, 0x8B );
2366     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2367     // CLR    $dst.hi
2368     emit_opcode(cbuf, 0x33);
2369     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2370 // small:
2371     // SHRD   $dst.lo,$dst.hi,$shift
2372     emit_opcode(cbuf,0x0F);
2373     emit_opcode(cbuf,0xAD);
2374     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2375     // SHR    $dst.hi,$shift"
2376     emit_opcode(cbuf,0xD3);
2377     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2378   %}
2379 
2380   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2381     // TEST shift,32
2382     emit_opcode(cbuf,0xF7);
2383     emit_rm(cbuf, 0x3, 0, ECX_enc);
2384     emit_d32(cbuf,0x20);
2385     // JEQ,s small
2386     emit_opcode(cbuf, 0x74);
2387     emit_d8(cbuf, 0x05);
2388     // MOV    $dst.lo,$dst.hi
2389     emit_opcode( cbuf, 0x8B );
2390     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2391     // SAR    $dst.hi,31
2392     emit_opcode(cbuf, 0xC1);
2393     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2394     emit_d8(cbuf, 0x1F );
2395 // small:
2396     // SHRD   $dst.lo,$dst.hi,$shift
2397     emit_opcode(cbuf,0x0F);
2398     emit_opcode(cbuf,0xAD);
2399     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2400     // SAR    $dst.hi,$shift"
2401     emit_opcode(cbuf,0xD3);
2402     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2403   %}
2404 
2405 
2406   // ----------------- Encodings for floating point unit -----------------
2407   // May leave result in FPU-TOS or FPU reg depending on opcodes
2408   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2409     $$$emit8$primary;
2410     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2411   %}
2412 
2413   // Pop argument in FPR0 with FSTP ST(0)
2414   enc_class PopFPU() %{
2415     emit_opcode( cbuf, 0xDD );
2416     emit_d8( cbuf, 0xD8 );
2417   %}
2418 
2419   // !!!!! equivalent to Pop_Reg_F
2420   enc_class Pop_Reg_DPR( regDPR dst ) %{
2421     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2422     emit_d8( cbuf, 0xD8+$dst$$reg );
2423   %}
2424 
2425   enc_class Push_Reg_DPR( regDPR dst ) %{
2426     emit_opcode( cbuf, 0xD9 );
2427     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2428   %}
2429 
2430   enc_class strictfp_bias1( regDPR dst ) %{
2431     emit_opcode( cbuf, 0xDB );           // FLD m80real
2432     emit_opcode( cbuf, 0x2D );
2433     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2434     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2435     emit_opcode( cbuf, 0xC8+$dst$$reg );
2436   %}
2437 
2438   enc_class strictfp_bias2( regDPR dst ) %{
2439     emit_opcode( cbuf, 0xDB );           // FLD m80real
2440     emit_opcode( cbuf, 0x2D );
2441     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2442     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2443     emit_opcode( cbuf, 0xC8+$dst$$reg );
2444   %}
2445 
2446   // Special case for moving an integer register to a stack slot.
2447   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2448     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2449   %}
2450 
2451   // Special case for moving a register to a stack slot.
2452   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2453     // Opcode already emitted
2454     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2455     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2456     emit_d32(cbuf, $dst$$disp);   // Displacement
2457   %}
2458 
2459   // Push the integer in stackSlot 'src' onto FP-stack
2460   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2461     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2462   %}
2463 
2464   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2465   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2466     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2467   %}
2468 
2469   // Same as Pop_Mem_F except for opcode
2470   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2471   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2472     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2473   %}
2474 
2475   enc_class Pop_Reg_FPR( regFPR dst ) %{
2476     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2477     emit_d8( cbuf, 0xD8+$dst$$reg );
2478   %}
2479 
2480   enc_class Push_Reg_FPR( regFPR dst ) %{
2481     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2482     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2483   %}
2484 
2485   // Push FPU's float to a stack-slot, and pop FPU-stack
2486   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2487     int pop = 0x02;
2488     if ($src$$reg != FPR1L_enc) {
2489       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2490       emit_d8( cbuf, 0xC0-1+$src$$reg );
2491       pop = 0x03;
2492     }
2493     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2494   %}
2495 
2496   // Push FPU's double to a stack-slot, and pop FPU-stack
2497   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2498     int pop = 0x02;
2499     if ($src$$reg != FPR1L_enc) {
2500       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2501       emit_d8( cbuf, 0xC0-1+$src$$reg );
2502       pop = 0x03;
2503     }
2504     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2505   %}
2506 
2507   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2508   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2509     int pop = 0xD0 - 1; // -1 since we skip FLD
2510     if ($src$$reg != FPR1L_enc) {
2511       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2512       emit_d8( cbuf, 0xC0-1+$src$$reg );
2513       pop = 0xD8;
2514     }
2515     emit_opcode( cbuf, 0xDD );
2516     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2517   %}
2518 
2519 
2520   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2521     // load dst in FPR0
2522     emit_opcode( cbuf, 0xD9 );
2523     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2524     if ($src$$reg != FPR1L_enc) {
2525       // fincstp
2526       emit_opcode (cbuf, 0xD9);
2527       emit_opcode (cbuf, 0xF7);
2528       // swap src with FPR1:
2529       // FXCH FPR1 with src
2530       emit_opcode(cbuf, 0xD9);
2531       emit_d8(cbuf, 0xC8-1+$src$$reg );
2532       // fdecstp
2533       emit_opcode (cbuf, 0xD9);
2534       emit_opcode (cbuf, 0xF6);
2535     }
2536   %}
2537 
2538   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2539     MacroAssembler _masm(&cbuf);
2540     __ subptr(rsp, 8);
2541     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2542     __ fld_d(Address(rsp, 0));
2543     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2544     __ fld_d(Address(rsp, 0));
2545   %}
2546 
2547   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2548     MacroAssembler _masm(&cbuf);
2549     __ subptr(rsp, 4);
2550     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2551     __ fld_s(Address(rsp, 0));
2552     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2553     __ fld_s(Address(rsp, 0));
2554   %}
2555 
2556   enc_class Push_ResultD(regD dst) %{
2557     MacroAssembler _masm(&cbuf);
2558     __ fstp_d(Address(rsp, 0));
2559     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2560     __ addptr(rsp, 8);
2561   %}
2562 
2563   enc_class Push_ResultF(regF dst, immI d8) %{
2564     MacroAssembler _masm(&cbuf);
2565     __ fstp_s(Address(rsp, 0));
2566     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2567     __ addptr(rsp, $d8$$constant);
2568   %}
2569 
2570   enc_class Push_SrcD(regD src) %{
2571     MacroAssembler _masm(&cbuf);
2572     __ subptr(rsp, 8);
2573     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2574     __ fld_d(Address(rsp, 0));
2575   %}
2576 
2577   enc_class push_stack_temp_qword() %{
2578     MacroAssembler _masm(&cbuf);
2579     __ subptr(rsp, 8);
2580   %}
2581 
2582   enc_class pop_stack_temp_qword() %{
2583     MacroAssembler _masm(&cbuf);
2584     __ addptr(rsp, 8);
2585   %}
2586 
2587   enc_class push_xmm_to_fpr1(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2590     __ fld_d(Address(rsp, 0));
2591   %}
2592 
2593   enc_class Push_Result_Mod_DPR( regDPR src) %{
2594     if ($src$$reg != FPR1L_enc) {
2595       // fincstp
2596       emit_opcode (cbuf, 0xD9);
2597       emit_opcode (cbuf, 0xF7);
2598       // FXCH FPR1 with src
2599       emit_opcode(cbuf, 0xD9);
2600       emit_d8(cbuf, 0xC8-1+$src$$reg );
2601       // fdecstp
2602       emit_opcode (cbuf, 0xD9);
2603       emit_opcode (cbuf, 0xF6);
2604     }
2605     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2606     // // FSTP   FPR$dst$$reg
2607     // emit_opcode( cbuf, 0xDD );
2608     // emit_d8( cbuf, 0xD8+$dst$$reg );
2609   %}
2610 
2611   enc_class fnstsw_sahf_skip_parity() %{
2612     // fnstsw ax
2613     emit_opcode( cbuf, 0xDF );
2614     emit_opcode( cbuf, 0xE0 );
2615     // sahf
2616     emit_opcode( cbuf, 0x9E );
2617     // jnp  ::skip
2618     emit_opcode( cbuf, 0x7B );
2619     emit_opcode( cbuf, 0x05 );
2620   %}
2621 
2622   enc_class emitModDPR() %{
2623     // fprem must be iterative
2624     // :: loop
2625     // fprem
2626     emit_opcode( cbuf, 0xD9 );
2627     emit_opcode( cbuf, 0xF8 );
2628     // wait
2629     emit_opcode( cbuf, 0x9b );
2630     // fnstsw ax
2631     emit_opcode( cbuf, 0xDF );
2632     emit_opcode( cbuf, 0xE0 );
2633     // sahf
2634     emit_opcode( cbuf, 0x9E );
2635     // jp  ::loop
2636     emit_opcode( cbuf, 0x0F );
2637     emit_opcode( cbuf, 0x8A );
2638     emit_opcode( cbuf, 0xF4 );
2639     emit_opcode( cbuf, 0xFF );
2640     emit_opcode( cbuf, 0xFF );
2641     emit_opcode( cbuf, 0xFF );
2642   %}
2643 
2644   enc_class fpu_flags() %{
2645     // fnstsw_ax
2646     emit_opcode( cbuf, 0xDF);
2647     emit_opcode( cbuf, 0xE0);
2648     // test ax,0x0400
2649     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2650     emit_opcode( cbuf, 0xA9 );
2651     emit_d16   ( cbuf, 0x0400 );
2652     // // // This sequence works, but stalls for 12-16 cycles on PPro
2653     // // test rax,0x0400
2654     // emit_opcode( cbuf, 0xA9 );
2655     // emit_d32   ( cbuf, 0x00000400 );
2656     //
2657     // jz exit (no unordered comparison)
2658     emit_opcode( cbuf, 0x74 );
2659     emit_d8    ( cbuf, 0x02 );
2660     // mov ah,1 - treat as LT case (set carry flag)
2661     emit_opcode( cbuf, 0xB4 );
2662     emit_d8    ( cbuf, 0x01 );
2663     // sahf
2664     emit_opcode( cbuf, 0x9E);
2665   %}
2666 
2667   enc_class cmpF_P6_fixup() %{
2668     // Fixup the integer flags in case comparison involved a NaN
2669     //
2670     // JNP exit (no unordered comparison, P-flag is set by NaN)
2671     emit_opcode( cbuf, 0x7B );
2672     emit_d8    ( cbuf, 0x03 );
2673     // MOV AH,1 - treat as LT case (set carry flag)
2674     emit_opcode( cbuf, 0xB4 );
2675     emit_d8    ( cbuf, 0x01 );
2676     // SAHF
2677     emit_opcode( cbuf, 0x9E);
2678     // NOP     // target for branch to avoid branch to branch
2679     emit_opcode( cbuf, 0x90);
2680   %}
2681 
2682 //     fnstsw_ax();
2683 //     sahf();
2684 //     movl(dst, nan_result);
2685 //     jcc(Assembler::parity, exit);
2686 //     movl(dst, less_result);
2687 //     jcc(Assembler::below, exit);
2688 //     movl(dst, equal_result);
2689 //     jcc(Assembler::equal, exit);
2690 //     movl(dst, greater_result);
2691 
2692 // less_result     =  1;
2693 // greater_result  = -1;
2694 // equal_result    = 0;
2695 // nan_result      = -1;
2696 
2697   enc_class CmpF_Result(rRegI dst) %{
2698     // fnstsw_ax();
2699     emit_opcode( cbuf, 0xDF);
2700     emit_opcode( cbuf, 0xE0);
2701     // sahf
2702     emit_opcode( cbuf, 0x9E);
2703     // movl(dst, nan_result);
2704     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2705     emit_d32( cbuf, -1 );
2706     // jcc(Assembler::parity, exit);
2707     emit_opcode( cbuf, 0x7A );
2708     emit_d8    ( cbuf, 0x13 );
2709     // movl(dst, less_result);
2710     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2711     emit_d32( cbuf, -1 );
2712     // jcc(Assembler::below, exit);
2713     emit_opcode( cbuf, 0x72 );
2714     emit_d8    ( cbuf, 0x0C );
2715     // movl(dst, equal_result);
2716     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2717     emit_d32( cbuf, 0 );
2718     // jcc(Assembler::equal, exit);
2719     emit_opcode( cbuf, 0x74 );
2720     emit_d8    ( cbuf, 0x05 );
2721     // movl(dst, greater_result);
2722     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2723     emit_d32( cbuf, 1 );
2724   %}
2725 
2726 
2727   // Compare the longs and set flags
2728   // BROKEN!  Do Not use as-is
2729   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2730     // CMP    $src1.hi,$src2.hi
2731     emit_opcode( cbuf, 0x3B );
2732     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2733     // JNE,s  done
2734     emit_opcode(cbuf,0x75);
2735     emit_d8(cbuf, 2 );
2736     // CMP    $src1.lo,$src2.lo
2737     emit_opcode( cbuf, 0x3B );
2738     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2739 // done:
2740   %}
2741 
2742   enc_class convert_int_long( regL dst, rRegI src ) %{
2743     // mov $dst.lo,$src
2744     int dst_encoding = $dst$$reg;
2745     int src_encoding = $src$$reg;
2746     encode_Copy( cbuf, dst_encoding  , src_encoding );
2747     // mov $dst.hi,$src
2748     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2749     // sar $dst.hi,31
2750     emit_opcode( cbuf, 0xC1 );
2751     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2752     emit_d8(cbuf, 0x1F );
2753   %}
2754 
2755   enc_class convert_long_double( eRegL src ) %{
2756     // push $src.hi
2757     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2758     // push $src.lo
2759     emit_opcode(cbuf, 0x50+$src$$reg  );
2760     // fild 64-bits at [SP]
2761     emit_opcode(cbuf,0xdf);
2762     emit_d8(cbuf, 0x6C);
2763     emit_d8(cbuf, 0x24);
2764     emit_d8(cbuf, 0x00);
2765     // pop stack
2766     emit_opcode(cbuf, 0x83); // add  SP, #8
2767     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2768     emit_d8(cbuf, 0x8);
2769   %}
2770 
2771   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2772     // IMUL   EDX:EAX,$src1
2773     emit_opcode( cbuf, 0xF7 );
2774     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2775     // SAR    EDX,$cnt-32
2776     int shift_count = ((int)$cnt$$constant) - 32;
2777     if (shift_count > 0) {
2778       emit_opcode(cbuf, 0xC1);
2779       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2780       emit_d8(cbuf, shift_count);
2781     }
2782   %}
2783 
2784   // this version doesn't have add sp, 8
2785   enc_class convert_long_double2( eRegL src ) %{
2786     // push $src.hi
2787     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2788     // push $src.lo
2789     emit_opcode(cbuf, 0x50+$src$$reg  );
2790     // fild 64-bits at [SP]
2791     emit_opcode(cbuf,0xdf);
2792     emit_d8(cbuf, 0x6C);
2793     emit_d8(cbuf, 0x24);
2794     emit_d8(cbuf, 0x00);
2795   %}
2796 
2797   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2798     // Basic idea: long = (long)int * (long)int
2799     // IMUL EDX:EAX, src
2800     emit_opcode( cbuf, 0xF7 );
2801     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2802   %}
2803 
2804   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2805     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2806     // MUL EDX:EAX, src
2807     emit_opcode( cbuf, 0xF7 );
2808     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2809   %}
2810 
2811   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2812     // Basic idea: lo(result) = lo(x_lo * y_lo)
2813     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2814     // MOV    $tmp,$src.lo
2815     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2816     // IMUL   $tmp,EDX
2817     emit_opcode( cbuf, 0x0F );
2818     emit_opcode( cbuf, 0xAF );
2819     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2820     // MOV    EDX,$src.hi
2821     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2822     // IMUL   EDX,EAX
2823     emit_opcode( cbuf, 0x0F );
2824     emit_opcode( cbuf, 0xAF );
2825     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2826     // ADD    $tmp,EDX
2827     emit_opcode( cbuf, 0x03 );
2828     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2829     // MUL   EDX:EAX,$src.lo
2830     emit_opcode( cbuf, 0xF7 );
2831     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2832     // ADD    EDX,ESI
2833     emit_opcode( cbuf, 0x03 );
2834     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2835   %}
2836 
2837   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2838     // Basic idea: lo(result) = lo(src * y_lo)
2839     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2840     // IMUL   $tmp,EDX,$src
2841     emit_opcode( cbuf, 0x6B );
2842     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2843     emit_d8( cbuf, (int)$src$$constant );
2844     // MOV    EDX,$src
2845     emit_opcode(cbuf, 0xB8 + EDX_enc);
2846     emit_d32( cbuf, (int)$src$$constant );
2847     // MUL   EDX:EAX,EDX
2848     emit_opcode( cbuf, 0xF7 );
2849     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2850     // ADD    EDX,ESI
2851     emit_opcode( cbuf, 0x03 );
2852     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2853   %}
2854 
2855   enc_class long_div( eRegL src1, eRegL src2 ) %{
2856     // PUSH src1.hi
2857     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2858     // PUSH src1.lo
2859     emit_opcode(cbuf,               0x50+$src1$$reg  );
2860     // PUSH src2.hi
2861     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2862     // PUSH src2.lo
2863     emit_opcode(cbuf,               0x50+$src2$$reg  );
2864     // CALL directly to the runtime
2865     cbuf.set_insts_mark();
2866     emit_opcode(cbuf,0xE8);       // Call into runtime
2867     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2868     // Restore stack
2869     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2870     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2871     emit_d8(cbuf, 4*4);
2872   %}
2873 
2874   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2875     // PUSH src1.hi
2876     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2877     // PUSH src1.lo
2878     emit_opcode(cbuf,               0x50+$src1$$reg  );
2879     // PUSH src2.hi
2880     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2881     // PUSH src2.lo
2882     emit_opcode(cbuf,               0x50+$src2$$reg  );
2883     // CALL directly to the runtime
2884     cbuf.set_insts_mark();
2885     emit_opcode(cbuf,0xE8);       // Call into runtime
2886     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2887     // Restore stack
2888     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2889     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2890     emit_d8(cbuf, 4*4);
2891   %}
2892 
2893   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2894     // MOV   $tmp,$src.lo
2895     emit_opcode(cbuf, 0x8B);
2896     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2897     // OR    $tmp,$src.hi
2898     emit_opcode(cbuf, 0x0B);
2899     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2900   %}
2901 
2902   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2903     // CMP    $src1.lo,$src2.lo
2904     emit_opcode( cbuf, 0x3B );
2905     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2906     // JNE,s  skip
2907     emit_cc(cbuf, 0x70, 0x5);
2908     emit_d8(cbuf,2);
2909     // CMP    $src1.hi,$src2.hi
2910     emit_opcode( cbuf, 0x3B );
2911     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2912   %}
2913 
2914   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2915     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2916     emit_opcode( cbuf, 0x3B );
2917     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2918     // MOV    $tmp,$src1.hi
2919     emit_opcode( cbuf, 0x8B );
2920     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2921     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2922     emit_opcode( cbuf, 0x1B );
2923     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2924   %}
2925 
2926   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2927     // XOR    $tmp,$tmp
2928     emit_opcode(cbuf,0x33);  // XOR
2929     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2930     // CMP    $tmp,$src.lo
2931     emit_opcode( cbuf, 0x3B );
2932     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2933     // SBB    $tmp,$src.hi
2934     emit_opcode( cbuf, 0x1B );
2935     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2936   %}
2937 
2938  // Sniff, sniff... smells like Gnu Superoptimizer
2939   enc_class neg_long( eRegL dst ) %{
2940     emit_opcode(cbuf,0xF7);    // NEG hi
2941     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2942     emit_opcode(cbuf,0xF7);    // NEG lo
2943     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2944     emit_opcode(cbuf,0x83);    // SBB hi,0
2945     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2946     emit_d8    (cbuf,0 );
2947   %}
2948 
2949   enc_class enc_pop_rdx() %{
2950     emit_opcode(cbuf,0x5A);
2951   %}
2952 
2953   enc_class enc_rethrow() %{
2954     cbuf.set_insts_mark();
2955     emit_opcode(cbuf, 0xE9);        // jmp    entry
2956     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2957                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2958   %}
2959 
2960 
2961   // Convert a double to an int.  Java semantics require we do complex
2962   // manglelations in the corner cases.  So we set the rounding mode to
2963   // 'zero', store the darned double down as an int, and reset the
2964   // rounding mode to 'nearest'.  The hardware throws an exception which
2965   // patches up the correct value directly to the stack.
2966   enc_class DPR2I_encoding( regDPR src ) %{
2967     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2968     // exceptions here, so that a NAN or other corner-case value will
2969     // thrown an exception (but normal values get converted at full speed).
2970     // However, I2C adapters and other float-stack manglers leave pending
2971     // invalid-op exceptions hanging.  We would have to clear them before
2972     // enabling them and that is more expensive than just testing for the
2973     // invalid value Intel stores down in the corner cases.
2974     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2975     emit_opcode(cbuf,0x2D);
2976     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2977     // Allocate a word
2978     emit_opcode(cbuf,0x83);            // SUB ESP,4
2979     emit_opcode(cbuf,0xEC);
2980     emit_d8(cbuf,0x04);
2981     // Encoding assumes a double has been pushed into FPR0.
2982     // Store down the double as an int, popping the FPU stack
2983     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2984     emit_opcode(cbuf,0x1C);
2985     emit_d8(cbuf,0x24);
2986     // Restore the rounding mode; mask the exception
2987     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2988     emit_opcode(cbuf,0x2D);
2989     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2990         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2991         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2992 
2993     // Load the converted int; adjust CPU stack
2994     emit_opcode(cbuf,0x58);       // POP EAX
2995     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2996     emit_d32   (cbuf,0x80000000); //         0x80000000
2997     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2998     emit_d8    (cbuf,0x07);       // Size of slow_call
2999     // Push src onto stack slow-path
3000     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3001     emit_d8    (cbuf,0xC0-1+$src$$reg );
3002     // CALL directly to the runtime
3003     cbuf.set_insts_mark();
3004     emit_opcode(cbuf,0xE8);       // Call into runtime
3005     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3006     // Carry on here...
3007   %}
3008 
3009   enc_class DPR2L_encoding( regDPR src ) %{
3010     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3011     emit_opcode(cbuf,0x2D);
3012     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3013     // Allocate a word
3014     emit_opcode(cbuf,0x83);            // SUB ESP,8
3015     emit_opcode(cbuf,0xEC);
3016     emit_d8(cbuf,0x08);
3017     // Encoding assumes a double has been pushed into FPR0.
3018     // Store down the double as a long, popping the FPU stack
3019     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3020     emit_opcode(cbuf,0x3C);
3021     emit_d8(cbuf,0x24);
3022     // Restore the rounding mode; mask the exception
3023     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3024     emit_opcode(cbuf,0x2D);
3025     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3026         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3027         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3028 
3029     // Load the converted int; adjust CPU stack
3030     emit_opcode(cbuf,0x58);       // POP EAX
3031     emit_opcode(cbuf,0x5A);       // POP EDX
3032     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3033     emit_d8    (cbuf,0xFA);       // rdx
3034     emit_d32   (cbuf,0x80000000); //         0x80000000
3035     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3036     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3037     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3038     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3039     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3040     emit_d8    (cbuf,0x07);       // Size of slow_call
3041     // Push src onto stack slow-path
3042     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3043     emit_d8    (cbuf,0xC0-1+$src$$reg );
3044     // CALL directly to the runtime
3045     cbuf.set_insts_mark();
3046     emit_opcode(cbuf,0xE8);       // Call into runtime
3047     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3048     // Carry on here...
3049   %}
3050 
3051   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3052     // Operand was loaded from memory into fp ST (stack top)
3053     // FMUL   ST,$src  /* D8 C8+i */
3054     emit_opcode(cbuf, 0xD8);
3055     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3056   %}
3057 
3058   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3059     // FADDP  ST,src2  /* D8 C0+i */
3060     emit_opcode(cbuf, 0xD8);
3061     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3062     //could use FADDP  src2,fpST  /* DE C0+i */
3063   %}
3064 
3065   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3066     // FADDP  src2,ST  /* DE C0+i */
3067     emit_opcode(cbuf, 0xDE);
3068     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3069   %}
3070 
3071   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3072     // Operand has been loaded into fp ST (stack top)
3073       // FSUB   ST,$src1
3074       emit_opcode(cbuf, 0xD8);
3075       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3076 
3077       // FDIV
3078       emit_opcode(cbuf, 0xD8);
3079       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3080   %}
3081 
3082   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3083     // Operand was loaded from memory into fp ST (stack top)
3084     // FADD   ST,$src  /* D8 C0+i */
3085     emit_opcode(cbuf, 0xD8);
3086     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3087 
3088     // FMUL  ST,src2  /* D8 C*+i */
3089     emit_opcode(cbuf, 0xD8);
3090     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3091   %}
3092 
3093 
3094   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3095     // Operand was loaded from memory into fp ST (stack top)
3096     // FADD   ST,$src  /* D8 C0+i */
3097     emit_opcode(cbuf, 0xD8);
3098     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3099 
3100     // FMULP  src2,ST  /* DE C8+i */
3101     emit_opcode(cbuf, 0xDE);
3102     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3103   %}
3104 
3105   // Atomically load the volatile long
3106   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3107     emit_opcode(cbuf,0xDF);
3108     int rm_byte_opcode = 0x05;
3109     int base     = $mem$$base;
3110     int index    = $mem$$index;
3111     int scale    = $mem$$scale;
3112     int displace = $mem$$disp;
3113     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3114     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3115     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3116   %}
3117 
3118   // Volatile Store Long.  Must be atomic, so move it into
3119   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3120   // target address before the store (for null-ptr checks)
3121   // so the memory operand is used twice in the encoding.
3122   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3123     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3124     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3125     emit_opcode(cbuf,0xDF);
3126     int rm_byte_opcode = 0x07;
3127     int base     = $mem$$base;
3128     int index    = $mem$$index;
3129     int scale    = $mem$$scale;
3130     int displace = $mem$$disp;
3131     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3132     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3133   %}
3134 
3135   // Safepoint Poll.  This polls the safepoint page, and causes an
3136   // exception if it is not readable. Unfortunately, it kills the condition code
3137   // in the process
3138   // We current use TESTL [spp],EDI
3139   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3140 
3141   enc_class Safepoint_Poll() %{
3142     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3143     emit_opcode(cbuf,0x85);
3144     emit_rm (cbuf, 0x0, 0x7, 0x5);
3145     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3146   %}
3147 %}
3148 
3149 
3150 //----------FRAME--------------------------------------------------------------
3151 // Definition of frame structure and management information.
3152 //
3153 //  S T A C K   L A Y O U T    Allocators stack-slot number
3154 //                             |   (to get allocators register number
3155 //  G  Owned by    |        |  v    add OptoReg::stack0())
3156 //  r   CALLER     |        |
3157 //  o     |        +--------+      pad to even-align allocators stack-slot
3158 //  w     V        |  pad0  |        numbers; owned by CALLER
3159 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3160 //  h     ^        |   in   |  5
3161 //        |        |  args  |  4   Holes in incoming args owned by SELF
3162 //  |     |        |        |  3
3163 //  |     |        +--------+
3164 //  V     |        | old out|      Empty on Intel, window on Sparc
3165 //        |    old |preserve|      Must be even aligned.
3166 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3167 //        |        |   in   |  3   area for Intel ret address
3168 //     Owned by    |preserve|      Empty on Sparc.
3169 //       SELF      +--------+
3170 //        |        |  pad2  |  2   pad to align old SP
3171 //        |        +--------+  1
3172 //        |        | locks  |  0
3173 //        |        +--------+----> OptoReg::stack0(), even aligned
3174 //        |        |  pad1  | 11   pad to align new SP
3175 //        |        +--------+
3176 //        |        |        | 10
3177 //        |        | spills |  9   spills
3178 //        V        |        |  8   (pad0 slot for callee)
3179 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3180 //        ^        |  out   |  7
3181 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3182 //     Owned by    +--------+
3183 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3184 //        |    new |preserve|      Must be even-aligned.
3185 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3186 //        |        |        |
3187 //
3188 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3189 //         known from SELF's arguments and the Java calling convention.
3190 //         Region 6-7 is determined per call site.
3191 // Note 2: If the calling convention leaves holes in the incoming argument
3192 //         area, those holes are owned by SELF.  Holes in the outgoing area
3193 //         are owned by the CALLEE.  Holes should not be nessecary in the
3194 //         incoming area, as the Java calling convention is completely under
3195 //         the control of the AD file.  Doubles can be sorted and packed to
3196 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3197 //         varargs C calling conventions.
3198 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3199 //         even aligned with pad0 as needed.
3200 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3201 //         region 6-11 is even aligned; it may be padded out more so that
3202 //         the region from SP to FP meets the minimum stack alignment.
3203 
3204 frame %{
3205   // What direction does stack grow in (assumed to be same for C & Java)
3206   stack_direction(TOWARDS_LOW);
3207 
3208   // These three registers define part of the calling convention
3209   // between compiled code and the interpreter.
3210   inline_cache_reg(EAX);                // Inline Cache Register
3211   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3212 
3213   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3214   cisc_spilling_operand_name(indOffset32);
3215 
3216   // Number of stack slots consumed by locking an object
3217   sync_stack_slots(1);
3218 
3219   // Compiled code's Frame Pointer
3220   frame_pointer(ESP);
3221   // Interpreter stores its frame pointer in a register which is
3222   // stored to the stack by I2CAdaptors.
3223   // I2CAdaptors convert from interpreted java to compiled java.
3224   interpreter_frame_pointer(EBP);
3225 
3226   // Stack alignment requirement
3227   // Alignment size in bytes (128-bit -> 16 bytes)
3228   stack_alignment(StackAlignmentInBytes);
3229 
3230   // Number of stack slots between incoming argument block and the start of
3231   // a new frame.  The PROLOG must add this many slots to the stack.  The
3232   // EPILOG must remove this many slots.  Intel needs one slot for
3233   // return address and one for rbp, (must save rbp)
3234   in_preserve_stack_slots(2+VerifyStackAtCalls);
3235 
3236   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3237   // for calls to C.  Supports the var-args backing area for register parms.
3238   varargs_C_out_slots_killed(0);
3239 
3240   // The after-PROLOG location of the return address.  Location of
3241   // return address specifies a type (REG or STACK) and a number
3242   // representing the register number (i.e. - use a register name) or
3243   // stack slot.
3244   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3245   // Otherwise, it is above the locks and verification slot and alignment word
3246   return_addr(STACK - 1 +
3247               round_to((Compile::current()->in_preserve_stack_slots() +
3248                         Compile::current()->fixed_slots()),
3249                        stack_alignment_in_slots()));
3250 
3251   // Body of function which returns an integer array locating
3252   // arguments either in registers or in stack slots.  Passed an array
3253   // of ideal registers called "sig" and a "length" count.  Stack-slot
3254   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3255   // arguments for a CALLEE.  Incoming stack arguments are
3256   // automatically biased by the preserve_stack_slots field above.
3257   calling_convention %{
3258     // No difference between ingoing/outgoing just pass false
3259     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3260   %}
3261 
3262 
3263   // Body of function which returns an integer array locating
3264   // arguments either in registers or in stack slots.  Passed an array
3265   // of ideal registers called "sig" and a "length" count.  Stack-slot
3266   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3267   // arguments for a CALLEE.  Incoming stack arguments are
3268   // automatically biased by the preserve_stack_slots field above.
3269   c_calling_convention %{
3270     // This is obviously always outgoing
3271     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3272   %}
3273 
3274   // Location of C & interpreter return values
3275   c_return_value %{
3276     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3277     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3278     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3279 
3280     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3281     // that C functions return float and double results in XMM0.
3282     if( ideal_reg == Op_RegD && UseSSE>=2 )
3283       return OptoRegPair(XMM0b_num,XMM0_num);
3284     if( ideal_reg == Op_RegF && UseSSE>=2 )
3285       return OptoRegPair(OptoReg::Bad,XMM0_num);
3286 
3287     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3288   %}
3289 
3290   // Location of return values
3291   return_value %{
3292     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3293     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3294     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3295     if( ideal_reg == Op_RegD && UseSSE>=2 )
3296       return OptoRegPair(XMM0b_num,XMM0_num);
3297     if( ideal_reg == Op_RegF && UseSSE>=1 )
3298       return OptoRegPair(OptoReg::Bad,XMM0_num);
3299     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3300   %}
3301 
3302 %}
3303 
3304 //----------ATTRIBUTES---------------------------------------------------------
3305 //----------Operand Attributes-------------------------------------------------
3306 op_attrib op_cost(0);        // Required cost attribute
3307 
3308 //----------Instruction Attributes---------------------------------------------
3309 ins_attrib ins_cost(100);       // Required cost attribute
3310 ins_attrib ins_size(8);         // Required size attribute (in bits)
3311 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3312                                 // non-matching short branch variant of some
3313                                                             // long branch?
3314 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3315                                 // specifies the alignment that some part of the instruction (not
3316                                 // necessarily the start) requires.  If > 1, a compute_padding()
3317                                 // function must be provided for the instruction
3318 
3319 //----------OPERANDS-----------------------------------------------------------
3320 // Operand definitions must precede instruction definitions for correct parsing
3321 // in the ADLC because operands constitute user defined types which are used in
3322 // instruction definitions.
3323 
3324 //----------Simple Operands----------------------------------------------------
3325 // Immediate Operands
3326 // Integer Immediate
3327 operand immI() %{
3328   match(ConI);
3329 
3330   op_cost(10);
3331   format %{ %}
3332   interface(CONST_INTER);
3333 %}
3334 
3335 // Constant for test vs zero
3336 operand immI0() %{
3337   predicate(n->get_int() == 0);
3338   match(ConI);
3339 
3340   op_cost(0);
3341   format %{ %}
3342   interface(CONST_INTER);
3343 %}
3344 
3345 // Constant for increment
3346 operand immI1() %{
3347   predicate(n->get_int() == 1);
3348   match(ConI);
3349 
3350   op_cost(0);
3351   format %{ %}
3352   interface(CONST_INTER);
3353 %}
3354 
3355 // Constant for decrement
3356 operand immI_M1() %{
3357   predicate(n->get_int() == -1);
3358   match(ConI);
3359 
3360   op_cost(0);
3361   format %{ %}
3362   interface(CONST_INTER);
3363 %}
3364 
3365 // Valid scale values for addressing modes
3366 operand immI2() %{
3367   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3368   match(ConI);
3369 
3370   format %{ %}
3371   interface(CONST_INTER);
3372 %}
3373 
3374 operand immI8() %{
3375   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3376   match(ConI);
3377 
3378   op_cost(5);
3379   format %{ %}
3380   interface(CONST_INTER);
3381 %}
3382 
3383 operand immI16() %{
3384   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3385   match(ConI);
3386 
3387   op_cost(10);
3388   format %{ %}
3389   interface(CONST_INTER);
3390 %}
3391 
3392 // Int Immediate non-negative
3393 operand immU31()
3394 %{
3395   predicate(n->get_int() >= 0);
3396   match(ConI);
3397 
3398   op_cost(0);
3399   format %{ %}
3400   interface(CONST_INTER);
3401 %}
3402 
3403 // Constant for long shifts
3404 operand immI_32() %{
3405   predicate( n->get_int() == 32 );
3406   match(ConI);
3407 
3408   op_cost(0);
3409   format %{ %}
3410   interface(CONST_INTER);
3411 %}
3412 
3413 operand immI_1_31() %{
3414   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3415   match(ConI);
3416 
3417   op_cost(0);
3418   format %{ %}
3419   interface(CONST_INTER);
3420 %}
3421 
3422 operand immI_32_63() %{
3423   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3424   match(ConI);
3425   op_cost(0);
3426 
3427   format %{ %}
3428   interface(CONST_INTER);
3429 %}
3430 
3431 operand immI_1() %{
3432   predicate( n->get_int() == 1 );
3433   match(ConI);
3434 
3435   op_cost(0);
3436   format %{ %}
3437   interface(CONST_INTER);
3438 %}
3439 
3440 operand immI_2() %{
3441   predicate( n->get_int() == 2 );
3442   match(ConI);
3443 
3444   op_cost(0);
3445   format %{ %}
3446   interface(CONST_INTER);
3447 %}
3448 
3449 operand immI_3() %{
3450   predicate( n->get_int() == 3 );
3451   match(ConI);
3452 
3453   op_cost(0);
3454   format %{ %}
3455   interface(CONST_INTER);
3456 %}
3457 
3458 // Pointer Immediate
3459 operand immP() %{
3460   match(ConP);
3461 
3462   op_cost(10);
3463   format %{ %}
3464   interface(CONST_INTER);
3465 %}
3466 
3467 // NULL Pointer Immediate
3468 operand immP0() %{
3469   predicate( n->get_ptr() == 0 );
3470   match(ConP);
3471   op_cost(0);
3472 
3473   format %{ %}
3474   interface(CONST_INTER);
3475 %}
3476 
3477 // Long Immediate
3478 operand immL() %{
3479   match(ConL);
3480 
3481   op_cost(20);
3482   format %{ %}
3483   interface(CONST_INTER);
3484 %}
3485 
3486 // Long Immediate zero
3487 operand immL0() %{
3488   predicate( n->get_long() == 0L );
3489   match(ConL);
3490   op_cost(0);
3491 
3492   format %{ %}
3493   interface(CONST_INTER);
3494 %}
3495 
3496 // Long Immediate zero
3497 operand immL_M1() %{
3498   predicate( n->get_long() == -1L );
3499   match(ConL);
3500   op_cost(0);
3501 
3502   format %{ %}
3503   interface(CONST_INTER);
3504 %}
3505 
3506 // Long immediate from 0 to 127.
3507 // Used for a shorter form of long mul by 10.
3508 operand immL_127() %{
3509   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3510   match(ConL);
3511   op_cost(0);
3512 
3513   format %{ %}
3514   interface(CONST_INTER);
3515 %}
3516 
3517 // Long Immediate: low 32-bit mask
3518 operand immL_32bits() %{
3519   predicate(n->get_long() == 0xFFFFFFFFL);
3520   match(ConL);
3521   op_cost(0);
3522 
3523   format %{ %}
3524   interface(CONST_INTER);
3525 %}
3526 
3527 // Long Immediate: low 32-bit mask
3528 operand immL32() %{
3529   predicate(n->get_long() == (int)(n->get_long()));
3530   match(ConL);
3531   op_cost(20);
3532 
3533   format %{ %}
3534   interface(CONST_INTER);
3535 %}
3536 
3537 //Double Immediate zero
3538 operand immDPR0() %{
3539   // Do additional (and counter-intuitive) test against NaN to work around VC++
3540   // bug that generates code such that NaNs compare equal to 0.0
3541   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3542   match(ConD);
3543 
3544   op_cost(5);
3545   format %{ %}
3546   interface(CONST_INTER);
3547 %}
3548 
3549 // Double Immediate one
3550 operand immDPR1() %{
3551   predicate( UseSSE<=1 && n->getd() == 1.0 );
3552   match(ConD);
3553 
3554   op_cost(5);
3555   format %{ %}
3556   interface(CONST_INTER);
3557 %}
3558 
3559 // Double Immediate
3560 operand immDPR() %{
3561   predicate(UseSSE<=1);
3562   match(ConD);
3563 
3564   op_cost(5);
3565   format %{ %}
3566   interface(CONST_INTER);
3567 %}
3568 
3569 operand immD() %{
3570   predicate(UseSSE>=2);
3571   match(ConD);
3572 
3573   op_cost(5);
3574   format %{ %}
3575   interface(CONST_INTER);
3576 %}
3577 
3578 // Double Immediate zero
3579 operand immD0() %{
3580   // Do additional (and counter-intuitive) test against NaN to work around VC++
3581   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3582   // compare equal to -0.0.
3583   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3584   match(ConD);
3585 
3586   format %{ %}
3587   interface(CONST_INTER);
3588 %}
3589 
3590 // Float Immediate zero
3591 operand immFPR0() %{
3592   predicate(UseSSE == 0 && n->getf() == 0.0F);
3593   match(ConF);
3594 
3595   op_cost(5);
3596   format %{ %}
3597   interface(CONST_INTER);
3598 %}
3599 
3600 // Float Immediate one
3601 operand immFPR1() %{
3602   predicate(UseSSE == 0 && n->getf() == 1.0F);
3603   match(ConF);
3604 
3605   op_cost(5);
3606   format %{ %}
3607   interface(CONST_INTER);
3608 %}
3609 
3610 // Float Immediate
3611 operand immFPR() %{
3612   predicate( UseSSE == 0 );
3613   match(ConF);
3614 
3615   op_cost(5);
3616   format %{ %}
3617   interface(CONST_INTER);
3618 %}
3619 
3620 // Float Immediate
3621 operand immF() %{
3622   predicate(UseSSE >= 1);
3623   match(ConF);
3624 
3625   op_cost(5);
3626   format %{ %}
3627   interface(CONST_INTER);
3628 %}
3629 
3630 // Float Immediate zero.  Zero and not -0.0
3631 operand immF0() %{
3632   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3633   match(ConF);
3634 
3635   op_cost(5);
3636   format %{ %}
3637   interface(CONST_INTER);
3638 %}
3639 
3640 // Immediates for special shifts (sign extend)
3641 
3642 // Constants for increment
3643 operand immI_16() %{
3644   predicate( n->get_int() == 16 );
3645   match(ConI);
3646 
3647   format %{ %}
3648   interface(CONST_INTER);
3649 %}
3650 
3651 operand immI_24() %{
3652   predicate( n->get_int() == 24 );
3653   match(ConI);
3654 
3655   format %{ %}
3656   interface(CONST_INTER);
3657 %}
3658 
3659 // Constant for byte-wide masking
3660 operand immI_255() %{
3661   predicate( n->get_int() == 255 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 // Constant for short-wide masking
3669 operand immI_65535() %{
3670   predicate(n->get_int() == 65535);
3671   match(ConI);
3672 
3673   format %{ %}
3674   interface(CONST_INTER);
3675 %}
3676 
3677 // Register Operands
3678 // Integer Register
3679 operand rRegI() %{
3680   constraint(ALLOC_IN_RC(int_reg));
3681   match(RegI);
3682   match(xRegI);
3683   match(eAXRegI);
3684   match(eBXRegI);
3685   match(eCXRegI);
3686   match(eDXRegI);
3687   match(eDIRegI);
3688   match(eSIRegI);
3689 
3690   format %{ %}
3691   interface(REG_INTER);
3692 %}
3693 
3694 // Subset of Integer Register
3695 operand xRegI(rRegI reg) %{
3696   constraint(ALLOC_IN_RC(int_x_reg));
3697   match(reg);
3698   match(eAXRegI);
3699   match(eBXRegI);
3700   match(eCXRegI);
3701   match(eDXRegI);
3702 
3703   format %{ %}
3704   interface(REG_INTER);
3705 %}
3706 
3707 // Special Registers
3708 operand eAXRegI(xRegI reg) %{
3709   constraint(ALLOC_IN_RC(eax_reg));
3710   match(reg);
3711   match(rRegI);
3712 
3713   format %{ "EAX" %}
3714   interface(REG_INTER);
3715 %}
3716 
3717 // Special Registers
3718 operand eBXRegI(xRegI reg) %{
3719   constraint(ALLOC_IN_RC(ebx_reg));
3720   match(reg);
3721   match(rRegI);
3722 
3723   format %{ "EBX" %}
3724   interface(REG_INTER);
3725 %}
3726 
3727 operand eCXRegI(xRegI reg) %{
3728   constraint(ALLOC_IN_RC(ecx_reg));
3729   match(reg);
3730   match(rRegI);
3731 
3732   format %{ "ECX" %}
3733   interface(REG_INTER);
3734 %}
3735 
3736 operand eDXRegI(xRegI reg) %{
3737   constraint(ALLOC_IN_RC(edx_reg));
3738   match(reg);
3739   match(rRegI);
3740 
3741   format %{ "EDX" %}
3742   interface(REG_INTER);
3743 %}
3744 
3745 operand eDIRegI(xRegI reg) %{
3746   constraint(ALLOC_IN_RC(edi_reg));
3747   match(reg);
3748   match(rRegI);
3749 
3750   format %{ "EDI" %}
3751   interface(REG_INTER);
3752 %}
3753 
3754 operand naxRegI() %{
3755   constraint(ALLOC_IN_RC(nax_reg));
3756   match(RegI);
3757   match(eCXRegI);
3758   match(eDXRegI);
3759   match(eSIRegI);
3760   match(eDIRegI);
3761 
3762   format %{ %}
3763   interface(REG_INTER);
3764 %}
3765 
3766 operand nadxRegI() %{
3767   constraint(ALLOC_IN_RC(nadx_reg));
3768   match(RegI);
3769   match(eBXRegI);
3770   match(eCXRegI);
3771   match(eSIRegI);
3772   match(eDIRegI);
3773 
3774   format %{ %}
3775   interface(REG_INTER);
3776 %}
3777 
3778 operand ncxRegI() %{
3779   constraint(ALLOC_IN_RC(ncx_reg));
3780   match(RegI);
3781   match(eAXRegI);
3782   match(eDXRegI);
3783   match(eSIRegI);
3784   match(eDIRegI);
3785 
3786   format %{ %}
3787   interface(REG_INTER);
3788 %}
3789 
3790 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3791 // //
3792 operand eSIRegI(xRegI reg) %{
3793    constraint(ALLOC_IN_RC(esi_reg));
3794    match(reg);
3795    match(rRegI);
3796 
3797    format %{ "ESI" %}
3798    interface(REG_INTER);
3799 %}
3800 
3801 // Pointer Register
3802 operand anyRegP() %{
3803   constraint(ALLOC_IN_RC(any_reg));
3804   match(RegP);
3805   match(eAXRegP);
3806   match(eBXRegP);
3807   match(eCXRegP);
3808   match(eDIRegP);
3809   match(eRegP);
3810 
3811   format %{ %}
3812   interface(REG_INTER);
3813 %}
3814 
3815 operand eRegP() %{
3816   constraint(ALLOC_IN_RC(int_reg));
3817   match(RegP);
3818   match(eAXRegP);
3819   match(eBXRegP);
3820   match(eCXRegP);
3821   match(eDIRegP);
3822 
3823   format %{ %}
3824   interface(REG_INTER);
3825 %}
3826 
3827 // On windows95, EBP is not safe to use for implicit null tests.
3828 operand eRegP_no_EBP() %{
3829   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3830   match(RegP);
3831   match(eAXRegP);
3832   match(eBXRegP);
3833   match(eCXRegP);
3834   match(eDIRegP);
3835 
3836   op_cost(100);
3837   format %{ %}
3838   interface(REG_INTER);
3839 %}
3840 
3841 operand naxRegP() %{
3842   constraint(ALLOC_IN_RC(nax_reg));
3843   match(RegP);
3844   match(eBXRegP);
3845   match(eDXRegP);
3846   match(eCXRegP);
3847   match(eSIRegP);
3848   match(eDIRegP);
3849 
3850   format %{ %}
3851   interface(REG_INTER);
3852 %}
3853 
3854 operand nabxRegP() %{
3855   constraint(ALLOC_IN_RC(nabx_reg));
3856   match(RegP);
3857   match(eCXRegP);
3858   match(eDXRegP);
3859   match(eSIRegP);
3860   match(eDIRegP);
3861 
3862   format %{ %}
3863   interface(REG_INTER);
3864 %}
3865 
3866 operand pRegP() %{
3867   constraint(ALLOC_IN_RC(p_reg));
3868   match(RegP);
3869   match(eBXRegP);
3870   match(eDXRegP);
3871   match(eSIRegP);
3872   match(eDIRegP);
3873 
3874   format %{ %}
3875   interface(REG_INTER);
3876 %}
3877 
3878 // Special Registers
3879 // Return a pointer value
3880 operand eAXRegP(eRegP reg) %{
3881   constraint(ALLOC_IN_RC(eax_reg));
3882   match(reg);
3883   format %{ "EAX" %}
3884   interface(REG_INTER);
3885 %}
3886 
3887 // Used in AtomicAdd
3888 operand eBXRegP(eRegP reg) %{
3889   constraint(ALLOC_IN_RC(ebx_reg));
3890   match(reg);
3891   format %{ "EBX" %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Tail-call (interprocedural jump) to interpreter
3896 operand eCXRegP(eRegP reg) %{
3897   constraint(ALLOC_IN_RC(ecx_reg));
3898   match(reg);
3899   format %{ "ECX" %}
3900   interface(REG_INTER);
3901 %}
3902 
3903 operand eSIRegP(eRegP reg) %{
3904   constraint(ALLOC_IN_RC(esi_reg));
3905   match(reg);
3906   format %{ "ESI" %}
3907   interface(REG_INTER);
3908 %}
3909 
3910 // Used in rep stosw
3911 operand eDIRegP(eRegP reg) %{
3912   constraint(ALLOC_IN_RC(edi_reg));
3913   match(reg);
3914   format %{ "EDI" %}
3915   interface(REG_INTER);
3916 %}
3917 
3918 operand eRegL() %{
3919   constraint(ALLOC_IN_RC(long_reg));
3920   match(RegL);
3921   match(eADXRegL);
3922 
3923   format %{ %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 operand eADXRegL( eRegL reg ) %{
3928   constraint(ALLOC_IN_RC(eadx_reg));
3929   match(reg);
3930 
3931   format %{ "EDX:EAX" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eBCXRegL( eRegL reg ) %{
3936   constraint(ALLOC_IN_RC(ebcx_reg));
3937   match(reg);
3938 
3939   format %{ "EBX:ECX" %}
3940   interface(REG_INTER);
3941 %}
3942 
3943 // Special case for integer high multiply
3944 operand eADXRegL_low_only() %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(RegL);
3947 
3948   format %{ "EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 // Flags register, used as output of compare instructions
3953 operand eFlagsReg() %{
3954   constraint(ALLOC_IN_RC(int_flags));
3955   match(RegFlags);
3956 
3957   format %{ "EFLAGS" %}
3958   interface(REG_INTER);
3959 %}
3960 
3961 // Flags register, used as output of FLOATING POINT compare instructions
3962 operand eFlagsRegU() %{
3963   constraint(ALLOC_IN_RC(int_flags));
3964   match(RegFlags);
3965 
3966   format %{ "EFLAGS_U" %}
3967   interface(REG_INTER);
3968 %}
3969 
3970 operand eFlagsRegUCF() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973   predicate(false);
3974 
3975   format %{ "EFLAGS_U_CF" %}
3976   interface(REG_INTER);
3977 %}
3978 
3979 // Condition Code Register used by long compare
3980 operand flagsReg_long_LTGE() %{
3981   constraint(ALLOC_IN_RC(int_flags));
3982   match(RegFlags);
3983   format %{ "FLAGS_LTGE" %}
3984   interface(REG_INTER);
3985 %}
3986 operand flagsReg_long_EQNE() %{
3987   constraint(ALLOC_IN_RC(int_flags));
3988   match(RegFlags);
3989   format %{ "FLAGS_EQNE" %}
3990   interface(REG_INTER);
3991 %}
3992 operand flagsReg_long_LEGT() %{
3993   constraint(ALLOC_IN_RC(int_flags));
3994   match(RegFlags);
3995   format %{ "FLAGS_LEGT" %}
3996   interface(REG_INTER);
3997 %}
3998 
3999 // Float register operands
4000 operand regDPR() %{
4001   predicate( UseSSE < 2 );
4002   constraint(ALLOC_IN_RC(fp_dbl_reg));
4003   match(RegD);
4004   match(regDPR1);
4005   match(regDPR2);
4006   format %{ %}
4007   interface(REG_INTER);
4008 %}
4009 
4010 operand regDPR1(regDPR reg) %{
4011   predicate( UseSSE < 2 );
4012   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4013   match(reg);
4014   format %{ "FPR1" %}
4015   interface(REG_INTER);
4016 %}
4017 
4018 operand regDPR2(regDPR reg) %{
4019   predicate( UseSSE < 2 );
4020   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4021   match(reg);
4022   format %{ "FPR2" %}
4023   interface(REG_INTER);
4024 %}
4025 
4026 operand regnotDPR1(regDPR reg) %{
4027   predicate( UseSSE < 2 );
4028   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4029   match(reg);
4030   format %{ %}
4031   interface(REG_INTER);
4032 %}
4033 
4034 // Float register operands
4035 operand regFPR() %{
4036   predicate( UseSSE < 2 );
4037   constraint(ALLOC_IN_RC(fp_flt_reg));
4038   match(RegF);
4039   match(regFPR1);
4040   format %{ %}
4041   interface(REG_INTER);
4042 %}
4043 
4044 // Float register operands
4045 operand regFPR1(regFPR reg) %{
4046   predicate( UseSSE < 2 );
4047   constraint(ALLOC_IN_RC(fp_flt_reg0));
4048   match(reg);
4049   format %{ "FPR1" %}
4050   interface(REG_INTER);
4051 %}
4052 
4053 // XMM Float register operands
4054 operand regF() %{
4055   predicate( UseSSE>=1 );
4056   constraint(ALLOC_IN_RC(float_reg_legacy));
4057   match(RegF);
4058   format %{ %}
4059   interface(REG_INTER);
4060 %}
4061 
4062 // XMM Double register operands
4063 operand regD() %{
4064   predicate( UseSSE>=2 );
4065   constraint(ALLOC_IN_RC(double_reg_legacy));
4066   match(RegD);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4072 // runtime code generation via reg_class_dynamic.
4073 operand vecS() %{
4074   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4075   match(VecS);
4076 
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 operand vecD() %{
4082   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4083   match(VecD);
4084 
4085   format %{ %}
4086   interface(REG_INTER);
4087 %}
4088 
4089 operand vecX() %{
4090   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4091   match(VecX);
4092 
4093   format %{ %}
4094   interface(REG_INTER);
4095 %}
4096 
4097 operand vecY() %{
4098   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4099   match(VecY);
4100 
4101   format %{ %}
4102   interface(REG_INTER);
4103 %}
4104 
4105 //----------Memory Operands----------------------------------------------------
4106 // Direct Memory Operand
4107 operand direct(immP addr) %{
4108   match(addr);
4109 
4110   format %{ "[$addr]" %}
4111   interface(MEMORY_INTER) %{
4112     base(0xFFFFFFFF);
4113     index(0x4);
4114     scale(0x0);
4115     disp($addr);
4116   %}
4117 %}
4118 
4119 // Indirect Memory Operand
4120 operand indirect(eRegP reg) %{
4121   constraint(ALLOC_IN_RC(int_reg));
4122   match(reg);
4123 
4124   format %{ "[$reg]" %}
4125   interface(MEMORY_INTER) %{
4126     base($reg);
4127     index(0x4);
4128     scale(0x0);
4129     disp(0x0);
4130   %}
4131 %}
4132 
4133 // Indirect Memory Plus Short Offset Operand
4134 operand indOffset8(eRegP reg, immI8 off) %{
4135   match(AddP reg off);
4136 
4137   format %{ "[$reg + $off]" %}
4138   interface(MEMORY_INTER) %{
4139     base($reg);
4140     index(0x4);
4141     scale(0x0);
4142     disp($off);
4143   %}
4144 %}
4145 
4146 // Indirect Memory Plus Long Offset Operand
4147 operand indOffset32(eRegP reg, immI off) %{
4148   match(AddP reg off);
4149 
4150   format %{ "[$reg + $off]" %}
4151   interface(MEMORY_INTER) %{
4152     base($reg);
4153     index(0x4);
4154     scale(0x0);
4155     disp($off);
4156   %}
4157 %}
4158 
4159 // Indirect Memory Plus Long Offset Operand
4160 operand indOffset32X(rRegI reg, immP off) %{
4161   match(AddP off reg);
4162 
4163   format %{ "[$reg + $off]" %}
4164   interface(MEMORY_INTER) %{
4165     base($reg);
4166     index(0x4);
4167     scale(0x0);
4168     disp($off);
4169   %}
4170 %}
4171 
4172 // Indirect Memory Plus Index Register Plus Offset Operand
4173 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4174   match(AddP (AddP reg ireg) off);
4175 
4176   op_cost(10);
4177   format %{"[$reg + $off + $ireg]" %}
4178   interface(MEMORY_INTER) %{
4179     base($reg);
4180     index($ireg);
4181     scale(0x0);
4182     disp($off);
4183   %}
4184 %}
4185 
4186 // Indirect Memory Plus Index Register Plus Offset Operand
4187 operand indIndex(eRegP reg, rRegI ireg) %{
4188   match(AddP reg ireg);
4189 
4190   op_cost(10);
4191   format %{"[$reg + $ireg]" %}
4192   interface(MEMORY_INTER) %{
4193     base($reg);
4194     index($ireg);
4195     scale(0x0);
4196     disp(0x0);
4197   %}
4198 %}
4199 
4200 // // -------------------------------------------------------------------------
4201 // // 486 architecture doesn't support "scale * index + offset" with out a base
4202 // // -------------------------------------------------------------------------
4203 // // Scaled Memory Operands
4204 // // Indirect Memory Times Scale Plus Offset Operand
4205 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4206 //   match(AddP off (LShiftI ireg scale));
4207 //
4208 //   op_cost(10);
4209 //   format %{"[$off + $ireg << $scale]" %}
4210 //   interface(MEMORY_INTER) %{
4211 //     base(0x4);
4212 //     index($ireg);
4213 //     scale($scale);
4214 //     disp($off);
4215 //   %}
4216 // %}
4217 
4218 // Indirect Memory Times Scale Plus Index Register
4219 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4220   match(AddP reg (LShiftI ireg scale));
4221 
4222   op_cost(10);
4223   format %{"[$reg + $ireg << $scale]" %}
4224   interface(MEMORY_INTER) %{
4225     base($reg);
4226     index($ireg);
4227     scale($scale);
4228     disp(0x0);
4229   %}
4230 %}
4231 
4232 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4233 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4234   match(AddP (AddP reg (LShiftI ireg scale)) off);
4235 
4236   op_cost(10);
4237   format %{"[$reg + $off + $ireg << $scale]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index($ireg);
4241     scale($scale);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 //----------Load Long Memory Operands------------------------------------------
4247 // The load-long idiom will use it's address expression again after loading
4248 // the first word of the long.  If the load-long destination overlaps with
4249 // registers used in the addressing expression, the 2nd half will be loaded
4250 // from a clobbered address.  Fix this by requiring that load-long use
4251 // address registers that do not overlap with the load-long target.
4252 
4253 // load-long support
4254 operand load_long_RegP() %{
4255   constraint(ALLOC_IN_RC(esi_reg));
4256   match(RegP);
4257   match(eSIRegP);
4258   op_cost(100);
4259   format %{  %}
4260   interface(REG_INTER);
4261 %}
4262 
4263 // Indirect Memory Operand Long
4264 operand load_long_indirect(load_long_RegP reg) %{
4265   constraint(ALLOC_IN_RC(esi_reg));
4266   match(reg);
4267 
4268   format %{ "[$reg]" %}
4269   interface(MEMORY_INTER) %{
4270     base($reg);
4271     index(0x4);
4272     scale(0x0);
4273     disp(0x0);
4274   %}
4275 %}
4276 
4277 // Indirect Memory Plus Long Offset Operand
4278 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4279   match(AddP reg off);
4280 
4281   format %{ "[$reg + $off]" %}
4282   interface(MEMORY_INTER) %{
4283     base($reg);
4284     index(0x4);
4285     scale(0x0);
4286     disp($off);
4287   %}
4288 %}
4289 
4290 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4291 
4292 
4293 //----------Special Memory Operands--------------------------------------------
4294 // Stack Slot Operand - This operand is used for loading and storing temporary
4295 //                      values on the stack where a match requires a value to
4296 //                      flow through memory.
4297 operand stackSlotP(sRegP reg) %{
4298   constraint(ALLOC_IN_RC(stack_slots));
4299   // No match rule because this operand is only generated in matching
4300   format %{ "[$reg]" %}
4301   interface(MEMORY_INTER) %{
4302     base(0x4);   // ESP
4303     index(0x4);  // No Index
4304     scale(0x0);  // No Scale
4305     disp($reg);  // Stack Offset
4306   %}
4307 %}
4308 
4309 operand stackSlotI(sRegI reg) %{
4310   constraint(ALLOC_IN_RC(stack_slots));
4311   // No match rule because this operand is only generated in matching
4312   format %{ "[$reg]" %}
4313   interface(MEMORY_INTER) %{
4314     base(0x4);   // ESP
4315     index(0x4);  // No Index
4316     scale(0x0);  // No Scale
4317     disp($reg);  // Stack Offset
4318   %}
4319 %}
4320 
4321 operand stackSlotF(sRegF reg) %{
4322   constraint(ALLOC_IN_RC(stack_slots));
4323   // No match rule because this operand is only generated in matching
4324   format %{ "[$reg]" %}
4325   interface(MEMORY_INTER) %{
4326     base(0x4);   // ESP
4327     index(0x4);  // No Index
4328     scale(0x0);  // No Scale
4329     disp($reg);  // Stack Offset
4330   %}
4331 %}
4332 
4333 operand stackSlotD(sRegD reg) %{
4334   constraint(ALLOC_IN_RC(stack_slots));
4335   // No match rule because this operand is only generated in matching
4336   format %{ "[$reg]" %}
4337   interface(MEMORY_INTER) %{
4338     base(0x4);   // ESP
4339     index(0x4);  // No Index
4340     scale(0x0);  // No Scale
4341     disp($reg);  // Stack Offset
4342   %}
4343 %}
4344 
4345 operand stackSlotL(sRegL reg) %{
4346   constraint(ALLOC_IN_RC(stack_slots));
4347   // No match rule because this operand is only generated in matching
4348   format %{ "[$reg]" %}
4349   interface(MEMORY_INTER) %{
4350     base(0x4);   // ESP
4351     index(0x4);  // No Index
4352     scale(0x0);  // No Scale
4353     disp($reg);  // Stack Offset
4354   %}
4355 %}
4356 
4357 //----------Memory Operands - Win95 Implicit Null Variants----------------
4358 // Indirect Memory Operand
4359 operand indirect_win95_safe(eRegP_no_EBP reg)
4360 %{
4361   constraint(ALLOC_IN_RC(int_reg));
4362   match(reg);
4363 
4364   op_cost(100);
4365   format %{ "[$reg]" %}
4366   interface(MEMORY_INTER) %{
4367     base($reg);
4368     index(0x4);
4369     scale(0x0);
4370     disp(0x0);
4371   %}
4372 %}
4373 
4374 // Indirect Memory Plus Short Offset Operand
4375 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4376 %{
4377   match(AddP reg off);
4378 
4379   op_cost(100);
4380   format %{ "[$reg + $off]" %}
4381   interface(MEMORY_INTER) %{
4382     base($reg);
4383     index(0x4);
4384     scale(0x0);
4385     disp($off);
4386   %}
4387 %}
4388 
4389 // Indirect Memory Plus Long Offset Operand
4390 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4391 %{
4392   match(AddP reg off);
4393 
4394   op_cost(100);
4395   format %{ "[$reg + $off]" %}
4396   interface(MEMORY_INTER) %{
4397     base($reg);
4398     index(0x4);
4399     scale(0x0);
4400     disp($off);
4401   %}
4402 %}
4403 
4404 // Indirect Memory Plus Index Register Plus Offset Operand
4405 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4406 %{
4407   match(AddP (AddP reg ireg) off);
4408 
4409   op_cost(100);
4410   format %{"[$reg + $off + $ireg]" %}
4411   interface(MEMORY_INTER) %{
4412     base($reg);
4413     index($ireg);
4414     scale(0x0);
4415     disp($off);
4416   %}
4417 %}
4418 
4419 // Indirect Memory Times Scale Plus Index Register
4420 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4421 %{
4422   match(AddP reg (LShiftI ireg scale));
4423 
4424   op_cost(100);
4425   format %{"[$reg + $ireg << $scale]" %}
4426   interface(MEMORY_INTER) %{
4427     base($reg);
4428     index($ireg);
4429     scale($scale);
4430     disp(0x0);
4431   %}
4432 %}
4433 
4434 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4435 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4436 %{
4437   match(AddP (AddP reg (LShiftI ireg scale)) off);
4438 
4439   op_cost(100);
4440   format %{"[$reg + $off + $ireg << $scale]" %}
4441   interface(MEMORY_INTER) %{
4442     base($reg);
4443     index($ireg);
4444     scale($scale);
4445     disp($off);
4446   %}
4447 %}
4448 
4449 //----------Conditional Branch Operands----------------------------------------
4450 // Comparison Op  - This is the operation of the comparison, and is limited to
4451 //                  the following set of codes:
4452 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4453 //
4454 // Other attributes of the comparison, such as unsignedness, are specified
4455 // by the comparison instruction that sets a condition code flags register.
4456 // That result is represented by a flags operand whose subtype is appropriate
4457 // to the unsignedness (etc.) of the comparison.
4458 //
4459 // Later, the instruction which matches both the Comparison Op (a Bool) and
4460 // the flags (produced by the Cmp) specifies the coding of the comparison op
4461 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4462 
4463 // Comparision Code
4464 operand cmpOp() %{
4465   match(Bool);
4466 
4467   format %{ "" %}
4468   interface(COND_INTER) %{
4469     equal(0x4, "e");
4470     not_equal(0x5, "ne");
4471     less(0xC, "l");
4472     greater_equal(0xD, "ge");
4473     less_equal(0xE, "le");
4474     greater(0xF, "g");
4475     overflow(0x0, "o");
4476     no_overflow(0x1, "no");
4477   %}
4478 %}
4479 
4480 // Comparison Code, unsigned compare.  Used by FP also, with
4481 // C2 (unordered) turned into GT or LT already.  The other bits
4482 // C0 and C3 are turned into Carry & Zero flags.
4483 operand cmpOpU() %{
4484   match(Bool);
4485 
4486   format %{ "" %}
4487   interface(COND_INTER) %{
4488     equal(0x4, "e");
4489     not_equal(0x5, "ne");
4490     less(0x2, "b");
4491     greater_equal(0x3, "nb");
4492     less_equal(0x6, "be");
4493     greater(0x7, "nbe");
4494     overflow(0x0, "o");
4495     no_overflow(0x1, "no");
4496   %}
4497 %}
4498 
4499 // Floating comparisons that don't require any fixup for the unordered case
4500 operand cmpOpUCF() %{
4501   match(Bool);
4502   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4503             n->as_Bool()->_test._test == BoolTest::ge ||
4504             n->as_Bool()->_test._test == BoolTest::le ||
4505             n->as_Bool()->_test._test == BoolTest::gt);
4506   format %{ "" %}
4507   interface(COND_INTER) %{
4508     equal(0x4, "e");
4509     not_equal(0x5, "ne");
4510     less(0x2, "b");
4511     greater_equal(0x3, "nb");
4512     less_equal(0x6, "be");
4513     greater(0x7, "nbe");
4514     overflow(0x0, "o");
4515     no_overflow(0x1, "no");
4516   %}
4517 %}
4518 
4519 
4520 // Floating comparisons that can be fixed up with extra conditional jumps
4521 operand cmpOpUCF2() %{
4522   match(Bool);
4523   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4524             n->as_Bool()->_test._test == BoolTest::eq);
4525   format %{ "" %}
4526   interface(COND_INTER) %{
4527     equal(0x4, "e");
4528     not_equal(0x5, "ne");
4529     less(0x2, "b");
4530     greater_equal(0x3, "nb");
4531     less_equal(0x6, "be");
4532     greater(0x7, "nbe");
4533     overflow(0x0, "o");
4534     no_overflow(0x1, "no");
4535   %}
4536 %}
4537 
4538 // Comparison Code for FP conditional move
4539 operand cmpOp_fcmov() %{
4540   match(Bool);
4541 
4542   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4543             n->as_Bool()->_test._test != BoolTest::no_overflow);
4544   format %{ "" %}
4545   interface(COND_INTER) %{
4546     equal        (0x0C8);
4547     not_equal    (0x1C8);
4548     less         (0x0C0);
4549     greater_equal(0x1C0);
4550     less_equal   (0x0D0);
4551     greater      (0x1D0);
4552     overflow(0x0, "o"); // not really supported by the instruction
4553     no_overflow(0x1, "no"); // not really supported by the instruction
4554   %}
4555 %}
4556 
4557 // Comparision Code used in long compares
4558 operand cmpOp_commute() %{
4559   match(Bool);
4560 
4561   format %{ "" %}
4562   interface(COND_INTER) %{
4563     equal(0x4, "e");
4564     not_equal(0x5, "ne");
4565     less(0xF, "g");
4566     greater_equal(0xE, "le");
4567     less_equal(0xD, "ge");
4568     greater(0xC, "l");
4569     overflow(0x0, "o");
4570     no_overflow(0x1, "no");
4571   %}
4572 %}
4573 
4574 //----------OPERAND CLASSES----------------------------------------------------
4575 // Operand Classes are groups of operands that are used as to simplify
4576 // instruction definitions by not requiring the AD writer to specify separate
4577 // instructions for every form of operand when the instruction accepts
4578 // multiple operand types with the same basic encoding and format.  The classic
4579 // case of this is memory operands.
4580 
4581 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4582                indIndex, indIndexScale, indIndexScaleOffset);
4583 
4584 // Long memory operations are encoded in 2 instructions and a +4 offset.
4585 // This means some kind of offset is always required and you cannot use
4586 // an oop as the offset (done when working on static globals).
4587 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4588                     indIndex, indIndexScale, indIndexScaleOffset);
4589 
4590 
4591 //----------PIPELINE-----------------------------------------------------------
4592 // Rules which define the behavior of the target architectures pipeline.
4593 pipeline %{
4594 
4595 //----------ATTRIBUTES---------------------------------------------------------
4596 attributes %{
4597   variable_size_instructions;        // Fixed size instructions
4598   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4599   instruction_unit_size = 1;         // An instruction is 1 bytes long
4600   instruction_fetch_unit_size = 16;  // The processor fetches one line
4601   instruction_fetch_units = 1;       // of 16 bytes
4602 
4603   // List of nop instructions
4604   nops( MachNop );
4605 %}
4606 
4607 //----------RESOURCES----------------------------------------------------------
4608 // Resources are the functional units available to the machine
4609 
4610 // Generic P2/P3 pipeline
4611 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4612 // 3 instructions decoded per cycle.
4613 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4614 // 2 ALU op, only ALU0 handles mul/div instructions.
4615 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4616            MS0, MS1, MEM = MS0 | MS1,
4617            BR, FPU,
4618            ALU0, ALU1, ALU = ALU0 | ALU1 );
4619 
4620 //----------PIPELINE DESCRIPTION-----------------------------------------------
4621 // Pipeline Description specifies the stages in the machine's pipeline
4622 
4623 // Generic P2/P3 pipeline
4624 pipe_desc(S0, S1, S2, S3, S4, S5);
4625 
4626 //----------PIPELINE CLASSES---------------------------------------------------
4627 // Pipeline Classes describe the stages in which input and output are
4628 // referenced by the hardware pipeline.
4629 
4630 // Naming convention: ialu or fpu
4631 // Then: _reg
4632 // Then: _reg if there is a 2nd register
4633 // Then: _long if it's a pair of instructions implementing a long
4634 // Then: _fat if it requires the big decoder
4635 //   Or: _mem if it requires the big decoder and a memory unit.
4636 
4637 // Integer ALU reg operation
4638 pipe_class ialu_reg(rRegI dst) %{
4639     single_instruction;
4640     dst    : S4(write);
4641     dst    : S3(read);
4642     DECODE : S0;        // any decoder
4643     ALU    : S3;        // any alu
4644 %}
4645 
4646 // Long ALU reg operation
4647 pipe_class ialu_reg_long(eRegL dst) %{
4648     instruction_count(2);
4649     dst    : S4(write);
4650     dst    : S3(read);
4651     DECODE : S0(2);     // any 2 decoders
4652     ALU    : S3(2);     // both alus
4653 %}
4654 
4655 // Integer ALU reg operation using big decoder
4656 pipe_class ialu_reg_fat(rRegI dst) %{
4657     single_instruction;
4658     dst    : S4(write);
4659     dst    : S3(read);
4660     D0     : S0;        // big decoder only
4661     ALU    : S3;        // any alu
4662 %}
4663 
4664 // Long ALU reg operation using big decoder
4665 pipe_class ialu_reg_long_fat(eRegL dst) %{
4666     instruction_count(2);
4667     dst    : S4(write);
4668     dst    : S3(read);
4669     D0     : S0(2);     // big decoder only; twice
4670     ALU    : S3(2);     // any 2 alus
4671 %}
4672 
4673 // Integer ALU reg-reg operation
4674 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4675     single_instruction;
4676     dst    : S4(write);
4677     src    : S3(read);
4678     DECODE : S0;        // any decoder
4679     ALU    : S3;        // any alu
4680 %}
4681 
4682 // Long ALU reg-reg operation
4683 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4684     instruction_count(2);
4685     dst    : S4(write);
4686     src    : S3(read);
4687     DECODE : S0(2);     // any 2 decoders
4688     ALU    : S3(2);     // both alus
4689 %}
4690 
4691 // Integer ALU reg-reg operation
4692 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4693     single_instruction;
4694     dst    : S4(write);
4695     src    : S3(read);
4696     D0     : S0;        // big decoder only
4697     ALU    : S3;        // any alu
4698 %}
4699 
4700 // Long ALU reg-reg operation
4701 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4702     instruction_count(2);
4703     dst    : S4(write);
4704     src    : S3(read);
4705     D0     : S0(2);     // big decoder only; twice
4706     ALU    : S3(2);     // both alus
4707 %}
4708 
4709 // Integer ALU reg-mem operation
4710 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4711     single_instruction;
4712     dst    : S5(write);
4713     mem    : S3(read);
4714     D0     : S0;        // big decoder only
4715     ALU    : S4;        // any alu
4716     MEM    : S3;        // any mem
4717 %}
4718 
4719 // Long ALU reg-mem operation
4720 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4721     instruction_count(2);
4722     dst    : S5(write);
4723     mem    : S3(read);
4724     D0     : S0(2);     // big decoder only; twice
4725     ALU    : S4(2);     // any 2 alus
4726     MEM    : S3(2);     // both mems
4727 %}
4728 
4729 // Integer mem operation (prefetch)
4730 pipe_class ialu_mem(memory mem)
4731 %{
4732     single_instruction;
4733     mem    : S3(read);
4734     D0     : S0;        // big decoder only
4735     MEM    : S3;        // any mem
4736 %}
4737 
4738 // Integer Store to Memory
4739 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4740     single_instruction;
4741     mem    : S3(read);
4742     src    : S5(read);
4743     D0     : S0;        // big decoder only
4744     ALU    : S4;        // any alu
4745     MEM    : S3;
4746 %}
4747 
4748 // Long Store to Memory
4749 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4750     instruction_count(2);
4751     mem    : S3(read);
4752     src    : S5(read);
4753     D0     : S0(2);     // big decoder only; twice
4754     ALU    : S4(2);     // any 2 alus
4755     MEM    : S3(2);     // Both mems
4756 %}
4757 
4758 // Integer Store to Memory
4759 pipe_class ialu_mem_imm(memory mem) %{
4760     single_instruction;
4761     mem    : S3(read);
4762     D0     : S0;        // big decoder only
4763     ALU    : S4;        // any alu
4764     MEM    : S3;
4765 %}
4766 
4767 // Integer ALU0 reg-reg operation
4768 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4769     single_instruction;
4770     dst    : S4(write);
4771     src    : S3(read);
4772     D0     : S0;        // Big decoder only
4773     ALU0   : S3;        // only alu0
4774 %}
4775 
4776 // Integer ALU0 reg-mem operation
4777 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4778     single_instruction;
4779     dst    : S5(write);
4780     mem    : S3(read);
4781     D0     : S0;        // big decoder only
4782     ALU0   : S4;        // ALU0 only
4783     MEM    : S3;        // any mem
4784 %}
4785 
4786 // Integer ALU reg-reg operation
4787 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4788     single_instruction;
4789     cr     : S4(write);
4790     src1   : S3(read);
4791     src2   : S3(read);
4792     DECODE : S0;        // any decoder
4793     ALU    : S3;        // any alu
4794 %}
4795 
4796 // Integer ALU reg-imm operation
4797 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4798     single_instruction;
4799     cr     : S4(write);
4800     src1   : S3(read);
4801     DECODE : S0;        // any decoder
4802     ALU    : S3;        // any alu
4803 %}
4804 
4805 // Integer ALU reg-mem operation
4806 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4807     single_instruction;
4808     cr     : S4(write);
4809     src1   : S3(read);
4810     src2   : S3(read);
4811     D0     : S0;        // big decoder only
4812     ALU    : S4;        // any alu
4813     MEM    : S3;
4814 %}
4815 
4816 // Conditional move reg-reg
4817 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4818     instruction_count(4);
4819     y      : S4(read);
4820     q      : S3(read);
4821     p      : S3(read);
4822     DECODE : S0(4);     // any decoder
4823 %}
4824 
4825 // Conditional move reg-reg
4826 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4827     single_instruction;
4828     dst    : S4(write);
4829     src    : S3(read);
4830     cr     : S3(read);
4831     DECODE : S0;        // any decoder
4832 %}
4833 
4834 // Conditional move reg-mem
4835 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4836     single_instruction;
4837     dst    : S4(write);
4838     src    : S3(read);
4839     cr     : S3(read);
4840     DECODE : S0;        // any decoder
4841     MEM    : S3;
4842 %}
4843 
4844 // Conditional move reg-reg long
4845 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4846     single_instruction;
4847     dst    : S4(write);
4848     src    : S3(read);
4849     cr     : S3(read);
4850     DECODE : S0(2);     // any 2 decoders
4851 %}
4852 
4853 // Conditional move double reg-reg
4854 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4855     single_instruction;
4856     dst    : S4(write);
4857     src    : S3(read);
4858     cr     : S3(read);
4859     DECODE : S0;        // any decoder
4860 %}
4861 
4862 // Float reg-reg operation
4863 pipe_class fpu_reg(regDPR dst) %{
4864     instruction_count(2);
4865     dst    : S3(read);
4866     DECODE : S0(2);     // any 2 decoders
4867     FPU    : S3;
4868 %}
4869 
4870 // Float reg-reg operation
4871 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4872     instruction_count(2);
4873     dst    : S4(write);
4874     src    : S3(read);
4875     DECODE : S0(2);     // any 2 decoders
4876     FPU    : S3;
4877 %}
4878 
4879 // Float reg-reg operation
4880 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4881     instruction_count(3);
4882     dst    : S4(write);
4883     src1   : S3(read);
4884     src2   : S3(read);
4885     DECODE : S0(3);     // any 3 decoders
4886     FPU    : S3(2);
4887 %}
4888 
4889 // Float reg-reg operation
4890 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4891     instruction_count(4);
4892     dst    : S4(write);
4893     src1   : S3(read);
4894     src2   : S3(read);
4895     src3   : S3(read);
4896     DECODE : S0(4);     // any 3 decoders
4897     FPU    : S3(2);
4898 %}
4899 
4900 // Float reg-reg operation
4901 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4902     instruction_count(4);
4903     dst    : S4(write);
4904     src1   : S3(read);
4905     src2   : S3(read);
4906     src3   : S3(read);
4907     DECODE : S1(3);     // any 3 decoders
4908     D0     : S0;        // Big decoder only
4909     FPU    : S3(2);
4910     MEM    : S3;
4911 %}
4912 
4913 // Float reg-mem operation
4914 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4915     instruction_count(2);
4916     dst    : S5(write);
4917     mem    : S3(read);
4918     D0     : S0;        // big decoder only
4919     DECODE : S1;        // any decoder for FPU POP
4920     FPU    : S4;
4921     MEM    : S3;        // any mem
4922 %}
4923 
4924 // Float reg-mem operation
4925 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4926     instruction_count(3);
4927     dst    : S5(write);
4928     src1   : S3(read);
4929     mem    : S3(read);
4930     D0     : S0;        // big decoder only
4931     DECODE : S1(2);     // any decoder for FPU POP
4932     FPU    : S4;
4933     MEM    : S3;        // any mem
4934 %}
4935 
4936 // Float mem-reg operation
4937 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4938     instruction_count(2);
4939     src    : S5(read);
4940     mem    : S3(read);
4941     DECODE : S0;        // any decoder for FPU PUSH
4942     D0     : S1;        // big decoder only
4943     FPU    : S4;
4944     MEM    : S3;        // any mem
4945 %}
4946 
4947 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4948     instruction_count(3);
4949     src1   : S3(read);
4950     src2   : S3(read);
4951     mem    : S3(read);
4952     DECODE : S0(2);     // any decoder for FPU PUSH
4953     D0     : S1;        // big decoder only
4954     FPU    : S4;
4955     MEM    : S3;        // any mem
4956 %}
4957 
4958 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4959     instruction_count(3);
4960     src1   : S3(read);
4961     src2   : S3(read);
4962     mem    : S4(read);
4963     DECODE : S0;        // any decoder for FPU PUSH
4964     D0     : S0(2);     // big decoder only
4965     FPU    : S4;
4966     MEM    : S3(2);     // any mem
4967 %}
4968 
4969 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4970     instruction_count(2);
4971     src1   : S3(read);
4972     dst    : S4(read);
4973     D0     : S0(2);     // big decoder only
4974     MEM    : S3(2);     // any mem
4975 %}
4976 
4977 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4978     instruction_count(3);
4979     src1   : S3(read);
4980     src2   : S3(read);
4981     dst    : S4(read);
4982     D0     : S0(3);     // big decoder only
4983     FPU    : S4;
4984     MEM    : S3(3);     // any mem
4985 %}
4986 
4987 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4988     instruction_count(3);
4989     src1   : S4(read);
4990     mem    : S4(read);
4991     DECODE : S0;        // any decoder for FPU PUSH
4992     D0     : S0(2);     // big decoder only
4993     FPU    : S4;
4994     MEM    : S3(2);     // any mem
4995 %}
4996 
4997 // Float load constant
4998 pipe_class fpu_reg_con(regDPR dst) %{
4999     instruction_count(2);
5000     dst    : S5(write);
5001     D0     : S0;        // big decoder only for the load
5002     DECODE : S1;        // any decoder for FPU POP
5003     FPU    : S4;
5004     MEM    : S3;        // any mem
5005 %}
5006 
5007 // Float load constant
5008 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5009     instruction_count(3);
5010     dst    : S5(write);
5011     src    : S3(read);
5012     D0     : S0;        // big decoder only for the load
5013     DECODE : S1(2);     // any decoder for FPU POP
5014     FPU    : S4;
5015     MEM    : S3;        // any mem
5016 %}
5017 
5018 // UnConditional branch
5019 pipe_class pipe_jmp( label labl ) %{
5020     single_instruction;
5021     BR   : S3;
5022 %}
5023 
5024 // Conditional branch
5025 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5026     single_instruction;
5027     cr    : S1(read);
5028     BR    : S3;
5029 %}
5030 
5031 // Allocation idiom
5032 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5033     instruction_count(1); force_serialization;
5034     fixed_latency(6);
5035     heap_ptr : S3(read);
5036     DECODE   : S0(3);
5037     D0       : S2;
5038     MEM      : S3;
5039     ALU      : S3(2);
5040     dst      : S5(write);
5041     BR       : S5;
5042 %}
5043 
5044 // Generic big/slow expanded idiom
5045 pipe_class pipe_slow(  ) %{
5046     instruction_count(10); multiple_bundles; force_serialization;
5047     fixed_latency(100);
5048     D0  : S0(2);
5049     MEM : S3(2);
5050 %}
5051 
5052 // The real do-nothing guy
5053 pipe_class empty( ) %{
5054     instruction_count(0);
5055 %}
5056 
5057 // Define the class for the Nop node
5058 define %{
5059    MachNop = empty;
5060 %}
5061 
5062 %}
5063 
5064 //----------INSTRUCTIONS-------------------------------------------------------
5065 //
5066 // match      -- States which machine-independent subtree may be replaced
5067 //               by this instruction.
5068 // ins_cost   -- The estimated cost of this instruction is used by instruction
5069 //               selection to identify a minimum cost tree of machine
5070 //               instructions that matches a tree of machine-independent
5071 //               instructions.
5072 // format     -- A string providing the disassembly for this instruction.
5073 //               The value of an instruction's operand may be inserted
5074 //               by referring to it with a '$' prefix.
5075 // opcode     -- Three instruction opcodes may be provided.  These are referred
5076 //               to within an encode class as $primary, $secondary, and $tertiary
5077 //               respectively.  The primary opcode is commonly used to
5078 //               indicate the type of machine instruction, while secondary
5079 //               and tertiary are often used for prefix options or addressing
5080 //               modes.
5081 // ins_encode -- A list of encode classes with parameters. The encode class
5082 //               name must have been defined in an 'enc_class' specification
5083 //               in the encode section of the architecture description.
5084 
5085 //----------BSWAP-Instruction--------------------------------------------------
5086 instruct bytes_reverse_int(rRegI dst) %{
5087   match(Set dst (ReverseBytesI dst));
5088 
5089   format %{ "BSWAP  $dst" %}
5090   opcode(0x0F, 0xC8);
5091   ins_encode( OpcP, OpcSReg(dst) );
5092   ins_pipe( ialu_reg );
5093 %}
5094 
5095 instruct bytes_reverse_long(eRegL dst) %{
5096   match(Set dst (ReverseBytesL dst));
5097 
5098   format %{ "BSWAP  $dst.lo\n\t"
5099             "BSWAP  $dst.hi\n\t"
5100             "XCHG   $dst.lo $dst.hi" %}
5101 
5102   ins_cost(125);
5103   ins_encode( bswap_long_bytes(dst) );
5104   ins_pipe( ialu_reg_reg);
5105 %}
5106 
5107 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5108   match(Set dst (ReverseBytesUS dst));
5109   effect(KILL cr);
5110 
5111   format %{ "BSWAP  $dst\n\t"
5112             "SHR    $dst,16\n\t" %}
5113   ins_encode %{
5114     __ bswapl($dst$$Register);
5115     __ shrl($dst$$Register, 16);
5116   %}
5117   ins_pipe( ialu_reg );
5118 %}
5119 
5120 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5121   match(Set dst (ReverseBytesS dst));
5122   effect(KILL cr);
5123 
5124   format %{ "BSWAP  $dst\n\t"
5125             "SAR    $dst,16\n\t" %}
5126   ins_encode %{
5127     __ bswapl($dst$$Register);
5128     __ sarl($dst$$Register, 16);
5129   %}
5130   ins_pipe( ialu_reg );
5131 %}
5132 
5133 
5134 //---------- Zeros Count Instructions ------------------------------------------
5135 
5136 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5137   predicate(UseCountLeadingZerosInstruction);
5138   match(Set dst (CountLeadingZerosI src));
5139   effect(KILL cr);
5140 
5141   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5142   ins_encode %{
5143     __ lzcntl($dst$$Register, $src$$Register);
5144   %}
5145   ins_pipe(ialu_reg);
5146 %}
5147 
5148 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5149   predicate(!UseCountLeadingZerosInstruction);
5150   match(Set dst (CountLeadingZerosI src));
5151   effect(KILL cr);
5152 
5153   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5154             "JNZ    skip\n\t"
5155             "MOV    $dst, -1\n"
5156       "skip:\n\t"
5157             "NEG    $dst\n\t"
5158             "ADD    $dst, 31" %}
5159   ins_encode %{
5160     Register Rdst = $dst$$Register;
5161     Register Rsrc = $src$$Register;
5162     Label skip;
5163     __ bsrl(Rdst, Rsrc);
5164     __ jccb(Assembler::notZero, skip);
5165     __ movl(Rdst, -1);
5166     __ bind(skip);
5167     __ negl(Rdst);
5168     __ addl(Rdst, BitsPerInt - 1);
5169   %}
5170   ins_pipe(ialu_reg);
5171 %}
5172 
5173 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5174   predicate(UseCountLeadingZerosInstruction);
5175   match(Set dst (CountLeadingZerosL src));
5176   effect(TEMP dst, KILL cr);
5177 
5178   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5179             "JNC    done\n\t"
5180             "LZCNT  $dst, $src.lo\n\t"
5181             "ADD    $dst, 32\n"
5182       "done:" %}
5183   ins_encode %{
5184     Register Rdst = $dst$$Register;
5185     Register Rsrc = $src$$Register;
5186     Label done;
5187     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5188     __ jccb(Assembler::carryClear, done);
5189     __ lzcntl(Rdst, Rsrc);
5190     __ addl(Rdst, BitsPerInt);
5191     __ bind(done);
5192   %}
5193   ins_pipe(ialu_reg);
5194 %}
5195 
5196 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5197   predicate(!UseCountLeadingZerosInstruction);
5198   match(Set dst (CountLeadingZerosL src));
5199   effect(TEMP dst, KILL cr);
5200 
5201   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5202             "JZ     msw_is_zero\n\t"
5203             "ADD    $dst, 32\n\t"
5204             "JMP    not_zero\n"
5205       "msw_is_zero:\n\t"
5206             "BSR    $dst, $src.lo\n\t"
5207             "JNZ    not_zero\n\t"
5208             "MOV    $dst, -1\n"
5209       "not_zero:\n\t"
5210             "NEG    $dst\n\t"
5211             "ADD    $dst, 63\n" %}
5212  ins_encode %{
5213     Register Rdst = $dst$$Register;
5214     Register Rsrc = $src$$Register;
5215     Label msw_is_zero;
5216     Label not_zero;
5217     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5218     __ jccb(Assembler::zero, msw_is_zero);
5219     __ addl(Rdst, BitsPerInt);
5220     __ jmpb(not_zero);
5221     __ bind(msw_is_zero);
5222     __ bsrl(Rdst, Rsrc);
5223     __ jccb(Assembler::notZero, not_zero);
5224     __ movl(Rdst, -1);
5225     __ bind(not_zero);
5226     __ negl(Rdst);
5227     __ addl(Rdst, BitsPerLong - 1);
5228   %}
5229   ins_pipe(ialu_reg);
5230 %}
5231 
5232 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5233   predicate(UseCountTrailingZerosInstruction);
5234   match(Set dst (CountTrailingZerosI src));
5235   effect(KILL cr);
5236 
5237   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5238   ins_encode %{
5239     __ tzcntl($dst$$Register, $src$$Register);
5240   %}
5241   ins_pipe(ialu_reg);
5242 %}
5243 
5244 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5245   predicate(!UseCountTrailingZerosInstruction);
5246   match(Set dst (CountTrailingZerosI src));
5247   effect(KILL cr);
5248 
5249   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5250             "JNZ    done\n\t"
5251             "MOV    $dst, 32\n"
5252       "done:" %}
5253   ins_encode %{
5254     Register Rdst = $dst$$Register;
5255     Label done;
5256     __ bsfl(Rdst, $src$$Register);
5257     __ jccb(Assembler::notZero, done);
5258     __ movl(Rdst, BitsPerInt);
5259     __ bind(done);
5260   %}
5261   ins_pipe(ialu_reg);
5262 %}
5263 
5264 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5265   predicate(UseCountTrailingZerosInstruction);
5266   match(Set dst (CountTrailingZerosL src));
5267   effect(TEMP dst, KILL cr);
5268 
5269   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5270             "JNC    done\n\t"
5271             "TZCNT  $dst, $src.hi\n\t"
5272             "ADD    $dst, 32\n"
5273             "done:" %}
5274   ins_encode %{
5275     Register Rdst = $dst$$Register;
5276     Register Rsrc = $src$$Register;
5277     Label done;
5278     __ tzcntl(Rdst, Rsrc);
5279     __ jccb(Assembler::carryClear, done);
5280     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5281     __ addl(Rdst, BitsPerInt);
5282     __ bind(done);
5283   %}
5284   ins_pipe(ialu_reg);
5285 %}
5286 
5287 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5288   predicate(!UseCountTrailingZerosInstruction);
5289   match(Set dst (CountTrailingZerosL src));
5290   effect(TEMP dst, KILL cr);
5291 
5292   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5293             "JNZ    done\n\t"
5294             "BSF    $dst, $src.hi\n\t"
5295             "JNZ    msw_not_zero\n\t"
5296             "MOV    $dst, 32\n"
5297       "msw_not_zero:\n\t"
5298             "ADD    $dst, 32\n"
5299       "done:" %}
5300   ins_encode %{
5301     Register Rdst = $dst$$Register;
5302     Register Rsrc = $src$$Register;
5303     Label msw_not_zero;
5304     Label done;
5305     __ bsfl(Rdst, Rsrc);
5306     __ jccb(Assembler::notZero, done);
5307     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5308     __ jccb(Assembler::notZero, msw_not_zero);
5309     __ movl(Rdst, BitsPerInt);
5310     __ bind(msw_not_zero);
5311     __ addl(Rdst, BitsPerInt);
5312     __ bind(done);
5313   %}
5314   ins_pipe(ialu_reg);
5315 %}
5316 
5317 
5318 //---------- Population Count Instructions -------------------------------------
5319 
5320 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5321   predicate(UsePopCountInstruction);
5322   match(Set dst (PopCountI src));
5323   effect(KILL cr);
5324 
5325   format %{ "POPCNT $dst, $src" %}
5326   ins_encode %{
5327     __ popcntl($dst$$Register, $src$$Register);
5328   %}
5329   ins_pipe(ialu_reg);
5330 %}
5331 
5332 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5333   predicate(UsePopCountInstruction);
5334   match(Set dst (PopCountI (LoadI mem)));
5335   effect(KILL cr);
5336 
5337   format %{ "POPCNT $dst, $mem" %}
5338   ins_encode %{
5339     __ popcntl($dst$$Register, $mem$$Address);
5340   %}
5341   ins_pipe(ialu_reg);
5342 %}
5343 
5344 // Note: Long.bitCount(long) returns an int.
5345 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5346   predicate(UsePopCountInstruction);
5347   match(Set dst (PopCountL src));
5348   effect(KILL cr, TEMP tmp, TEMP dst);
5349 
5350   format %{ "POPCNT $dst, $src.lo\n\t"
5351             "POPCNT $tmp, $src.hi\n\t"
5352             "ADD    $dst, $tmp" %}
5353   ins_encode %{
5354     __ popcntl($dst$$Register, $src$$Register);
5355     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5356     __ addl($dst$$Register, $tmp$$Register);
5357   %}
5358   ins_pipe(ialu_reg);
5359 %}
5360 
5361 // Note: Long.bitCount(long) returns an int.
5362 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5363   predicate(UsePopCountInstruction);
5364   match(Set dst (PopCountL (LoadL mem)));
5365   effect(KILL cr, TEMP tmp, TEMP dst);
5366 
5367   format %{ "POPCNT $dst, $mem\n\t"
5368             "POPCNT $tmp, $mem+4\n\t"
5369             "ADD    $dst, $tmp" %}
5370   ins_encode %{
5371     //__ popcntl($dst$$Register, $mem$$Address$$first);
5372     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5373     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5374     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5375     __ addl($dst$$Register, $tmp$$Register);
5376   %}
5377   ins_pipe(ialu_reg);
5378 %}
5379 
5380 
5381 //----------Load/Store/Move Instructions---------------------------------------
5382 //----------Load Instructions--------------------------------------------------
5383 // Load Byte (8bit signed)
5384 instruct loadB(xRegI dst, memory mem) %{
5385   match(Set dst (LoadB mem));
5386 
5387   ins_cost(125);
5388   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5389 
5390   ins_encode %{
5391     __ movsbl($dst$$Register, $mem$$Address);
5392   %}
5393 
5394   ins_pipe(ialu_reg_mem);
5395 %}
5396 
5397 // Load Byte (8bit signed) into Long Register
5398 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5399   match(Set dst (ConvI2L (LoadB mem)));
5400   effect(KILL cr);
5401 
5402   ins_cost(375);
5403   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5404             "MOV    $dst.hi,$dst.lo\n\t"
5405             "SAR    $dst.hi,7" %}
5406 
5407   ins_encode %{
5408     __ movsbl($dst$$Register, $mem$$Address);
5409     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5410     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5411   %}
5412 
5413   ins_pipe(ialu_reg_mem);
5414 %}
5415 
5416 // Load Unsigned Byte (8bit UNsigned)
5417 instruct loadUB(xRegI dst, memory mem) %{
5418   match(Set dst (LoadUB mem));
5419 
5420   ins_cost(125);
5421   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5422 
5423   ins_encode %{
5424     __ movzbl($dst$$Register, $mem$$Address);
5425   %}
5426 
5427   ins_pipe(ialu_reg_mem);
5428 %}
5429 
5430 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5431 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5432   match(Set dst (ConvI2L (LoadUB mem)));
5433   effect(KILL cr);
5434 
5435   ins_cost(250);
5436   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5437             "XOR    $dst.hi,$dst.hi" %}
5438 
5439   ins_encode %{
5440     Register Rdst = $dst$$Register;
5441     __ movzbl(Rdst, $mem$$Address);
5442     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5443   %}
5444 
5445   ins_pipe(ialu_reg_mem);
5446 %}
5447 
5448 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5449 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5450   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5451   effect(KILL cr);
5452 
5453   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5454             "XOR    $dst.hi,$dst.hi\n\t"
5455             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5456   ins_encode %{
5457     Register Rdst = $dst$$Register;
5458     __ movzbl(Rdst, $mem$$Address);
5459     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5460     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5461   %}
5462   ins_pipe(ialu_reg_mem);
5463 %}
5464 
5465 // Load Short (16bit signed)
5466 instruct loadS(rRegI dst, memory mem) %{
5467   match(Set dst (LoadS mem));
5468 
5469   ins_cost(125);
5470   format %{ "MOVSX  $dst,$mem\t# short" %}
5471 
5472   ins_encode %{
5473     __ movswl($dst$$Register, $mem$$Address);
5474   %}
5475 
5476   ins_pipe(ialu_reg_mem);
5477 %}
5478 
5479 // Load Short (16 bit signed) to Byte (8 bit signed)
5480 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5481   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5482 
5483   ins_cost(125);
5484   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5485   ins_encode %{
5486     __ movsbl($dst$$Register, $mem$$Address);
5487   %}
5488   ins_pipe(ialu_reg_mem);
5489 %}
5490 
5491 // Load Short (16bit signed) into Long Register
5492 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5493   match(Set dst (ConvI2L (LoadS mem)));
5494   effect(KILL cr);
5495 
5496   ins_cost(375);
5497   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5498             "MOV    $dst.hi,$dst.lo\n\t"
5499             "SAR    $dst.hi,15" %}
5500 
5501   ins_encode %{
5502     __ movswl($dst$$Register, $mem$$Address);
5503     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5504     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5505   %}
5506 
5507   ins_pipe(ialu_reg_mem);
5508 %}
5509 
5510 // Load Unsigned Short/Char (16bit unsigned)
5511 instruct loadUS(rRegI dst, memory mem) %{
5512   match(Set dst (LoadUS mem));
5513 
5514   ins_cost(125);
5515   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5516 
5517   ins_encode %{
5518     __ movzwl($dst$$Register, $mem$$Address);
5519   %}
5520 
5521   ins_pipe(ialu_reg_mem);
5522 %}
5523 
5524 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5525 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5526   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5527 
5528   ins_cost(125);
5529   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5530   ins_encode %{
5531     __ movsbl($dst$$Register, $mem$$Address);
5532   %}
5533   ins_pipe(ialu_reg_mem);
5534 %}
5535 
5536 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5537 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5538   match(Set dst (ConvI2L (LoadUS mem)));
5539   effect(KILL cr);
5540 
5541   ins_cost(250);
5542   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5543             "XOR    $dst.hi,$dst.hi" %}
5544 
5545   ins_encode %{
5546     __ movzwl($dst$$Register, $mem$$Address);
5547     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5548   %}
5549 
5550   ins_pipe(ialu_reg_mem);
5551 %}
5552 
5553 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5554 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5555   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5556   effect(KILL cr);
5557 
5558   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5559             "XOR    $dst.hi,$dst.hi" %}
5560   ins_encode %{
5561     Register Rdst = $dst$$Register;
5562     __ movzbl(Rdst, $mem$$Address);
5563     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5564   %}
5565   ins_pipe(ialu_reg_mem);
5566 %}
5567 
5568 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5569 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5570   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5571   effect(KILL cr);
5572 
5573   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5574             "XOR    $dst.hi,$dst.hi\n\t"
5575             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5576   ins_encode %{
5577     Register Rdst = $dst$$Register;
5578     __ movzwl(Rdst, $mem$$Address);
5579     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5580     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5581   %}
5582   ins_pipe(ialu_reg_mem);
5583 %}
5584 
5585 // Load Integer
5586 instruct loadI(rRegI dst, memory mem) %{
5587   match(Set dst (LoadI mem));
5588 
5589   ins_cost(125);
5590   format %{ "MOV    $dst,$mem\t# int" %}
5591 
5592   ins_encode %{
5593     __ movl($dst$$Register, $mem$$Address);
5594   %}
5595 
5596   ins_pipe(ialu_reg_mem);
5597 %}
5598 
5599 // Load Integer (32 bit signed) to Byte (8 bit signed)
5600 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5601   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5602 
5603   ins_cost(125);
5604   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5605   ins_encode %{
5606     __ movsbl($dst$$Register, $mem$$Address);
5607   %}
5608   ins_pipe(ialu_reg_mem);
5609 %}
5610 
5611 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5612 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5613   match(Set dst (AndI (LoadI mem) mask));
5614 
5615   ins_cost(125);
5616   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5617   ins_encode %{
5618     __ movzbl($dst$$Register, $mem$$Address);
5619   %}
5620   ins_pipe(ialu_reg_mem);
5621 %}
5622 
5623 // Load Integer (32 bit signed) to Short (16 bit signed)
5624 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5625   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5626 
5627   ins_cost(125);
5628   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5629   ins_encode %{
5630     __ movswl($dst$$Register, $mem$$Address);
5631   %}
5632   ins_pipe(ialu_reg_mem);
5633 %}
5634 
5635 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5636 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5637   match(Set dst (AndI (LoadI mem) mask));
5638 
5639   ins_cost(125);
5640   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5641   ins_encode %{
5642     __ movzwl($dst$$Register, $mem$$Address);
5643   %}
5644   ins_pipe(ialu_reg_mem);
5645 %}
5646 
5647 // Load Integer into Long Register
5648 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5649   match(Set dst (ConvI2L (LoadI mem)));
5650   effect(KILL cr);
5651 
5652   ins_cost(375);
5653   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5654             "MOV    $dst.hi,$dst.lo\n\t"
5655             "SAR    $dst.hi,31" %}
5656 
5657   ins_encode %{
5658     __ movl($dst$$Register, $mem$$Address);
5659     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5660     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5661   %}
5662 
5663   ins_pipe(ialu_reg_mem);
5664 %}
5665 
5666 // Load Integer with mask 0xFF into Long Register
5667 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5668   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5669   effect(KILL cr);
5670 
5671   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5672             "XOR    $dst.hi,$dst.hi" %}
5673   ins_encode %{
5674     Register Rdst = $dst$$Register;
5675     __ movzbl(Rdst, $mem$$Address);
5676     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5677   %}
5678   ins_pipe(ialu_reg_mem);
5679 %}
5680 
5681 // Load Integer with mask 0xFFFF into Long Register
5682 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5683   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5684   effect(KILL cr);
5685 
5686   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5687             "XOR    $dst.hi,$dst.hi" %}
5688   ins_encode %{
5689     Register Rdst = $dst$$Register;
5690     __ movzwl(Rdst, $mem$$Address);
5691     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5692   %}
5693   ins_pipe(ialu_reg_mem);
5694 %}
5695 
5696 // Load Integer with 31-bit mask into Long Register
5697 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5698   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5699   effect(KILL cr);
5700 
5701   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5702             "XOR    $dst.hi,$dst.hi\n\t"
5703             "AND    $dst.lo,$mask" %}
5704   ins_encode %{
5705     Register Rdst = $dst$$Register;
5706     __ movl(Rdst, $mem$$Address);
5707     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5708     __ andl(Rdst, $mask$$constant);
5709   %}
5710   ins_pipe(ialu_reg_mem);
5711 %}
5712 
5713 // Load Unsigned Integer into Long Register
5714 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5715   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5716   effect(KILL cr);
5717 
5718   ins_cost(250);
5719   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5720             "XOR    $dst.hi,$dst.hi" %}
5721 
5722   ins_encode %{
5723     __ movl($dst$$Register, $mem$$Address);
5724     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5725   %}
5726 
5727   ins_pipe(ialu_reg_mem);
5728 %}
5729 
5730 // Load Long.  Cannot clobber address while loading, so restrict address
5731 // register to ESI
5732 instruct loadL(eRegL dst, load_long_memory mem) %{
5733   predicate(!((LoadLNode*)n)->require_atomic_access());
5734   match(Set dst (LoadL mem));
5735 
5736   ins_cost(250);
5737   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5738             "MOV    $dst.hi,$mem+4" %}
5739 
5740   ins_encode %{
5741     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5742     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5743     __ movl($dst$$Register, Amemlo);
5744     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5745   %}
5746 
5747   ins_pipe(ialu_reg_long_mem);
5748 %}
5749 
5750 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5751 // then store it down to the stack and reload on the int
5752 // side.
5753 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5754   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5755   match(Set dst (LoadL mem));
5756 
5757   ins_cost(200);
5758   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5759             "FISTp  $dst" %}
5760   ins_encode(enc_loadL_volatile(mem,dst));
5761   ins_pipe( fpu_reg_mem );
5762 %}
5763 
5764 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5765   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5766   match(Set dst (LoadL mem));
5767   effect(TEMP tmp);
5768   ins_cost(180);
5769   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5770             "MOVSD  $dst,$tmp" %}
5771   ins_encode %{
5772     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5773     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5774   %}
5775   ins_pipe( pipe_slow );
5776 %}
5777 
5778 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5779   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5780   match(Set dst (LoadL mem));
5781   effect(TEMP tmp);
5782   ins_cost(160);
5783   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5784             "MOVD   $dst.lo,$tmp\n\t"
5785             "PSRLQ  $tmp,32\n\t"
5786             "MOVD   $dst.hi,$tmp" %}
5787   ins_encode %{
5788     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5789     __ movdl($dst$$Register, $tmp$$XMMRegister);
5790     __ psrlq($tmp$$XMMRegister, 32);
5791     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5792   %}
5793   ins_pipe( pipe_slow );
5794 %}
5795 
5796 // Load Range
5797 instruct loadRange(rRegI dst, memory mem) %{
5798   match(Set dst (LoadRange mem));
5799 
5800   ins_cost(125);
5801   format %{ "MOV    $dst,$mem" %}
5802   opcode(0x8B);
5803   ins_encode( OpcP, RegMem(dst,mem));
5804   ins_pipe( ialu_reg_mem );
5805 %}
5806 
5807 
5808 // Load Pointer
5809 instruct loadP(eRegP dst, memory mem) %{
5810   match(Set dst (LoadP mem));
5811 
5812   ins_cost(125);
5813   format %{ "MOV    $dst,$mem" %}
5814   opcode(0x8B);
5815   ins_encode( OpcP, RegMem(dst,mem));
5816   ins_pipe( ialu_reg_mem );
5817 %}
5818 
5819 // Load Klass Pointer
5820 instruct loadKlass(eRegP dst, memory mem) %{
5821   match(Set dst (LoadKlass mem));
5822 
5823   ins_cost(125);
5824   format %{ "MOV    $dst,$mem" %}
5825   opcode(0x8B);
5826   ins_encode( OpcP, RegMem(dst,mem));
5827   ins_pipe( ialu_reg_mem );
5828 %}
5829 
5830 // Load Double
5831 instruct loadDPR(regDPR dst, memory mem) %{
5832   predicate(UseSSE<=1);
5833   match(Set dst (LoadD mem));
5834 
5835   ins_cost(150);
5836   format %{ "FLD_D  ST,$mem\n\t"
5837             "FSTP   $dst" %}
5838   opcode(0xDD);               /* DD /0 */
5839   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5840               Pop_Reg_DPR(dst) );
5841   ins_pipe( fpu_reg_mem );
5842 %}
5843 
5844 // Load Double to XMM
5845 instruct loadD(regD dst, memory mem) %{
5846   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5847   match(Set dst (LoadD mem));
5848   ins_cost(145);
5849   format %{ "MOVSD  $dst,$mem" %}
5850   ins_encode %{
5851     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5852   %}
5853   ins_pipe( pipe_slow );
5854 %}
5855 
5856 instruct loadD_partial(regD dst, memory mem) %{
5857   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5858   match(Set dst (LoadD mem));
5859   ins_cost(145);
5860   format %{ "MOVLPD $dst,$mem" %}
5861   ins_encode %{
5862     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5863   %}
5864   ins_pipe( pipe_slow );
5865 %}
5866 
5867 // Load to XMM register (single-precision floating point)
5868 // MOVSS instruction
5869 instruct loadF(regF dst, memory mem) %{
5870   predicate(UseSSE>=1);
5871   match(Set dst (LoadF mem));
5872   ins_cost(145);
5873   format %{ "MOVSS  $dst,$mem" %}
5874   ins_encode %{
5875     __ movflt ($dst$$XMMRegister, $mem$$Address);
5876   %}
5877   ins_pipe( pipe_slow );
5878 %}
5879 
5880 // Load Float
5881 instruct loadFPR(regFPR dst, memory mem) %{
5882   predicate(UseSSE==0);
5883   match(Set dst (LoadF mem));
5884 
5885   ins_cost(150);
5886   format %{ "FLD_S  ST,$mem\n\t"
5887             "FSTP   $dst" %}
5888   opcode(0xD9);               /* D9 /0 */
5889   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5890               Pop_Reg_FPR(dst) );
5891   ins_pipe( fpu_reg_mem );
5892 %}
5893 
5894 // Load Effective Address
5895 instruct leaP8(eRegP dst, indOffset8 mem) %{
5896   match(Set dst mem);
5897 
5898   ins_cost(110);
5899   format %{ "LEA    $dst,$mem" %}
5900   opcode(0x8D);
5901   ins_encode( OpcP, RegMem(dst,mem));
5902   ins_pipe( ialu_reg_reg_fat );
5903 %}
5904 
5905 instruct leaP32(eRegP dst, indOffset32 mem) %{
5906   match(Set dst mem);
5907 
5908   ins_cost(110);
5909   format %{ "LEA    $dst,$mem" %}
5910   opcode(0x8D);
5911   ins_encode( OpcP, RegMem(dst,mem));
5912   ins_pipe( ialu_reg_reg_fat );
5913 %}
5914 
5915 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5916   match(Set dst mem);
5917 
5918   ins_cost(110);
5919   format %{ "LEA    $dst,$mem" %}
5920   opcode(0x8D);
5921   ins_encode( OpcP, RegMem(dst,mem));
5922   ins_pipe( ialu_reg_reg_fat );
5923 %}
5924 
5925 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5926   match(Set dst mem);
5927 
5928   ins_cost(110);
5929   format %{ "LEA    $dst,$mem" %}
5930   opcode(0x8D);
5931   ins_encode( OpcP, RegMem(dst,mem));
5932   ins_pipe( ialu_reg_reg_fat );
5933 %}
5934 
5935 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5936   match(Set dst mem);
5937 
5938   ins_cost(110);
5939   format %{ "LEA    $dst,$mem" %}
5940   opcode(0x8D);
5941   ins_encode( OpcP, RegMem(dst,mem));
5942   ins_pipe( ialu_reg_reg_fat );
5943 %}
5944 
5945 // Load Constant
5946 instruct loadConI(rRegI dst, immI src) %{
5947   match(Set dst src);
5948 
5949   format %{ "MOV    $dst,$src" %}
5950   ins_encode( LdImmI(dst, src) );
5951   ins_pipe( ialu_reg_fat );
5952 %}
5953 
5954 // Load Constant zero
5955 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5956   match(Set dst src);
5957   effect(KILL cr);
5958 
5959   ins_cost(50);
5960   format %{ "XOR    $dst,$dst" %}
5961   opcode(0x33);  /* + rd */
5962   ins_encode( OpcP, RegReg( dst, dst ) );
5963   ins_pipe( ialu_reg );
5964 %}
5965 
5966 instruct loadConP(eRegP dst, immP src) %{
5967   match(Set dst src);
5968 
5969   format %{ "MOV    $dst,$src" %}
5970   opcode(0xB8);  /* + rd */
5971   ins_encode( LdImmP(dst, src) );
5972   ins_pipe( ialu_reg_fat );
5973 %}
5974 
5975 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5976   match(Set dst src);
5977   effect(KILL cr);
5978   ins_cost(200);
5979   format %{ "MOV    $dst.lo,$src.lo\n\t"
5980             "MOV    $dst.hi,$src.hi" %}
5981   opcode(0xB8);
5982   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5983   ins_pipe( ialu_reg_long_fat );
5984 %}
5985 
5986 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5987   match(Set dst src);
5988   effect(KILL cr);
5989   ins_cost(150);
5990   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5991             "XOR    $dst.hi,$dst.hi" %}
5992   opcode(0x33,0x33);
5993   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5994   ins_pipe( ialu_reg_long );
5995 %}
5996 
5997 // The instruction usage is guarded by predicate in operand immFPR().
5998 instruct loadConFPR(regFPR dst, immFPR con) %{
5999   match(Set dst con);
6000   ins_cost(125);
6001   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6002             "FSTP   $dst" %}
6003   ins_encode %{
6004     __ fld_s($constantaddress($con));
6005     __ fstp_d($dst$$reg);
6006   %}
6007   ins_pipe(fpu_reg_con);
6008 %}
6009 
6010 // The instruction usage is guarded by predicate in operand immFPR0().
6011 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6012   match(Set dst con);
6013   ins_cost(125);
6014   format %{ "FLDZ   ST\n\t"
6015             "FSTP   $dst" %}
6016   ins_encode %{
6017     __ fldz();
6018     __ fstp_d($dst$$reg);
6019   %}
6020   ins_pipe(fpu_reg_con);
6021 %}
6022 
6023 // The instruction usage is guarded by predicate in operand immFPR1().
6024 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6025   match(Set dst con);
6026   ins_cost(125);
6027   format %{ "FLD1   ST\n\t"
6028             "FSTP   $dst" %}
6029   ins_encode %{
6030     __ fld1();
6031     __ fstp_d($dst$$reg);
6032   %}
6033   ins_pipe(fpu_reg_con);
6034 %}
6035 
6036 // The instruction usage is guarded by predicate in operand immF().
6037 instruct loadConF(regF dst, immF con) %{
6038   match(Set dst con);
6039   ins_cost(125);
6040   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6041   ins_encode %{
6042     __ movflt($dst$$XMMRegister, $constantaddress($con));
6043   %}
6044   ins_pipe(pipe_slow);
6045 %}
6046 
6047 // The instruction usage is guarded by predicate in operand immF0().
6048 instruct loadConF0(regF dst, immF0 src) %{
6049   match(Set dst src);
6050   ins_cost(100);
6051   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6052   ins_encode %{
6053     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6054   %}
6055   ins_pipe(pipe_slow);
6056 %}
6057 
6058 // The instruction usage is guarded by predicate in operand immDPR().
6059 instruct loadConDPR(regDPR dst, immDPR con) %{
6060   match(Set dst con);
6061   ins_cost(125);
6062 
6063   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6064             "FSTP   $dst" %}
6065   ins_encode %{
6066     __ fld_d($constantaddress($con));
6067     __ fstp_d($dst$$reg);
6068   %}
6069   ins_pipe(fpu_reg_con);
6070 %}
6071 
6072 // The instruction usage is guarded by predicate in operand immDPR0().
6073 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6074   match(Set dst con);
6075   ins_cost(125);
6076 
6077   format %{ "FLDZ   ST\n\t"
6078             "FSTP   $dst" %}
6079   ins_encode %{
6080     __ fldz();
6081     __ fstp_d($dst$$reg);
6082   %}
6083   ins_pipe(fpu_reg_con);
6084 %}
6085 
6086 // The instruction usage is guarded by predicate in operand immDPR1().
6087 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6088   match(Set dst con);
6089   ins_cost(125);
6090 
6091   format %{ "FLD1   ST\n\t"
6092             "FSTP   $dst" %}
6093   ins_encode %{
6094     __ fld1();
6095     __ fstp_d($dst$$reg);
6096   %}
6097   ins_pipe(fpu_reg_con);
6098 %}
6099 
6100 // The instruction usage is guarded by predicate in operand immD().
6101 instruct loadConD(regD dst, immD con) %{
6102   match(Set dst con);
6103   ins_cost(125);
6104   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6105   ins_encode %{
6106     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6107   %}
6108   ins_pipe(pipe_slow);
6109 %}
6110 
6111 // The instruction usage is guarded by predicate in operand immD0().
6112 instruct loadConD0(regD dst, immD0 src) %{
6113   match(Set dst src);
6114   ins_cost(100);
6115   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6116   ins_encode %{
6117     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6118   %}
6119   ins_pipe( pipe_slow );
6120 %}
6121 
6122 // Load Stack Slot
6123 instruct loadSSI(rRegI dst, stackSlotI src) %{
6124   match(Set dst src);
6125   ins_cost(125);
6126 
6127   format %{ "MOV    $dst,$src" %}
6128   opcode(0x8B);
6129   ins_encode( OpcP, RegMem(dst,src));
6130   ins_pipe( ialu_reg_mem );
6131 %}
6132 
6133 instruct loadSSL(eRegL dst, stackSlotL src) %{
6134   match(Set dst src);
6135 
6136   ins_cost(200);
6137   format %{ "MOV    $dst,$src.lo\n\t"
6138             "MOV    $dst+4,$src.hi" %}
6139   opcode(0x8B, 0x8B);
6140   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6141   ins_pipe( ialu_mem_long_reg );
6142 %}
6143 
6144 // Load Stack Slot
6145 instruct loadSSP(eRegP dst, stackSlotP src) %{
6146   match(Set dst src);
6147   ins_cost(125);
6148 
6149   format %{ "MOV    $dst,$src" %}
6150   opcode(0x8B);
6151   ins_encode( OpcP, RegMem(dst,src));
6152   ins_pipe( ialu_reg_mem );
6153 %}
6154 
6155 // Load Stack Slot
6156 instruct loadSSF(regFPR dst, stackSlotF src) %{
6157   match(Set dst src);
6158   ins_cost(125);
6159 
6160   format %{ "FLD_S  $src\n\t"
6161             "FSTP   $dst" %}
6162   opcode(0xD9);               /* D9 /0, FLD m32real */
6163   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6164               Pop_Reg_FPR(dst) );
6165   ins_pipe( fpu_reg_mem );
6166 %}
6167 
6168 // Load Stack Slot
6169 instruct loadSSD(regDPR dst, stackSlotD src) %{
6170   match(Set dst src);
6171   ins_cost(125);
6172 
6173   format %{ "FLD_D  $src\n\t"
6174             "FSTP   $dst" %}
6175   opcode(0xDD);               /* DD /0, FLD m64real */
6176   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6177               Pop_Reg_DPR(dst) );
6178   ins_pipe( fpu_reg_mem );
6179 %}
6180 
6181 // Prefetch instructions for allocation.
6182 // Must be safe to execute with invalid address (cannot fault).
6183 
6184 instruct prefetchAlloc0( memory mem ) %{
6185   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6186   match(PrefetchAllocation mem);
6187   ins_cost(0);
6188   size(0);
6189   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6190   ins_encode();
6191   ins_pipe(empty);
6192 %}
6193 
6194 instruct prefetchAlloc( memory mem ) %{
6195   predicate(AllocatePrefetchInstr==3);
6196   match( PrefetchAllocation mem );
6197   ins_cost(100);
6198 
6199   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6200   ins_encode %{
6201     __ prefetchw($mem$$Address);
6202   %}
6203   ins_pipe(ialu_mem);
6204 %}
6205 
6206 instruct prefetchAllocNTA( memory mem ) %{
6207   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6208   match(PrefetchAllocation mem);
6209   ins_cost(100);
6210 
6211   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6212   ins_encode %{
6213     __ prefetchnta($mem$$Address);
6214   %}
6215   ins_pipe(ialu_mem);
6216 %}
6217 
6218 instruct prefetchAllocT0( memory mem ) %{
6219   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6220   match(PrefetchAllocation mem);
6221   ins_cost(100);
6222 
6223   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6224   ins_encode %{
6225     __ prefetcht0($mem$$Address);
6226   %}
6227   ins_pipe(ialu_mem);
6228 %}
6229 
6230 instruct prefetchAllocT2( memory mem ) %{
6231   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6232   match(PrefetchAllocation mem);
6233   ins_cost(100);
6234 
6235   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6236   ins_encode %{
6237     __ prefetcht2($mem$$Address);
6238   %}
6239   ins_pipe(ialu_mem);
6240 %}
6241 
6242 //----------Store Instructions-------------------------------------------------
6243 
6244 // Store Byte
6245 instruct storeB(memory mem, xRegI src) %{
6246   match(Set mem (StoreB mem src));
6247 
6248   ins_cost(125);
6249   format %{ "MOV8   $mem,$src" %}
6250   opcode(0x88);
6251   ins_encode( OpcP, RegMem( src, mem ) );
6252   ins_pipe( ialu_mem_reg );
6253 %}
6254 
6255 // Store Char/Short
6256 instruct storeC(memory mem, rRegI src) %{
6257   match(Set mem (StoreC mem src));
6258 
6259   ins_cost(125);
6260   format %{ "MOV16  $mem,$src" %}
6261   opcode(0x89, 0x66);
6262   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6263   ins_pipe( ialu_mem_reg );
6264 %}
6265 
6266 // Store Integer
6267 instruct storeI(memory mem, rRegI src) %{
6268   match(Set mem (StoreI mem src));
6269 
6270   ins_cost(125);
6271   format %{ "MOV    $mem,$src" %}
6272   opcode(0x89);
6273   ins_encode( OpcP, RegMem( src, mem ) );
6274   ins_pipe( ialu_mem_reg );
6275 %}
6276 
6277 // Store Long
6278 instruct storeL(long_memory mem, eRegL src) %{
6279   predicate(!((StoreLNode*)n)->require_atomic_access());
6280   match(Set mem (StoreL mem src));
6281 
6282   ins_cost(200);
6283   format %{ "MOV    $mem,$src.lo\n\t"
6284             "MOV    $mem+4,$src.hi" %}
6285   opcode(0x89, 0x89);
6286   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6287   ins_pipe( ialu_mem_long_reg );
6288 %}
6289 
6290 // Store Long to Integer
6291 instruct storeL2I(memory mem, eRegL src) %{
6292   match(Set mem (StoreI mem (ConvL2I src)));
6293 
6294   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6295   ins_encode %{
6296     __ movl($mem$$Address, $src$$Register);
6297   %}
6298   ins_pipe(ialu_mem_reg);
6299 %}
6300 
6301 // Volatile Store Long.  Must be atomic, so move it into
6302 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6303 // target address before the store (for null-ptr checks)
6304 // so the memory operand is used twice in the encoding.
6305 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6306   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6307   match(Set mem (StoreL mem src));
6308   effect( KILL cr );
6309   ins_cost(400);
6310   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6311             "FILD   $src\n\t"
6312             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6313   opcode(0x3B);
6314   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6315   ins_pipe( fpu_reg_mem );
6316 %}
6317 
6318 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6319   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6320   match(Set mem (StoreL mem src));
6321   effect( TEMP tmp, KILL cr );
6322   ins_cost(380);
6323   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6324             "MOVSD  $tmp,$src\n\t"
6325             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6326   ins_encode %{
6327     __ cmpl(rax, $mem$$Address);
6328     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6329     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6330   %}
6331   ins_pipe( pipe_slow );
6332 %}
6333 
6334 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6335   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6336   match(Set mem (StoreL mem src));
6337   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6338   ins_cost(360);
6339   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6340             "MOVD   $tmp,$src.lo\n\t"
6341             "MOVD   $tmp2,$src.hi\n\t"
6342             "PUNPCKLDQ $tmp,$tmp2\n\t"
6343             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6344   ins_encode %{
6345     __ cmpl(rax, $mem$$Address);
6346     __ movdl($tmp$$XMMRegister, $src$$Register);
6347     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6348     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6349     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6350   %}
6351   ins_pipe( pipe_slow );
6352 %}
6353 
6354 // Store Pointer; for storing unknown oops and raw pointers
6355 instruct storeP(memory mem, anyRegP src) %{
6356   match(Set mem (StoreP mem src));
6357 
6358   ins_cost(125);
6359   format %{ "MOV    $mem,$src" %}
6360   opcode(0x89);
6361   ins_encode( OpcP, RegMem( src, mem ) );
6362   ins_pipe( ialu_mem_reg );
6363 %}
6364 
6365 // Store Integer Immediate
6366 instruct storeImmI(memory mem, immI src) %{
6367   match(Set mem (StoreI mem src));
6368 
6369   ins_cost(150);
6370   format %{ "MOV    $mem,$src" %}
6371   opcode(0xC7);               /* C7 /0 */
6372   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6373   ins_pipe( ialu_mem_imm );
6374 %}
6375 
6376 // Store Short/Char Immediate
6377 instruct storeImmI16(memory mem, immI16 src) %{
6378   predicate(UseStoreImmI16);
6379   match(Set mem (StoreC mem src));
6380 
6381   ins_cost(150);
6382   format %{ "MOV16  $mem,$src" %}
6383   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6384   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6385   ins_pipe( ialu_mem_imm );
6386 %}
6387 
6388 // Store Pointer Immediate; null pointers or constant oops that do not
6389 // need card-mark barriers.
6390 instruct storeImmP(memory mem, immP src) %{
6391   match(Set mem (StoreP mem src));
6392 
6393   ins_cost(150);
6394   format %{ "MOV    $mem,$src" %}
6395   opcode(0xC7);               /* C7 /0 */
6396   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6397   ins_pipe( ialu_mem_imm );
6398 %}
6399 
6400 // Store Byte Immediate
6401 instruct storeImmB(memory mem, immI8 src) %{
6402   match(Set mem (StoreB mem src));
6403 
6404   ins_cost(150);
6405   format %{ "MOV8   $mem,$src" %}
6406   opcode(0xC6);               /* C6 /0 */
6407   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6408   ins_pipe( ialu_mem_imm );
6409 %}
6410 
6411 // Store CMS card-mark Immediate
6412 instruct storeImmCM(memory mem, immI8 src) %{
6413   match(Set mem (StoreCM mem src));
6414 
6415   ins_cost(150);
6416   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6417   opcode(0xC6);               /* C6 /0 */
6418   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6419   ins_pipe( ialu_mem_imm );
6420 %}
6421 
6422 // Store Double
6423 instruct storeDPR( memory mem, regDPR1 src) %{
6424   predicate(UseSSE<=1);
6425   match(Set mem (StoreD mem src));
6426 
6427   ins_cost(100);
6428   format %{ "FST_D  $mem,$src" %}
6429   opcode(0xDD);       /* DD /2 */
6430   ins_encode( enc_FPR_store(mem,src) );
6431   ins_pipe( fpu_mem_reg );
6432 %}
6433 
6434 // Store double does rounding on x86
6435 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6436   predicate(UseSSE<=1);
6437   match(Set mem (StoreD mem (RoundDouble src)));
6438 
6439   ins_cost(100);
6440   format %{ "FST_D  $mem,$src\t# round" %}
6441   opcode(0xDD);       /* DD /2 */
6442   ins_encode( enc_FPR_store(mem,src) );
6443   ins_pipe( fpu_mem_reg );
6444 %}
6445 
6446 // Store XMM register to memory (double-precision floating points)
6447 // MOVSD instruction
6448 instruct storeD(memory mem, regD src) %{
6449   predicate(UseSSE>=2);
6450   match(Set mem (StoreD mem src));
6451   ins_cost(95);
6452   format %{ "MOVSD  $mem,$src" %}
6453   ins_encode %{
6454     __ movdbl($mem$$Address, $src$$XMMRegister);
6455   %}
6456   ins_pipe( pipe_slow );
6457 %}
6458 
6459 // Store XMM register to memory (single-precision floating point)
6460 // MOVSS instruction
6461 instruct storeF(memory mem, regF src) %{
6462   predicate(UseSSE>=1);
6463   match(Set mem (StoreF mem src));
6464   ins_cost(95);
6465   format %{ "MOVSS  $mem,$src" %}
6466   ins_encode %{
6467     __ movflt($mem$$Address, $src$$XMMRegister);
6468   %}
6469   ins_pipe( pipe_slow );
6470 %}
6471 
6472 // Store Float
6473 instruct storeFPR( memory mem, regFPR1 src) %{
6474   predicate(UseSSE==0);
6475   match(Set mem (StoreF mem src));
6476 
6477   ins_cost(100);
6478   format %{ "FST_S  $mem,$src" %}
6479   opcode(0xD9);       /* D9 /2 */
6480   ins_encode( enc_FPR_store(mem,src) );
6481   ins_pipe( fpu_mem_reg );
6482 %}
6483 
6484 // Store Float does rounding on x86
6485 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6486   predicate(UseSSE==0);
6487   match(Set mem (StoreF mem (RoundFloat src)));
6488 
6489   ins_cost(100);
6490   format %{ "FST_S  $mem,$src\t# round" %}
6491   opcode(0xD9);       /* D9 /2 */
6492   ins_encode( enc_FPR_store(mem,src) );
6493   ins_pipe( fpu_mem_reg );
6494 %}
6495 
6496 // Store Float does rounding on x86
6497 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6498   predicate(UseSSE<=1);
6499   match(Set mem (StoreF mem (ConvD2F src)));
6500 
6501   ins_cost(100);
6502   format %{ "FST_S  $mem,$src\t# D-round" %}
6503   opcode(0xD9);       /* D9 /2 */
6504   ins_encode( enc_FPR_store(mem,src) );
6505   ins_pipe( fpu_mem_reg );
6506 %}
6507 
6508 // Store immediate Float value (it is faster than store from FPU register)
6509 // The instruction usage is guarded by predicate in operand immFPR().
6510 instruct storeFPR_imm( memory mem, immFPR src) %{
6511   match(Set mem (StoreF mem src));
6512 
6513   ins_cost(50);
6514   format %{ "MOV    $mem,$src\t# store float" %}
6515   opcode(0xC7);               /* C7 /0 */
6516   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6517   ins_pipe( ialu_mem_imm );
6518 %}
6519 
6520 // Store immediate Float value (it is faster than store from XMM register)
6521 // The instruction usage is guarded by predicate in operand immF().
6522 instruct storeF_imm( memory mem, immF src) %{
6523   match(Set mem (StoreF mem src));
6524 
6525   ins_cost(50);
6526   format %{ "MOV    $mem,$src\t# store float" %}
6527   opcode(0xC7);               /* C7 /0 */
6528   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6529   ins_pipe( ialu_mem_imm );
6530 %}
6531 
6532 // Store Integer to stack slot
6533 instruct storeSSI(stackSlotI dst, rRegI src) %{
6534   match(Set dst src);
6535 
6536   ins_cost(100);
6537   format %{ "MOV    $dst,$src" %}
6538   opcode(0x89);
6539   ins_encode( OpcPRegSS( dst, src ) );
6540   ins_pipe( ialu_mem_reg );
6541 %}
6542 
6543 // Store Integer to stack slot
6544 instruct storeSSP(stackSlotP dst, eRegP src) %{
6545   match(Set dst src);
6546 
6547   ins_cost(100);
6548   format %{ "MOV    $dst,$src" %}
6549   opcode(0x89);
6550   ins_encode( OpcPRegSS( dst, src ) );
6551   ins_pipe( ialu_mem_reg );
6552 %}
6553 
6554 // Store Long to stack slot
6555 instruct storeSSL(stackSlotL dst, eRegL src) %{
6556   match(Set dst src);
6557 
6558   ins_cost(200);
6559   format %{ "MOV    $dst,$src.lo\n\t"
6560             "MOV    $dst+4,$src.hi" %}
6561   opcode(0x89, 0x89);
6562   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6563   ins_pipe( ialu_mem_long_reg );
6564 %}
6565 
6566 //----------MemBar Instructions-----------------------------------------------
6567 // Memory barrier flavors
6568 
6569 instruct membar_acquire() %{
6570   match(MemBarAcquire);
6571   match(LoadFence);
6572   ins_cost(400);
6573 
6574   size(0);
6575   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6576   ins_encode();
6577   ins_pipe(empty);
6578 %}
6579 
6580 instruct membar_acquire_lock() %{
6581   match(MemBarAcquireLock);
6582   ins_cost(0);
6583 
6584   size(0);
6585   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6586   ins_encode( );
6587   ins_pipe(empty);
6588 %}
6589 
6590 instruct membar_release() %{
6591   match(MemBarRelease);
6592   match(StoreFence);
6593   ins_cost(400);
6594 
6595   size(0);
6596   format %{ "MEMBAR-release ! (empty encoding)" %}
6597   ins_encode( );
6598   ins_pipe(empty);
6599 %}
6600 
6601 instruct membar_release_lock() %{
6602   match(MemBarReleaseLock);
6603   ins_cost(0);
6604 
6605   size(0);
6606   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6607   ins_encode( );
6608   ins_pipe(empty);
6609 %}
6610 
6611 instruct membar_volatile(eFlagsReg cr) %{
6612   match(MemBarVolatile);
6613   effect(KILL cr);
6614   ins_cost(400);
6615 
6616   format %{
6617     $$template
6618     if (os::is_MP()) {
6619       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6620     } else {
6621       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6622     }
6623   %}
6624   ins_encode %{
6625     __ membar(Assembler::StoreLoad);
6626   %}
6627   ins_pipe(pipe_slow);
6628 %}
6629 
6630 instruct unnecessary_membar_volatile() %{
6631   match(MemBarVolatile);
6632   predicate(Matcher::post_store_load_barrier(n));
6633   ins_cost(0);
6634 
6635   size(0);
6636   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6637   ins_encode( );
6638   ins_pipe(empty);
6639 %}
6640 
6641 instruct membar_storestore() %{
6642   match(MemBarStoreStore);
6643   ins_cost(0);
6644 
6645   size(0);
6646   format %{ "MEMBAR-storestore (empty encoding)" %}
6647   ins_encode( );
6648   ins_pipe(empty);
6649 %}
6650 
6651 //----------Move Instructions--------------------------------------------------
6652 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6653   match(Set dst (CastX2P src));
6654   format %{ "# X2P  $dst, $src" %}
6655   ins_encode( /*empty encoding*/ );
6656   ins_cost(0);
6657   ins_pipe(empty);
6658 %}
6659 
6660 instruct castP2X(rRegI dst, eRegP src ) %{
6661   match(Set dst (CastP2X src));
6662   ins_cost(50);
6663   format %{ "MOV    $dst, $src\t# CastP2X" %}
6664   ins_encode( enc_Copy( dst, src) );
6665   ins_pipe( ialu_reg_reg );
6666 %}
6667 
6668 //----------Conditional Move---------------------------------------------------
6669 // Conditional move
6670 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6671   predicate(!VM_Version::supports_cmov() );
6672   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6673   ins_cost(200);
6674   format %{ "J$cop,us skip\t# signed cmove\n\t"
6675             "MOV    $dst,$src\n"
6676       "skip:" %}
6677   ins_encode %{
6678     Label Lskip;
6679     // Invert sense of branch from sense of CMOV
6680     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6681     __ movl($dst$$Register, $src$$Register);
6682     __ bind(Lskip);
6683   %}
6684   ins_pipe( pipe_cmov_reg );
6685 %}
6686 
6687 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6688   predicate(!VM_Version::supports_cmov() );
6689   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6690   ins_cost(200);
6691   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6692             "MOV    $dst,$src\n"
6693       "skip:" %}
6694   ins_encode %{
6695     Label Lskip;
6696     // Invert sense of branch from sense of CMOV
6697     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6698     __ movl($dst$$Register, $src$$Register);
6699     __ bind(Lskip);
6700   %}
6701   ins_pipe( pipe_cmov_reg );
6702 %}
6703 
6704 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6705   predicate(VM_Version::supports_cmov() );
6706   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6707   ins_cost(200);
6708   format %{ "CMOV$cop $dst,$src" %}
6709   opcode(0x0F,0x40);
6710   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6711   ins_pipe( pipe_cmov_reg );
6712 %}
6713 
6714 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6715   predicate(VM_Version::supports_cmov() );
6716   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6717   ins_cost(200);
6718   format %{ "CMOV$cop $dst,$src" %}
6719   opcode(0x0F,0x40);
6720   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6721   ins_pipe( pipe_cmov_reg );
6722 %}
6723 
6724 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6725   predicate(VM_Version::supports_cmov() );
6726   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6727   ins_cost(200);
6728   expand %{
6729     cmovI_regU(cop, cr, dst, src);
6730   %}
6731 %}
6732 
6733 // Conditional move
6734 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6735   predicate(VM_Version::supports_cmov() );
6736   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6737   ins_cost(250);
6738   format %{ "CMOV$cop $dst,$src" %}
6739   opcode(0x0F,0x40);
6740   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6741   ins_pipe( pipe_cmov_mem );
6742 %}
6743 
6744 // Conditional move
6745 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6746   predicate(VM_Version::supports_cmov() );
6747   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6748   ins_cost(250);
6749   format %{ "CMOV$cop $dst,$src" %}
6750   opcode(0x0F,0x40);
6751   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6752   ins_pipe( pipe_cmov_mem );
6753 %}
6754 
6755 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6756   predicate(VM_Version::supports_cmov() );
6757   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6758   ins_cost(250);
6759   expand %{
6760     cmovI_memU(cop, cr, dst, src);
6761   %}
6762 %}
6763 
6764 // Conditional move
6765 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6766   predicate(VM_Version::supports_cmov() );
6767   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6768   ins_cost(200);
6769   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6770   opcode(0x0F,0x40);
6771   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6772   ins_pipe( pipe_cmov_reg );
6773 %}
6774 
6775 // Conditional move (non-P6 version)
6776 // Note:  a CMoveP is generated for  stubs and native wrappers
6777 //        regardless of whether we are on a P6, so we
6778 //        emulate a cmov here
6779 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6780   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6781   ins_cost(300);
6782   format %{ "Jn$cop   skip\n\t"
6783           "MOV    $dst,$src\t# pointer\n"
6784       "skip:" %}
6785   opcode(0x8b);
6786   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6787   ins_pipe( pipe_cmov_reg );
6788 %}
6789 
6790 // Conditional move
6791 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6792   predicate(VM_Version::supports_cmov() );
6793   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6794   ins_cost(200);
6795   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6796   opcode(0x0F,0x40);
6797   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6798   ins_pipe( pipe_cmov_reg );
6799 %}
6800 
6801 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6802   predicate(VM_Version::supports_cmov() );
6803   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6804   ins_cost(200);
6805   expand %{
6806     cmovP_regU(cop, cr, dst, src);
6807   %}
6808 %}
6809 
6810 // DISABLED: Requires the ADLC to emit a bottom_type call that
6811 // correctly meets the two pointer arguments; one is an incoming
6812 // register but the other is a memory operand.  ALSO appears to
6813 // be buggy with implicit null checks.
6814 //
6815 //// Conditional move
6816 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6817 //  predicate(VM_Version::supports_cmov() );
6818 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6819 //  ins_cost(250);
6820 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6821 //  opcode(0x0F,0x40);
6822 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6823 //  ins_pipe( pipe_cmov_mem );
6824 //%}
6825 //
6826 //// Conditional move
6827 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6828 //  predicate(VM_Version::supports_cmov() );
6829 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6830 //  ins_cost(250);
6831 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6832 //  opcode(0x0F,0x40);
6833 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6834 //  ins_pipe( pipe_cmov_mem );
6835 //%}
6836 
6837 // Conditional move
6838 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6839   predicate(UseSSE<=1);
6840   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6841   ins_cost(200);
6842   format %{ "FCMOV$cop $dst,$src\t# double" %}
6843   opcode(0xDA);
6844   ins_encode( enc_cmov_dpr(cop,src) );
6845   ins_pipe( pipe_cmovDPR_reg );
6846 %}
6847 
6848 // Conditional move
6849 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6850   predicate(UseSSE==0);
6851   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6852   ins_cost(200);
6853   format %{ "FCMOV$cop $dst,$src\t# float" %}
6854   opcode(0xDA);
6855   ins_encode( enc_cmov_dpr(cop,src) );
6856   ins_pipe( pipe_cmovDPR_reg );
6857 %}
6858 
6859 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6860 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6861   predicate(UseSSE<=1);
6862   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6863   ins_cost(200);
6864   format %{ "Jn$cop   skip\n\t"
6865             "MOV    $dst,$src\t# double\n"
6866       "skip:" %}
6867   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6868   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6869   ins_pipe( pipe_cmovDPR_reg );
6870 %}
6871 
6872 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6873 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6874   predicate(UseSSE==0);
6875   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6876   ins_cost(200);
6877   format %{ "Jn$cop    skip\n\t"
6878             "MOV    $dst,$src\t# float\n"
6879       "skip:" %}
6880   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6881   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6882   ins_pipe( pipe_cmovDPR_reg );
6883 %}
6884 
6885 // No CMOVE with SSE/SSE2
6886 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6887   predicate (UseSSE>=1);
6888   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6889   ins_cost(200);
6890   format %{ "Jn$cop   skip\n\t"
6891             "MOVSS  $dst,$src\t# float\n"
6892       "skip:" %}
6893   ins_encode %{
6894     Label skip;
6895     // Invert sense of branch from sense of CMOV
6896     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6897     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6898     __ bind(skip);
6899   %}
6900   ins_pipe( pipe_slow );
6901 %}
6902 
6903 // No CMOVE with SSE/SSE2
6904 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6905   predicate (UseSSE>=2);
6906   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6907   ins_cost(200);
6908   format %{ "Jn$cop   skip\n\t"
6909             "MOVSD  $dst,$src\t# float\n"
6910       "skip:" %}
6911   ins_encode %{
6912     Label skip;
6913     // Invert sense of branch from sense of CMOV
6914     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6915     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6916     __ bind(skip);
6917   %}
6918   ins_pipe( pipe_slow );
6919 %}
6920 
6921 // unsigned version
6922 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6923   predicate (UseSSE>=1);
6924   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6925   ins_cost(200);
6926   format %{ "Jn$cop   skip\n\t"
6927             "MOVSS  $dst,$src\t# float\n"
6928       "skip:" %}
6929   ins_encode %{
6930     Label skip;
6931     // Invert sense of branch from sense of CMOV
6932     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6933     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6934     __ bind(skip);
6935   %}
6936   ins_pipe( pipe_slow );
6937 %}
6938 
6939 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6940   predicate (UseSSE>=1);
6941   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6942   ins_cost(200);
6943   expand %{
6944     fcmovF_regU(cop, cr, dst, src);
6945   %}
6946 %}
6947 
6948 // unsigned version
6949 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6950   predicate (UseSSE>=2);
6951   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6952   ins_cost(200);
6953   format %{ "Jn$cop   skip\n\t"
6954             "MOVSD  $dst,$src\t# float\n"
6955       "skip:" %}
6956   ins_encode %{
6957     Label skip;
6958     // Invert sense of branch from sense of CMOV
6959     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6960     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6961     __ bind(skip);
6962   %}
6963   ins_pipe( pipe_slow );
6964 %}
6965 
6966 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6967   predicate (UseSSE>=2);
6968   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6969   ins_cost(200);
6970   expand %{
6971     fcmovD_regU(cop, cr, dst, src);
6972   %}
6973 %}
6974 
6975 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6976   predicate(VM_Version::supports_cmov() );
6977   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6978   ins_cost(200);
6979   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6980             "CMOV$cop $dst.hi,$src.hi" %}
6981   opcode(0x0F,0x40);
6982   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6983   ins_pipe( pipe_cmov_reg_long );
6984 %}
6985 
6986 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6987   predicate(VM_Version::supports_cmov() );
6988   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6989   ins_cost(200);
6990   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6991             "CMOV$cop $dst.hi,$src.hi" %}
6992   opcode(0x0F,0x40);
6993   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6994   ins_pipe( pipe_cmov_reg_long );
6995 %}
6996 
6997 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6998   predicate(VM_Version::supports_cmov() );
6999   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7000   ins_cost(200);
7001   expand %{
7002     cmovL_regU(cop, cr, dst, src);
7003   %}
7004 %}
7005 
7006 //----------Arithmetic Instructions--------------------------------------------
7007 //----------Addition Instructions----------------------------------------------
7008 
7009 // Integer Addition Instructions
7010 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7011   match(Set dst (AddI dst src));
7012   effect(KILL cr);
7013 
7014   size(2);
7015   format %{ "ADD    $dst,$src" %}
7016   opcode(0x03);
7017   ins_encode( OpcP, RegReg( dst, src) );
7018   ins_pipe( ialu_reg_reg );
7019 %}
7020 
7021 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7022   match(Set dst (AddI dst src));
7023   effect(KILL cr);
7024 
7025   format %{ "ADD    $dst,$src" %}
7026   opcode(0x81, 0x00); /* /0 id */
7027   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7028   ins_pipe( ialu_reg );
7029 %}
7030 
7031 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7032   predicate(UseIncDec);
7033   match(Set dst (AddI dst src));
7034   effect(KILL cr);
7035 
7036   size(1);
7037   format %{ "INC    $dst" %}
7038   opcode(0x40); /*  */
7039   ins_encode( Opc_plus( primary, dst ) );
7040   ins_pipe( ialu_reg );
7041 %}
7042 
7043 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7044   match(Set dst (AddI src0 src1));
7045   ins_cost(110);
7046 
7047   format %{ "LEA    $dst,[$src0 + $src1]" %}
7048   opcode(0x8D); /* 0x8D /r */
7049   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7050   ins_pipe( ialu_reg_reg );
7051 %}
7052 
7053 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7054   match(Set dst (AddP src0 src1));
7055   ins_cost(110);
7056 
7057   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7058   opcode(0x8D); /* 0x8D /r */
7059   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7060   ins_pipe( ialu_reg_reg );
7061 %}
7062 
7063 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7064   predicate(UseIncDec);
7065   match(Set dst (AddI dst src));
7066   effect(KILL cr);
7067 
7068   size(1);
7069   format %{ "DEC    $dst" %}
7070   opcode(0x48); /*  */
7071   ins_encode( Opc_plus( primary, dst ) );
7072   ins_pipe( ialu_reg );
7073 %}
7074 
7075 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7076   match(Set dst (AddP dst src));
7077   effect(KILL cr);
7078 
7079   size(2);
7080   format %{ "ADD    $dst,$src" %}
7081   opcode(0x03);
7082   ins_encode( OpcP, RegReg( dst, src) );
7083   ins_pipe( ialu_reg_reg );
7084 %}
7085 
7086 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7087   match(Set dst (AddP dst src));
7088   effect(KILL cr);
7089 
7090   format %{ "ADD    $dst,$src" %}
7091   opcode(0x81,0x00); /* Opcode 81 /0 id */
7092   // ins_encode( RegImm( dst, src) );
7093   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7094   ins_pipe( ialu_reg );
7095 %}
7096 
7097 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7098   match(Set dst (AddI dst (LoadI src)));
7099   effect(KILL cr);
7100 
7101   ins_cost(125);
7102   format %{ "ADD    $dst,$src" %}
7103   opcode(0x03);
7104   ins_encode( OpcP, RegMem( dst, src) );
7105   ins_pipe( ialu_reg_mem );
7106 %}
7107 
7108 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7109   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7110   effect(KILL cr);
7111 
7112   ins_cost(150);
7113   format %{ "ADD    $dst,$src" %}
7114   opcode(0x01);  /* Opcode 01 /r */
7115   ins_encode( OpcP, RegMem( src, dst ) );
7116   ins_pipe( ialu_mem_reg );
7117 %}
7118 
7119 // Add Memory with Immediate
7120 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7121   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7122   effect(KILL cr);
7123 
7124   ins_cost(125);
7125   format %{ "ADD    $dst,$src" %}
7126   opcode(0x81);               /* Opcode 81 /0 id */
7127   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7128   ins_pipe( ialu_mem_imm );
7129 %}
7130 
7131 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7132   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7133   effect(KILL cr);
7134 
7135   ins_cost(125);
7136   format %{ "INC    $dst" %}
7137   opcode(0xFF);               /* Opcode FF /0 */
7138   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7139   ins_pipe( ialu_mem_imm );
7140 %}
7141 
7142 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7143   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7144   effect(KILL cr);
7145 
7146   ins_cost(125);
7147   format %{ "DEC    $dst" %}
7148   opcode(0xFF);               /* Opcode FF /1 */
7149   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7150   ins_pipe( ialu_mem_imm );
7151 %}
7152 
7153 
7154 instruct checkCastPP( eRegP dst ) %{
7155   match(Set dst (CheckCastPP dst));
7156 
7157   size(0);
7158   format %{ "#checkcastPP of $dst" %}
7159   ins_encode( /*empty encoding*/ );
7160   ins_pipe( empty );
7161 %}
7162 
7163 instruct castPP( eRegP dst ) %{
7164   match(Set dst (CastPP dst));
7165   format %{ "#castPP of $dst" %}
7166   ins_encode( /*empty encoding*/ );
7167   ins_pipe( empty );
7168 %}
7169 
7170 instruct castII( rRegI dst ) %{
7171   match(Set dst (CastII dst));
7172   format %{ "#castII of $dst" %}
7173   ins_encode( /*empty encoding*/ );
7174   ins_cost(0);
7175   ins_pipe( empty );
7176 %}
7177 
7178 
7179 // Load-locked - same as a regular pointer load when used with compare-swap
7180 instruct loadPLocked(eRegP dst, memory mem) %{
7181   match(Set dst (LoadPLocked mem));
7182 
7183   ins_cost(125);
7184   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7185   opcode(0x8B);
7186   ins_encode( OpcP, RegMem(dst,mem));
7187   ins_pipe( ialu_reg_mem );
7188 %}
7189 
7190 // Conditional-store of the updated heap-top.
7191 // Used during allocation of the shared heap.
7192 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7193 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7194   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7195   // EAX is killed if there is contention, but then it's also unused.
7196   // In the common case of no contention, EAX holds the new oop address.
7197   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7198   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7199   ins_pipe( pipe_cmpxchg );
7200 %}
7201 
7202 // Conditional-store of an int value.
7203 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7204 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7205   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7206   effect(KILL oldval);
7207   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7208   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7209   ins_pipe( pipe_cmpxchg );
7210 %}
7211 
7212 // Conditional-store of a long value.
7213 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7214 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7215   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7216   effect(KILL oldval);
7217   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7218             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7219             "XCHG   EBX,ECX"
7220   %}
7221   ins_encode %{
7222     // Note: we need to swap rbx, and rcx before and after the
7223     //       cmpxchg8 instruction because the instruction uses
7224     //       rcx as the high order word of the new value to store but
7225     //       our register encoding uses rbx.
7226     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7227     if( os::is_MP() )
7228       __ lock();
7229     __ cmpxchg8($mem$$Address);
7230     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7231   %}
7232   ins_pipe( pipe_cmpxchg );
7233 %}
7234 
7235 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7236 
7237 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7238   predicate(VM_Version::supports_cx8());
7239   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7240   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7241   effect(KILL cr, KILL oldval);
7242   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7243             "MOV    $res,0\n\t"
7244             "JNE,s  fail\n\t"
7245             "MOV    $res,1\n"
7246           "fail:" %}
7247   ins_encode( enc_cmpxchg8(mem_ptr),
7248               enc_flags_ne_to_boolean(res) );
7249   ins_pipe( pipe_cmpxchg );
7250 %}
7251 
7252 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7253   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7254   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7255   effect(KILL cr, KILL oldval);
7256   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7257             "MOV    $res,0\n\t"
7258             "JNE,s  fail\n\t"
7259             "MOV    $res,1\n"
7260           "fail:" %}
7261   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7262   ins_pipe( pipe_cmpxchg );
7263 %}
7264 
7265 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7266   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7267   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7268   effect(KILL cr, KILL oldval);
7269   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7270             "MOV    $res,0\n\t"
7271             "JNE,s  fail\n\t"
7272             "MOV    $res,1\n"
7273           "fail:" %}
7274   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7275   ins_pipe( pipe_cmpxchg );
7276 %}
7277 
7278 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7279   predicate(VM_Version::supports_cx8());
7280   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7281   effect(KILL cr);
7282   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7283   ins_encode( enc_cmpxchg8(mem_ptr) );
7284   ins_pipe( pipe_cmpxchg );
7285 %}
7286 
7287 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7288   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7289   effect(KILL cr);
7290   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7291   ins_encode( enc_cmpxchg(mem_ptr) );
7292   ins_pipe( pipe_cmpxchg );
7293 %}
7294 
7295 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7296   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7297   effect(KILL cr);
7298   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7299   ins_encode( enc_cmpxchg(mem_ptr) );
7300   ins_pipe( pipe_cmpxchg );
7301 %}
7302 
7303 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7304   predicate(n->as_LoadStore()->result_not_used());
7305   match(Set dummy (GetAndAddI mem add));
7306   effect(KILL cr);
7307   format %{ "ADDL  [$mem],$add" %}
7308   ins_encode %{
7309     if (os::is_MP()) { __ lock(); }
7310     __ addl($mem$$Address, $add$$constant);
7311   %}
7312   ins_pipe( pipe_cmpxchg );
7313 %}
7314 
7315 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7316   match(Set newval (GetAndAddI mem newval));
7317   effect(KILL cr);
7318   format %{ "XADDL  [$mem],$newval" %}
7319   ins_encode %{
7320     if (os::is_MP()) { __ lock(); }
7321     __ xaddl($mem$$Address, $newval$$Register);
7322   %}
7323   ins_pipe( pipe_cmpxchg );
7324 %}
7325 
7326 instruct xchgI( memory mem, rRegI newval) %{
7327   match(Set newval (GetAndSetI mem newval));
7328   format %{ "XCHGL  $newval,[$mem]" %}
7329   ins_encode %{
7330     __ xchgl($newval$$Register, $mem$$Address);
7331   %}
7332   ins_pipe( pipe_cmpxchg );
7333 %}
7334 
7335 instruct xchgP( memory mem, pRegP newval) %{
7336   match(Set newval (GetAndSetP mem newval));
7337   format %{ "XCHGL  $newval,[$mem]" %}
7338   ins_encode %{
7339     __ xchgl($newval$$Register, $mem$$Address);
7340   %}
7341   ins_pipe( pipe_cmpxchg );
7342 %}
7343 
7344 //----------Subtraction Instructions-------------------------------------------
7345 
7346 // Integer Subtraction Instructions
7347 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7348   match(Set dst (SubI dst src));
7349   effect(KILL cr);
7350 
7351   size(2);
7352   format %{ "SUB    $dst,$src" %}
7353   opcode(0x2B);
7354   ins_encode( OpcP, RegReg( dst, src) );
7355   ins_pipe( ialu_reg_reg );
7356 %}
7357 
7358 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7359   match(Set dst (SubI dst src));
7360   effect(KILL cr);
7361 
7362   format %{ "SUB    $dst,$src" %}
7363   opcode(0x81,0x05);  /* Opcode 81 /5 */
7364   // ins_encode( RegImm( dst, src) );
7365   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7366   ins_pipe( ialu_reg );
7367 %}
7368 
7369 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7370   match(Set dst (SubI dst (LoadI src)));
7371   effect(KILL cr);
7372 
7373   ins_cost(125);
7374   format %{ "SUB    $dst,$src" %}
7375   opcode(0x2B);
7376   ins_encode( OpcP, RegMem( dst, src) );
7377   ins_pipe( ialu_reg_mem );
7378 %}
7379 
7380 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7381   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7382   effect(KILL cr);
7383 
7384   ins_cost(150);
7385   format %{ "SUB    $dst,$src" %}
7386   opcode(0x29);  /* Opcode 29 /r */
7387   ins_encode( OpcP, RegMem( src, dst ) );
7388   ins_pipe( ialu_mem_reg );
7389 %}
7390 
7391 // Subtract from a pointer
7392 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7393   match(Set dst (AddP dst (SubI zero src)));
7394   effect(KILL cr);
7395 
7396   size(2);
7397   format %{ "SUB    $dst,$src" %}
7398   opcode(0x2B);
7399   ins_encode( OpcP, RegReg( dst, src) );
7400   ins_pipe( ialu_reg_reg );
7401 %}
7402 
7403 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7404   match(Set dst (SubI zero dst));
7405   effect(KILL cr);
7406 
7407   size(2);
7408   format %{ "NEG    $dst" %}
7409   opcode(0xF7,0x03);  // Opcode F7 /3
7410   ins_encode( OpcP, RegOpc( dst ) );
7411   ins_pipe( ialu_reg );
7412 %}
7413 
7414 //----------Multiplication/Division Instructions-------------------------------
7415 // Integer Multiplication Instructions
7416 // Multiply Register
7417 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7418   match(Set dst (MulI dst src));
7419   effect(KILL cr);
7420 
7421   size(3);
7422   ins_cost(300);
7423   format %{ "IMUL   $dst,$src" %}
7424   opcode(0xAF, 0x0F);
7425   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7426   ins_pipe( ialu_reg_reg_alu0 );
7427 %}
7428 
7429 // Multiply 32-bit Immediate
7430 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7431   match(Set dst (MulI src imm));
7432   effect(KILL cr);
7433 
7434   ins_cost(300);
7435   format %{ "IMUL   $dst,$src,$imm" %}
7436   opcode(0x69);  /* 69 /r id */
7437   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7438   ins_pipe( ialu_reg_reg_alu0 );
7439 %}
7440 
7441 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7442   match(Set dst src);
7443   effect(KILL cr);
7444 
7445   // Note that this is artificially increased to make it more expensive than loadConL
7446   ins_cost(250);
7447   format %{ "MOV    EAX,$src\t// low word only" %}
7448   opcode(0xB8);
7449   ins_encode( LdImmL_Lo(dst, src) );
7450   ins_pipe( ialu_reg_fat );
7451 %}
7452 
7453 // Multiply by 32-bit Immediate, taking the shifted high order results
7454 //  (special case for shift by 32)
7455 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7456   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7457   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7458              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7459              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7460   effect(USE src1, KILL cr);
7461 
7462   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7463   ins_cost(0*100 + 1*400 - 150);
7464   format %{ "IMUL   EDX:EAX,$src1" %}
7465   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7466   ins_pipe( pipe_slow );
7467 %}
7468 
7469 // Multiply by 32-bit Immediate, taking the shifted high order results
7470 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7471   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7472   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7473              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7474              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7475   effect(USE src1, KILL cr);
7476 
7477   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7478   ins_cost(1*100 + 1*400 - 150);
7479   format %{ "IMUL   EDX:EAX,$src1\n\t"
7480             "SAR    EDX,$cnt-32" %}
7481   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7482   ins_pipe( pipe_slow );
7483 %}
7484 
7485 // Multiply Memory 32-bit Immediate
7486 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7487   match(Set dst (MulI (LoadI src) imm));
7488   effect(KILL cr);
7489 
7490   ins_cost(300);
7491   format %{ "IMUL   $dst,$src,$imm" %}
7492   opcode(0x69);  /* 69 /r id */
7493   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7494   ins_pipe( ialu_reg_mem_alu0 );
7495 %}
7496 
7497 // Multiply Memory
7498 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7499   match(Set dst (MulI dst (LoadI src)));
7500   effect(KILL cr);
7501 
7502   ins_cost(350);
7503   format %{ "IMUL   $dst,$src" %}
7504   opcode(0xAF, 0x0F);
7505   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7506   ins_pipe( ialu_reg_mem_alu0 );
7507 %}
7508 
7509 // Multiply Register Int to Long
7510 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7511   // Basic Idea: long = (long)int * (long)int
7512   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7513   effect(DEF dst, USE src, USE src1, KILL flags);
7514 
7515   ins_cost(300);
7516   format %{ "IMUL   $dst,$src1" %}
7517 
7518   ins_encode( long_int_multiply( dst, src1 ) );
7519   ins_pipe( ialu_reg_reg_alu0 );
7520 %}
7521 
7522 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7523   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7524   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7525   effect(KILL flags);
7526 
7527   ins_cost(300);
7528   format %{ "MUL    $dst,$src1" %}
7529 
7530   ins_encode( long_uint_multiply(dst, src1) );
7531   ins_pipe( ialu_reg_reg_alu0 );
7532 %}
7533 
7534 // Multiply Register Long
7535 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7536   match(Set dst (MulL dst src));
7537   effect(KILL cr, TEMP tmp);
7538   ins_cost(4*100+3*400);
7539 // Basic idea: lo(result) = lo(x_lo * y_lo)
7540 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7541   format %{ "MOV    $tmp,$src.lo\n\t"
7542             "IMUL   $tmp,EDX\n\t"
7543             "MOV    EDX,$src.hi\n\t"
7544             "IMUL   EDX,EAX\n\t"
7545             "ADD    $tmp,EDX\n\t"
7546             "MUL    EDX:EAX,$src.lo\n\t"
7547             "ADD    EDX,$tmp" %}
7548   ins_encode( long_multiply( dst, src, tmp ) );
7549   ins_pipe( pipe_slow );
7550 %}
7551 
7552 // Multiply Register Long where the left operand's high 32 bits are zero
7553 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7554   predicate(is_operand_hi32_zero(n->in(1)));
7555   match(Set dst (MulL dst src));
7556   effect(KILL cr, TEMP tmp);
7557   ins_cost(2*100+2*400);
7558 // Basic idea: lo(result) = lo(x_lo * y_lo)
7559 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7560   format %{ "MOV    $tmp,$src.hi\n\t"
7561             "IMUL   $tmp,EAX\n\t"
7562             "MUL    EDX:EAX,$src.lo\n\t"
7563             "ADD    EDX,$tmp" %}
7564   ins_encode %{
7565     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7566     __ imull($tmp$$Register, rax);
7567     __ mull($src$$Register);
7568     __ addl(rdx, $tmp$$Register);
7569   %}
7570   ins_pipe( pipe_slow );
7571 %}
7572 
7573 // Multiply Register Long where the right operand's high 32 bits are zero
7574 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7575   predicate(is_operand_hi32_zero(n->in(2)));
7576   match(Set dst (MulL dst src));
7577   effect(KILL cr, TEMP tmp);
7578   ins_cost(2*100+2*400);
7579 // Basic idea: lo(result) = lo(x_lo * y_lo)
7580 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7581   format %{ "MOV    $tmp,$src.lo\n\t"
7582             "IMUL   $tmp,EDX\n\t"
7583             "MUL    EDX:EAX,$src.lo\n\t"
7584             "ADD    EDX,$tmp" %}
7585   ins_encode %{
7586     __ movl($tmp$$Register, $src$$Register);
7587     __ imull($tmp$$Register, rdx);
7588     __ mull($src$$Register);
7589     __ addl(rdx, $tmp$$Register);
7590   %}
7591   ins_pipe( pipe_slow );
7592 %}
7593 
7594 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7595 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7596   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7597   match(Set dst (MulL dst src));
7598   effect(KILL cr);
7599   ins_cost(1*400);
7600 // Basic idea: lo(result) = lo(x_lo * y_lo)
7601 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7602   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7603   ins_encode %{
7604     __ mull($src$$Register);
7605   %}
7606   ins_pipe( pipe_slow );
7607 %}
7608 
7609 // Multiply Register Long by small constant
7610 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7611   match(Set dst (MulL dst src));
7612   effect(KILL cr, TEMP tmp);
7613   ins_cost(2*100+2*400);
7614   size(12);
7615 // Basic idea: lo(result) = lo(src * EAX)
7616 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7617   format %{ "IMUL   $tmp,EDX,$src\n\t"
7618             "MOV    EDX,$src\n\t"
7619             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7620             "ADD    EDX,$tmp" %}
7621   ins_encode( long_multiply_con( dst, src, tmp ) );
7622   ins_pipe( pipe_slow );
7623 %}
7624 
7625 // Integer DIV with Register
7626 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7627   match(Set rax (DivI rax div));
7628   effect(KILL rdx, KILL cr);
7629   size(26);
7630   ins_cost(30*100+10*100);
7631   format %{ "CMP    EAX,0x80000000\n\t"
7632             "JNE,s  normal\n\t"
7633             "XOR    EDX,EDX\n\t"
7634             "CMP    ECX,-1\n\t"
7635             "JE,s   done\n"
7636     "normal: CDQ\n\t"
7637             "IDIV   $div\n\t"
7638     "done:"        %}
7639   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7640   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7641   ins_pipe( ialu_reg_reg_alu0 );
7642 %}
7643 
7644 // Divide Register Long
7645 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7646   match(Set dst (DivL src1 src2));
7647   effect( KILL cr, KILL cx, KILL bx );
7648   ins_cost(10000);
7649   format %{ "PUSH   $src1.hi\n\t"
7650             "PUSH   $src1.lo\n\t"
7651             "PUSH   $src2.hi\n\t"
7652             "PUSH   $src2.lo\n\t"
7653             "CALL   SharedRuntime::ldiv\n\t"
7654             "ADD    ESP,16" %}
7655   ins_encode( long_div(src1,src2) );
7656   ins_pipe( pipe_slow );
7657 %}
7658 
7659 // Integer DIVMOD with Register, both quotient and mod results
7660 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7661   match(DivModI rax div);
7662   effect(KILL cr);
7663   size(26);
7664   ins_cost(30*100+10*100);
7665   format %{ "CMP    EAX,0x80000000\n\t"
7666             "JNE,s  normal\n\t"
7667             "XOR    EDX,EDX\n\t"
7668             "CMP    ECX,-1\n\t"
7669             "JE,s   done\n"
7670     "normal: CDQ\n\t"
7671             "IDIV   $div\n\t"
7672     "done:"        %}
7673   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7674   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7675   ins_pipe( pipe_slow );
7676 %}
7677 
7678 // Integer MOD with Register
7679 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7680   match(Set rdx (ModI rax div));
7681   effect(KILL rax, KILL cr);
7682 
7683   size(26);
7684   ins_cost(300);
7685   format %{ "CDQ\n\t"
7686             "IDIV   $div" %}
7687   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7688   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7689   ins_pipe( ialu_reg_reg_alu0 );
7690 %}
7691 
7692 // Remainder Register Long
7693 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7694   match(Set dst (ModL src1 src2));
7695   effect( KILL cr, KILL cx, KILL bx );
7696   ins_cost(10000);
7697   format %{ "PUSH   $src1.hi\n\t"
7698             "PUSH   $src1.lo\n\t"
7699             "PUSH   $src2.hi\n\t"
7700             "PUSH   $src2.lo\n\t"
7701             "CALL   SharedRuntime::lrem\n\t"
7702             "ADD    ESP,16" %}
7703   ins_encode( long_mod(src1,src2) );
7704   ins_pipe( pipe_slow );
7705 %}
7706 
7707 // Divide Register Long (no special case since divisor != -1)
7708 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7709   match(Set dst (DivL dst imm));
7710   effect( TEMP tmp, TEMP tmp2, KILL cr );
7711   ins_cost(1000);
7712   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7713             "XOR    $tmp2,$tmp2\n\t"
7714             "CMP    $tmp,EDX\n\t"
7715             "JA,s   fast\n\t"
7716             "MOV    $tmp2,EAX\n\t"
7717             "MOV    EAX,EDX\n\t"
7718             "MOV    EDX,0\n\t"
7719             "JLE,s  pos\n\t"
7720             "LNEG   EAX : $tmp2\n\t"
7721             "DIV    $tmp # unsigned division\n\t"
7722             "XCHG   EAX,$tmp2\n\t"
7723             "DIV    $tmp\n\t"
7724             "LNEG   $tmp2 : EAX\n\t"
7725             "JMP,s  done\n"
7726     "pos:\n\t"
7727             "DIV    $tmp\n\t"
7728             "XCHG   EAX,$tmp2\n"
7729     "fast:\n\t"
7730             "DIV    $tmp\n"
7731     "done:\n\t"
7732             "MOV    EDX,$tmp2\n\t"
7733             "NEG    EDX:EAX # if $imm < 0" %}
7734   ins_encode %{
7735     int con = (int)$imm$$constant;
7736     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7737     int pcon = (con > 0) ? con : -con;
7738     Label Lfast, Lpos, Ldone;
7739 
7740     __ movl($tmp$$Register, pcon);
7741     __ xorl($tmp2$$Register,$tmp2$$Register);
7742     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7743     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7744 
7745     __ movl($tmp2$$Register, $dst$$Register); // save
7746     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7747     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7748     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7749 
7750     // Negative dividend.
7751     // convert value to positive to use unsigned division
7752     __ lneg($dst$$Register, $tmp2$$Register);
7753     __ divl($tmp$$Register);
7754     __ xchgl($dst$$Register, $tmp2$$Register);
7755     __ divl($tmp$$Register);
7756     // revert result back to negative
7757     __ lneg($tmp2$$Register, $dst$$Register);
7758     __ jmpb(Ldone);
7759 
7760     __ bind(Lpos);
7761     __ divl($tmp$$Register); // Use unsigned division
7762     __ xchgl($dst$$Register, $tmp2$$Register);
7763     // Fallthrow for final divide, tmp2 has 32 bit hi result
7764 
7765     __ bind(Lfast);
7766     // fast path: src is positive
7767     __ divl($tmp$$Register); // Use unsigned division
7768 
7769     __ bind(Ldone);
7770     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7771     if (con < 0) {
7772       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7773     }
7774   %}
7775   ins_pipe( pipe_slow );
7776 %}
7777 
7778 // Remainder Register Long (remainder fit into 32 bits)
7779 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7780   match(Set dst (ModL dst imm));
7781   effect( TEMP tmp, TEMP tmp2, KILL cr );
7782   ins_cost(1000);
7783   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7784             "CMP    $tmp,EDX\n\t"
7785             "JA,s   fast\n\t"
7786             "MOV    $tmp2,EAX\n\t"
7787             "MOV    EAX,EDX\n\t"
7788             "MOV    EDX,0\n\t"
7789             "JLE,s  pos\n\t"
7790             "LNEG   EAX : $tmp2\n\t"
7791             "DIV    $tmp # unsigned division\n\t"
7792             "MOV    EAX,$tmp2\n\t"
7793             "DIV    $tmp\n\t"
7794             "NEG    EDX\n\t"
7795             "JMP,s  done\n"
7796     "pos:\n\t"
7797             "DIV    $tmp\n\t"
7798             "MOV    EAX,$tmp2\n"
7799     "fast:\n\t"
7800             "DIV    $tmp\n"
7801     "done:\n\t"
7802             "MOV    EAX,EDX\n\t"
7803             "SAR    EDX,31\n\t" %}
7804   ins_encode %{
7805     int con = (int)$imm$$constant;
7806     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7807     int pcon = (con > 0) ? con : -con;
7808     Label  Lfast, Lpos, Ldone;
7809 
7810     __ movl($tmp$$Register, pcon);
7811     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7812     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7813 
7814     __ movl($tmp2$$Register, $dst$$Register); // save
7815     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7816     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7817     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7818 
7819     // Negative dividend.
7820     // convert value to positive to use unsigned division
7821     __ lneg($dst$$Register, $tmp2$$Register);
7822     __ divl($tmp$$Register);
7823     __ movl($dst$$Register, $tmp2$$Register);
7824     __ divl($tmp$$Register);
7825     // revert remainder back to negative
7826     __ negl(HIGH_FROM_LOW($dst$$Register));
7827     __ jmpb(Ldone);
7828 
7829     __ bind(Lpos);
7830     __ divl($tmp$$Register);
7831     __ movl($dst$$Register, $tmp2$$Register);
7832 
7833     __ bind(Lfast);
7834     // fast path: src is positive
7835     __ divl($tmp$$Register);
7836 
7837     __ bind(Ldone);
7838     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7839     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7840 
7841   %}
7842   ins_pipe( pipe_slow );
7843 %}
7844 
7845 // Integer Shift Instructions
7846 // Shift Left by one
7847 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7848   match(Set dst (LShiftI dst shift));
7849   effect(KILL cr);
7850 
7851   size(2);
7852   format %{ "SHL    $dst,$shift" %}
7853   opcode(0xD1, 0x4);  /* D1 /4 */
7854   ins_encode( OpcP, RegOpc( dst ) );
7855   ins_pipe( ialu_reg );
7856 %}
7857 
7858 // Shift Left by 8-bit immediate
7859 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7860   match(Set dst (LShiftI dst shift));
7861   effect(KILL cr);
7862 
7863   size(3);
7864   format %{ "SHL    $dst,$shift" %}
7865   opcode(0xC1, 0x4);  /* C1 /4 ib */
7866   ins_encode( RegOpcImm( dst, shift) );
7867   ins_pipe( ialu_reg );
7868 %}
7869 
7870 // Shift Left by variable
7871 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7872   match(Set dst (LShiftI dst shift));
7873   effect(KILL cr);
7874 
7875   size(2);
7876   format %{ "SHL    $dst,$shift" %}
7877   opcode(0xD3, 0x4);  /* D3 /4 */
7878   ins_encode( OpcP, RegOpc( dst ) );
7879   ins_pipe( ialu_reg_reg );
7880 %}
7881 
7882 // Arithmetic shift right by one
7883 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7884   match(Set dst (RShiftI dst shift));
7885   effect(KILL cr);
7886 
7887   size(2);
7888   format %{ "SAR    $dst,$shift" %}
7889   opcode(0xD1, 0x7);  /* D1 /7 */
7890   ins_encode( OpcP, RegOpc( dst ) );
7891   ins_pipe( ialu_reg );
7892 %}
7893 
7894 // Arithmetic shift right by one
7895 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7896   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7897   effect(KILL cr);
7898   format %{ "SAR    $dst,$shift" %}
7899   opcode(0xD1, 0x7);  /* D1 /7 */
7900   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7901   ins_pipe( ialu_mem_imm );
7902 %}
7903 
7904 // Arithmetic Shift Right by 8-bit immediate
7905 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7906   match(Set dst (RShiftI dst shift));
7907   effect(KILL cr);
7908 
7909   size(3);
7910   format %{ "SAR    $dst,$shift" %}
7911   opcode(0xC1, 0x7);  /* C1 /7 ib */
7912   ins_encode( RegOpcImm( dst, shift ) );
7913   ins_pipe( ialu_mem_imm );
7914 %}
7915 
7916 // Arithmetic Shift Right by 8-bit immediate
7917 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7918   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7919   effect(KILL cr);
7920 
7921   format %{ "SAR    $dst,$shift" %}
7922   opcode(0xC1, 0x7);  /* C1 /7 ib */
7923   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7924   ins_pipe( ialu_mem_imm );
7925 %}
7926 
7927 // Arithmetic Shift Right by variable
7928 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7929   match(Set dst (RShiftI dst shift));
7930   effect(KILL cr);
7931 
7932   size(2);
7933   format %{ "SAR    $dst,$shift" %}
7934   opcode(0xD3, 0x7);  /* D3 /7 */
7935   ins_encode( OpcP, RegOpc( dst ) );
7936   ins_pipe( ialu_reg_reg );
7937 %}
7938 
7939 // Logical shift right by one
7940 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7941   match(Set dst (URShiftI dst shift));
7942   effect(KILL cr);
7943 
7944   size(2);
7945   format %{ "SHR    $dst,$shift" %}
7946   opcode(0xD1, 0x5);  /* D1 /5 */
7947   ins_encode( OpcP, RegOpc( dst ) );
7948   ins_pipe( ialu_reg );
7949 %}
7950 
7951 // Logical Shift Right by 8-bit immediate
7952 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7953   match(Set dst (URShiftI dst shift));
7954   effect(KILL cr);
7955 
7956   size(3);
7957   format %{ "SHR    $dst,$shift" %}
7958   opcode(0xC1, 0x5);  /* C1 /5 ib */
7959   ins_encode( RegOpcImm( dst, shift) );
7960   ins_pipe( ialu_reg );
7961 %}
7962 
7963 
7964 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7965 // This idiom is used by the compiler for the i2b bytecode.
7966 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7967   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7968 
7969   size(3);
7970   format %{ "MOVSX  $dst,$src :8" %}
7971   ins_encode %{
7972     __ movsbl($dst$$Register, $src$$Register);
7973   %}
7974   ins_pipe(ialu_reg_reg);
7975 %}
7976 
7977 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7978 // This idiom is used by the compiler the i2s bytecode.
7979 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7980   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7981 
7982   size(3);
7983   format %{ "MOVSX  $dst,$src :16" %}
7984   ins_encode %{
7985     __ movswl($dst$$Register, $src$$Register);
7986   %}
7987   ins_pipe(ialu_reg_reg);
7988 %}
7989 
7990 
7991 // Logical Shift Right by variable
7992 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7993   match(Set dst (URShiftI dst shift));
7994   effect(KILL cr);
7995 
7996   size(2);
7997   format %{ "SHR    $dst,$shift" %}
7998   opcode(0xD3, 0x5);  /* D3 /5 */
7999   ins_encode( OpcP, RegOpc( dst ) );
8000   ins_pipe( ialu_reg_reg );
8001 %}
8002 
8003 
8004 //----------Logical Instructions-----------------------------------------------
8005 //----------Integer Logical Instructions---------------------------------------
8006 // And Instructions
8007 // And Register with Register
8008 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8009   match(Set dst (AndI dst src));
8010   effect(KILL cr);
8011 
8012   size(2);
8013   format %{ "AND    $dst,$src" %}
8014   opcode(0x23);
8015   ins_encode( OpcP, RegReg( dst, src) );
8016   ins_pipe( ialu_reg_reg );
8017 %}
8018 
8019 // And Register with Immediate
8020 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8021   match(Set dst (AndI dst src));
8022   effect(KILL cr);
8023 
8024   format %{ "AND    $dst,$src" %}
8025   opcode(0x81,0x04);  /* Opcode 81 /4 */
8026   // ins_encode( RegImm( dst, src) );
8027   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8028   ins_pipe( ialu_reg );
8029 %}
8030 
8031 // And Register with Memory
8032 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8033   match(Set dst (AndI dst (LoadI src)));
8034   effect(KILL cr);
8035 
8036   ins_cost(125);
8037   format %{ "AND    $dst,$src" %}
8038   opcode(0x23);
8039   ins_encode( OpcP, RegMem( dst, src) );
8040   ins_pipe( ialu_reg_mem );
8041 %}
8042 
8043 // And Memory with Register
8044 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8045   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8046   effect(KILL cr);
8047 
8048   ins_cost(150);
8049   format %{ "AND    $dst,$src" %}
8050   opcode(0x21);  /* Opcode 21 /r */
8051   ins_encode( OpcP, RegMem( src, dst ) );
8052   ins_pipe( ialu_mem_reg );
8053 %}
8054 
8055 // And Memory with Immediate
8056 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8057   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8058   effect(KILL cr);
8059 
8060   ins_cost(125);
8061   format %{ "AND    $dst,$src" %}
8062   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8063   // ins_encode( MemImm( dst, src) );
8064   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8065   ins_pipe( ialu_mem_imm );
8066 %}
8067 
8068 // BMI1 instructions
8069 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8070   match(Set dst (AndI (XorI src1 minus_1) src2));
8071   predicate(UseBMI1Instructions);
8072   effect(KILL cr);
8073 
8074   format %{ "ANDNL  $dst, $src1, $src2" %}
8075 
8076   ins_encode %{
8077     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8078   %}
8079   ins_pipe(ialu_reg);
8080 %}
8081 
8082 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8083   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8084   predicate(UseBMI1Instructions);
8085   effect(KILL cr);
8086 
8087   ins_cost(125);
8088   format %{ "ANDNL  $dst, $src1, $src2" %}
8089 
8090   ins_encode %{
8091     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8092   %}
8093   ins_pipe(ialu_reg_mem);
8094 %}
8095 
8096 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8097   match(Set dst (AndI (SubI imm_zero src) src));
8098   predicate(UseBMI1Instructions);
8099   effect(KILL cr);
8100 
8101   format %{ "BLSIL  $dst, $src" %}
8102 
8103   ins_encode %{
8104     __ blsil($dst$$Register, $src$$Register);
8105   %}
8106   ins_pipe(ialu_reg);
8107 %}
8108 
8109 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8110   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8111   predicate(UseBMI1Instructions);
8112   effect(KILL cr);
8113 
8114   ins_cost(125);
8115   format %{ "BLSIL  $dst, $src" %}
8116 
8117   ins_encode %{
8118     __ blsil($dst$$Register, $src$$Address);
8119   %}
8120   ins_pipe(ialu_reg_mem);
8121 %}
8122 
8123 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8124 %{
8125   match(Set dst (XorI (AddI src minus_1) src));
8126   predicate(UseBMI1Instructions);
8127   effect(KILL cr);
8128 
8129   format %{ "BLSMSKL $dst, $src" %}
8130 
8131   ins_encode %{
8132     __ blsmskl($dst$$Register, $src$$Register);
8133   %}
8134 
8135   ins_pipe(ialu_reg);
8136 %}
8137 
8138 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8139 %{
8140   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8141   predicate(UseBMI1Instructions);
8142   effect(KILL cr);
8143 
8144   ins_cost(125);
8145   format %{ "BLSMSKL $dst, $src" %}
8146 
8147   ins_encode %{
8148     __ blsmskl($dst$$Register, $src$$Address);
8149   %}
8150 
8151   ins_pipe(ialu_reg_mem);
8152 %}
8153 
8154 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8155 %{
8156   match(Set dst (AndI (AddI src minus_1) src) );
8157   predicate(UseBMI1Instructions);
8158   effect(KILL cr);
8159 
8160   format %{ "BLSRL  $dst, $src" %}
8161 
8162   ins_encode %{
8163     __ blsrl($dst$$Register, $src$$Register);
8164   %}
8165 
8166   ins_pipe(ialu_reg);
8167 %}
8168 
8169 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8170 %{
8171   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8172   predicate(UseBMI1Instructions);
8173   effect(KILL cr);
8174 
8175   ins_cost(125);
8176   format %{ "BLSRL  $dst, $src" %}
8177 
8178   ins_encode %{
8179     __ blsrl($dst$$Register, $src$$Address);
8180   %}
8181 
8182   ins_pipe(ialu_reg_mem);
8183 %}
8184 
8185 // Or Instructions
8186 // Or Register with Register
8187 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8188   match(Set dst (OrI dst src));
8189   effect(KILL cr);
8190 
8191   size(2);
8192   format %{ "OR     $dst,$src" %}
8193   opcode(0x0B);
8194   ins_encode( OpcP, RegReg( dst, src) );
8195   ins_pipe( ialu_reg_reg );
8196 %}
8197 
8198 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8199   match(Set dst (OrI dst (CastP2X src)));
8200   effect(KILL cr);
8201 
8202   size(2);
8203   format %{ "OR     $dst,$src" %}
8204   opcode(0x0B);
8205   ins_encode( OpcP, RegReg( dst, src) );
8206   ins_pipe( ialu_reg_reg );
8207 %}
8208 
8209 
8210 // Or Register with Immediate
8211 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8212   match(Set dst (OrI dst src));
8213   effect(KILL cr);
8214 
8215   format %{ "OR     $dst,$src" %}
8216   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8217   // ins_encode( RegImm( dst, src) );
8218   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8219   ins_pipe( ialu_reg );
8220 %}
8221 
8222 // Or Register with Memory
8223 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8224   match(Set dst (OrI dst (LoadI src)));
8225   effect(KILL cr);
8226 
8227   ins_cost(125);
8228   format %{ "OR     $dst,$src" %}
8229   opcode(0x0B);
8230   ins_encode( OpcP, RegMem( dst, src) );
8231   ins_pipe( ialu_reg_mem );
8232 %}
8233 
8234 // Or Memory with Register
8235 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8236   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8237   effect(KILL cr);
8238 
8239   ins_cost(150);
8240   format %{ "OR     $dst,$src" %}
8241   opcode(0x09);  /* Opcode 09 /r */
8242   ins_encode( OpcP, RegMem( src, dst ) );
8243   ins_pipe( ialu_mem_reg );
8244 %}
8245 
8246 // Or Memory with Immediate
8247 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8248   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8249   effect(KILL cr);
8250 
8251   ins_cost(125);
8252   format %{ "OR     $dst,$src" %}
8253   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8254   // ins_encode( MemImm( dst, src) );
8255   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8256   ins_pipe( ialu_mem_imm );
8257 %}
8258 
8259 // ROL/ROR
8260 // ROL expand
8261 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8262   effect(USE_DEF dst, USE shift, KILL cr);
8263 
8264   format %{ "ROL    $dst, $shift" %}
8265   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8266   ins_encode( OpcP, RegOpc( dst ));
8267   ins_pipe( ialu_reg );
8268 %}
8269 
8270 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8271   effect(USE_DEF dst, USE shift, KILL cr);
8272 
8273   format %{ "ROL    $dst, $shift" %}
8274   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8275   ins_encode( RegOpcImm(dst, shift) );
8276   ins_pipe(ialu_reg);
8277 %}
8278 
8279 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8280   effect(USE_DEF dst, USE shift, KILL cr);
8281 
8282   format %{ "ROL    $dst, $shift" %}
8283   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8284   ins_encode(OpcP, RegOpc(dst));
8285   ins_pipe( ialu_reg_reg );
8286 %}
8287 // end of ROL expand
8288 
8289 // ROL 32bit by one once
8290 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8291   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8292 
8293   expand %{
8294     rolI_eReg_imm1(dst, lshift, cr);
8295   %}
8296 %}
8297 
8298 // ROL 32bit var by imm8 once
8299 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8300   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8301   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8302 
8303   expand %{
8304     rolI_eReg_imm8(dst, lshift, cr);
8305   %}
8306 %}
8307 
8308 // ROL 32bit var by var once
8309 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8310   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8311 
8312   expand %{
8313     rolI_eReg_CL(dst, shift, cr);
8314   %}
8315 %}
8316 
8317 // ROL 32bit var by var once
8318 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8319   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8320 
8321   expand %{
8322     rolI_eReg_CL(dst, shift, cr);
8323   %}
8324 %}
8325 
8326 // ROR expand
8327 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8328   effect(USE_DEF dst, USE shift, KILL cr);
8329 
8330   format %{ "ROR    $dst, $shift" %}
8331   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8332   ins_encode( OpcP, RegOpc( dst ) );
8333   ins_pipe( ialu_reg );
8334 %}
8335 
8336 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8337   effect (USE_DEF dst, USE shift, KILL cr);
8338 
8339   format %{ "ROR    $dst, $shift" %}
8340   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8341   ins_encode( RegOpcImm(dst, shift) );
8342   ins_pipe( ialu_reg );
8343 %}
8344 
8345 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8346   effect(USE_DEF dst, USE shift, KILL cr);
8347 
8348   format %{ "ROR    $dst, $shift" %}
8349   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8350   ins_encode(OpcP, RegOpc(dst));
8351   ins_pipe( ialu_reg_reg );
8352 %}
8353 // end of ROR expand
8354 
8355 // ROR right once
8356 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8357   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8358 
8359   expand %{
8360     rorI_eReg_imm1(dst, rshift, cr);
8361   %}
8362 %}
8363 
8364 // ROR 32bit by immI8 once
8365 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8366   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8367   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8368 
8369   expand %{
8370     rorI_eReg_imm8(dst, rshift, cr);
8371   %}
8372 %}
8373 
8374 // ROR 32bit var by var once
8375 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8376   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8377 
8378   expand %{
8379     rorI_eReg_CL(dst, shift, cr);
8380   %}
8381 %}
8382 
8383 // ROR 32bit var by var once
8384 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8385   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8386 
8387   expand %{
8388     rorI_eReg_CL(dst, shift, cr);
8389   %}
8390 %}
8391 
8392 // Xor Instructions
8393 // Xor Register with Register
8394 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8395   match(Set dst (XorI dst src));
8396   effect(KILL cr);
8397 
8398   size(2);
8399   format %{ "XOR    $dst,$src" %}
8400   opcode(0x33);
8401   ins_encode( OpcP, RegReg( dst, src) );
8402   ins_pipe( ialu_reg_reg );
8403 %}
8404 
8405 // Xor Register with Immediate -1
8406 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8407   match(Set dst (XorI dst imm));
8408 
8409   size(2);
8410   format %{ "NOT    $dst" %}
8411   ins_encode %{
8412      __ notl($dst$$Register);
8413   %}
8414   ins_pipe( ialu_reg );
8415 %}
8416 
8417 // Xor Register with Immediate
8418 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8419   match(Set dst (XorI dst src));
8420   effect(KILL cr);
8421 
8422   format %{ "XOR    $dst,$src" %}
8423   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8424   // ins_encode( RegImm( dst, src) );
8425   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8426   ins_pipe( ialu_reg );
8427 %}
8428 
8429 // Xor Register with Memory
8430 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8431   match(Set dst (XorI dst (LoadI src)));
8432   effect(KILL cr);
8433 
8434   ins_cost(125);
8435   format %{ "XOR    $dst,$src" %}
8436   opcode(0x33);
8437   ins_encode( OpcP, RegMem(dst, src) );
8438   ins_pipe( ialu_reg_mem );
8439 %}
8440 
8441 // Xor Memory with Register
8442 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8443   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8444   effect(KILL cr);
8445 
8446   ins_cost(150);
8447   format %{ "XOR    $dst,$src" %}
8448   opcode(0x31);  /* Opcode 31 /r */
8449   ins_encode( OpcP, RegMem( src, dst ) );
8450   ins_pipe( ialu_mem_reg );
8451 %}
8452 
8453 // Xor Memory with Immediate
8454 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8455   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8456   effect(KILL cr);
8457 
8458   ins_cost(125);
8459   format %{ "XOR    $dst,$src" %}
8460   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8461   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8462   ins_pipe( ialu_mem_imm );
8463 %}
8464 
8465 //----------Convert Int to Boolean---------------------------------------------
8466 
8467 instruct movI_nocopy(rRegI dst, rRegI src) %{
8468   effect( DEF dst, USE src );
8469   format %{ "MOV    $dst,$src" %}
8470   ins_encode( enc_Copy( dst, src) );
8471   ins_pipe( ialu_reg_reg );
8472 %}
8473 
8474 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8475   effect( USE_DEF dst, USE src, KILL cr );
8476 
8477   size(4);
8478   format %{ "NEG    $dst\n\t"
8479             "ADC    $dst,$src" %}
8480   ins_encode( neg_reg(dst),
8481               OpcRegReg(0x13,dst,src) );
8482   ins_pipe( ialu_reg_reg_long );
8483 %}
8484 
8485 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8486   match(Set dst (Conv2B src));
8487 
8488   expand %{
8489     movI_nocopy(dst,src);
8490     ci2b(dst,src,cr);
8491   %}
8492 %}
8493 
8494 instruct movP_nocopy(rRegI dst, eRegP src) %{
8495   effect( DEF dst, USE src );
8496   format %{ "MOV    $dst,$src" %}
8497   ins_encode( enc_Copy( dst, src) );
8498   ins_pipe( ialu_reg_reg );
8499 %}
8500 
8501 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8502   effect( USE_DEF dst, USE src, KILL cr );
8503   format %{ "NEG    $dst\n\t"
8504             "ADC    $dst,$src" %}
8505   ins_encode( neg_reg(dst),
8506               OpcRegReg(0x13,dst,src) );
8507   ins_pipe( ialu_reg_reg_long );
8508 %}
8509 
8510 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8511   match(Set dst (Conv2B src));
8512 
8513   expand %{
8514     movP_nocopy(dst,src);
8515     cp2b(dst,src,cr);
8516   %}
8517 %}
8518 
8519 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8520   match(Set dst (CmpLTMask p q));
8521   effect(KILL cr);
8522   ins_cost(400);
8523 
8524   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8525   format %{ "XOR    $dst,$dst\n\t"
8526             "CMP    $p,$q\n\t"
8527             "SETlt  $dst\n\t"
8528             "NEG    $dst" %}
8529   ins_encode %{
8530     Register Rp = $p$$Register;
8531     Register Rq = $q$$Register;
8532     Register Rd = $dst$$Register;
8533     Label done;
8534     __ xorl(Rd, Rd);
8535     __ cmpl(Rp, Rq);
8536     __ setb(Assembler::less, Rd);
8537     __ negl(Rd);
8538   %}
8539 
8540   ins_pipe(pipe_slow);
8541 %}
8542 
8543 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8544   match(Set dst (CmpLTMask dst zero));
8545   effect(DEF dst, KILL cr);
8546   ins_cost(100);
8547 
8548   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8549   ins_encode %{
8550   __ sarl($dst$$Register, 31);
8551   %}
8552   ins_pipe(ialu_reg);
8553 %}
8554 
8555 /* better to save a register than avoid a branch */
8556 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8557   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8558   effect(KILL cr);
8559   ins_cost(400);
8560   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8561             "JGE    done\n\t"
8562             "ADD    $p,$y\n"
8563             "done:  " %}
8564   ins_encode %{
8565     Register Rp = $p$$Register;
8566     Register Rq = $q$$Register;
8567     Register Ry = $y$$Register;
8568     Label done;
8569     __ subl(Rp, Rq);
8570     __ jccb(Assembler::greaterEqual, done);
8571     __ addl(Rp, Ry);
8572     __ bind(done);
8573   %}
8574 
8575   ins_pipe(pipe_cmplt);
8576 %}
8577 
8578 /* better to save a register than avoid a branch */
8579 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8580   match(Set y (AndI (CmpLTMask p q) y));
8581   effect(KILL cr);
8582 
8583   ins_cost(300);
8584 
8585   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8586             "JLT      done\n\t"
8587             "XORL     $y, $y\n"
8588             "done:  " %}
8589   ins_encode %{
8590     Register Rp = $p$$Register;
8591     Register Rq = $q$$Register;
8592     Register Ry = $y$$Register;
8593     Label done;
8594     __ cmpl(Rp, Rq);
8595     __ jccb(Assembler::less, done);
8596     __ xorl(Ry, Ry);
8597     __ bind(done);
8598   %}
8599 
8600   ins_pipe(pipe_cmplt);
8601 %}
8602 
8603 /* If I enable this, I encourage spilling in the inner loop of compress.
8604 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8605   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8606 */
8607 //----------Overflow Math Instructions-----------------------------------------
8608 
8609 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8610 %{
8611   match(Set cr (OverflowAddI op1 op2));
8612   effect(DEF cr, USE_KILL op1, USE op2);
8613 
8614   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8615 
8616   ins_encode %{
8617     __ addl($op1$$Register, $op2$$Register);
8618   %}
8619   ins_pipe(ialu_reg_reg);
8620 %}
8621 
8622 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8623 %{
8624   match(Set cr (OverflowAddI op1 op2));
8625   effect(DEF cr, USE_KILL op1, USE op2);
8626 
8627   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8628 
8629   ins_encode %{
8630     __ addl($op1$$Register, $op2$$constant);
8631   %}
8632   ins_pipe(ialu_reg_reg);
8633 %}
8634 
8635 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8636 %{
8637   match(Set cr (OverflowSubI op1 op2));
8638 
8639   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8640   ins_encode %{
8641     __ cmpl($op1$$Register, $op2$$Register);
8642   %}
8643   ins_pipe(ialu_reg_reg);
8644 %}
8645 
8646 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8647 %{
8648   match(Set cr (OverflowSubI op1 op2));
8649 
8650   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8651   ins_encode %{
8652     __ cmpl($op1$$Register, $op2$$constant);
8653   %}
8654   ins_pipe(ialu_reg_reg);
8655 %}
8656 
8657 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8658 %{
8659   match(Set cr (OverflowSubI zero op2));
8660   effect(DEF cr, USE_KILL op2);
8661 
8662   format %{ "NEG    $op2\t# overflow check int" %}
8663   ins_encode %{
8664     __ negl($op2$$Register);
8665   %}
8666   ins_pipe(ialu_reg_reg);
8667 %}
8668 
8669 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8670 %{
8671   match(Set cr (OverflowMulI op1 op2));
8672   effect(DEF cr, USE_KILL op1, USE op2);
8673 
8674   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8675   ins_encode %{
8676     __ imull($op1$$Register, $op2$$Register);
8677   %}
8678   ins_pipe(ialu_reg_reg_alu0);
8679 %}
8680 
8681 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8682 %{
8683   match(Set cr (OverflowMulI op1 op2));
8684   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8685 
8686   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8687   ins_encode %{
8688     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8689   %}
8690   ins_pipe(ialu_reg_reg_alu0);
8691 %}
8692 
8693 //----------Long Instructions------------------------------------------------
8694 // Add Long Register with Register
8695 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8696   match(Set dst (AddL dst src));
8697   effect(KILL cr);
8698   ins_cost(200);
8699   format %{ "ADD    $dst.lo,$src.lo\n\t"
8700             "ADC    $dst.hi,$src.hi" %}
8701   opcode(0x03, 0x13);
8702   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8703   ins_pipe( ialu_reg_reg_long );
8704 %}
8705 
8706 // Add Long Register with Immediate
8707 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8708   match(Set dst (AddL dst src));
8709   effect(KILL cr);
8710   format %{ "ADD    $dst.lo,$src.lo\n\t"
8711             "ADC    $dst.hi,$src.hi" %}
8712   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8713   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8714   ins_pipe( ialu_reg_long );
8715 %}
8716 
8717 // Add Long Register with Memory
8718 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8719   match(Set dst (AddL dst (LoadL mem)));
8720   effect(KILL cr);
8721   ins_cost(125);
8722   format %{ "ADD    $dst.lo,$mem\n\t"
8723             "ADC    $dst.hi,$mem+4" %}
8724   opcode(0x03, 0x13);
8725   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8726   ins_pipe( ialu_reg_long_mem );
8727 %}
8728 
8729 // Subtract Long Register with Register.
8730 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8731   match(Set dst (SubL dst src));
8732   effect(KILL cr);
8733   ins_cost(200);
8734   format %{ "SUB    $dst.lo,$src.lo\n\t"
8735             "SBB    $dst.hi,$src.hi" %}
8736   opcode(0x2B, 0x1B);
8737   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8738   ins_pipe( ialu_reg_reg_long );
8739 %}
8740 
8741 // Subtract Long Register with Immediate
8742 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8743   match(Set dst (SubL dst src));
8744   effect(KILL cr);
8745   format %{ "SUB    $dst.lo,$src.lo\n\t"
8746             "SBB    $dst.hi,$src.hi" %}
8747   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8748   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8749   ins_pipe( ialu_reg_long );
8750 %}
8751 
8752 // Subtract Long Register with Memory
8753 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8754   match(Set dst (SubL dst (LoadL mem)));
8755   effect(KILL cr);
8756   ins_cost(125);
8757   format %{ "SUB    $dst.lo,$mem\n\t"
8758             "SBB    $dst.hi,$mem+4" %}
8759   opcode(0x2B, 0x1B);
8760   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8761   ins_pipe( ialu_reg_long_mem );
8762 %}
8763 
8764 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8765   match(Set dst (SubL zero dst));
8766   effect(KILL cr);
8767   ins_cost(300);
8768   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8769   ins_encode( neg_long(dst) );
8770   ins_pipe( ialu_reg_reg_long );
8771 %}
8772 
8773 // And Long Register with Register
8774 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8775   match(Set dst (AndL dst src));
8776   effect(KILL cr);
8777   format %{ "AND    $dst.lo,$src.lo\n\t"
8778             "AND    $dst.hi,$src.hi" %}
8779   opcode(0x23,0x23);
8780   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8781   ins_pipe( ialu_reg_reg_long );
8782 %}
8783 
8784 // And Long Register with Immediate
8785 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8786   match(Set dst (AndL dst src));
8787   effect(KILL cr);
8788   format %{ "AND    $dst.lo,$src.lo\n\t"
8789             "AND    $dst.hi,$src.hi" %}
8790   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8791   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8792   ins_pipe( ialu_reg_long );
8793 %}
8794 
8795 // And Long Register with Memory
8796 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8797   match(Set dst (AndL dst (LoadL mem)));
8798   effect(KILL cr);
8799   ins_cost(125);
8800   format %{ "AND    $dst.lo,$mem\n\t"
8801             "AND    $dst.hi,$mem+4" %}
8802   opcode(0x23, 0x23);
8803   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8804   ins_pipe( ialu_reg_long_mem );
8805 %}
8806 
8807 // BMI1 instructions
8808 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8809   match(Set dst (AndL (XorL src1 minus_1) src2));
8810   predicate(UseBMI1Instructions);
8811   effect(KILL cr, TEMP dst);
8812 
8813   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8814             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8815          %}
8816 
8817   ins_encode %{
8818     Register Rdst = $dst$$Register;
8819     Register Rsrc1 = $src1$$Register;
8820     Register Rsrc2 = $src2$$Register;
8821     __ andnl(Rdst, Rsrc1, Rsrc2);
8822     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8823   %}
8824   ins_pipe(ialu_reg_reg_long);
8825 %}
8826 
8827 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8828   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8829   predicate(UseBMI1Instructions);
8830   effect(KILL cr, TEMP dst);
8831 
8832   ins_cost(125);
8833   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8834             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8835          %}
8836 
8837   ins_encode %{
8838     Register Rdst = $dst$$Register;
8839     Register Rsrc1 = $src1$$Register;
8840     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8841 
8842     __ andnl(Rdst, Rsrc1, $src2$$Address);
8843     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8844   %}
8845   ins_pipe(ialu_reg_mem);
8846 %}
8847 
8848 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8849   match(Set dst (AndL (SubL imm_zero src) src));
8850   predicate(UseBMI1Instructions);
8851   effect(KILL cr, TEMP dst);
8852 
8853   format %{ "MOVL   $dst.hi, 0\n\t"
8854             "BLSIL  $dst.lo, $src.lo\n\t"
8855             "JNZ    done\n\t"
8856             "BLSIL  $dst.hi, $src.hi\n"
8857             "done:"
8858          %}
8859 
8860   ins_encode %{
8861     Label done;
8862     Register Rdst = $dst$$Register;
8863     Register Rsrc = $src$$Register;
8864     __ movl(HIGH_FROM_LOW(Rdst), 0);
8865     __ blsil(Rdst, Rsrc);
8866     __ jccb(Assembler::notZero, done);
8867     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8868     __ bind(done);
8869   %}
8870   ins_pipe(ialu_reg);
8871 %}
8872 
8873 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8874   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8875   predicate(UseBMI1Instructions);
8876   effect(KILL cr, TEMP dst);
8877 
8878   ins_cost(125);
8879   format %{ "MOVL   $dst.hi, 0\n\t"
8880             "BLSIL  $dst.lo, $src\n\t"
8881             "JNZ    done\n\t"
8882             "BLSIL  $dst.hi, $src+4\n"
8883             "done:"
8884          %}
8885 
8886   ins_encode %{
8887     Label done;
8888     Register Rdst = $dst$$Register;
8889     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8890 
8891     __ movl(HIGH_FROM_LOW(Rdst), 0);
8892     __ blsil(Rdst, $src$$Address);
8893     __ jccb(Assembler::notZero, done);
8894     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8895     __ bind(done);
8896   %}
8897   ins_pipe(ialu_reg_mem);
8898 %}
8899 
8900 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8901 %{
8902   match(Set dst (XorL (AddL src minus_1) src));
8903   predicate(UseBMI1Instructions);
8904   effect(KILL cr, TEMP dst);
8905 
8906   format %{ "MOVL    $dst.hi, 0\n\t"
8907             "BLSMSKL $dst.lo, $src.lo\n\t"
8908             "JNC     done\n\t"
8909             "BLSMSKL $dst.hi, $src.hi\n"
8910             "done:"
8911          %}
8912 
8913   ins_encode %{
8914     Label done;
8915     Register Rdst = $dst$$Register;
8916     Register Rsrc = $src$$Register;
8917     __ movl(HIGH_FROM_LOW(Rdst), 0);
8918     __ blsmskl(Rdst, Rsrc);
8919     __ jccb(Assembler::carryClear, done);
8920     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8921     __ bind(done);
8922   %}
8923 
8924   ins_pipe(ialu_reg);
8925 %}
8926 
8927 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8928 %{
8929   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8930   predicate(UseBMI1Instructions);
8931   effect(KILL cr, TEMP dst);
8932 
8933   ins_cost(125);
8934   format %{ "MOVL    $dst.hi, 0\n\t"
8935             "BLSMSKL $dst.lo, $src\n\t"
8936             "JNC     done\n\t"
8937             "BLSMSKL $dst.hi, $src+4\n"
8938             "done:"
8939          %}
8940 
8941   ins_encode %{
8942     Label done;
8943     Register Rdst = $dst$$Register;
8944     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8945 
8946     __ movl(HIGH_FROM_LOW(Rdst), 0);
8947     __ blsmskl(Rdst, $src$$Address);
8948     __ jccb(Assembler::carryClear, done);
8949     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8950     __ bind(done);
8951   %}
8952 
8953   ins_pipe(ialu_reg_mem);
8954 %}
8955 
8956 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8957 %{
8958   match(Set dst (AndL (AddL src minus_1) src) );
8959   predicate(UseBMI1Instructions);
8960   effect(KILL cr, TEMP dst);
8961 
8962   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8963             "BLSRL  $dst.lo, $src.lo\n\t"
8964             "JNC    done\n\t"
8965             "BLSRL  $dst.hi, $src.hi\n"
8966             "done:"
8967   %}
8968 
8969   ins_encode %{
8970     Label done;
8971     Register Rdst = $dst$$Register;
8972     Register Rsrc = $src$$Register;
8973     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8974     __ blsrl(Rdst, Rsrc);
8975     __ jccb(Assembler::carryClear, done);
8976     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8977     __ bind(done);
8978   %}
8979 
8980   ins_pipe(ialu_reg);
8981 %}
8982 
8983 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8984 %{
8985   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8986   predicate(UseBMI1Instructions);
8987   effect(KILL cr, TEMP dst);
8988 
8989   ins_cost(125);
8990   format %{ "MOVL   $dst.hi, $src+4\n\t"
8991             "BLSRL  $dst.lo, $src\n\t"
8992             "JNC    done\n\t"
8993             "BLSRL  $dst.hi, $src+4\n"
8994             "done:"
8995   %}
8996 
8997   ins_encode %{
8998     Label done;
8999     Register Rdst = $dst$$Register;
9000     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9001     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9002     __ blsrl(Rdst, $src$$Address);
9003     __ jccb(Assembler::carryClear, done);
9004     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9005     __ bind(done);
9006   %}
9007 
9008   ins_pipe(ialu_reg_mem);
9009 %}
9010 
9011 // Or Long Register with Register
9012 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9013   match(Set dst (OrL dst src));
9014   effect(KILL cr);
9015   format %{ "OR     $dst.lo,$src.lo\n\t"
9016             "OR     $dst.hi,$src.hi" %}
9017   opcode(0x0B,0x0B);
9018   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9019   ins_pipe( ialu_reg_reg_long );
9020 %}
9021 
9022 // Or Long Register with Immediate
9023 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9024   match(Set dst (OrL dst src));
9025   effect(KILL cr);
9026   format %{ "OR     $dst.lo,$src.lo\n\t"
9027             "OR     $dst.hi,$src.hi" %}
9028   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9029   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9030   ins_pipe( ialu_reg_long );
9031 %}
9032 
9033 // Or Long Register with Memory
9034 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9035   match(Set dst (OrL dst (LoadL mem)));
9036   effect(KILL cr);
9037   ins_cost(125);
9038   format %{ "OR     $dst.lo,$mem\n\t"
9039             "OR     $dst.hi,$mem+4" %}
9040   opcode(0x0B,0x0B);
9041   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9042   ins_pipe( ialu_reg_long_mem );
9043 %}
9044 
9045 // Xor Long Register with Register
9046 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9047   match(Set dst (XorL dst src));
9048   effect(KILL cr);
9049   format %{ "XOR    $dst.lo,$src.lo\n\t"
9050             "XOR    $dst.hi,$src.hi" %}
9051   opcode(0x33,0x33);
9052   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9053   ins_pipe( ialu_reg_reg_long );
9054 %}
9055 
9056 // Xor Long Register with Immediate -1
9057 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9058   match(Set dst (XorL dst imm));
9059   format %{ "NOT    $dst.lo\n\t"
9060             "NOT    $dst.hi" %}
9061   ins_encode %{
9062      __ notl($dst$$Register);
9063      __ notl(HIGH_FROM_LOW($dst$$Register));
9064   %}
9065   ins_pipe( ialu_reg_long );
9066 %}
9067 
9068 // Xor Long Register with Immediate
9069 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9070   match(Set dst (XorL dst src));
9071   effect(KILL cr);
9072   format %{ "XOR    $dst.lo,$src.lo\n\t"
9073             "XOR    $dst.hi,$src.hi" %}
9074   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9075   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9076   ins_pipe( ialu_reg_long );
9077 %}
9078 
9079 // Xor Long Register with Memory
9080 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9081   match(Set dst (XorL dst (LoadL mem)));
9082   effect(KILL cr);
9083   ins_cost(125);
9084   format %{ "XOR    $dst.lo,$mem\n\t"
9085             "XOR    $dst.hi,$mem+4" %}
9086   opcode(0x33,0x33);
9087   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9088   ins_pipe( ialu_reg_long_mem );
9089 %}
9090 
9091 // Shift Left Long by 1
9092 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9093   predicate(UseNewLongLShift);
9094   match(Set dst (LShiftL dst cnt));
9095   effect(KILL cr);
9096   ins_cost(100);
9097   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9098             "ADC    $dst.hi,$dst.hi" %}
9099   ins_encode %{
9100     __ addl($dst$$Register,$dst$$Register);
9101     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9102   %}
9103   ins_pipe( ialu_reg_long );
9104 %}
9105 
9106 // Shift Left Long by 2
9107 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9108   predicate(UseNewLongLShift);
9109   match(Set dst (LShiftL dst cnt));
9110   effect(KILL cr);
9111   ins_cost(100);
9112   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9113             "ADC    $dst.hi,$dst.hi\n\t"
9114             "ADD    $dst.lo,$dst.lo\n\t"
9115             "ADC    $dst.hi,$dst.hi" %}
9116   ins_encode %{
9117     __ addl($dst$$Register,$dst$$Register);
9118     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9119     __ addl($dst$$Register,$dst$$Register);
9120     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9121   %}
9122   ins_pipe( ialu_reg_long );
9123 %}
9124 
9125 // Shift Left Long by 3
9126 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9127   predicate(UseNewLongLShift);
9128   match(Set dst (LShiftL dst cnt));
9129   effect(KILL cr);
9130   ins_cost(100);
9131   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9132             "ADC    $dst.hi,$dst.hi\n\t"
9133             "ADD    $dst.lo,$dst.lo\n\t"
9134             "ADC    $dst.hi,$dst.hi\n\t"
9135             "ADD    $dst.lo,$dst.lo\n\t"
9136             "ADC    $dst.hi,$dst.hi" %}
9137   ins_encode %{
9138     __ addl($dst$$Register,$dst$$Register);
9139     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9140     __ addl($dst$$Register,$dst$$Register);
9141     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9142     __ addl($dst$$Register,$dst$$Register);
9143     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9144   %}
9145   ins_pipe( ialu_reg_long );
9146 %}
9147 
9148 // Shift Left Long by 1-31
9149 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9150   match(Set dst (LShiftL dst cnt));
9151   effect(KILL cr);
9152   ins_cost(200);
9153   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9154             "SHL    $dst.lo,$cnt" %}
9155   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9156   ins_encode( move_long_small_shift(dst,cnt) );
9157   ins_pipe( ialu_reg_long );
9158 %}
9159 
9160 // Shift Left Long by 32-63
9161 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9162   match(Set dst (LShiftL dst cnt));
9163   effect(KILL cr);
9164   ins_cost(300);
9165   format %{ "MOV    $dst.hi,$dst.lo\n"
9166           "\tSHL    $dst.hi,$cnt-32\n"
9167           "\tXOR    $dst.lo,$dst.lo" %}
9168   opcode(0xC1, 0x4);  /* C1 /4 ib */
9169   ins_encode( move_long_big_shift_clr(dst,cnt) );
9170   ins_pipe( ialu_reg_long );
9171 %}
9172 
9173 // Shift Left Long by variable
9174 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9175   match(Set dst (LShiftL dst shift));
9176   effect(KILL cr);
9177   ins_cost(500+200);
9178   size(17);
9179   format %{ "TEST   $shift,32\n\t"
9180             "JEQ,s  small\n\t"
9181             "MOV    $dst.hi,$dst.lo\n\t"
9182             "XOR    $dst.lo,$dst.lo\n"
9183     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9184             "SHL    $dst.lo,$shift" %}
9185   ins_encode( shift_left_long( dst, shift ) );
9186   ins_pipe( pipe_slow );
9187 %}
9188 
9189 // Shift Right Long by 1-31
9190 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9191   match(Set dst (URShiftL dst cnt));
9192   effect(KILL cr);
9193   ins_cost(200);
9194   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9195             "SHR    $dst.hi,$cnt" %}
9196   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9197   ins_encode( move_long_small_shift(dst,cnt) );
9198   ins_pipe( ialu_reg_long );
9199 %}
9200 
9201 // Shift Right Long by 32-63
9202 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9203   match(Set dst (URShiftL dst cnt));
9204   effect(KILL cr);
9205   ins_cost(300);
9206   format %{ "MOV    $dst.lo,$dst.hi\n"
9207           "\tSHR    $dst.lo,$cnt-32\n"
9208           "\tXOR    $dst.hi,$dst.hi" %}
9209   opcode(0xC1, 0x5);  /* C1 /5 ib */
9210   ins_encode( move_long_big_shift_clr(dst,cnt) );
9211   ins_pipe( ialu_reg_long );
9212 %}
9213 
9214 // Shift Right Long by variable
9215 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9216   match(Set dst (URShiftL dst shift));
9217   effect(KILL cr);
9218   ins_cost(600);
9219   size(17);
9220   format %{ "TEST   $shift,32\n\t"
9221             "JEQ,s  small\n\t"
9222             "MOV    $dst.lo,$dst.hi\n\t"
9223             "XOR    $dst.hi,$dst.hi\n"
9224     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9225             "SHR    $dst.hi,$shift" %}
9226   ins_encode( shift_right_long( dst, shift ) );
9227   ins_pipe( pipe_slow );
9228 %}
9229 
9230 // Shift Right Long by 1-31
9231 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9232   match(Set dst (RShiftL dst cnt));
9233   effect(KILL cr);
9234   ins_cost(200);
9235   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9236             "SAR    $dst.hi,$cnt" %}
9237   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9238   ins_encode( move_long_small_shift(dst,cnt) );
9239   ins_pipe( ialu_reg_long );
9240 %}
9241 
9242 // Shift Right Long by 32-63
9243 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9244   match(Set dst (RShiftL dst cnt));
9245   effect(KILL cr);
9246   ins_cost(300);
9247   format %{ "MOV    $dst.lo,$dst.hi\n"
9248           "\tSAR    $dst.lo,$cnt-32\n"
9249           "\tSAR    $dst.hi,31" %}
9250   opcode(0xC1, 0x7);  /* C1 /7 ib */
9251   ins_encode( move_long_big_shift_sign(dst,cnt) );
9252   ins_pipe( ialu_reg_long );
9253 %}
9254 
9255 // Shift Right arithmetic Long by variable
9256 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9257   match(Set dst (RShiftL dst shift));
9258   effect(KILL cr);
9259   ins_cost(600);
9260   size(18);
9261   format %{ "TEST   $shift,32\n\t"
9262             "JEQ,s  small\n\t"
9263             "MOV    $dst.lo,$dst.hi\n\t"
9264             "SAR    $dst.hi,31\n"
9265     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9266             "SAR    $dst.hi,$shift" %}
9267   ins_encode( shift_right_arith_long( dst, shift ) );
9268   ins_pipe( pipe_slow );
9269 %}
9270 
9271 
9272 //----------Double Instructions------------------------------------------------
9273 // Double Math
9274 
9275 // Compare & branch
9276 
9277 // P6 version of float compare, sets condition codes in EFLAGS
9278 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9279   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9280   match(Set cr (CmpD src1 src2));
9281   effect(KILL rax);
9282   ins_cost(150);
9283   format %{ "FLD    $src1\n\t"
9284             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9285             "JNP    exit\n\t"
9286             "MOV    ah,1       // saw a NaN, set CF\n\t"
9287             "SAHF\n"
9288      "exit:\tNOP               // avoid branch to branch" %}
9289   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9290   ins_encode( Push_Reg_DPR(src1),
9291               OpcP, RegOpc(src2),
9292               cmpF_P6_fixup );
9293   ins_pipe( pipe_slow );
9294 %}
9295 
9296 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9297   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9298   match(Set cr (CmpD src1 src2));
9299   ins_cost(150);
9300   format %{ "FLD    $src1\n\t"
9301             "FUCOMIP ST,$src2  // P6 instruction" %}
9302   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9303   ins_encode( Push_Reg_DPR(src1),
9304               OpcP, RegOpc(src2));
9305   ins_pipe( pipe_slow );
9306 %}
9307 
9308 // Compare & branch
9309 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9310   predicate(UseSSE<=1);
9311   match(Set cr (CmpD src1 src2));
9312   effect(KILL rax);
9313   ins_cost(200);
9314   format %{ "FLD    $src1\n\t"
9315             "FCOMp  $src2\n\t"
9316             "FNSTSW AX\n\t"
9317             "TEST   AX,0x400\n\t"
9318             "JZ,s   flags\n\t"
9319             "MOV    AH,1\t# unordered treat as LT\n"
9320     "flags:\tSAHF" %}
9321   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9322   ins_encode( Push_Reg_DPR(src1),
9323               OpcP, RegOpc(src2),
9324               fpu_flags);
9325   ins_pipe( pipe_slow );
9326 %}
9327 
9328 // Compare vs zero into -1,0,1
9329 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9330   predicate(UseSSE<=1);
9331   match(Set dst (CmpD3 src1 zero));
9332   effect(KILL cr, KILL rax);
9333   ins_cost(280);
9334   format %{ "FTSTD  $dst,$src1" %}
9335   opcode(0xE4, 0xD9);
9336   ins_encode( Push_Reg_DPR(src1),
9337               OpcS, OpcP, PopFPU,
9338               CmpF_Result(dst));
9339   ins_pipe( pipe_slow );
9340 %}
9341 
9342 // Compare into -1,0,1
9343 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9344   predicate(UseSSE<=1);
9345   match(Set dst (CmpD3 src1 src2));
9346   effect(KILL cr, KILL rax);
9347   ins_cost(300);
9348   format %{ "FCMPD  $dst,$src1,$src2" %}
9349   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9350   ins_encode( Push_Reg_DPR(src1),
9351               OpcP, RegOpc(src2),
9352               CmpF_Result(dst));
9353   ins_pipe( pipe_slow );
9354 %}
9355 
9356 // float compare and set condition codes in EFLAGS by XMM regs
9357 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9358   predicate(UseSSE>=2);
9359   match(Set cr (CmpD src1 src2));
9360   ins_cost(145);
9361   format %{ "UCOMISD $src1,$src2\n\t"
9362             "JNP,s   exit\n\t"
9363             "PUSHF\t# saw NaN, set CF\n\t"
9364             "AND     [rsp], #0xffffff2b\n\t"
9365             "POPF\n"
9366     "exit:" %}
9367   ins_encode %{
9368     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9369     emit_cmpfp_fixup(_masm);
9370   %}
9371   ins_pipe( pipe_slow );
9372 %}
9373 
9374 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9375   predicate(UseSSE>=2);
9376   match(Set cr (CmpD src1 src2));
9377   ins_cost(100);
9378   format %{ "UCOMISD $src1,$src2" %}
9379   ins_encode %{
9380     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9381   %}
9382   ins_pipe( pipe_slow );
9383 %}
9384 
9385 // float compare and set condition codes in EFLAGS by XMM regs
9386 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9387   predicate(UseSSE>=2);
9388   match(Set cr (CmpD src1 (LoadD src2)));
9389   ins_cost(145);
9390   format %{ "UCOMISD $src1,$src2\n\t"
9391             "JNP,s   exit\n\t"
9392             "PUSHF\t# saw NaN, set CF\n\t"
9393             "AND     [rsp], #0xffffff2b\n\t"
9394             "POPF\n"
9395     "exit:" %}
9396   ins_encode %{
9397     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9398     emit_cmpfp_fixup(_masm);
9399   %}
9400   ins_pipe( pipe_slow );
9401 %}
9402 
9403 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9404   predicate(UseSSE>=2);
9405   match(Set cr (CmpD src1 (LoadD src2)));
9406   ins_cost(100);
9407   format %{ "UCOMISD $src1,$src2" %}
9408   ins_encode %{
9409     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9410   %}
9411   ins_pipe( pipe_slow );
9412 %}
9413 
9414 // Compare into -1,0,1 in XMM
9415 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9416   predicate(UseSSE>=2);
9417   match(Set dst (CmpD3 src1 src2));
9418   effect(KILL cr);
9419   ins_cost(255);
9420   format %{ "UCOMISD $src1, $src2\n\t"
9421             "MOV     $dst, #-1\n\t"
9422             "JP,s    done\n\t"
9423             "JB,s    done\n\t"
9424             "SETNE   $dst\n\t"
9425             "MOVZB   $dst, $dst\n"
9426     "done:" %}
9427   ins_encode %{
9428     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9429     emit_cmpfp3(_masm, $dst$$Register);
9430   %}
9431   ins_pipe( pipe_slow );
9432 %}
9433 
9434 // Compare into -1,0,1 in XMM and memory
9435 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9436   predicate(UseSSE>=2);
9437   match(Set dst (CmpD3 src1 (LoadD src2)));
9438   effect(KILL cr);
9439   ins_cost(275);
9440   format %{ "UCOMISD $src1, $src2\n\t"
9441             "MOV     $dst, #-1\n\t"
9442             "JP,s    done\n\t"
9443             "JB,s    done\n\t"
9444             "SETNE   $dst\n\t"
9445             "MOVZB   $dst, $dst\n"
9446     "done:" %}
9447   ins_encode %{
9448     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9449     emit_cmpfp3(_masm, $dst$$Register);
9450   %}
9451   ins_pipe( pipe_slow );
9452 %}
9453 
9454 
9455 instruct subDPR_reg(regDPR dst, regDPR src) %{
9456   predicate (UseSSE <=1);
9457   match(Set dst (SubD dst src));
9458 
9459   format %{ "FLD    $src\n\t"
9460             "DSUBp  $dst,ST" %}
9461   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9462   ins_cost(150);
9463   ins_encode( Push_Reg_DPR(src),
9464               OpcP, RegOpc(dst) );
9465   ins_pipe( fpu_reg_reg );
9466 %}
9467 
9468 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9469   predicate (UseSSE <=1);
9470   match(Set dst (RoundDouble (SubD src1 src2)));
9471   ins_cost(250);
9472 
9473   format %{ "FLD    $src2\n\t"
9474             "DSUB   ST,$src1\n\t"
9475             "FSTP_D $dst\t# D-round" %}
9476   opcode(0xD8, 0x5);
9477   ins_encode( Push_Reg_DPR(src2),
9478               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9479   ins_pipe( fpu_mem_reg_reg );
9480 %}
9481 
9482 
9483 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9484   predicate (UseSSE <=1);
9485   match(Set dst (SubD dst (LoadD src)));
9486   ins_cost(150);
9487 
9488   format %{ "FLD    $src\n\t"
9489             "DSUBp  $dst,ST" %}
9490   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9491   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9492               OpcP, RegOpc(dst) );
9493   ins_pipe( fpu_reg_mem );
9494 %}
9495 
9496 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9497   predicate (UseSSE<=1);
9498   match(Set dst (AbsD src));
9499   ins_cost(100);
9500   format %{ "FABS" %}
9501   opcode(0xE1, 0xD9);
9502   ins_encode( OpcS, OpcP );
9503   ins_pipe( fpu_reg_reg );
9504 %}
9505 
9506 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9507   predicate(UseSSE<=1);
9508   match(Set dst (NegD src));
9509   ins_cost(100);
9510   format %{ "FCHS" %}
9511   opcode(0xE0, 0xD9);
9512   ins_encode( OpcS, OpcP );
9513   ins_pipe( fpu_reg_reg );
9514 %}
9515 
9516 instruct addDPR_reg(regDPR dst, regDPR src) %{
9517   predicate(UseSSE<=1);
9518   match(Set dst (AddD dst src));
9519   format %{ "FLD    $src\n\t"
9520             "DADD   $dst,ST" %}
9521   size(4);
9522   ins_cost(150);
9523   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9524   ins_encode( Push_Reg_DPR(src),
9525               OpcP, RegOpc(dst) );
9526   ins_pipe( fpu_reg_reg );
9527 %}
9528 
9529 
9530 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9531   predicate(UseSSE<=1);
9532   match(Set dst (RoundDouble (AddD src1 src2)));
9533   ins_cost(250);
9534 
9535   format %{ "FLD    $src2\n\t"
9536             "DADD   ST,$src1\n\t"
9537             "FSTP_D $dst\t# D-round" %}
9538   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9539   ins_encode( Push_Reg_DPR(src2),
9540               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9541   ins_pipe( fpu_mem_reg_reg );
9542 %}
9543 
9544 
9545 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9546   predicate(UseSSE<=1);
9547   match(Set dst (AddD dst (LoadD src)));
9548   ins_cost(150);
9549 
9550   format %{ "FLD    $src\n\t"
9551             "DADDp  $dst,ST" %}
9552   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9553   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9554               OpcP, RegOpc(dst) );
9555   ins_pipe( fpu_reg_mem );
9556 %}
9557 
9558 // add-to-memory
9559 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9560   predicate(UseSSE<=1);
9561   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9562   ins_cost(150);
9563 
9564   format %{ "FLD_D  $dst\n\t"
9565             "DADD   ST,$src\n\t"
9566             "FST_D  $dst" %}
9567   opcode(0xDD, 0x0);
9568   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9569               Opcode(0xD8), RegOpc(src),
9570               set_instruction_start,
9571               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9572   ins_pipe( fpu_reg_mem );
9573 %}
9574 
9575 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9576   predicate(UseSSE<=1);
9577   match(Set dst (AddD dst con));
9578   ins_cost(125);
9579   format %{ "FLD1\n\t"
9580             "DADDp  $dst,ST" %}
9581   ins_encode %{
9582     __ fld1();
9583     __ faddp($dst$$reg);
9584   %}
9585   ins_pipe(fpu_reg);
9586 %}
9587 
9588 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9589   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9590   match(Set dst (AddD dst con));
9591   ins_cost(200);
9592   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9593             "DADDp  $dst,ST" %}
9594   ins_encode %{
9595     __ fld_d($constantaddress($con));
9596     __ faddp($dst$$reg);
9597   %}
9598   ins_pipe(fpu_reg_mem);
9599 %}
9600 
9601 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9602   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9603   match(Set dst (RoundDouble (AddD src con)));
9604   ins_cost(200);
9605   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9606             "DADD   ST,$src\n\t"
9607             "FSTP_D $dst\t# D-round" %}
9608   ins_encode %{
9609     __ fld_d($constantaddress($con));
9610     __ fadd($src$$reg);
9611     __ fstp_d(Address(rsp, $dst$$disp));
9612   %}
9613   ins_pipe(fpu_mem_reg_con);
9614 %}
9615 
9616 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9617   predicate(UseSSE<=1);
9618   match(Set dst (MulD dst src));
9619   format %{ "FLD    $src\n\t"
9620             "DMULp  $dst,ST" %}
9621   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9622   ins_cost(150);
9623   ins_encode( Push_Reg_DPR(src),
9624               OpcP, RegOpc(dst) );
9625   ins_pipe( fpu_reg_reg );
9626 %}
9627 
9628 // Strict FP instruction biases argument before multiply then
9629 // biases result to avoid double rounding of subnormals.
9630 //
9631 // scale arg1 by multiplying arg1 by 2^(-15360)
9632 // load arg2
9633 // multiply scaled arg1 by arg2
9634 // rescale product by 2^(15360)
9635 //
9636 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9637   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9638   match(Set dst (MulD dst src));
9639   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9640 
9641   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9642             "DMULp  $dst,ST\n\t"
9643             "FLD    $src\n\t"
9644             "DMULp  $dst,ST\n\t"
9645             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9646             "DMULp  $dst,ST\n\t" %}
9647   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9648   ins_encode( strictfp_bias1(dst),
9649               Push_Reg_DPR(src),
9650               OpcP, RegOpc(dst),
9651               strictfp_bias2(dst) );
9652   ins_pipe( fpu_reg_reg );
9653 %}
9654 
9655 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9656   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9657   match(Set dst (MulD dst con));
9658   ins_cost(200);
9659   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9660             "DMULp  $dst,ST" %}
9661   ins_encode %{
9662     __ fld_d($constantaddress($con));
9663     __ fmulp($dst$$reg);
9664   %}
9665   ins_pipe(fpu_reg_mem);
9666 %}
9667 
9668 
9669 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9670   predicate( UseSSE<=1 );
9671   match(Set dst (MulD dst (LoadD src)));
9672   ins_cost(200);
9673   format %{ "FLD_D  $src\n\t"
9674             "DMULp  $dst,ST" %}
9675   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9676   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9677               OpcP, RegOpc(dst) );
9678   ins_pipe( fpu_reg_mem );
9679 %}
9680 
9681 //
9682 // Cisc-alternate to reg-reg multiply
9683 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9684   predicate( UseSSE<=1 );
9685   match(Set dst (MulD src (LoadD mem)));
9686   ins_cost(250);
9687   format %{ "FLD_D  $mem\n\t"
9688             "DMUL   ST,$src\n\t"
9689             "FSTP_D $dst" %}
9690   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9691   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9692               OpcReg_FPR(src),
9693               Pop_Reg_DPR(dst) );
9694   ins_pipe( fpu_reg_reg_mem );
9695 %}
9696 
9697 
9698 // MACRO3 -- addDPR a mulDPR
9699 // This instruction is a '2-address' instruction in that the result goes
9700 // back to src2.  This eliminates a move from the macro; possibly the
9701 // register allocator will have to add it back (and maybe not).
9702 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9703   predicate( UseSSE<=1 );
9704   match(Set src2 (AddD (MulD src0 src1) src2));
9705   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9706             "DMUL   ST,$src1\n\t"
9707             "DADDp  $src2,ST" %}
9708   ins_cost(250);
9709   opcode(0xDD); /* LoadD DD /0 */
9710   ins_encode( Push_Reg_FPR(src0),
9711               FMul_ST_reg(src1),
9712               FAddP_reg_ST(src2) );
9713   ins_pipe( fpu_reg_reg_reg );
9714 %}
9715 
9716 
9717 // MACRO3 -- subDPR a mulDPR
9718 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9719   predicate( UseSSE<=1 );
9720   match(Set src2 (SubD (MulD src0 src1) src2));
9721   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9722             "DMUL   ST,$src1\n\t"
9723             "DSUBRp $src2,ST" %}
9724   ins_cost(250);
9725   ins_encode( Push_Reg_FPR(src0),
9726               FMul_ST_reg(src1),
9727               Opcode(0xDE), Opc_plus(0xE0,src2));
9728   ins_pipe( fpu_reg_reg_reg );
9729 %}
9730 
9731 
9732 instruct divDPR_reg(regDPR dst, regDPR src) %{
9733   predicate( UseSSE<=1 );
9734   match(Set dst (DivD dst src));
9735 
9736   format %{ "FLD    $src\n\t"
9737             "FDIVp  $dst,ST" %}
9738   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9739   ins_cost(150);
9740   ins_encode( Push_Reg_DPR(src),
9741               OpcP, RegOpc(dst) );
9742   ins_pipe( fpu_reg_reg );
9743 %}
9744 
9745 // Strict FP instruction biases argument before division then
9746 // biases result, to avoid double rounding of subnormals.
9747 //
9748 // scale dividend by multiplying dividend by 2^(-15360)
9749 // load divisor
9750 // divide scaled dividend by divisor
9751 // rescale quotient by 2^(15360)
9752 //
9753 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9754   predicate (UseSSE<=1);
9755   match(Set dst (DivD dst src));
9756   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9757   ins_cost(01);
9758 
9759   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9760             "DMULp  $dst,ST\n\t"
9761             "FLD    $src\n\t"
9762             "FDIVp  $dst,ST\n\t"
9763             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9764             "DMULp  $dst,ST\n\t" %}
9765   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9766   ins_encode( strictfp_bias1(dst),
9767               Push_Reg_DPR(src),
9768               OpcP, RegOpc(dst),
9769               strictfp_bias2(dst) );
9770   ins_pipe( fpu_reg_reg );
9771 %}
9772 
9773 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9774   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9775   match(Set dst (RoundDouble (DivD src1 src2)));
9776 
9777   format %{ "FLD    $src1\n\t"
9778             "FDIV   ST,$src2\n\t"
9779             "FSTP_D $dst\t# D-round" %}
9780   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9781   ins_encode( Push_Reg_DPR(src1),
9782               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9783   ins_pipe( fpu_mem_reg_reg );
9784 %}
9785 
9786 
9787 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9788   predicate(UseSSE<=1);
9789   match(Set dst (ModD dst src));
9790   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9791 
9792   format %{ "DMOD   $dst,$src" %}
9793   ins_cost(250);
9794   ins_encode(Push_Reg_Mod_DPR(dst, src),
9795               emitModDPR(),
9796               Push_Result_Mod_DPR(src),
9797               Pop_Reg_DPR(dst));
9798   ins_pipe( pipe_slow );
9799 %}
9800 
9801 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9802   predicate(UseSSE>=2);
9803   match(Set dst (ModD src0 src1));
9804   effect(KILL rax, KILL cr);
9805 
9806   format %{ "SUB    ESP,8\t # DMOD\n"
9807           "\tMOVSD  [ESP+0],$src1\n"
9808           "\tFLD_D  [ESP+0]\n"
9809           "\tMOVSD  [ESP+0],$src0\n"
9810           "\tFLD_D  [ESP+0]\n"
9811      "loop:\tFPREM\n"
9812           "\tFWAIT\n"
9813           "\tFNSTSW AX\n"
9814           "\tSAHF\n"
9815           "\tJP     loop\n"
9816           "\tFSTP_D [ESP+0]\n"
9817           "\tMOVSD  $dst,[ESP+0]\n"
9818           "\tADD    ESP,8\n"
9819           "\tFSTP   ST0\t # Restore FPU Stack"
9820     %}
9821   ins_cost(250);
9822   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9823   ins_pipe( pipe_slow );
9824 %}
9825 
9826 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9827   predicate (UseSSE<=1);
9828   match(Set dst(AtanD dst src));
9829   format %{ "DATA   $dst,$src" %}
9830   opcode(0xD9, 0xF3);
9831   ins_encode( Push_Reg_DPR(src),
9832               OpcP, OpcS, RegOpc(dst) );
9833   ins_pipe( pipe_slow );
9834 %}
9835 
9836 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9837   predicate (UseSSE>=2);
9838   match(Set dst(AtanD dst src));
9839   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9840   format %{ "DATA   $dst,$src" %}
9841   opcode(0xD9, 0xF3);
9842   ins_encode( Push_SrcD(src),
9843               OpcP, OpcS, Push_ResultD(dst) );
9844   ins_pipe( pipe_slow );
9845 %}
9846 
9847 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9848   predicate (UseSSE<=1);
9849   match(Set dst (SqrtD src));
9850   format %{ "DSQRT  $dst,$src" %}
9851   opcode(0xFA, 0xD9);
9852   ins_encode( Push_Reg_DPR(src),
9853               OpcS, OpcP, Pop_Reg_DPR(dst) );
9854   ins_pipe( pipe_slow );
9855 %}
9856 
9857 //-------------Float Instructions-------------------------------
9858 // Float Math
9859 
9860 // Code for float compare:
9861 //     fcompp();
9862 //     fwait(); fnstsw_ax();
9863 //     sahf();
9864 //     movl(dst, unordered_result);
9865 //     jcc(Assembler::parity, exit);
9866 //     movl(dst, less_result);
9867 //     jcc(Assembler::below, exit);
9868 //     movl(dst, equal_result);
9869 //     jcc(Assembler::equal, exit);
9870 //     movl(dst, greater_result);
9871 //   exit:
9872 
9873 // P6 version of float compare, sets condition codes in EFLAGS
9874 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9875   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9876   match(Set cr (CmpF src1 src2));
9877   effect(KILL rax);
9878   ins_cost(150);
9879   format %{ "FLD    $src1\n\t"
9880             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9881             "JNP    exit\n\t"
9882             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9883             "SAHF\n"
9884      "exit:\tNOP               // avoid branch to branch" %}
9885   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9886   ins_encode( Push_Reg_DPR(src1),
9887               OpcP, RegOpc(src2),
9888               cmpF_P6_fixup );
9889   ins_pipe( pipe_slow );
9890 %}
9891 
9892 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9893   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9894   match(Set cr (CmpF src1 src2));
9895   ins_cost(100);
9896   format %{ "FLD    $src1\n\t"
9897             "FUCOMIP ST,$src2  // P6 instruction" %}
9898   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9899   ins_encode( Push_Reg_DPR(src1),
9900               OpcP, RegOpc(src2));
9901   ins_pipe( pipe_slow );
9902 %}
9903 
9904 
9905 // Compare & branch
9906 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9907   predicate(UseSSE == 0);
9908   match(Set cr (CmpF src1 src2));
9909   effect(KILL rax);
9910   ins_cost(200);
9911   format %{ "FLD    $src1\n\t"
9912             "FCOMp  $src2\n\t"
9913             "FNSTSW AX\n\t"
9914             "TEST   AX,0x400\n\t"
9915             "JZ,s   flags\n\t"
9916             "MOV    AH,1\t# unordered treat as LT\n"
9917     "flags:\tSAHF" %}
9918   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9919   ins_encode( Push_Reg_DPR(src1),
9920               OpcP, RegOpc(src2),
9921               fpu_flags);
9922   ins_pipe( pipe_slow );
9923 %}
9924 
9925 // Compare vs zero into -1,0,1
9926 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9927   predicate(UseSSE == 0);
9928   match(Set dst (CmpF3 src1 zero));
9929   effect(KILL cr, KILL rax);
9930   ins_cost(280);
9931   format %{ "FTSTF  $dst,$src1" %}
9932   opcode(0xE4, 0xD9);
9933   ins_encode( Push_Reg_DPR(src1),
9934               OpcS, OpcP, PopFPU,
9935               CmpF_Result(dst));
9936   ins_pipe( pipe_slow );
9937 %}
9938 
9939 // Compare into -1,0,1
9940 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9941   predicate(UseSSE == 0);
9942   match(Set dst (CmpF3 src1 src2));
9943   effect(KILL cr, KILL rax);
9944   ins_cost(300);
9945   format %{ "FCMPF  $dst,$src1,$src2" %}
9946   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9947   ins_encode( Push_Reg_DPR(src1),
9948               OpcP, RegOpc(src2),
9949               CmpF_Result(dst));
9950   ins_pipe( pipe_slow );
9951 %}
9952 
9953 // float compare and set condition codes in EFLAGS by XMM regs
9954 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9955   predicate(UseSSE>=1);
9956   match(Set cr (CmpF src1 src2));
9957   ins_cost(145);
9958   format %{ "UCOMISS $src1,$src2\n\t"
9959             "JNP,s   exit\n\t"
9960             "PUSHF\t# saw NaN, set CF\n\t"
9961             "AND     [rsp], #0xffffff2b\n\t"
9962             "POPF\n"
9963     "exit:" %}
9964   ins_encode %{
9965     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9966     emit_cmpfp_fixup(_masm);
9967   %}
9968   ins_pipe( pipe_slow );
9969 %}
9970 
9971 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9972   predicate(UseSSE>=1);
9973   match(Set cr (CmpF src1 src2));
9974   ins_cost(100);
9975   format %{ "UCOMISS $src1,$src2" %}
9976   ins_encode %{
9977     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9978   %}
9979   ins_pipe( pipe_slow );
9980 %}
9981 
9982 // float compare and set condition codes in EFLAGS by XMM regs
9983 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
9984   predicate(UseSSE>=1);
9985   match(Set cr (CmpF src1 (LoadF src2)));
9986   ins_cost(165);
9987   format %{ "UCOMISS $src1,$src2\n\t"
9988             "JNP,s   exit\n\t"
9989             "PUSHF\t# saw NaN, set CF\n\t"
9990             "AND     [rsp], #0xffffff2b\n\t"
9991             "POPF\n"
9992     "exit:" %}
9993   ins_encode %{
9994     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9995     emit_cmpfp_fixup(_masm);
9996   %}
9997   ins_pipe( pipe_slow );
9998 %}
9999 
10000 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10001   predicate(UseSSE>=1);
10002   match(Set cr (CmpF src1 (LoadF src2)));
10003   ins_cost(100);
10004   format %{ "UCOMISS $src1,$src2" %}
10005   ins_encode %{
10006     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10007   %}
10008   ins_pipe( pipe_slow );
10009 %}
10010 
10011 // Compare into -1,0,1 in XMM
10012 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10013   predicate(UseSSE>=1);
10014   match(Set dst (CmpF3 src1 src2));
10015   effect(KILL cr);
10016   ins_cost(255);
10017   format %{ "UCOMISS $src1, $src2\n\t"
10018             "MOV     $dst, #-1\n\t"
10019             "JP,s    done\n\t"
10020             "JB,s    done\n\t"
10021             "SETNE   $dst\n\t"
10022             "MOVZB   $dst, $dst\n"
10023     "done:" %}
10024   ins_encode %{
10025     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10026     emit_cmpfp3(_masm, $dst$$Register);
10027   %}
10028   ins_pipe( pipe_slow );
10029 %}
10030 
10031 // Compare into -1,0,1 in XMM and memory
10032 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10033   predicate(UseSSE>=1);
10034   match(Set dst (CmpF3 src1 (LoadF src2)));
10035   effect(KILL cr);
10036   ins_cost(275);
10037   format %{ "UCOMISS $src1, $src2\n\t"
10038             "MOV     $dst, #-1\n\t"
10039             "JP,s    done\n\t"
10040             "JB,s    done\n\t"
10041             "SETNE   $dst\n\t"
10042             "MOVZB   $dst, $dst\n"
10043     "done:" %}
10044   ins_encode %{
10045     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10046     emit_cmpfp3(_masm, $dst$$Register);
10047   %}
10048   ins_pipe( pipe_slow );
10049 %}
10050 
10051 // Spill to obtain 24-bit precision
10052 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10053   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10054   match(Set dst (SubF src1 src2));
10055 
10056   format %{ "FSUB   $dst,$src1 - $src2" %}
10057   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10058   ins_encode( Push_Reg_FPR(src1),
10059               OpcReg_FPR(src2),
10060               Pop_Mem_FPR(dst) );
10061   ins_pipe( fpu_mem_reg_reg );
10062 %}
10063 //
10064 // This instruction does not round to 24-bits
10065 instruct subFPR_reg(regFPR dst, regFPR src) %{
10066   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10067   match(Set dst (SubF dst src));
10068 
10069   format %{ "FSUB   $dst,$src" %}
10070   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10071   ins_encode( Push_Reg_FPR(src),
10072               OpcP, RegOpc(dst) );
10073   ins_pipe( fpu_reg_reg );
10074 %}
10075 
10076 // Spill to obtain 24-bit precision
10077 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10078   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10079   match(Set dst (AddF src1 src2));
10080 
10081   format %{ "FADD   $dst,$src1,$src2" %}
10082   opcode(0xD8, 0x0); /* D8 C0+i */
10083   ins_encode( Push_Reg_FPR(src2),
10084               OpcReg_FPR(src1),
10085               Pop_Mem_FPR(dst) );
10086   ins_pipe( fpu_mem_reg_reg );
10087 %}
10088 //
10089 // This instruction does not round to 24-bits
10090 instruct addFPR_reg(regFPR dst, regFPR src) %{
10091   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10092   match(Set dst (AddF dst src));
10093 
10094   format %{ "FLD    $src\n\t"
10095             "FADDp  $dst,ST" %}
10096   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10097   ins_encode( Push_Reg_FPR(src),
10098               OpcP, RegOpc(dst) );
10099   ins_pipe( fpu_reg_reg );
10100 %}
10101 
10102 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10103   predicate(UseSSE==0);
10104   match(Set dst (AbsF src));
10105   ins_cost(100);
10106   format %{ "FABS" %}
10107   opcode(0xE1, 0xD9);
10108   ins_encode( OpcS, OpcP );
10109   ins_pipe( fpu_reg_reg );
10110 %}
10111 
10112 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10113   predicate(UseSSE==0);
10114   match(Set dst (NegF src));
10115   ins_cost(100);
10116   format %{ "FCHS" %}
10117   opcode(0xE0, 0xD9);
10118   ins_encode( OpcS, OpcP );
10119   ins_pipe( fpu_reg_reg );
10120 %}
10121 
10122 // Cisc-alternate to addFPR_reg
10123 // Spill to obtain 24-bit precision
10124 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10125   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10126   match(Set dst (AddF src1 (LoadF src2)));
10127 
10128   format %{ "FLD    $src2\n\t"
10129             "FADD   ST,$src1\n\t"
10130             "FSTP_S $dst" %}
10131   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10132   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10133               OpcReg_FPR(src1),
10134               Pop_Mem_FPR(dst) );
10135   ins_pipe( fpu_mem_reg_mem );
10136 %}
10137 //
10138 // Cisc-alternate to addFPR_reg
10139 // This instruction does not round to 24-bits
10140 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10141   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10142   match(Set dst (AddF dst (LoadF src)));
10143 
10144   format %{ "FADD   $dst,$src" %}
10145   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10146   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10147               OpcP, RegOpc(dst) );
10148   ins_pipe( fpu_reg_mem );
10149 %}
10150 
10151 // // Following two instructions for _222_mpegaudio
10152 // Spill to obtain 24-bit precision
10153 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10154   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10155   match(Set dst (AddF src1 src2));
10156 
10157   format %{ "FADD   $dst,$src1,$src2" %}
10158   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10159   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10160               OpcReg_FPR(src2),
10161               Pop_Mem_FPR(dst) );
10162   ins_pipe( fpu_mem_reg_mem );
10163 %}
10164 
10165 // Cisc-spill variant
10166 // Spill to obtain 24-bit precision
10167 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10168   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10169   match(Set dst (AddF src1 (LoadF src2)));
10170 
10171   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10172   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10173   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10174               set_instruction_start,
10175               OpcP, RMopc_Mem(secondary,src1),
10176               Pop_Mem_FPR(dst) );
10177   ins_pipe( fpu_mem_mem_mem );
10178 %}
10179 
10180 // Spill to obtain 24-bit precision
10181 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10182   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10183   match(Set dst (AddF src1 src2));
10184 
10185   format %{ "FADD   $dst,$src1,$src2" %}
10186   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10187   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10188               set_instruction_start,
10189               OpcP, RMopc_Mem(secondary,src1),
10190               Pop_Mem_FPR(dst) );
10191   ins_pipe( fpu_mem_mem_mem );
10192 %}
10193 
10194 
10195 // Spill to obtain 24-bit precision
10196 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10197   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10198   match(Set dst (AddF src con));
10199   format %{ "FLD    $src\n\t"
10200             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10201             "FSTP_S $dst"  %}
10202   ins_encode %{
10203     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10204     __ fadd_s($constantaddress($con));
10205     __ fstp_s(Address(rsp, $dst$$disp));
10206   %}
10207   ins_pipe(fpu_mem_reg_con);
10208 %}
10209 //
10210 // This instruction does not round to 24-bits
10211 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10212   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10213   match(Set dst (AddF src con));
10214   format %{ "FLD    $src\n\t"
10215             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10216             "FSTP   $dst"  %}
10217   ins_encode %{
10218     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10219     __ fadd_s($constantaddress($con));
10220     __ fstp_d($dst$$reg);
10221   %}
10222   ins_pipe(fpu_reg_reg_con);
10223 %}
10224 
10225 // Spill to obtain 24-bit precision
10226 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10227   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10228   match(Set dst (MulF src1 src2));
10229 
10230   format %{ "FLD    $src1\n\t"
10231             "FMUL   $src2\n\t"
10232             "FSTP_S $dst"  %}
10233   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10234   ins_encode( Push_Reg_FPR(src1),
10235               OpcReg_FPR(src2),
10236               Pop_Mem_FPR(dst) );
10237   ins_pipe( fpu_mem_reg_reg );
10238 %}
10239 //
10240 // This instruction does not round to 24-bits
10241 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10242   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10243   match(Set dst (MulF src1 src2));
10244 
10245   format %{ "FLD    $src1\n\t"
10246             "FMUL   $src2\n\t"
10247             "FSTP_S $dst"  %}
10248   opcode(0xD8, 0x1); /* D8 C8+i */
10249   ins_encode( Push_Reg_FPR(src2),
10250               OpcReg_FPR(src1),
10251               Pop_Reg_FPR(dst) );
10252   ins_pipe( fpu_reg_reg_reg );
10253 %}
10254 
10255 
10256 // Spill to obtain 24-bit precision
10257 // Cisc-alternate to reg-reg multiply
10258 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10259   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10260   match(Set dst (MulF src1 (LoadF src2)));
10261 
10262   format %{ "FLD_S  $src2\n\t"
10263             "FMUL   $src1\n\t"
10264             "FSTP_S $dst"  %}
10265   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10266   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10267               OpcReg_FPR(src1),
10268               Pop_Mem_FPR(dst) );
10269   ins_pipe( fpu_mem_reg_mem );
10270 %}
10271 //
10272 // This instruction does not round to 24-bits
10273 // Cisc-alternate to reg-reg multiply
10274 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10275   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10276   match(Set dst (MulF src1 (LoadF src2)));
10277 
10278   format %{ "FMUL   $dst,$src1,$src2" %}
10279   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10280   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10281               OpcReg_FPR(src1),
10282               Pop_Reg_FPR(dst) );
10283   ins_pipe( fpu_reg_reg_mem );
10284 %}
10285 
10286 // Spill to obtain 24-bit precision
10287 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10288   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10289   match(Set dst (MulF src1 src2));
10290 
10291   format %{ "FMUL   $dst,$src1,$src2" %}
10292   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10293   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10294               set_instruction_start,
10295               OpcP, RMopc_Mem(secondary,src1),
10296               Pop_Mem_FPR(dst) );
10297   ins_pipe( fpu_mem_mem_mem );
10298 %}
10299 
10300 // Spill to obtain 24-bit precision
10301 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10302   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10303   match(Set dst (MulF src con));
10304 
10305   format %{ "FLD    $src\n\t"
10306             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10307             "FSTP_S $dst"  %}
10308   ins_encode %{
10309     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10310     __ fmul_s($constantaddress($con));
10311     __ fstp_s(Address(rsp, $dst$$disp));
10312   %}
10313   ins_pipe(fpu_mem_reg_con);
10314 %}
10315 //
10316 // This instruction does not round to 24-bits
10317 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10318   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10319   match(Set dst (MulF src con));
10320 
10321   format %{ "FLD    $src\n\t"
10322             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10323             "FSTP   $dst"  %}
10324   ins_encode %{
10325     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10326     __ fmul_s($constantaddress($con));
10327     __ fstp_d($dst$$reg);
10328   %}
10329   ins_pipe(fpu_reg_reg_con);
10330 %}
10331 
10332 
10333 //
10334 // MACRO1 -- subsume unshared load into mulFPR
10335 // This instruction does not round to 24-bits
10336 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10337   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10338   match(Set dst (MulF (LoadF mem1) src));
10339 
10340   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10341             "FMUL   ST,$src\n\t"
10342             "FSTP   $dst" %}
10343   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10344   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10345               OpcReg_FPR(src),
10346               Pop_Reg_FPR(dst) );
10347   ins_pipe( fpu_reg_reg_mem );
10348 %}
10349 //
10350 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10351 // This instruction does not round to 24-bits
10352 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10353   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10354   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10355   ins_cost(95);
10356 
10357   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10358             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10359             "FADD   ST,$src2\n\t"
10360             "FSTP   $dst" %}
10361   opcode(0xD9); /* LoadF D9 /0 */
10362   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10363               FMul_ST_reg(src1),
10364               FAdd_ST_reg(src2),
10365               Pop_Reg_FPR(dst) );
10366   ins_pipe( fpu_reg_mem_reg_reg );
10367 %}
10368 
10369 // MACRO3 -- addFPR a mulFPR
10370 // This instruction does not round to 24-bits.  It is a '2-address'
10371 // instruction in that the result goes back to src2.  This eliminates
10372 // a move from the macro; possibly the register allocator will have
10373 // to add it back (and maybe not).
10374 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10375   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10376   match(Set src2 (AddF (MulF src0 src1) src2));
10377 
10378   format %{ "FLD    $src0     ===MACRO3===\n\t"
10379             "FMUL   ST,$src1\n\t"
10380             "FADDP  $src2,ST" %}
10381   opcode(0xD9); /* LoadF D9 /0 */
10382   ins_encode( Push_Reg_FPR(src0),
10383               FMul_ST_reg(src1),
10384               FAddP_reg_ST(src2) );
10385   ins_pipe( fpu_reg_reg_reg );
10386 %}
10387 
10388 // MACRO4 -- divFPR subFPR
10389 // This instruction does not round to 24-bits
10390 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10391   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10392   match(Set dst (DivF (SubF src2 src1) src3));
10393 
10394   format %{ "FLD    $src2   ===MACRO4===\n\t"
10395             "FSUB   ST,$src1\n\t"
10396             "FDIV   ST,$src3\n\t"
10397             "FSTP  $dst" %}
10398   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10399   ins_encode( Push_Reg_FPR(src2),
10400               subFPR_divFPR_encode(src1,src3),
10401               Pop_Reg_FPR(dst) );
10402   ins_pipe( fpu_reg_reg_reg_reg );
10403 %}
10404 
10405 // Spill to obtain 24-bit precision
10406 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10407   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10408   match(Set dst (DivF src1 src2));
10409 
10410   format %{ "FDIV   $dst,$src1,$src2" %}
10411   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10412   ins_encode( Push_Reg_FPR(src1),
10413               OpcReg_FPR(src2),
10414               Pop_Mem_FPR(dst) );
10415   ins_pipe( fpu_mem_reg_reg );
10416 %}
10417 //
10418 // This instruction does not round to 24-bits
10419 instruct divFPR_reg(regFPR dst, regFPR src) %{
10420   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10421   match(Set dst (DivF dst src));
10422 
10423   format %{ "FDIV   $dst,$src" %}
10424   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10425   ins_encode( Push_Reg_FPR(src),
10426               OpcP, RegOpc(dst) );
10427   ins_pipe( fpu_reg_reg );
10428 %}
10429 
10430 
10431 // Spill to obtain 24-bit precision
10432 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10433   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10434   match(Set dst (ModF src1 src2));
10435   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10436 
10437   format %{ "FMOD   $dst,$src1,$src2" %}
10438   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10439               emitModDPR(),
10440               Push_Result_Mod_DPR(src2),
10441               Pop_Mem_FPR(dst));
10442   ins_pipe( pipe_slow );
10443 %}
10444 //
10445 // This instruction does not round to 24-bits
10446 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10447   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10448   match(Set dst (ModF dst src));
10449   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10450 
10451   format %{ "FMOD   $dst,$src" %}
10452   ins_encode(Push_Reg_Mod_DPR(dst, src),
10453               emitModDPR(),
10454               Push_Result_Mod_DPR(src),
10455               Pop_Reg_FPR(dst));
10456   ins_pipe( pipe_slow );
10457 %}
10458 
10459 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10460   predicate(UseSSE>=1);
10461   match(Set dst (ModF src0 src1));
10462   effect(KILL rax, KILL cr);
10463   format %{ "SUB    ESP,4\t # FMOD\n"
10464           "\tMOVSS  [ESP+0],$src1\n"
10465           "\tFLD_S  [ESP+0]\n"
10466           "\tMOVSS  [ESP+0],$src0\n"
10467           "\tFLD_S  [ESP+0]\n"
10468      "loop:\tFPREM\n"
10469           "\tFWAIT\n"
10470           "\tFNSTSW AX\n"
10471           "\tSAHF\n"
10472           "\tJP     loop\n"
10473           "\tFSTP_S [ESP+0]\n"
10474           "\tMOVSS  $dst,[ESP+0]\n"
10475           "\tADD    ESP,4\n"
10476           "\tFSTP   ST0\t # Restore FPU Stack"
10477     %}
10478   ins_cost(250);
10479   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10480   ins_pipe( pipe_slow );
10481 %}
10482 
10483 
10484 //----------Arithmetic Conversion Instructions---------------------------------
10485 // The conversions operations are all Alpha sorted.  Please keep it that way!
10486 
10487 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10488   predicate(UseSSE==0);
10489   match(Set dst (RoundFloat src));
10490   ins_cost(125);
10491   format %{ "FST_S  $dst,$src\t# F-round" %}
10492   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10493   ins_pipe( fpu_mem_reg );
10494 %}
10495 
10496 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10497   predicate(UseSSE<=1);
10498   match(Set dst (RoundDouble src));
10499   ins_cost(125);
10500   format %{ "FST_D  $dst,$src\t# D-round" %}
10501   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10502   ins_pipe( fpu_mem_reg );
10503 %}
10504 
10505 // Force rounding to 24-bit precision and 6-bit exponent
10506 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10507   predicate(UseSSE==0);
10508   match(Set dst (ConvD2F src));
10509   format %{ "FST_S  $dst,$src\t# F-round" %}
10510   expand %{
10511     roundFloat_mem_reg(dst,src);
10512   %}
10513 %}
10514 
10515 // Force rounding to 24-bit precision and 6-bit exponent
10516 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10517   predicate(UseSSE==1);
10518   match(Set dst (ConvD2F src));
10519   effect( KILL cr );
10520   format %{ "SUB    ESP,4\n\t"
10521             "FST_S  [ESP],$src\t# F-round\n\t"
10522             "MOVSS  $dst,[ESP]\n\t"
10523             "ADD ESP,4" %}
10524   ins_encode %{
10525     __ subptr(rsp, 4);
10526     if ($src$$reg != FPR1L_enc) {
10527       __ fld_s($src$$reg-1);
10528       __ fstp_s(Address(rsp, 0));
10529     } else {
10530       __ fst_s(Address(rsp, 0));
10531     }
10532     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10533     __ addptr(rsp, 4);
10534   %}
10535   ins_pipe( pipe_slow );
10536 %}
10537 
10538 // Force rounding double precision to single precision
10539 instruct convD2F_reg(regF dst, regD src) %{
10540   predicate(UseSSE>=2);
10541   match(Set dst (ConvD2F src));
10542   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10543   ins_encode %{
10544     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10545   %}
10546   ins_pipe( pipe_slow );
10547 %}
10548 
10549 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10550   predicate(UseSSE==0);
10551   match(Set dst (ConvF2D src));
10552   format %{ "FST_S  $dst,$src\t# D-round" %}
10553   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10554   ins_pipe( fpu_reg_reg );
10555 %}
10556 
10557 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10558   predicate(UseSSE==1);
10559   match(Set dst (ConvF2D src));
10560   format %{ "FST_D  $dst,$src\t# D-round" %}
10561   expand %{
10562     roundDouble_mem_reg(dst,src);
10563   %}
10564 %}
10565 
10566 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10567   predicate(UseSSE==1);
10568   match(Set dst (ConvF2D src));
10569   effect( KILL cr );
10570   format %{ "SUB    ESP,4\n\t"
10571             "MOVSS  [ESP] $src\n\t"
10572             "FLD_S  [ESP]\n\t"
10573             "ADD    ESP,4\n\t"
10574             "FSTP   $dst\t# D-round" %}
10575   ins_encode %{
10576     __ subptr(rsp, 4);
10577     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10578     __ fld_s(Address(rsp, 0));
10579     __ addptr(rsp, 4);
10580     __ fstp_d($dst$$reg);
10581   %}
10582   ins_pipe( pipe_slow );
10583 %}
10584 
10585 instruct convF2D_reg(regD dst, regF src) %{
10586   predicate(UseSSE>=2);
10587   match(Set dst (ConvF2D src));
10588   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10589   ins_encode %{
10590     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10591   %}
10592   ins_pipe( pipe_slow );
10593 %}
10594 
10595 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10596 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10597   predicate(UseSSE<=1);
10598   match(Set dst (ConvD2I src));
10599   effect( KILL tmp, KILL cr );
10600   format %{ "FLD    $src\t# Convert double to int \n\t"
10601             "FLDCW  trunc mode\n\t"
10602             "SUB    ESP,4\n\t"
10603             "FISTp  [ESP + #0]\n\t"
10604             "FLDCW  std/24-bit mode\n\t"
10605             "POP    EAX\n\t"
10606             "CMP    EAX,0x80000000\n\t"
10607             "JNE,s  fast\n\t"
10608             "FLD_D  $src\n\t"
10609             "CALL   d2i_wrapper\n"
10610       "fast:" %}
10611   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10612   ins_pipe( pipe_slow );
10613 %}
10614 
10615 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10616 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10617   predicate(UseSSE>=2);
10618   match(Set dst (ConvD2I src));
10619   effect( KILL tmp, KILL cr );
10620   format %{ "CVTTSD2SI $dst, $src\n\t"
10621             "CMP    $dst,0x80000000\n\t"
10622             "JNE,s  fast\n\t"
10623             "SUB    ESP, 8\n\t"
10624             "MOVSD  [ESP], $src\n\t"
10625             "FLD_D  [ESP]\n\t"
10626             "ADD    ESP, 8\n\t"
10627             "CALL   d2i_wrapper\n"
10628       "fast:" %}
10629   ins_encode %{
10630     Label fast;
10631     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10632     __ cmpl($dst$$Register, 0x80000000);
10633     __ jccb(Assembler::notEqual, fast);
10634     __ subptr(rsp, 8);
10635     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10636     __ fld_d(Address(rsp, 0));
10637     __ addptr(rsp, 8);
10638     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10639     __ bind(fast);
10640   %}
10641   ins_pipe( pipe_slow );
10642 %}
10643 
10644 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10645   predicate(UseSSE<=1);
10646   match(Set dst (ConvD2L src));
10647   effect( KILL cr );
10648   format %{ "FLD    $src\t# Convert double to long\n\t"
10649             "FLDCW  trunc mode\n\t"
10650             "SUB    ESP,8\n\t"
10651             "FISTp  [ESP + #0]\n\t"
10652             "FLDCW  std/24-bit mode\n\t"
10653             "POP    EAX\n\t"
10654             "POP    EDX\n\t"
10655             "CMP    EDX,0x80000000\n\t"
10656             "JNE,s  fast\n\t"
10657             "TEST   EAX,EAX\n\t"
10658             "JNE,s  fast\n\t"
10659             "FLD    $src\n\t"
10660             "CALL   d2l_wrapper\n"
10661       "fast:" %}
10662   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10663   ins_pipe( pipe_slow );
10664 %}
10665 
10666 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10667 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10668   predicate (UseSSE>=2);
10669   match(Set dst (ConvD2L src));
10670   effect( KILL cr );
10671   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10672             "MOVSD  [ESP],$src\n\t"
10673             "FLD_D  [ESP]\n\t"
10674             "FLDCW  trunc mode\n\t"
10675             "FISTp  [ESP + #0]\n\t"
10676             "FLDCW  std/24-bit mode\n\t"
10677             "POP    EAX\n\t"
10678             "POP    EDX\n\t"
10679             "CMP    EDX,0x80000000\n\t"
10680             "JNE,s  fast\n\t"
10681             "TEST   EAX,EAX\n\t"
10682             "JNE,s  fast\n\t"
10683             "SUB    ESP,8\n\t"
10684             "MOVSD  [ESP],$src\n\t"
10685             "FLD_D  [ESP]\n\t"
10686             "ADD    ESP,8\n\t"
10687             "CALL   d2l_wrapper\n"
10688       "fast:" %}
10689   ins_encode %{
10690     Label fast;
10691     __ subptr(rsp, 8);
10692     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10693     __ fld_d(Address(rsp, 0));
10694     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10695     __ fistp_d(Address(rsp, 0));
10696     // Restore the rounding mode, mask the exception
10697     if (Compile::current()->in_24_bit_fp_mode()) {
10698       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10699     } else {
10700       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10701     }
10702     // Load the converted long, adjust CPU stack
10703     __ pop(rax);
10704     __ pop(rdx);
10705     __ cmpl(rdx, 0x80000000);
10706     __ jccb(Assembler::notEqual, fast);
10707     __ testl(rax, rax);
10708     __ jccb(Assembler::notEqual, fast);
10709     __ subptr(rsp, 8);
10710     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10711     __ fld_d(Address(rsp, 0));
10712     __ addptr(rsp, 8);
10713     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10714     __ bind(fast);
10715   %}
10716   ins_pipe( pipe_slow );
10717 %}
10718 
10719 // Convert a double to an int.  Java semantics require we do complex
10720 // manglations in the corner cases.  So we set the rounding mode to
10721 // 'zero', store the darned double down as an int, and reset the
10722 // rounding mode to 'nearest'.  The hardware stores a flag value down
10723 // if we would overflow or converted a NAN; we check for this and
10724 // and go the slow path if needed.
10725 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10726   predicate(UseSSE==0);
10727   match(Set dst (ConvF2I src));
10728   effect( KILL tmp, KILL cr );
10729   format %{ "FLD    $src\t# Convert float to int \n\t"
10730             "FLDCW  trunc mode\n\t"
10731             "SUB    ESP,4\n\t"
10732             "FISTp  [ESP + #0]\n\t"
10733             "FLDCW  std/24-bit mode\n\t"
10734             "POP    EAX\n\t"
10735             "CMP    EAX,0x80000000\n\t"
10736             "JNE,s  fast\n\t"
10737             "FLD    $src\n\t"
10738             "CALL   d2i_wrapper\n"
10739       "fast:" %}
10740   // DPR2I_encoding works for FPR2I
10741   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10742   ins_pipe( pipe_slow );
10743 %}
10744 
10745 // Convert a float in xmm to an int reg.
10746 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10747   predicate(UseSSE>=1);
10748   match(Set dst (ConvF2I src));
10749   effect( KILL tmp, KILL cr );
10750   format %{ "CVTTSS2SI $dst, $src\n\t"
10751             "CMP    $dst,0x80000000\n\t"
10752             "JNE,s  fast\n\t"
10753             "SUB    ESP, 4\n\t"
10754             "MOVSS  [ESP], $src\n\t"
10755             "FLD    [ESP]\n\t"
10756             "ADD    ESP, 4\n\t"
10757             "CALL   d2i_wrapper\n"
10758       "fast:" %}
10759   ins_encode %{
10760     Label fast;
10761     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10762     __ cmpl($dst$$Register, 0x80000000);
10763     __ jccb(Assembler::notEqual, fast);
10764     __ subptr(rsp, 4);
10765     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10766     __ fld_s(Address(rsp, 0));
10767     __ addptr(rsp, 4);
10768     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10769     __ bind(fast);
10770   %}
10771   ins_pipe( pipe_slow );
10772 %}
10773 
10774 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10775   predicate(UseSSE==0);
10776   match(Set dst (ConvF2L src));
10777   effect( KILL cr );
10778   format %{ "FLD    $src\t# Convert float to long\n\t"
10779             "FLDCW  trunc mode\n\t"
10780             "SUB    ESP,8\n\t"
10781             "FISTp  [ESP + #0]\n\t"
10782             "FLDCW  std/24-bit mode\n\t"
10783             "POP    EAX\n\t"
10784             "POP    EDX\n\t"
10785             "CMP    EDX,0x80000000\n\t"
10786             "JNE,s  fast\n\t"
10787             "TEST   EAX,EAX\n\t"
10788             "JNE,s  fast\n\t"
10789             "FLD    $src\n\t"
10790             "CALL   d2l_wrapper\n"
10791       "fast:" %}
10792   // DPR2L_encoding works for FPR2L
10793   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10794   ins_pipe( pipe_slow );
10795 %}
10796 
10797 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10798 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10799   predicate (UseSSE>=1);
10800   match(Set dst (ConvF2L src));
10801   effect( KILL cr );
10802   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10803             "MOVSS  [ESP],$src\n\t"
10804             "FLD_S  [ESP]\n\t"
10805             "FLDCW  trunc mode\n\t"
10806             "FISTp  [ESP + #0]\n\t"
10807             "FLDCW  std/24-bit mode\n\t"
10808             "POP    EAX\n\t"
10809             "POP    EDX\n\t"
10810             "CMP    EDX,0x80000000\n\t"
10811             "JNE,s  fast\n\t"
10812             "TEST   EAX,EAX\n\t"
10813             "JNE,s  fast\n\t"
10814             "SUB    ESP,4\t# Convert float to long\n\t"
10815             "MOVSS  [ESP],$src\n\t"
10816             "FLD_S  [ESP]\n\t"
10817             "ADD    ESP,4\n\t"
10818             "CALL   d2l_wrapper\n"
10819       "fast:" %}
10820   ins_encode %{
10821     Label fast;
10822     __ subptr(rsp, 8);
10823     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10824     __ fld_s(Address(rsp, 0));
10825     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10826     __ fistp_d(Address(rsp, 0));
10827     // Restore the rounding mode, mask the exception
10828     if (Compile::current()->in_24_bit_fp_mode()) {
10829       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10830     } else {
10831       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10832     }
10833     // Load the converted long, adjust CPU stack
10834     __ pop(rax);
10835     __ pop(rdx);
10836     __ cmpl(rdx, 0x80000000);
10837     __ jccb(Assembler::notEqual, fast);
10838     __ testl(rax, rax);
10839     __ jccb(Assembler::notEqual, fast);
10840     __ subptr(rsp, 4);
10841     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10842     __ fld_s(Address(rsp, 0));
10843     __ addptr(rsp, 4);
10844     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10845     __ bind(fast);
10846   %}
10847   ins_pipe( pipe_slow );
10848 %}
10849 
10850 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10851   predicate( UseSSE<=1 );
10852   match(Set dst (ConvI2D src));
10853   format %{ "FILD   $src\n\t"
10854             "FSTP   $dst" %}
10855   opcode(0xDB, 0x0);  /* DB /0 */
10856   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10857   ins_pipe( fpu_reg_mem );
10858 %}
10859 
10860 instruct convI2D_reg(regD dst, rRegI src) %{
10861   predicate( UseSSE>=2 && !UseXmmI2D );
10862   match(Set dst (ConvI2D src));
10863   format %{ "CVTSI2SD $dst,$src" %}
10864   ins_encode %{
10865     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10866   %}
10867   ins_pipe( pipe_slow );
10868 %}
10869 
10870 instruct convI2D_mem(regD dst, memory mem) %{
10871   predicate( UseSSE>=2 );
10872   match(Set dst (ConvI2D (LoadI mem)));
10873   format %{ "CVTSI2SD $dst,$mem" %}
10874   ins_encode %{
10875     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10876   %}
10877   ins_pipe( pipe_slow );
10878 %}
10879 
10880 instruct convXI2D_reg(regD dst, rRegI src)
10881 %{
10882   predicate( UseSSE>=2 && UseXmmI2D );
10883   match(Set dst (ConvI2D src));
10884 
10885   format %{ "MOVD  $dst,$src\n\t"
10886             "CVTDQ2PD $dst,$dst\t# i2d" %}
10887   ins_encode %{
10888     __ movdl($dst$$XMMRegister, $src$$Register);
10889     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10890   %}
10891   ins_pipe(pipe_slow); // XXX
10892 %}
10893 
10894 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10895   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10896   match(Set dst (ConvI2D (LoadI mem)));
10897   format %{ "FILD   $mem\n\t"
10898             "FSTP   $dst" %}
10899   opcode(0xDB);      /* DB /0 */
10900   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10901               Pop_Reg_DPR(dst));
10902   ins_pipe( fpu_reg_mem );
10903 %}
10904 
10905 // Convert a byte to a float; no rounding step needed.
10906 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10907   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10908   match(Set dst (ConvI2F src));
10909   format %{ "FILD   $src\n\t"
10910             "FSTP   $dst" %}
10911 
10912   opcode(0xDB, 0x0);  /* DB /0 */
10913   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10914   ins_pipe( fpu_reg_mem );
10915 %}
10916 
10917 // In 24-bit mode, force exponent rounding by storing back out
10918 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10919   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10920   match(Set dst (ConvI2F src));
10921   ins_cost(200);
10922   format %{ "FILD   $src\n\t"
10923             "FSTP_S $dst" %}
10924   opcode(0xDB, 0x0);  /* DB /0 */
10925   ins_encode( Push_Mem_I(src),
10926               Pop_Mem_FPR(dst));
10927   ins_pipe( fpu_mem_mem );
10928 %}
10929 
10930 // In 24-bit mode, force exponent rounding by storing back out
10931 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10932   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10933   match(Set dst (ConvI2F (LoadI mem)));
10934   ins_cost(200);
10935   format %{ "FILD   $mem\n\t"
10936             "FSTP_S $dst" %}
10937   opcode(0xDB);  /* DB /0 */
10938   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10939               Pop_Mem_FPR(dst));
10940   ins_pipe( fpu_mem_mem );
10941 %}
10942 
10943 // This instruction does not round to 24-bits
10944 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10945   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10946   match(Set dst (ConvI2F src));
10947   format %{ "FILD   $src\n\t"
10948             "FSTP   $dst" %}
10949   opcode(0xDB, 0x0);  /* DB /0 */
10950   ins_encode( Push_Mem_I(src),
10951               Pop_Reg_FPR(dst));
10952   ins_pipe( fpu_reg_mem );
10953 %}
10954 
10955 // This instruction does not round to 24-bits
10956 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10957   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10958   match(Set dst (ConvI2F (LoadI mem)));
10959   format %{ "FILD   $mem\n\t"
10960             "FSTP   $dst" %}
10961   opcode(0xDB);      /* DB /0 */
10962   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10963               Pop_Reg_FPR(dst));
10964   ins_pipe( fpu_reg_mem );
10965 %}
10966 
10967 // Convert an int to a float in xmm; no rounding step needed.
10968 instruct convI2F_reg(regF dst, rRegI src) %{
10969   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
10970   match(Set dst (ConvI2F src));
10971   format %{ "CVTSI2SS $dst, $src" %}
10972   ins_encode %{
10973     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10974   %}
10975   ins_pipe( pipe_slow );
10976 %}
10977 
10978  instruct convXI2F_reg(regF dst, rRegI src)
10979 %{
10980   predicate( UseSSE>=2 && UseXmmI2F );
10981   match(Set dst (ConvI2F src));
10982 
10983   format %{ "MOVD  $dst,$src\n\t"
10984             "CVTDQ2PS $dst,$dst\t# i2f" %}
10985   ins_encode %{
10986     __ movdl($dst$$XMMRegister, $src$$Register);
10987     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10988   %}
10989   ins_pipe(pipe_slow); // XXX
10990 %}
10991 
10992 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
10993   match(Set dst (ConvI2L src));
10994   effect(KILL cr);
10995   ins_cost(375);
10996   format %{ "MOV    $dst.lo,$src\n\t"
10997             "MOV    $dst.hi,$src\n\t"
10998             "SAR    $dst.hi,31" %}
10999   ins_encode(convert_int_long(dst,src));
11000   ins_pipe( ialu_reg_reg_long );
11001 %}
11002 
11003 // Zero-extend convert int to long
11004 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11005   match(Set dst (AndL (ConvI2L src) mask) );
11006   effect( KILL flags );
11007   ins_cost(250);
11008   format %{ "MOV    $dst.lo,$src\n\t"
11009             "XOR    $dst.hi,$dst.hi" %}
11010   opcode(0x33); // XOR
11011   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11012   ins_pipe( ialu_reg_reg_long );
11013 %}
11014 
11015 // Zero-extend long
11016 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11017   match(Set dst (AndL src mask) );
11018   effect( KILL flags );
11019   ins_cost(250);
11020   format %{ "MOV    $dst.lo,$src.lo\n\t"
11021             "XOR    $dst.hi,$dst.hi\n\t" %}
11022   opcode(0x33); // XOR
11023   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11024   ins_pipe( ialu_reg_reg_long );
11025 %}
11026 
11027 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11028   predicate (UseSSE<=1);
11029   match(Set dst (ConvL2D src));
11030   effect( KILL cr );
11031   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11032             "PUSH   $src.lo\n\t"
11033             "FILD   ST,[ESP + #0]\n\t"
11034             "ADD    ESP,8\n\t"
11035             "FSTP_D $dst\t# D-round" %}
11036   opcode(0xDF, 0x5);  /* DF /5 */
11037   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11038   ins_pipe( pipe_slow );
11039 %}
11040 
11041 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11042   predicate (UseSSE>=2);
11043   match(Set dst (ConvL2D src));
11044   effect( KILL cr );
11045   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11046             "PUSH   $src.lo\n\t"
11047             "FILD_D [ESP]\n\t"
11048             "FSTP_D [ESP]\n\t"
11049             "MOVSD  $dst,[ESP]\n\t"
11050             "ADD    ESP,8" %}
11051   opcode(0xDF, 0x5);  /* DF /5 */
11052   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11053   ins_pipe( pipe_slow );
11054 %}
11055 
11056 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11057   predicate (UseSSE>=1);
11058   match(Set dst (ConvL2F src));
11059   effect( KILL cr );
11060   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11061             "PUSH   $src.lo\n\t"
11062             "FILD_D [ESP]\n\t"
11063             "FSTP_S [ESP]\n\t"
11064             "MOVSS  $dst,[ESP]\n\t"
11065             "ADD    ESP,8" %}
11066   opcode(0xDF, 0x5);  /* DF /5 */
11067   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11068   ins_pipe( pipe_slow );
11069 %}
11070 
11071 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11072   match(Set dst (ConvL2F src));
11073   effect( KILL cr );
11074   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11075             "PUSH   $src.lo\n\t"
11076             "FILD   ST,[ESP + #0]\n\t"
11077             "ADD    ESP,8\n\t"
11078             "FSTP_S $dst\t# F-round" %}
11079   opcode(0xDF, 0x5);  /* DF /5 */
11080   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11081   ins_pipe( pipe_slow );
11082 %}
11083 
11084 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11085   match(Set dst (ConvL2I src));
11086   effect( DEF dst, USE src );
11087   format %{ "MOV    $dst,$src.lo" %}
11088   ins_encode(enc_CopyL_Lo(dst,src));
11089   ins_pipe( ialu_reg_reg );
11090 %}
11091 
11092 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11093   match(Set dst (MoveF2I src));
11094   effect( DEF dst, USE src );
11095   ins_cost(100);
11096   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11097   ins_encode %{
11098     __ movl($dst$$Register, Address(rsp, $src$$disp));
11099   %}
11100   ins_pipe( ialu_reg_mem );
11101 %}
11102 
11103 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11104   predicate(UseSSE==0);
11105   match(Set dst (MoveF2I src));
11106   effect( DEF dst, USE src );
11107 
11108   ins_cost(125);
11109   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11110   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11111   ins_pipe( fpu_mem_reg );
11112 %}
11113 
11114 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11115   predicate(UseSSE>=1);
11116   match(Set dst (MoveF2I src));
11117   effect( DEF dst, USE src );
11118 
11119   ins_cost(95);
11120   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11121   ins_encode %{
11122     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11123   %}
11124   ins_pipe( pipe_slow );
11125 %}
11126 
11127 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11128   predicate(UseSSE>=2);
11129   match(Set dst (MoveF2I src));
11130   effect( DEF dst, USE src );
11131   ins_cost(85);
11132   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11133   ins_encode %{
11134     __ movdl($dst$$Register, $src$$XMMRegister);
11135   %}
11136   ins_pipe( pipe_slow );
11137 %}
11138 
11139 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11140   match(Set dst (MoveI2F src));
11141   effect( DEF dst, USE src );
11142 
11143   ins_cost(100);
11144   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11145   ins_encode %{
11146     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11147   %}
11148   ins_pipe( ialu_mem_reg );
11149 %}
11150 
11151 
11152 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11153   predicate(UseSSE==0);
11154   match(Set dst (MoveI2F src));
11155   effect(DEF dst, USE src);
11156 
11157   ins_cost(125);
11158   format %{ "FLD_S  $src\n\t"
11159             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11160   opcode(0xD9);               /* D9 /0, FLD m32real */
11161   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11162               Pop_Reg_FPR(dst) );
11163   ins_pipe( fpu_reg_mem );
11164 %}
11165 
11166 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11167   predicate(UseSSE>=1);
11168   match(Set dst (MoveI2F src));
11169   effect( DEF dst, USE src );
11170 
11171   ins_cost(95);
11172   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11173   ins_encode %{
11174     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11175   %}
11176   ins_pipe( pipe_slow );
11177 %}
11178 
11179 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11180   predicate(UseSSE>=2);
11181   match(Set dst (MoveI2F src));
11182   effect( DEF dst, USE src );
11183 
11184   ins_cost(85);
11185   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11186   ins_encode %{
11187     __ movdl($dst$$XMMRegister, $src$$Register);
11188   %}
11189   ins_pipe( pipe_slow );
11190 %}
11191 
11192 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11193   match(Set dst (MoveD2L src));
11194   effect(DEF dst, USE src);
11195 
11196   ins_cost(250);
11197   format %{ "MOV    $dst.lo,$src\n\t"
11198             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11199   opcode(0x8B, 0x8B);
11200   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11201   ins_pipe( ialu_mem_long_reg );
11202 %}
11203 
11204 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11205   predicate(UseSSE<=1);
11206   match(Set dst (MoveD2L src));
11207   effect(DEF dst, USE src);
11208 
11209   ins_cost(125);
11210   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11211   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11212   ins_pipe( fpu_mem_reg );
11213 %}
11214 
11215 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11216   predicate(UseSSE>=2);
11217   match(Set dst (MoveD2L src));
11218   effect(DEF dst, USE src);
11219   ins_cost(95);
11220   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11221   ins_encode %{
11222     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11223   %}
11224   ins_pipe( pipe_slow );
11225 %}
11226 
11227 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11228   predicate(UseSSE>=2);
11229   match(Set dst (MoveD2L src));
11230   effect(DEF dst, USE src, TEMP tmp);
11231   ins_cost(85);
11232   format %{ "MOVD   $dst.lo,$src\n\t"
11233             "PSHUFLW $tmp,$src,0x4E\n\t"
11234             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11235   ins_encode %{
11236     __ movdl($dst$$Register, $src$$XMMRegister);
11237     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11238     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11239   %}
11240   ins_pipe( pipe_slow );
11241 %}
11242 
11243 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11244   match(Set dst (MoveL2D src));
11245   effect(DEF dst, USE src);
11246 
11247   ins_cost(200);
11248   format %{ "MOV    $dst,$src.lo\n\t"
11249             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11250   opcode(0x89, 0x89);
11251   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11252   ins_pipe( ialu_mem_long_reg );
11253 %}
11254 
11255 
11256 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11257   predicate(UseSSE<=1);
11258   match(Set dst (MoveL2D src));
11259   effect(DEF dst, USE src);
11260   ins_cost(125);
11261 
11262   format %{ "FLD_D  $src\n\t"
11263             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11264   opcode(0xDD);               /* DD /0, FLD m64real */
11265   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11266               Pop_Reg_DPR(dst) );
11267   ins_pipe( fpu_reg_mem );
11268 %}
11269 
11270 
11271 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11272   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11273   match(Set dst (MoveL2D src));
11274   effect(DEF dst, USE src);
11275 
11276   ins_cost(95);
11277   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11278   ins_encode %{
11279     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11280   %}
11281   ins_pipe( pipe_slow );
11282 %}
11283 
11284 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11285   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11286   match(Set dst (MoveL2D src));
11287   effect(DEF dst, USE src);
11288 
11289   ins_cost(95);
11290   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11291   ins_encode %{
11292     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11293   %}
11294   ins_pipe( pipe_slow );
11295 %}
11296 
11297 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11298   predicate(UseSSE>=2);
11299   match(Set dst (MoveL2D src));
11300   effect(TEMP dst, USE src, TEMP tmp);
11301   ins_cost(85);
11302   format %{ "MOVD   $dst,$src.lo\n\t"
11303             "MOVD   $tmp,$src.hi\n\t"
11304             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11305   ins_encode %{
11306     __ movdl($dst$$XMMRegister, $src$$Register);
11307     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11308     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11309   %}
11310   ins_pipe( pipe_slow );
11311 %}
11312 
11313 
11314 // =======================================================================
11315 // fast clearing of an array
11316 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11317   predicate(!((ClearArrayNode*)n)->is_large());
11318   match(Set dummy (ClearArray cnt base));
11319   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11320 
11321   format %{ $$template
11322     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11323     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11324     $$emit$$"JG     LARGE\n\t"
11325     $$emit$$"SHL    ECX, 1\n\t"
11326     $$emit$$"DEC    ECX\n\t"
11327     $$emit$$"JS     DONE\t# Zero length\n\t"
11328     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11329     $$emit$$"DEC    ECX\n\t"
11330     $$emit$$"JGE    LOOP\n\t"
11331     $$emit$$"JMP    DONE\n\t"
11332     $$emit$$"# LARGE:\n\t"
11333     if (UseFastStosb) {
11334        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11335        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11336     } else {
11337        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11338        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11339     }
11340     $$emit$$"# DONE"
11341   %}
11342   ins_encode %{
11343     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11344   %}
11345   ins_pipe( pipe_slow );
11346 %}
11347 
11348 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11349   predicate(((ClearArrayNode*)n)->is_large());
11350   match(Set dummy (ClearArray cnt base));
11351   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11352   format %{ $$template
11353     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11354     if (UseFastStosb) {
11355        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11356        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11357     } else {
11358        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11359        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11360     }
11361     $$emit$$"# DONE"
11362   %}
11363   ins_encode %{
11364     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11365   %}
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11370                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11371   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11372   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11373   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11374 
11375   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11376   ins_encode %{
11377     __ string_compare($str1$$Register, $str2$$Register,
11378                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11379                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11380   %}
11381   ins_pipe( pipe_slow );
11382 %}
11383 
11384 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11385                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11386   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11387   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11388   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11389 
11390   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11391   ins_encode %{
11392     __ string_compare($str1$$Register, $str2$$Register,
11393                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11394                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11395   %}
11396   ins_pipe( pipe_slow );
11397 %}
11398 
11399 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11400                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11401   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11402   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11403   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11404 
11405   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11406   ins_encode %{
11407     __ string_compare($str1$$Register, $str2$$Register,
11408                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11409                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11410   %}
11411   ins_pipe( pipe_slow );
11412 %}
11413 
11414 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11415                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11416   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11417   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11418   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11419 
11420   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11421   ins_encode %{
11422     __ string_compare($str2$$Register, $str1$$Register,
11423                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11424                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11425   %}
11426   ins_pipe( pipe_slow );
11427 %}
11428 
11429 // fast string equals
11430 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11431                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11432   match(Set result (StrEquals (Binary str1 str2) cnt));
11433   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11434 
11435   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11436   ins_encode %{
11437     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11438                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11439                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11440   %}
11441 
11442   ins_pipe( pipe_slow );
11443 %}
11444 
11445 // fast search of substring with known size.
11446 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11447                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11448   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11449   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11450   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11451 
11452   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11453   ins_encode %{
11454     int icnt2 = (int)$int_cnt2$$constant;
11455     if (icnt2 >= 16) {
11456       // IndexOf for constant substrings with size >= 16 elements
11457       // which don't need to be loaded through stack.
11458       __ string_indexofC8($str1$$Register, $str2$$Register,
11459                           $cnt1$$Register, $cnt2$$Register,
11460                           icnt2, $result$$Register,
11461                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11462     } else {
11463       // Small strings are loaded through stack if they cross page boundary.
11464       __ string_indexof($str1$$Register, $str2$$Register,
11465                         $cnt1$$Register, $cnt2$$Register,
11466                         icnt2, $result$$Register,
11467                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11468     }
11469   %}
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 // fast search of substring with known size.
11474 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11475                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11476   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11477   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11478   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11479 
11480   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11481   ins_encode %{
11482     int icnt2 = (int)$int_cnt2$$constant;
11483     if (icnt2 >= 8) {
11484       // IndexOf for constant substrings with size >= 8 elements
11485       // which don't need to be loaded through stack.
11486       __ string_indexofC8($str1$$Register, $str2$$Register,
11487                           $cnt1$$Register, $cnt2$$Register,
11488                           icnt2, $result$$Register,
11489                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11490     } else {
11491       // Small strings are loaded through stack if they cross page boundary.
11492       __ string_indexof($str1$$Register, $str2$$Register,
11493                         $cnt1$$Register, $cnt2$$Register,
11494                         icnt2, $result$$Register,
11495                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11496     }
11497   %}
11498   ins_pipe( pipe_slow );
11499 %}
11500 
11501 // fast search of substring with known size.
11502 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11503                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11504   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11505   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11506   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11507 
11508   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11509   ins_encode %{
11510     int icnt2 = (int)$int_cnt2$$constant;
11511     if (icnt2 >= 8) {
11512       // IndexOf for constant substrings with size >= 8 elements
11513       // which don't need to be loaded through stack.
11514       __ string_indexofC8($str1$$Register, $str2$$Register,
11515                           $cnt1$$Register, $cnt2$$Register,
11516                           icnt2, $result$$Register,
11517                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11518     } else {
11519       // Small strings are loaded through stack if they cross page boundary.
11520       __ string_indexof($str1$$Register, $str2$$Register,
11521                         $cnt1$$Register, $cnt2$$Register,
11522                         icnt2, $result$$Register,
11523                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11524     }
11525   %}
11526   ins_pipe( pipe_slow );
11527 %}
11528 
11529 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11530                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11531   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11532   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11533   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11534 
11535   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11536   ins_encode %{
11537     __ string_indexof($str1$$Register, $str2$$Register,
11538                       $cnt1$$Register, $cnt2$$Register,
11539                       (-1), $result$$Register,
11540                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11541   %}
11542   ins_pipe( pipe_slow );
11543 %}
11544 
11545 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11546                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11547   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11548   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11549   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11550 
11551   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11552   ins_encode %{
11553     __ string_indexof($str1$$Register, $str2$$Register,
11554                       $cnt1$$Register, $cnt2$$Register,
11555                       (-1), $result$$Register,
11556                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11557   %}
11558   ins_pipe( pipe_slow );
11559 %}
11560 
11561 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11562                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11563   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11564   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11565   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11566 
11567   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11568   ins_encode %{
11569     __ string_indexof($str1$$Register, $str2$$Register,
11570                       $cnt1$$Register, $cnt2$$Register,
11571                       (-1), $result$$Register,
11572                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11573   %}
11574   ins_pipe( pipe_slow );
11575 %}
11576 
11577 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11578                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11579   predicate(UseSSE42Intrinsics);
11580   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11581   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11582   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11583   ins_encode %{
11584     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11585                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11586   %}
11587   ins_pipe( pipe_slow );
11588 %}
11589 
11590 // fast array equals
11591 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11592                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11593 %{
11594   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11595   match(Set result (AryEq ary1 ary2));
11596   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11597   //ins_cost(300);
11598 
11599   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11600   ins_encode %{
11601     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11602                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11603                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11604   %}
11605   ins_pipe( pipe_slow );
11606 %}
11607 
11608 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11609                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11610 %{
11611   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11612   match(Set result (AryEq ary1 ary2));
11613   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11614   //ins_cost(300);
11615 
11616   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11617   ins_encode %{
11618     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11619                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11620                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11621   %}
11622   ins_pipe( pipe_slow );
11623 %}
11624 
11625 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11626                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11627 %{
11628   match(Set result (HasNegatives ary1 len));
11629   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11630 
11631   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11632   ins_encode %{
11633     __ has_negatives($ary1$$Register, $len$$Register,
11634                      $result$$Register, $tmp3$$Register,
11635                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11636   %}
11637   ins_pipe( pipe_slow );
11638 %}
11639 
11640 // fast char[] to byte[] compression
11641 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11642                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11643   match(Set result (StrCompressedCopy src (Binary dst len)));
11644   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11645 
11646   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11647   ins_encode %{
11648     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11649                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11650                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11651   %}
11652   ins_pipe( pipe_slow );
11653 %}
11654 
11655 // fast byte[] to char[] inflation
11656 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11657                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11658   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11659   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11660 
11661   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11662   ins_encode %{
11663     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11664                           $tmp1$$XMMRegister, $tmp2$$Register);
11665   %}
11666   ins_pipe( pipe_slow );
11667 %}
11668 
11669 // encode char[] to byte[] in ISO_8859_1
11670 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11671                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11672                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11673   match(Set result (EncodeISOArray src (Binary dst len)));
11674   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11675 
11676   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11677   ins_encode %{
11678     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11679                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11680                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11681   %}
11682   ins_pipe( pipe_slow );
11683 %}
11684 
11685 
11686 //----------Control Flow Instructions------------------------------------------
11687 // Signed compare Instructions
11688 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11689   match(Set cr (CmpI op1 op2));
11690   effect( DEF cr, USE op1, USE op2 );
11691   format %{ "CMP    $op1,$op2" %}
11692   opcode(0x3B);  /* Opcode 3B /r */
11693   ins_encode( OpcP, RegReg( op1, op2) );
11694   ins_pipe( ialu_cr_reg_reg );
11695 %}
11696 
11697 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11698   match(Set cr (CmpI op1 op2));
11699   effect( DEF cr, USE op1 );
11700   format %{ "CMP    $op1,$op2" %}
11701   opcode(0x81,0x07);  /* Opcode 81 /7 */
11702   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11703   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11704   ins_pipe( ialu_cr_reg_imm );
11705 %}
11706 
11707 // Cisc-spilled version of cmpI_eReg
11708 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11709   match(Set cr (CmpI op1 (LoadI op2)));
11710 
11711   format %{ "CMP    $op1,$op2" %}
11712   ins_cost(500);
11713   opcode(0x3B);  /* Opcode 3B /r */
11714   ins_encode( OpcP, RegMem( op1, op2) );
11715   ins_pipe( ialu_cr_reg_mem );
11716 %}
11717 
11718 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11719   match(Set cr (CmpI src zero));
11720   effect( DEF cr, USE src );
11721 
11722   format %{ "TEST   $src,$src" %}
11723   opcode(0x85);
11724   ins_encode( OpcP, RegReg( src, src ) );
11725   ins_pipe( ialu_cr_reg_imm );
11726 %}
11727 
11728 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11729   match(Set cr (CmpI (AndI src con) zero));
11730 
11731   format %{ "TEST   $src,$con" %}
11732   opcode(0xF7,0x00);
11733   ins_encode( OpcP, RegOpc(src), Con32(con) );
11734   ins_pipe( ialu_cr_reg_imm );
11735 %}
11736 
11737 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11738   match(Set cr (CmpI (AndI src mem) zero));
11739 
11740   format %{ "TEST   $src,$mem" %}
11741   opcode(0x85);
11742   ins_encode( OpcP, RegMem( src, mem ) );
11743   ins_pipe( ialu_cr_reg_mem );
11744 %}
11745 
11746 // Unsigned compare Instructions; really, same as signed except they
11747 // produce an eFlagsRegU instead of eFlagsReg.
11748 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11749   match(Set cr (CmpU op1 op2));
11750 
11751   format %{ "CMPu   $op1,$op2" %}
11752   opcode(0x3B);  /* Opcode 3B /r */
11753   ins_encode( OpcP, RegReg( op1, op2) );
11754   ins_pipe( ialu_cr_reg_reg );
11755 %}
11756 
11757 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11758   match(Set cr (CmpU op1 op2));
11759 
11760   format %{ "CMPu   $op1,$op2" %}
11761   opcode(0x81,0x07);  /* Opcode 81 /7 */
11762   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11763   ins_pipe( ialu_cr_reg_imm );
11764 %}
11765 
11766 // // Cisc-spilled version of cmpU_eReg
11767 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11768   match(Set cr (CmpU op1 (LoadI op2)));
11769 
11770   format %{ "CMPu   $op1,$op2" %}
11771   ins_cost(500);
11772   opcode(0x3B);  /* Opcode 3B /r */
11773   ins_encode( OpcP, RegMem( op1, op2) );
11774   ins_pipe( ialu_cr_reg_mem );
11775 %}
11776 
11777 // // Cisc-spilled version of cmpU_eReg
11778 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11779 //  match(Set cr (CmpU (LoadI op1) op2));
11780 //
11781 //  format %{ "CMPu   $op1,$op2" %}
11782 //  ins_cost(500);
11783 //  opcode(0x39);  /* Opcode 39 /r */
11784 //  ins_encode( OpcP, RegMem( op1, op2) );
11785 //%}
11786 
11787 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11788   match(Set cr (CmpU src zero));
11789 
11790   format %{ "TESTu  $src,$src" %}
11791   opcode(0x85);
11792   ins_encode( OpcP, RegReg( src, src ) );
11793   ins_pipe( ialu_cr_reg_imm );
11794 %}
11795 
11796 // Unsigned pointer compare Instructions
11797 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11798   match(Set cr (CmpP op1 op2));
11799 
11800   format %{ "CMPu   $op1,$op2" %}
11801   opcode(0x3B);  /* Opcode 3B /r */
11802   ins_encode( OpcP, RegReg( op1, op2) );
11803   ins_pipe( ialu_cr_reg_reg );
11804 %}
11805 
11806 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11807   match(Set cr (CmpP op1 op2));
11808 
11809   format %{ "CMPu   $op1,$op2" %}
11810   opcode(0x81,0x07);  /* Opcode 81 /7 */
11811   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11812   ins_pipe( ialu_cr_reg_imm );
11813 %}
11814 
11815 // // Cisc-spilled version of cmpP_eReg
11816 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11817   match(Set cr (CmpP op1 (LoadP op2)));
11818 
11819   format %{ "CMPu   $op1,$op2" %}
11820   ins_cost(500);
11821   opcode(0x3B);  /* Opcode 3B /r */
11822   ins_encode( OpcP, RegMem( op1, op2) );
11823   ins_pipe( ialu_cr_reg_mem );
11824 %}
11825 
11826 // // Cisc-spilled version of cmpP_eReg
11827 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11828 //  match(Set cr (CmpP (LoadP op1) op2));
11829 //
11830 //  format %{ "CMPu   $op1,$op2" %}
11831 //  ins_cost(500);
11832 //  opcode(0x39);  /* Opcode 39 /r */
11833 //  ins_encode( OpcP, RegMem( op1, op2) );
11834 //%}
11835 
11836 // Compare raw pointer (used in out-of-heap check).
11837 // Only works because non-oop pointers must be raw pointers
11838 // and raw pointers have no anti-dependencies.
11839 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11840   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11841   match(Set cr (CmpP op1 (LoadP op2)));
11842 
11843   format %{ "CMPu   $op1,$op2" %}
11844   opcode(0x3B);  /* Opcode 3B /r */
11845   ins_encode( OpcP, RegMem( op1, op2) );
11846   ins_pipe( ialu_cr_reg_mem );
11847 %}
11848 
11849 //
11850 // This will generate a signed flags result. This should be ok
11851 // since any compare to a zero should be eq/neq.
11852 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11853   match(Set cr (CmpP src zero));
11854 
11855   format %{ "TEST   $src,$src" %}
11856   opcode(0x85);
11857   ins_encode( OpcP, RegReg( src, src ) );
11858   ins_pipe( ialu_cr_reg_imm );
11859 %}
11860 
11861 // Cisc-spilled version of testP_reg
11862 // This will generate a signed flags result. This should be ok
11863 // since any compare to a zero should be eq/neq.
11864 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11865   match(Set cr (CmpP (LoadP op) zero));
11866 
11867   format %{ "TEST   $op,0xFFFFFFFF" %}
11868   ins_cost(500);
11869   opcode(0xF7);               /* Opcode F7 /0 */
11870   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11871   ins_pipe( ialu_cr_reg_imm );
11872 %}
11873 
11874 // Yanked all unsigned pointer compare operations.
11875 // Pointer compares are done with CmpP which is already unsigned.
11876 
11877 //----------Max and Min--------------------------------------------------------
11878 // Min Instructions
11879 ////
11880 //   *** Min and Max using the conditional move are slower than the
11881 //   *** branch version on a Pentium III.
11882 // // Conditional move for min
11883 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11884 //  effect( USE_DEF op2, USE op1, USE cr );
11885 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11886 //  opcode(0x4C,0x0F);
11887 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11888 //  ins_pipe( pipe_cmov_reg );
11889 //%}
11890 //
11891 //// Min Register with Register (P6 version)
11892 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11893 //  predicate(VM_Version::supports_cmov() );
11894 //  match(Set op2 (MinI op1 op2));
11895 //  ins_cost(200);
11896 //  expand %{
11897 //    eFlagsReg cr;
11898 //    compI_eReg(cr,op1,op2);
11899 //    cmovI_reg_lt(op2,op1,cr);
11900 //  %}
11901 //%}
11902 
11903 // Min Register with Register (generic version)
11904 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11905   match(Set dst (MinI dst src));
11906   effect(KILL flags);
11907   ins_cost(300);
11908 
11909   format %{ "MIN    $dst,$src" %}
11910   opcode(0xCC);
11911   ins_encode( min_enc(dst,src) );
11912   ins_pipe( pipe_slow );
11913 %}
11914 
11915 // Max Register with Register
11916 //   *** Min and Max using the conditional move are slower than the
11917 //   *** branch version on a Pentium III.
11918 // // Conditional move for max
11919 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11920 //  effect( USE_DEF op2, USE op1, USE cr );
11921 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11922 //  opcode(0x4F,0x0F);
11923 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11924 //  ins_pipe( pipe_cmov_reg );
11925 //%}
11926 //
11927 // // Max Register with Register (P6 version)
11928 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11929 //  predicate(VM_Version::supports_cmov() );
11930 //  match(Set op2 (MaxI op1 op2));
11931 //  ins_cost(200);
11932 //  expand %{
11933 //    eFlagsReg cr;
11934 //    compI_eReg(cr,op1,op2);
11935 //    cmovI_reg_gt(op2,op1,cr);
11936 //  %}
11937 //%}
11938 
11939 // Max Register with Register (generic version)
11940 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11941   match(Set dst (MaxI dst src));
11942   effect(KILL flags);
11943   ins_cost(300);
11944 
11945   format %{ "MAX    $dst,$src" %}
11946   opcode(0xCC);
11947   ins_encode( max_enc(dst,src) );
11948   ins_pipe( pipe_slow );
11949 %}
11950 
11951 // ============================================================================
11952 // Counted Loop limit node which represents exact final iterator value.
11953 // Note: the resulting value should fit into integer range since
11954 // counted loops have limit check on overflow.
11955 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11956   match(Set limit (LoopLimit (Binary init limit) stride));
11957   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11958   ins_cost(300);
11959 
11960   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11961   ins_encode %{
11962     int strd = (int)$stride$$constant;
11963     assert(strd != 1 && strd != -1, "sanity");
11964     int m1 = (strd > 0) ? 1 : -1;
11965     // Convert limit to long (EAX:EDX)
11966     __ cdql();
11967     // Convert init to long (init:tmp)
11968     __ movl($tmp$$Register, $init$$Register);
11969     __ sarl($tmp$$Register, 31);
11970     // $limit - $init
11971     __ subl($limit$$Register, $init$$Register);
11972     __ sbbl($limit_hi$$Register, $tmp$$Register);
11973     // + ($stride - 1)
11974     if (strd > 0) {
11975       __ addl($limit$$Register, (strd - 1));
11976       __ adcl($limit_hi$$Register, 0);
11977       __ movl($tmp$$Register, strd);
11978     } else {
11979       __ addl($limit$$Register, (strd + 1));
11980       __ adcl($limit_hi$$Register, -1);
11981       __ lneg($limit_hi$$Register, $limit$$Register);
11982       __ movl($tmp$$Register, -strd);
11983     }
11984     // signed devision: (EAX:EDX) / pos_stride
11985     __ idivl($tmp$$Register);
11986     if (strd < 0) {
11987       // restore sign
11988       __ negl($tmp$$Register);
11989     }
11990     // (EAX) * stride
11991     __ mull($tmp$$Register);
11992     // + init (ignore upper bits)
11993     __ addl($limit$$Register, $init$$Register);
11994   %}
11995   ins_pipe( pipe_slow );
11996 %}
11997 
11998 // ============================================================================
11999 // Branch Instructions
12000 // Jump Table
12001 instruct jumpXtnd(rRegI switch_val) %{
12002   match(Jump switch_val);
12003   ins_cost(350);
12004   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12005   ins_encode %{
12006     // Jump to Address(table_base + switch_reg)
12007     Address index(noreg, $switch_val$$Register, Address::times_1);
12008     __ jump(ArrayAddress($constantaddress, index));
12009   %}
12010   ins_pipe(pipe_jmp);
12011 %}
12012 
12013 // Jump Direct - Label defines a relative address from JMP+1
12014 instruct jmpDir(label labl) %{
12015   match(Goto);
12016   effect(USE labl);
12017 
12018   ins_cost(300);
12019   format %{ "JMP    $labl" %}
12020   size(5);
12021   ins_encode %{
12022     Label* L = $labl$$label;
12023     __ jmp(*L, false); // Always long jump
12024   %}
12025   ins_pipe( pipe_jmp );
12026 %}
12027 
12028 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12029 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12030   match(If cop cr);
12031   effect(USE labl);
12032 
12033   ins_cost(300);
12034   format %{ "J$cop    $labl" %}
12035   size(6);
12036   ins_encode %{
12037     Label* L = $labl$$label;
12038     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12039   %}
12040   ins_pipe( pipe_jcc );
12041 %}
12042 
12043 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12044 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12045   predicate(!n->has_vector_mask_set());
12046   match(CountedLoopEnd cop cr);
12047   effect(USE labl);
12048 
12049   ins_cost(300);
12050   format %{ "J$cop    $labl\t# Loop end" %}
12051   size(6);
12052   ins_encode %{
12053     Label* L = $labl$$label;
12054     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12055   %}
12056   ins_pipe( pipe_jcc );
12057 %}
12058 
12059 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12060 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12061   predicate(!n->has_vector_mask_set());
12062   match(CountedLoopEnd cop cmp);
12063   effect(USE labl);
12064 
12065   ins_cost(300);
12066   format %{ "J$cop,u  $labl\t# Loop end" %}
12067   size(6);
12068   ins_encode %{
12069     Label* L = $labl$$label;
12070     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12071   %}
12072   ins_pipe( pipe_jcc );
12073 %}
12074 
12075 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12076   predicate(!n->has_vector_mask_set());
12077   match(CountedLoopEnd cop cmp);
12078   effect(USE labl);
12079 
12080   ins_cost(200);
12081   format %{ "J$cop,u  $labl\t# Loop end" %}
12082   size(6);
12083   ins_encode %{
12084     Label* L = $labl$$label;
12085     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12086   %}
12087   ins_pipe( pipe_jcc );
12088 %}
12089 
12090 // mask version
12091 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12092 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12093   predicate(n->has_vector_mask_set());
12094   match(CountedLoopEnd cop cr);
12095   effect(USE labl);
12096 
12097   ins_cost(400);
12098   format %{ "J$cop    $labl\t# Loop end\n\t"
12099             "restorevectmask \t# vector mask restore for loops" %}
12100   size(10);
12101   ins_encode %{
12102     Label* L = $labl$$label;
12103     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12104     __ restorevectmask();
12105   %}
12106   ins_pipe( pipe_jcc );
12107 %}
12108 
12109 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12110 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12111   predicate(n->has_vector_mask_set());
12112   match(CountedLoopEnd cop cmp);
12113   effect(USE labl);
12114 
12115   ins_cost(400);
12116   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12117             "restorevectmask \t# vector mask restore for loops" %}
12118   size(10);
12119   ins_encode %{
12120     Label* L = $labl$$label;
12121     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12122     __ restorevectmask();
12123   %}
12124   ins_pipe( pipe_jcc );
12125 %}
12126 
12127 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12128   predicate(n->has_vector_mask_set());
12129   match(CountedLoopEnd cop cmp);
12130   effect(USE labl);
12131 
12132   ins_cost(300);
12133   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12134             "restorevectmask \t# vector mask restore for loops" %}
12135   size(10);
12136   ins_encode %{
12137     Label* L = $labl$$label;
12138     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12139     __ restorevectmask();
12140   %}
12141   ins_pipe( pipe_jcc );
12142 %}
12143 
12144 // Jump Direct Conditional - using unsigned comparison
12145 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12146   match(If cop cmp);
12147   effect(USE labl);
12148 
12149   ins_cost(300);
12150   format %{ "J$cop,u  $labl" %}
12151   size(6);
12152   ins_encode %{
12153     Label* L = $labl$$label;
12154     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12155   %}
12156   ins_pipe(pipe_jcc);
12157 %}
12158 
12159 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12160   match(If cop cmp);
12161   effect(USE labl);
12162 
12163   ins_cost(200);
12164   format %{ "J$cop,u  $labl" %}
12165   size(6);
12166   ins_encode %{
12167     Label* L = $labl$$label;
12168     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12169   %}
12170   ins_pipe(pipe_jcc);
12171 %}
12172 
12173 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12174   match(If cop cmp);
12175   effect(USE labl);
12176 
12177   ins_cost(200);
12178   format %{ $$template
12179     if ($cop$$cmpcode == Assembler::notEqual) {
12180       $$emit$$"JP,u   $labl\n\t"
12181       $$emit$$"J$cop,u   $labl"
12182     } else {
12183       $$emit$$"JP,u   done\n\t"
12184       $$emit$$"J$cop,u   $labl\n\t"
12185       $$emit$$"done:"
12186     }
12187   %}
12188   ins_encode %{
12189     Label* l = $labl$$label;
12190     if ($cop$$cmpcode == Assembler::notEqual) {
12191       __ jcc(Assembler::parity, *l, false);
12192       __ jcc(Assembler::notEqual, *l, false);
12193     } else if ($cop$$cmpcode == Assembler::equal) {
12194       Label done;
12195       __ jccb(Assembler::parity, done);
12196       __ jcc(Assembler::equal, *l, false);
12197       __ bind(done);
12198     } else {
12199        ShouldNotReachHere();
12200     }
12201   %}
12202   ins_pipe(pipe_jcc);
12203 %}
12204 
12205 // ============================================================================
12206 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12207 // array for an instance of the superklass.  Set a hidden internal cache on a
12208 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12209 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12210 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12211   match(Set result (PartialSubtypeCheck sub super));
12212   effect( KILL rcx, KILL cr );
12213 
12214   ins_cost(1100);  // slightly larger than the next version
12215   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12216             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12217             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12218             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12219             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12220             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12221             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12222      "miss:\t" %}
12223 
12224   opcode(0x1); // Force a XOR of EDI
12225   ins_encode( enc_PartialSubtypeCheck() );
12226   ins_pipe( pipe_slow );
12227 %}
12228 
12229 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12230   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12231   effect( KILL rcx, KILL result );
12232 
12233   ins_cost(1000);
12234   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12235             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12236             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12237             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12238             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12239             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12240      "miss:\t" %}
12241 
12242   opcode(0x0);  // No need to XOR EDI
12243   ins_encode( enc_PartialSubtypeCheck() );
12244   ins_pipe( pipe_slow );
12245 %}
12246 
12247 // ============================================================================
12248 // Branch Instructions -- short offset versions
12249 //
12250 // These instructions are used to replace jumps of a long offset (the default
12251 // match) with jumps of a shorter offset.  These instructions are all tagged
12252 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12253 // match rules in general matching.  Instead, the ADLC generates a conversion
12254 // method in the MachNode which can be used to do in-place replacement of the
12255 // long variant with the shorter variant.  The compiler will determine if a
12256 // branch can be taken by the is_short_branch_offset() predicate in the machine
12257 // specific code section of the file.
12258 
12259 // Jump Direct - Label defines a relative address from JMP+1
12260 instruct jmpDir_short(label labl) %{
12261   match(Goto);
12262   effect(USE labl);
12263 
12264   ins_cost(300);
12265   format %{ "JMP,s  $labl" %}
12266   size(2);
12267   ins_encode %{
12268     Label* L = $labl$$label;
12269     __ jmpb(*L);
12270   %}
12271   ins_pipe( pipe_jmp );
12272   ins_short_branch(1);
12273 %}
12274 
12275 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12276 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12277   match(If cop cr);
12278   effect(USE labl);
12279 
12280   ins_cost(300);
12281   format %{ "J$cop,s  $labl" %}
12282   size(2);
12283   ins_encode %{
12284     Label* L = $labl$$label;
12285     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12286   %}
12287   ins_pipe( pipe_jcc );
12288   ins_short_branch(1);
12289 %}
12290 
12291 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12292 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12293   match(CountedLoopEnd cop cr);
12294   effect(USE labl);
12295 
12296   ins_cost(300);
12297   format %{ "J$cop,s  $labl\t# Loop end" %}
12298   size(2);
12299   ins_encode %{
12300     Label* L = $labl$$label;
12301     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12302   %}
12303   ins_pipe( pipe_jcc );
12304   ins_short_branch(1);
12305 %}
12306 
12307 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12308 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12309   match(CountedLoopEnd cop cmp);
12310   effect(USE labl);
12311 
12312   ins_cost(300);
12313   format %{ "J$cop,us $labl\t# Loop end" %}
12314   size(2);
12315   ins_encode %{
12316     Label* L = $labl$$label;
12317     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12318   %}
12319   ins_pipe( pipe_jcc );
12320   ins_short_branch(1);
12321 %}
12322 
12323 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12324   match(CountedLoopEnd cop cmp);
12325   effect(USE labl);
12326 
12327   ins_cost(300);
12328   format %{ "J$cop,us $labl\t# Loop end" %}
12329   size(2);
12330   ins_encode %{
12331     Label* L = $labl$$label;
12332     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12333   %}
12334   ins_pipe( pipe_jcc );
12335   ins_short_branch(1);
12336 %}
12337 
12338 // Jump Direct Conditional - using unsigned comparison
12339 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12340   match(If cop cmp);
12341   effect(USE labl);
12342 
12343   ins_cost(300);
12344   format %{ "J$cop,us $labl" %}
12345   size(2);
12346   ins_encode %{
12347     Label* L = $labl$$label;
12348     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12349   %}
12350   ins_pipe( pipe_jcc );
12351   ins_short_branch(1);
12352 %}
12353 
12354 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12355   match(If cop cmp);
12356   effect(USE labl);
12357 
12358   ins_cost(300);
12359   format %{ "J$cop,us $labl" %}
12360   size(2);
12361   ins_encode %{
12362     Label* L = $labl$$label;
12363     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12364   %}
12365   ins_pipe( pipe_jcc );
12366   ins_short_branch(1);
12367 %}
12368 
12369 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12370   match(If cop cmp);
12371   effect(USE labl);
12372 
12373   ins_cost(300);
12374   format %{ $$template
12375     if ($cop$$cmpcode == Assembler::notEqual) {
12376       $$emit$$"JP,u,s   $labl\n\t"
12377       $$emit$$"J$cop,u,s   $labl"
12378     } else {
12379       $$emit$$"JP,u,s   done\n\t"
12380       $$emit$$"J$cop,u,s  $labl\n\t"
12381       $$emit$$"done:"
12382     }
12383   %}
12384   size(4);
12385   ins_encode %{
12386     Label* l = $labl$$label;
12387     if ($cop$$cmpcode == Assembler::notEqual) {
12388       __ jccb(Assembler::parity, *l);
12389       __ jccb(Assembler::notEqual, *l);
12390     } else if ($cop$$cmpcode == Assembler::equal) {
12391       Label done;
12392       __ jccb(Assembler::parity, done);
12393       __ jccb(Assembler::equal, *l);
12394       __ bind(done);
12395     } else {
12396        ShouldNotReachHere();
12397     }
12398   %}
12399   ins_pipe(pipe_jcc);
12400   ins_short_branch(1);
12401 %}
12402 
12403 // ============================================================================
12404 // Long Compare
12405 //
12406 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12407 // is tricky.  The flavor of compare used depends on whether we are testing
12408 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12409 // The GE test is the negated LT test.  The LE test can be had by commuting
12410 // the operands (yielding a GE test) and then negating; negate again for the
12411 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12412 // NE test is negated from that.
12413 
12414 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12415 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12416 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12417 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12418 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12419 // foo match ends up with the wrong leaf.  One fix is to not match both
12420 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12421 // both forms beat the trinary form of long-compare and both are very useful
12422 // on Intel which has so few registers.
12423 
12424 // Manifest a CmpL result in an integer register.  Very painful.
12425 // This is the test to avoid.
12426 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12427   match(Set dst (CmpL3 src1 src2));
12428   effect( KILL flags );
12429   ins_cost(1000);
12430   format %{ "XOR    $dst,$dst\n\t"
12431             "CMP    $src1.hi,$src2.hi\n\t"
12432             "JLT,s  m_one\n\t"
12433             "JGT,s  p_one\n\t"
12434             "CMP    $src1.lo,$src2.lo\n\t"
12435             "JB,s   m_one\n\t"
12436             "JEQ,s  done\n"
12437     "p_one:\tINC    $dst\n\t"
12438             "JMP,s  done\n"
12439     "m_one:\tDEC    $dst\n"
12440      "done:" %}
12441   ins_encode %{
12442     Label p_one, m_one, done;
12443     __ xorptr($dst$$Register, $dst$$Register);
12444     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12445     __ jccb(Assembler::less,    m_one);
12446     __ jccb(Assembler::greater, p_one);
12447     __ cmpl($src1$$Register, $src2$$Register);
12448     __ jccb(Assembler::below,   m_one);
12449     __ jccb(Assembler::equal,   done);
12450     __ bind(p_one);
12451     __ incrementl($dst$$Register);
12452     __ jmpb(done);
12453     __ bind(m_one);
12454     __ decrementl($dst$$Register);
12455     __ bind(done);
12456   %}
12457   ins_pipe( pipe_slow );
12458 %}
12459 
12460 //======
12461 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12462 // compares.  Can be used for LE or GT compares by reversing arguments.
12463 // NOT GOOD FOR EQ/NE tests.
12464 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12465   match( Set flags (CmpL src zero ));
12466   ins_cost(100);
12467   format %{ "TEST   $src.hi,$src.hi" %}
12468   opcode(0x85);
12469   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12470   ins_pipe( ialu_cr_reg_reg );
12471 %}
12472 
12473 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12474 // compares.  Can be used for LE or GT compares by reversing arguments.
12475 // NOT GOOD FOR EQ/NE tests.
12476 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12477   match( Set flags (CmpL src1 src2 ));
12478   effect( TEMP tmp );
12479   ins_cost(300);
12480   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12481             "MOV    $tmp,$src1.hi\n\t"
12482             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12483   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12484   ins_pipe( ialu_cr_reg_reg );
12485 %}
12486 
12487 // Long compares reg < zero/req OR reg >= zero/req.
12488 // Just a wrapper for a normal branch, plus the predicate test.
12489 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12490   match(If cmp flags);
12491   effect(USE labl);
12492   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12493   expand %{
12494     jmpCon(cmp,flags,labl);    // JLT or JGE...
12495   %}
12496 %}
12497 
12498 // Compare 2 longs and CMOVE longs.
12499 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12500   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12501   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12502   ins_cost(400);
12503   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12504             "CMOV$cmp $dst.hi,$src.hi" %}
12505   opcode(0x0F,0x40);
12506   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12507   ins_pipe( pipe_cmov_reg_long );
12508 %}
12509 
12510 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12511   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12512   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12513   ins_cost(500);
12514   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12515             "CMOV$cmp $dst.hi,$src.hi" %}
12516   opcode(0x0F,0x40);
12517   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12518   ins_pipe( pipe_cmov_reg_long );
12519 %}
12520 
12521 // Compare 2 longs and CMOVE ints.
12522 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12523   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12524   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12525   ins_cost(200);
12526   format %{ "CMOV$cmp $dst,$src" %}
12527   opcode(0x0F,0x40);
12528   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12529   ins_pipe( pipe_cmov_reg );
12530 %}
12531 
12532 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12533   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12534   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12535   ins_cost(250);
12536   format %{ "CMOV$cmp $dst,$src" %}
12537   opcode(0x0F,0x40);
12538   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12539   ins_pipe( pipe_cmov_mem );
12540 %}
12541 
12542 // Compare 2 longs and CMOVE ints.
12543 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12544   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12545   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12546   ins_cost(200);
12547   format %{ "CMOV$cmp $dst,$src" %}
12548   opcode(0x0F,0x40);
12549   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12550   ins_pipe( pipe_cmov_reg );
12551 %}
12552 
12553 // Compare 2 longs and CMOVE doubles
12554 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12555   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12556   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12557   ins_cost(200);
12558   expand %{
12559     fcmovDPR_regS(cmp,flags,dst,src);
12560   %}
12561 %}
12562 
12563 // Compare 2 longs and CMOVE doubles
12564 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12565   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12566   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12567   ins_cost(200);
12568   expand %{
12569     fcmovD_regS(cmp,flags,dst,src);
12570   %}
12571 %}
12572 
12573 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12574   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12575   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12576   ins_cost(200);
12577   expand %{
12578     fcmovFPR_regS(cmp,flags,dst,src);
12579   %}
12580 %}
12581 
12582 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12583   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12584   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12585   ins_cost(200);
12586   expand %{
12587     fcmovF_regS(cmp,flags,dst,src);
12588   %}
12589 %}
12590 
12591 //======
12592 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12593 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12594   match( Set flags (CmpL src zero ));
12595   effect(TEMP tmp);
12596   ins_cost(200);
12597   format %{ "MOV    $tmp,$src.lo\n\t"
12598             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12599   ins_encode( long_cmp_flags0( src, tmp ) );
12600   ins_pipe( ialu_reg_reg_long );
12601 %}
12602 
12603 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12604 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12605   match( Set flags (CmpL src1 src2 ));
12606   ins_cost(200+300);
12607   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12608             "JNE,s  skip\n\t"
12609             "CMP    $src1.hi,$src2.hi\n\t"
12610      "skip:\t" %}
12611   ins_encode( long_cmp_flags1( src1, src2 ) );
12612   ins_pipe( ialu_cr_reg_reg );
12613 %}
12614 
12615 // Long compare reg == zero/reg OR reg != zero/reg
12616 // Just a wrapper for a normal branch, plus the predicate test.
12617 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12618   match(If cmp flags);
12619   effect(USE labl);
12620   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12621   expand %{
12622     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12623   %}
12624 %}
12625 
12626 // Compare 2 longs and CMOVE longs.
12627 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12628   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12629   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12630   ins_cost(400);
12631   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12632             "CMOV$cmp $dst.hi,$src.hi" %}
12633   opcode(0x0F,0x40);
12634   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12635   ins_pipe( pipe_cmov_reg_long );
12636 %}
12637 
12638 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12639   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12640   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12641   ins_cost(500);
12642   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12643             "CMOV$cmp $dst.hi,$src.hi" %}
12644   opcode(0x0F,0x40);
12645   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12646   ins_pipe( pipe_cmov_reg_long );
12647 %}
12648 
12649 // Compare 2 longs and CMOVE ints.
12650 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12651   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12652   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12653   ins_cost(200);
12654   format %{ "CMOV$cmp $dst,$src" %}
12655   opcode(0x0F,0x40);
12656   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12657   ins_pipe( pipe_cmov_reg );
12658 %}
12659 
12660 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12661   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12662   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12663   ins_cost(250);
12664   format %{ "CMOV$cmp $dst,$src" %}
12665   opcode(0x0F,0x40);
12666   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12667   ins_pipe( pipe_cmov_mem );
12668 %}
12669 
12670 // Compare 2 longs and CMOVE ints.
12671 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12672   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12673   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12674   ins_cost(200);
12675   format %{ "CMOV$cmp $dst,$src" %}
12676   opcode(0x0F,0x40);
12677   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12678   ins_pipe( pipe_cmov_reg );
12679 %}
12680 
12681 // Compare 2 longs and CMOVE doubles
12682 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12683   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12684   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12685   ins_cost(200);
12686   expand %{
12687     fcmovDPR_regS(cmp,flags,dst,src);
12688   %}
12689 %}
12690 
12691 // Compare 2 longs and CMOVE doubles
12692 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12693   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12694   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12695   ins_cost(200);
12696   expand %{
12697     fcmovD_regS(cmp,flags,dst,src);
12698   %}
12699 %}
12700 
12701 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12702   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12703   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12704   ins_cost(200);
12705   expand %{
12706     fcmovFPR_regS(cmp,flags,dst,src);
12707   %}
12708 %}
12709 
12710 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12711   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12712   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12713   ins_cost(200);
12714   expand %{
12715     fcmovF_regS(cmp,flags,dst,src);
12716   %}
12717 %}
12718 
12719 //======
12720 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12721 // Same as cmpL_reg_flags_LEGT except must negate src
12722 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12723   match( Set flags (CmpL src zero ));
12724   effect( TEMP tmp );
12725   ins_cost(300);
12726   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12727             "CMP    $tmp,$src.lo\n\t"
12728             "SBB    $tmp,$src.hi\n\t" %}
12729   ins_encode( long_cmp_flags3(src, tmp) );
12730   ins_pipe( ialu_reg_reg_long );
12731 %}
12732 
12733 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12734 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12735 // requires a commuted test to get the same result.
12736 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12737   match( Set flags (CmpL src1 src2 ));
12738   effect( TEMP tmp );
12739   ins_cost(300);
12740   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12741             "MOV    $tmp,$src2.hi\n\t"
12742             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12743   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12744   ins_pipe( ialu_cr_reg_reg );
12745 %}
12746 
12747 // Long compares reg < zero/req OR reg >= zero/req.
12748 // Just a wrapper for a normal branch, plus the predicate test
12749 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12750   match(If cmp flags);
12751   effect(USE labl);
12752   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12753   ins_cost(300);
12754   expand %{
12755     jmpCon(cmp,flags,labl);    // JGT or JLE...
12756   %}
12757 %}
12758 
12759 // Compare 2 longs and CMOVE longs.
12760 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12761   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12762   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12763   ins_cost(400);
12764   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12765             "CMOV$cmp $dst.hi,$src.hi" %}
12766   opcode(0x0F,0x40);
12767   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12768   ins_pipe( pipe_cmov_reg_long );
12769 %}
12770 
12771 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12772   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12773   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12774   ins_cost(500);
12775   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12776             "CMOV$cmp $dst.hi,$src.hi+4" %}
12777   opcode(0x0F,0x40);
12778   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12779   ins_pipe( pipe_cmov_reg_long );
12780 %}
12781 
12782 // Compare 2 longs and CMOVE ints.
12783 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12784   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12785   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12786   ins_cost(200);
12787   format %{ "CMOV$cmp $dst,$src" %}
12788   opcode(0x0F,0x40);
12789   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12790   ins_pipe( pipe_cmov_reg );
12791 %}
12792 
12793 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12794   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12795   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12796   ins_cost(250);
12797   format %{ "CMOV$cmp $dst,$src" %}
12798   opcode(0x0F,0x40);
12799   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12800   ins_pipe( pipe_cmov_mem );
12801 %}
12802 
12803 // Compare 2 longs and CMOVE ptrs.
12804 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12805   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12806   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12807   ins_cost(200);
12808   format %{ "CMOV$cmp $dst,$src" %}
12809   opcode(0x0F,0x40);
12810   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12811   ins_pipe( pipe_cmov_reg );
12812 %}
12813 
12814 // Compare 2 longs and CMOVE doubles
12815 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12816   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12817   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12818   ins_cost(200);
12819   expand %{
12820     fcmovDPR_regS(cmp,flags,dst,src);
12821   %}
12822 %}
12823 
12824 // Compare 2 longs and CMOVE doubles
12825 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12826   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12827   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12828   ins_cost(200);
12829   expand %{
12830     fcmovD_regS(cmp,flags,dst,src);
12831   %}
12832 %}
12833 
12834 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12835   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12836   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12837   ins_cost(200);
12838   expand %{
12839     fcmovFPR_regS(cmp,flags,dst,src);
12840   %}
12841 %}
12842 
12843 
12844 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12845   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12846   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12847   ins_cost(200);
12848   expand %{
12849     fcmovF_regS(cmp,flags,dst,src);
12850   %}
12851 %}
12852 
12853 
12854 // ============================================================================
12855 // Procedure Call/Return Instructions
12856 // Call Java Static Instruction
12857 // Note: If this code changes, the corresponding ret_addr_offset() and
12858 //       compute_padding() functions will have to be adjusted.
12859 instruct CallStaticJavaDirect(method meth) %{
12860   match(CallStaticJava);
12861   effect(USE meth);
12862 
12863   ins_cost(300);
12864   format %{ "CALL,static " %}
12865   opcode(0xE8); /* E8 cd */
12866   ins_encode( pre_call_resets,
12867               Java_Static_Call( meth ),
12868               call_epilog,
12869               post_call_FPU );
12870   ins_pipe( pipe_slow );
12871   ins_alignment(4);
12872 %}
12873 
12874 // Call Java Dynamic Instruction
12875 // Note: If this code changes, the corresponding ret_addr_offset() and
12876 //       compute_padding() functions will have to be adjusted.
12877 instruct CallDynamicJavaDirect(method meth) %{
12878   match(CallDynamicJava);
12879   effect(USE meth);
12880 
12881   ins_cost(300);
12882   format %{ "MOV    EAX,(oop)-1\n\t"
12883             "CALL,dynamic" %}
12884   opcode(0xE8); /* E8 cd */
12885   ins_encode( pre_call_resets,
12886               Java_Dynamic_Call( meth ),
12887               call_epilog,
12888               post_call_FPU );
12889   ins_pipe( pipe_slow );
12890   ins_alignment(4);
12891 %}
12892 
12893 // Call Runtime Instruction
12894 instruct CallRuntimeDirect(method meth) %{
12895   match(CallRuntime );
12896   effect(USE meth);
12897 
12898   ins_cost(300);
12899   format %{ "CALL,runtime " %}
12900   opcode(0xE8); /* E8 cd */
12901   // Use FFREEs to clear entries in float stack
12902   ins_encode( pre_call_resets,
12903               FFree_Float_Stack_All,
12904               Java_To_Runtime( meth ),
12905               post_call_FPU );
12906   ins_pipe( pipe_slow );
12907 %}
12908 
12909 // Call runtime without safepoint
12910 instruct CallLeafDirect(method meth) %{
12911   match(CallLeaf);
12912   effect(USE meth);
12913 
12914   ins_cost(300);
12915   format %{ "CALL_LEAF,runtime " %}
12916   opcode(0xE8); /* E8 cd */
12917   ins_encode( pre_call_resets,
12918               FFree_Float_Stack_All,
12919               Java_To_Runtime( meth ),
12920               Verify_FPU_For_Leaf, post_call_FPU );
12921   ins_pipe( pipe_slow );
12922 %}
12923 
12924 instruct CallLeafNoFPDirect(method meth) %{
12925   match(CallLeafNoFP);
12926   effect(USE meth);
12927 
12928   ins_cost(300);
12929   format %{ "CALL_LEAF_NOFP,runtime " %}
12930   opcode(0xE8); /* E8 cd */
12931   ins_encode(Java_To_Runtime(meth));
12932   ins_pipe( pipe_slow );
12933 %}
12934 
12935 
12936 // Return Instruction
12937 // Remove the return address & jump to it.
12938 instruct Ret() %{
12939   match(Return);
12940   format %{ "RET" %}
12941   opcode(0xC3);
12942   ins_encode(OpcP);
12943   ins_pipe( pipe_jmp );
12944 %}
12945 
12946 // Tail Call; Jump from runtime stub to Java code.
12947 // Also known as an 'interprocedural jump'.
12948 // Target of jump will eventually return to caller.
12949 // TailJump below removes the return address.
12950 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12951   match(TailCall jump_target method_oop );
12952   ins_cost(300);
12953   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12954   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12955   ins_encode( OpcP, RegOpc(jump_target) );
12956   ins_pipe( pipe_jmp );
12957 %}
12958 
12959 
12960 // Tail Jump; remove the return address; jump to target.
12961 // TailCall above leaves the return address around.
12962 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12963   match( TailJump jump_target ex_oop );
12964   ins_cost(300);
12965   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12966             "JMP    $jump_target " %}
12967   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12968   ins_encode( enc_pop_rdx,
12969               OpcP, RegOpc(jump_target) );
12970   ins_pipe( pipe_jmp );
12971 %}
12972 
12973 // Create exception oop: created by stack-crawling runtime code.
12974 // Created exception is now available to this handler, and is setup
12975 // just prior to jumping to this handler.  No code emitted.
12976 instruct CreateException( eAXRegP ex_oop )
12977 %{
12978   match(Set ex_oop (CreateEx));
12979 
12980   size(0);
12981   // use the following format syntax
12982   format %{ "# exception oop is in EAX; no code emitted" %}
12983   ins_encode();
12984   ins_pipe( empty );
12985 %}
12986 
12987 
12988 // Rethrow exception:
12989 // The exception oop will come in the first argument position.
12990 // Then JUMP (not call) to the rethrow stub code.
12991 instruct RethrowException()
12992 %{
12993   match(Rethrow);
12994 
12995   // use the following format syntax
12996   format %{ "JMP    rethrow_stub" %}
12997   ins_encode(enc_rethrow);
12998   ins_pipe( pipe_jmp );
12999 %}
13000 
13001 // inlined locking and unlocking
13002 
13003 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13004   predicate(Compile::current()->use_rtm());
13005   match(Set cr (FastLock object box));
13006   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13007   ins_cost(300);
13008   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13009   ins_encode %{
13010     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13011                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13012                  _counters, _rtm_counters, _stack_rtm_counters,
13013                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13014                  true, ra_->C->profile_rtm());
13015   %}
13016   ins_pipe(pipe_slow);
13017 %}
13018 
13019 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13020   predicate(!Compile::current()->use_rtm());
13021   match(Set cr (FastLock object box));
13022   effect(TEMP tmp, TEMP scr, USE_KILL box);
13023   ins_cost(300);
13024   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13025   ins_encode %{
13026     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13027                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13028   %}
13029   ins_pipe(pipe_slow);
13030 %}
13031 
13032 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13033   match(Set cr (FastUnlock object box));
13034   effect(TEMP tmp, USE_KILL box);
13035   ins_cost(300);
13036   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13037   ins_encode %{
13038     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13039   %}
13040   ins_pipe(pipe_slow);
13041 %}
13042 
13043 
13044 
13045 // ============================================================================
13046 // Safepoint Instruction
13047 instruct safePoint_poll(eFlagsReg cr) %{
13048   match(SafePoint);
13049   effect(KILL cr);
13050 
13051   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13052   // On SPARC that might be acceptable as we can generate the address with
13053   // just a sethi, saving an or.  By polling at offset 0 we can end up
13054   // putting additional pressure on the index-0 in the D$.  Because of
13055   // alignment (just like the situation at hand) the lower indices tend
13056   // to see more traffic.  It'd be better to change the polling address
13057   // to offset 0 of the last $line in the polling page.
13058 
13059   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13060   ins_cost(125);
13061   size(6) ;
13062   ins_encode( Safepoint_Poll() );
13063   ins_pipe( ialu_reg_mem );
13064 %}
13065 
13066 
13067 // ============================================================================
13068 // This name is KNOWN by the ADLC and cannot be changed.
13069 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13070 // for this guy.
13071 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13072   match(Set dst (ThreadLocal));
13073   effect(DEF dst, KILL cr);
13074 
13075   format %{ "MOV    $dst, Thread::current()" %}
13076   ins_encode %{
13077     Register dstReg = as_Register($dst$$reg);
13078     __ get_thread(dstReg);
13079   %}
13080   ins_pipe( ialu_reg_fat );
13081 %}
13082 
13083 
13084 
13085 //----------PEEPHOLE RULES-----------------------------------------------------
13086 // These must follow all instruction definitions as they use the names
13087 // defined in the instructions definitions.
13088 //
13089 // peepmatch ( root_instr_name [preceding_instruction]* );
13090 //
13091 // peepconstraint %{
13092 // (instruction_number.operand_name relational_op instruction_number.operand_name
13093 //  [, ...] );
13094 // // instruction numbers are zero-based using left to right order in peepmatch
13095 //
13096 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13097 // // provide an instruction_number.operand_name for each operand that appears
13098 // // in the replacement instruction's match rule
13099 //
13100 // ---------VM FLAGS---------------------------------------------------------
13101 //
13102 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13103 //
13104 // Each peephole rule is given an identifying number starting with zero and
13105 // increasing by one in the order seen by the parser.  An individual peephole
13106 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13107 // on the command-line.
13108 //
13109 // ---------CURRENT LIMITATIONS----------------------------------------------
13110 //
13111 // Only match adjacent instructions in same basic block
13112 // Only equality constraints
13113 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13114 // Only one replacement instruction
13115 //
13116 // ---------EXAMPLE----------------------------------------------------------
13117 //
13118 // // pertinent parts of existing instructions in architecture description
13119 // instruct movI(rRegI dst, rRegI src) %{
13120 //   match(Set dst (CopyI src));
13121 // %}
13122 //
13123 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13124 //   match(Set dst (AddI dst src));
13125 //   effect(KILL cr);
13126 // %}
13127 //
13128 // // Change (inc mov) to lea
13129 // peephole %{
13130 //   // increment preceeded by register-register move
13131 //   peepmatch ( incI_eReg movI );
13132 //   // require that the destination register of the increment
13133 //   // match the destination register of the move
13134 //   peepconstraint ( 0.dst == 1.dst );
13135 //   // construct a replacement instruction that sets
13136 //   // the destination to ( move's source register + one )
13137 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13138 // %}
13139 //
13140 // Implementation no longer uses movX instructions since
13141 // machine-independent system no longer uses CopyX nodes.
13142 //
13143 // peephole %{
13144 //   peepmatch ( incI_eReg movI );
13145 //   peepconstraint ( 0.dst == 1.dst );
13146 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13147 // %}
13148 //
13149 // peephole %{
13150 //   peepmatch ( decI_eReg movI );
13151 //   peepconstraint ( 0.dst == 1.dst );
13152 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13153 // %}
13154 //
13155 // peephole %{
13156 //   peepmatch ( addI_eReg_imm movI );
13157 //   peepconstraint ( 0.dst == 1.dst );
13158 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13159 // %}
13160 //
13161 // peephole %{
13162 //   peepmatch ( addP_eReg_imm movP );
13163 //   peepconstraint ( 0.dst == 1.dst );
13164 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13165 // %}
13166 
13167 // // Change load of spilled value to only a spill
13168 // instruct storeI(memory mem, rRegI src) %{
13169 //   match(Set mem (StoreI mem src));
13170 // %}
13171 //
13172 // instruct loadI(rRegI dst, memory mem) %{
13173 //   match(Set dst (LoadI mem));
13174 // %}
13175 //
13176 peephole %{
13177   peepmatch ( loadI storeI );
13178   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13179   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13180 %}
13181 
13182 //----------SMARTSPILL RULES---------------------------------------------------
13183 // These must follow all instruction definitions as they use the names
13184 // defined in the instructions definitions.